MuckRock · duckduckgrayduck · Dec 11, 2023 · Dec 2, 2023 · Dec 2, 2023 · Dec 2, 2023
diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
@@ -45,13 +45,12 @@ jobs:
 
       - name: Install dependencies for imports
         run: |
-           pip install python-dateutil requests urllib3 fastjsonschema ratelimit listcrunch pyyaml
+           pip install python-dateutil requests urllib3 fastjsonschema ratelimit listcrunch pyyaml pytest vcrpy
 
       - name: Install pylint and black
         run: |
           pip install pylint black
 
-      - name: Run pylint and black
+      - name: Run pylint and black on ./documentcloud and ./tests
         run: |
-          pylint ./documentcloud
-          black  ./documentcloud
+          pylint ./documentcloud ./tests; black ./documentcloud ./tests
diff --git a/docs/changelog.rst b/docs/changelog.rst
@@ -1,5 +1,9 @@
 Changelog
 ---------
+4.0.1
+~~~~~
+* Reformats some strings in tests to conform to pylint standards.
+
 
 4.0.0
 ~~~~~

diff --git a/docs/conf.py b/docs/conf.py
@@ -57,7 +57,7 @@
 # The short X.Y version.
 version = "4.0"
 # The full version, including alpha/beta/rc tags.
-release = "4.0.0"
+release = "4.0.1"
 
 # The language for content autogenerated by Sphinx. Refer to documentation
 # for a list of supported languages.

diff --git a/docs/documents.rst b/docs/documents.rst
@@ -155,11 +155,11 @@ Document
 
            >>> # Grab a document
            >>> obj = client.documents.get('71072')
-           >>> print obj.title
+           >>> print(obj.title)
            Draft OIR Report
            >>> # Change its title
            >>> obj.title = "Brand new title"
-           >>> print obj.title
+           >>> print(obj.title)
            Brand New Title
            >>> # Save those changes
            >>> obj.put()
@@ -282,7 +282,7 @@ Document
        >>> obj = client.documents.get('1088501-adventuretime-alta')
        >>> txt = obj.get_page_text(1)
        # Let's print just the first line
-       >>> print txt.decode().split("\n")[0]
+       >>> print(txt.split("\n")[0])
        STATE OF CALIFORNIA- HEALTH AND HUMAN SERVICES AGENCY
 
    .. method:: get_page_position_json(page)

diff --git a/docs/gettingstarted.rst b/docs/gettingstarted.rst
@@ -26,7 +26,7 @@ You can also specify a custom uri if you have installed your own version of Docu
 
     >>> client = DocumentCloud(USERNAME, PASSWORD, base_uri="https://your.documentcloud.domain/api/", auth_uri="https://your.account.server.domain/api/")
 
-If you need to debug, you can pass a logging level as a parameter to the client when you instantiate. You will need to import logging first. There are several `logging levels <https://docs.python.org/3/library/logging.html#logging-levels>`_ depending on your needs. For this example, we will use the DEBUG level. 
+If you need to debug, you can pass a logging level as a parameter to the client when you instantiate. You will need to import logging first. There are several `logging levels <https://docs.python.org/3/library/logging.html#logging-levels>`_ depending on your needs. For this example, we will use the DEBUG level. ::
 
     >>> import logging
     >>> client = DocumentCloud(USERNAME, PASSWORD, loglevel=logging.DEBUG)
@@ -47,13 +47,13 @@ Interacting with a document
 
 Once you have you hands on a document object, you can interact with the metadata stored at documentcloud.org. Here's a sample: ::
 
-    >>> print obj.title
+    >>> print(obj.title)
     Final OIR Report
-    >>> print obj.id
+    >>> print(obj.id)
     71072
-    >>> print obj.contributor_organization
+    >>> print(obj.contributor_organization)
     Los Angeles Times
-    >>> print obj.canonical_url
+    >>> print(obj.canonical_url)
     http://www.documentcloud.org/documents/71072-oir-final-report.html
 
 You can even download the PDF, page images and full text. ::
@@ -92,7 +92,7 @@ Uploading a document that is not a PDF
 
 You can upload a document whose file extension is one of the seventy supported filetypes by including the original_extension parameter 
  (See https://www.documentcloud.org/help/api#supported-file-types for supported filetypes)
- Example: Uploading a JPG file that is stored in your home directory. 
+ Example: Uploading a JPG file that is stored in your home directory. ::
 
     >>> obj = self.client.documents.upload("~/test.jpg", original_extension='jpg')
 
@@ -108,7 +108,7 @@ First upload the document as normal. ::
     >>> from documentcloud import DocumentCloud
     >>> client = DocumentCloud(DOCUMENTCLOUD_USERNAME, DOCUMENTCLOUD_PASSWORD)
     >>> obj = client.documents.upload("/home/ben/pdfs/myfile.pdf", access='public')
-    
+
 Then refresh your local document object from the server. If it is does not show up as public, then it is still processing, and you'll have to check again. ::
 
     >>> obj = client.documents.get(obj.id)
@@ -119,7 +119,7 @@ Then refresh your local document object from the server. If it is does not show
 Uploading a directory of documents as a project
 -----------------------------------------------
 
-Here's how to upload a directory full of documents and add them all to a new project. Be warned, this will upload any documents in directories inside the path you specify. ::
+Here's how to upload a directory full of PDFs and add them all to a new project. Be warned, this will upload any documents in directories inside the path you specify. ::
 
     >>> # Connect to documentcloud
     >>> from documentcloud import DocumentCloud
@@ -133,10 +133,19 @@ Here's how to upload a directory full of documents and add them all to a new pro
     >>> # Save the changes to the project
     >>> project.put()
 
+If you want to upload a directory of other file types, you can specify the extensions you want. 
+For example, the following will upload all .txt and .jpg files in the groucho_marx directory. ::
+    >>> obj_list = client.documents.upload_directory('/home/ben/pdfs/groucho_marx/', extensions = ['.txt', '.jpg'])
+
+If you pass extensions='None' it will upload all files that DocumentCloud supprots, regardless of extension type. 
+For example, the following will upload all files that are supported by DocumentCloud in the groucho_marx directory. ::
+    >>> obj_list = client.documents.upload_directory('/home/ben/pdfs/groucho_marx/', extensions=None)
+
+
 Uploading a PDF from a URL
 --------------------------
 
-How to read a PDF document from a URL on the World Wide Web and upload it to DocumentCloud without saving it to your local hard drive.
+You can upload a PDF from a remote URL in the following way. ::
 
     >>> from documentcloud import DocumentCloud
     >>> url = "http://myhost.org/interesting-doc.pdf"
@@ -146,11 +155,23 @@ How to read a PDF document from a URL on the World Wide Web and upload it to Doc
 
 
 Uploading a document with a different supported file type from URL
---------------------------
-Here is an example of how to read a document with another supported file type from a URL and upload it to DocumentCloud without saving it to your local hard drive. 
+------------------------------------------------------------------
+
+You can specify the original_extension on upload to to handle other extension types. ::
 
     >>> from documentcloud import DocumentCloud
     >>> url = "https://upload.wikimedia.org/wikipedia/commons/4/47/PNG_transparency_demonstration_1.png"
     >>> client = DocumentCloud(DOCUMENTCLOUD_USERNAME, DOCUMENTCLOUD_PASSWORD)
     >>> # Upload the specified URL to the given client
     >>> obj = client.documents.upload(url, original_extension='png')
+
+Upload a list of URLs as documents to DocumentCloud
+---------------------------------------------------
+
+If you are trying to upload a lot of URLs regularly, there is a bulk method to upload them 25 at a time - upload_urls(). ::
+
+    >>> urls = ["https://www.chicago.gov/content/dam/city/depts/dcd/tif/22reports/T_072_24thMichiganAR22.pdf", "https://www.chicago.gov/content/dam/city/depts/dcd/tif/22reports/T_063_CanalCongressAR22.pdf"]
+    >>> new = client.documents.upload_urls(urls)
+    >>> new
+    [<Document: 23932356 - T_072_24thMichiganAR22>, <Document: 23932357 - T_063_CanalCongressAR22>]
+
diff --git a/docs/projects.rst b/docs/projects.rst
@@ -26,7 +26,7 @@ ProjectClient
    .. method:: create(title, description="", private=True, document_ids=None)
 
       Create a new project on DocumentCloud. You must be authorized to do this.
-      Returns the object representing the new record you've created.
+      Returns the object representing the new record you've created. ::
 
            >>> from documentcloud import DocumentCloud
            >>> client = DocumentCloud(USERNAME, PASSWORD)
@@ -56,7 +56,7 @@ ProjectClient
       must be authorized to do this. Returns a tuple. An object representing the
       record comes first. A boolean that reports whether or not the objects was
       created fresh comes second. It is true when the record was created, false
-      when it was found on the site already.
+      when it was found on the site already.  ::
 
            >>> from documentcloud import DocumentCloud
            >>> client = DocumentCloud(USERNAME, PASSWORD)
@@ -119,15 +119,15 @@ Project
 
    .. method:: clear_documents()
 
-      Removes all documents from a project. 
+      Removes all documents from a project. ::
 
          >>> obj = client.projects.get('816')
          >>> obj.clear_documents()
 
    .. method:: add_documents()
 
       Efficiently adds a lot of documents to a project. 
-      Adds the documents 25 at a time using bulk API calls. 
+      Adds the documents 25 at a time using bulk API calls. ::
 
          >>> documents_to_add = [client.documents.get('23745990'), client.documents.get('23745988')]
          >>> obj = client.projects.get('816')
@@ -145,7 +145,7 @@ Project
    .. attribute:: document_ids
 
        A list that contains the unique identifier of the documents assigned to
-       this project. Cannot be edited. Edit the document_list instead.
+       this project. Cannot be edited. Edit the document_list instead. ::
 
            >>> obj = client.projects.get('816')
            >>> obj.document_ids
@@ -155,7 +155,7 @@ Project
 
        A list that documents assigned to this project. Can be expanded by
        appending new documents to the list or cleared by reassigning it as an
-       empty list and then issuing the put command.
+       empty list and then issuing the put command. ::
 
            >>> obj = client.projects.get('816')
            >>> obj.document_list

diff --git a/setup.py b/setup.py
@@ -7,7 +7,7 @@
 
 setup(
     name="python-documentcloud",
-    version="4.0.0",
+    version="4.0.1",
     description="A simple Python wrapper for the DocumentCloud API",
     author="Mitchell Kotler",
     author_email="[email protected]",

diff --git a/tests/conftest.py b/tests/conftest.py
@@ -142,7 +142,7 @@ def make_document(pdf=DEFAULT_DOCUMENT_URI, **kwargs):
 def project(client, document_factory):
     with vcr.use_cassette("tests/cassettes/fixtures/project.yaml"):
         document = document_factory()
-        title = "This is a project for testing {}".format(uuid4())
+        title = f"This is a project for testing {uuid4()}"
         project = client.projects.create(
             title, "This is a project for testing", document_ids=[document.id]
         )

diff --git a/tests/test_documents.py b/tests/test_documents.py
@@ -75,7 +75,7 @@ def test_dir(self, document, attr):
 
     def test_mentions(self, client, document):
         document = client.documents.search(
-            "document:{} text".format(document.id), mentions="true"
+            f"document:{document.id} text", mentions="true"
         )[0]
         assert document.mentions
         mention = document.mentions[0]
@@ -158,7 +158,9 @@ def test_section(self, document_factory):
 
 class TestDocumentClient:
     def test_search(self, client, document):
-        documents = client.documents.search("document:{} simple".format(document.id))
+        documents = client.documents.search(
+            f"document:{document.id} simple"
+        )
         assert documents
 
     def test_list(self, client):
@@ -176,10 +178,11 @@ def test_public_upload(self, public_client):
             public_client.documents.upload("tests/test.pdf")
 
     def test_upload_file(self, document_factory):
-        pdf = open("tests/test.pdf", "rb")
-        document = document_factory(pdf)
+        with open("tests/test.pdf", "rb") as pdf:
+            document = document_factory(pdf)
         assert document.status == "success"
 
+
     def test_upload_file_path(self, document_factory):
         document = document_factory("tests/test.pdf")
         assert document.status == "success"