Formatting and documentation updates

DynamicGenetics · Apr 12, 2021 · 134986f · 134986f
1 parent e48b2d9
commit 134986f
Show file tree

Hide file tree

Showing 4 changed files with 94 additions and 59 deletions.
diff --git a/docs/source/conf.py b/docs/source/conf.py
@@ -49,4 +49,3 @@
 # relative to this directory. They are copied after the builtin static files,
 # so a file named "default.css" will overwrite the builtin "default.css".
 html_static_path = ["_static"]
-
diff --git a/docs/source/index.rst b/docs/source/index.rst
@@ -14,29 +14,35 @@ Before you use the rehyrdator, please make sure to read the Disclaimers to get a
 User Guide
 =====================
 The *Spotify Rehydrator* primarily operates through the ``Rehydrator`` class. The required inputs for this class are an input folder,
-an output folder and a `ClientCredentials <https://spotipy.readthedocs.io/en/2.16.1/#client-credentials-flow>`_ 
-object, which is used by `Spotipy` for authenticating the API calls. You can then call the ``run()`` method.
+an output folder and a Client ID and Client Secret from the Spotify Developer Portal. These are used for authenticating the API calls. You can then call the ``run()`` method.
 
 .. note::  To request developer credentials go to `Spotify's developer portal <https://developer.spotify.com/dashboard/>`_.
             You will need to 'create an app' which have credentials associated with it.
             Your app dashboard will give you access to your ``Client ID`` and a ``Client Secret``. 
 
 Assuming you have set your Client ID and Client Secret as environment variables then this is an example of how you could run the Rehydrator::
     
+    import os
     from spotifyrehydrator import Rehydrator
-    from spotipy.oauth2 import SpotifyClientCredentials
-
-    auth = SpotifyClientCredentials(
-        client_id=CLIENT_ID, 
-        client_secret=CLIENT_SECRET
-        )
 
     Rehydrator(
         input_path=os.path.join(pathlib.Path(__file__).parent.absolute(), "input"),
         output_path=os.path.join(pathlib.Path(__file__).parent.absolute(), "output"),
-        sp_creds=auth,
+        client_id=os.getenv("SPOTIFY_CLIENT_ID"),
+        client_secret=os.getenv("SPOTIFY_CLIENT_SECRET"),
     ).run(return_all=True)
 
+
+The ``.run()`` argument will by default return the following information as columns: spotify track ID of the returned track, the name of the artist of the returned track,
+the name of the returned track. This will be joined with the searched artist and track, the person ID where relevant, and the time metadata in the original ``.json`` file.
+There are then three optional arguments:  
+* ``artist_info = True`` will return the popularity of the artist returned and a list of genres attributed to that artist, provided by `the Artists API endpoint <https://developer.spotify.com/documentation/web-api/reference/#category-artists>`_
+* ``audio_features = True`` will return a column for each of the audio features provided by the `Tracks API. <https://developer.spotify.com/documentation/web-api/reference/#category-tracks>`_
+* ``return_all = True`` will return both the above. 
+
+Be aware that extra arguments involve more API calls and so may take longer. 
+
+
 Expected formats
 ------------------
 
@@ -81,9 +87,7 @@ Useful information
 --------------------
 * If the output directory does not exist then it will be created. 
 * Rehydration for one individual can take 15 minutes or more depending on how many songs there are.
-* If a file for the next individual's data to be rehydrated already exists in the output directory
-    then that person will be skipped. You will need to delete or remove their file from the output
-    folder for the rehydrator to process their data. 
+* If a file for the next individual's data to be rehydrated already exists in the output directory then that person will be skipped. You will need to delete or remove their file from the output folder for the rehydrator to process their data. 
 
 Disclaimers
 ================

diff --git a/src/spotifyrehydrator/utils.py b/src/spotifyrehydrator/utils.py
@@ -1,5 +1,5 @@
 """
-Main module for the `spotifyrehydrator` package, containing three dataclasses.
+The main module for the `spotifyrehydrator` package contains three dataclasses.
 
 `Track` operates on a single Track instance, starting from just a `name` and an `artist`,
 as would be provided in self-requested data. It is possible to use `Track` to get information
@@ -44,15 +44,21 @@ class Rehydrator:
 
     Attributes
     ----------
-    input_path: path to the directory (folder) where the input json files are stored.
-    output_path: path to the directory (folder) where the output .tsv files are saved.
-    client_id: Spotify API client ID Credentials
-    client_secret: Spotify API client secret Credentials
-    _person_ids: A list of each of the unique 'people' files identified for, or None.
+    input_path: str
+        path to the directory (folder) where the input json files are stored.
+    output_path: str
+        path to the directory (folder) where the output .tsv files are saved.
+    client_id: str
+        Spotify API client ID Credentials
+    client_secret: str
+        Spotify API client secret Credentials
+    _person_ids: list or None
+        A list of each of the unique 'people' files identified for, or None.
 
     Example
     -------
-        ``Rehydrator(input_path, output_path, sp).run()``
+
+        >>> Rehydrator(input_path, output_path, sp).run()
     """
 
     input_path: str
@@ -144,15 +150,19 @@ def rehydrate(
 
         """
         For a single person's set of data, use the Tracks class to get all of
-        the track IDs and features, then join these on the full listening 
-        history data. Save out the complete data, and return it too. 
+        the track IDs and features, then join these on the full listening
+        history data. Save out the complete data, and return it too.
 
         Parameters
         -----------
         person_id: str = None
-        return_all: bool = False
-        audio_features: bool = False
-        artist_info: bool = False
+            Unique ID for the person this set of data belongs to.
+        return_all: bool, default = False
+            Return both audio_features and artist_info
+        audio_features: bool, default = False
+            Return each `track's audio features. <https://developer.spotify.com/documentation/web-api/reference/#object-audiofeaturesobject>`_
+        artist_info: bool, default = False
+            Return the `popularity and genre list for each track's artist <https://developer.spotify.com/documentation/web-api/reference/#object-artistobject>`_
         """
 
         if person_id is not None:
@@ -194,14 +204,12 @@ def run(
 
         Parameters
         -----------
-        return_all: bool = False
+        return_all: bool, default = False
             Return both audio_features and artist_info
-        audio_features: bool = False
-            Return each track's audio features. 
-            `Spotify documentation available here. <https://developer.spotify.com/documentation/web-api/reference/#object-audiofeaturesobject>`_
-        artist_info: bool = False
-            Return the popularity and genre list for each track's artist. 
-            `Spotify documentation available here. <https://developer.spotify.com/documentation/web-api/reference/#object-artistobject>`_
+        audio_features: bool, default = False
+            Return each `track's audio features. <https://developer.spotify.com/documentation/web-api/reference/#object-audiofeaturesobject>`_
+        artist_info: bool, default = False
+            Return the `popularity and genre list for each track's artist <https://developer.spotify.com/documentation/web-api/reference/#object-artistobject>`_
         """
 
         try:
@@ -220,11 +228,15 @@ def run(
             logging.warn(
                 "---> No unique identifiers found. Rehydrating all files together."
             )
-            self.rehydrate(return_all=return_all, audio_features=audio_features, artist_info=artist_info)
+            self.rehydrate(
+                return_all=return_all,
+                audio_features=audio_features,
+                artist_info=artist_info,
+            )
 
     def _save(self, data: pd.DataFrame, person_id: str = None):
 
-        """Function to save the rehydrated data out to .tsv. person_id is optional for file naming."""
+        """Function to save the rehydrated data out to ``.tsv``. ``person_id`` is optional for file naming."""
 
         # Create an output folder if it doesn't already exist
         if not os.path.exists(self.output_path):
@@ -247,9 +259,7 @@ def _save(self, data: pd.DataFrame, person_id: str = None):
             )
 
         logger.info(
-            "---> Rehydrated data has been saved to the output folder".format(
-                person_id
-            )
+            "---> Rehydrated data has been saved to the output folder".format(person_id)
         )
 
 
@@ -269,10 +279,18 @@ class Tracks:
 
     Example
     -------
-        ``Tracks(data, client_id, client_secret).get(return_all=True)``
 
-    This will return a pd.Dataframe with feature columns filled for each unique track
+        >>> Tracks(data, client_id, client_secret).get(return_all=True)
+
+    This will return a ``pd.Dataframe`` with feature columns filled for each unique track
     in the original data.
+
+
+    Raises
+    -------
+    KeyError
+        If the input data provided does not contain a ``artistName`` and ``trackName``
+
     """
 
     data: pd.DataFrame
@@ -435,17 +453,16 @@ def get(
     ) -> pd.DataFrame:
 
         """
-        Get the requested data for each track. Returns a dataframe of unique tracks. 
+        Get the requested data for each track. Returns a dataframe of unique tracks.
 
         Parameters
         ------------
         return_all: bool, default = False
-            Run with all optional data arguments as True
-        artist_info: bool, default = False
-            Include keys of 'artist_genres' and 'artist_pop' with list of artist's genres and popularity rating
-            given by the Spotify API artist end point.
+            Return both audio_features and artist_info
         audio_features: bool, default = False
-            Include key of 'audio_features' with value as a dict of the results from the audio_features endpoint.
+            Return each `track's audio features. <https://developer.spotify.com/documentation/web-api/reference/#object-audiofeaturesobject>`_
+        artist_info: bool, default = False
+            Return the `popularity and genre list for each track's artist <https://developer.spotify.com/documentation/web-api/reference/#object-artistobject>`_
         """
 
         # Get the basic track information
@@ -487,23 +504,30 @@ class Track:
 
     """
     A class that searches for and returns a spotify ID and other optional information for a track,
-    given a trackName and and artistName.
+    given a ``trackName`` and and ``artistName``.
 
     Attributes
     ----------
-    name: The name of the track (str).
-    artist: The name of the artist (str).
-    client_id: Spotify API client ID Credentials
-    client_secret: Spotify API client secret Credentials
+    name: str
+        The name of the track.
+    artist: str
+        The name of the artist.
+    client_id: str
+        Spotify API client ID Credentials
+    client_secret: str
+        Spotify API client secret Credentials
+
     Example
     -------
+
     .. code-block::
         heroes = Track(name="Heroes", artist="David Bowie", sp_creds=creds)
         # Returns dict with just the SpotifyID
         heroes.get()
         # Returns dict with all requested information
         heroes.get(return_all=True)
 
+
     """
 
     name: str
@@ -536,7 +560,8 @@ def search_results(self, remove_char=None) -> dict:
             track = self.name
 
         results = self.sp_auth.search(
-            q="artist:" + artist + " track:" + track, type="track",
+            q="artist:" + artist + " track:" + track,
+            type="track",
         )
         # Return the first result from this search
         return results
@@ -623,12 +648,11 @@ def get(
         Parameters
         ------------
         return_all: bool, default = False
-            Run with all optional data arguments as True
-        artist_info: bool, default = False
-            Include keys of 'artist_genres' and 'artist_pop' with list of artist's genres and popularity rating
-            given by the Spotify API artist end point.
+            Return both audio_features and artist_info
         audio_features: bool, default = False
-            Include key of 'audio_features' with value as a dict of the results from the audio_features endpoint.
+            Return each `track's audio features. <https://developer.spotify.com/documentation/web-api/reference/#object-audiofeaturesobject>`_
+        artist_info: bool, default = False
+            Return the `popularity and genre list for each track's artist <https://developer.spotify.com/documentation/web-api/reference/#object-artistobject>`_
         """
 
         try:
@@ -651,4 +675,3 @@ def get(
 
         # Assuming we successfully got some results, extract requested info and return
         return self._extract_results(results, return_all, artist_info, audio_features)
-
diff --git a/tests/test_functions.py b/tests/test_functions.py
@@ -79,7 +79,12 @@ def setup_method(self):
 
     def test_incorrect_input_columns(self):
         """Try to give Tracks obj a df with incorrect columns."""
-        df = pd.DataFrame({"col1": [2, 1, 9, 8, 7, 4], "col2": [0, 1, 9, 4, 2, 3],})
+        df = pd.DataFrame(
+            {
+                "col1": [2, 1, 9, 8, 7, 4],
+                "col2": [0, 1, 9, 4, 2, 3],
+            }
+        )
         with pytest.raises(KeyError):
             tracks = Tracks(
                 df,
@@ -175,7 +180,10 @@ def test_read_data(self, person, input, expected):
 
     @pytest.mark.parametrize(
         "person, input, expected",
-        [("Person002", INPUT_PEOPLE, 9), (None, INPUT_NO_PEOPLE, 65),],
+        [
+            ("Person002", INPUT_PEOPLE, 9),
+            (None, INPUT_NO_PEOPLE, 65),
+        ],
     )
     def test_rehydrate(self, person, input, expected):
         data = Rehydrator(
@@ -203,6 +211,7 @@ def test_existing_data(self, caplog):
         # Assert we get the correct warning message in the logger.
         assert "Output file for Person002 already exists." in caplog.text
 
+
 class TestIntegrationPeople:
     """Class to check the whole rehydrator behaves as expected when there are multiple people."""