From 6282720afe1b9d4235b2ddd673f21ff6f4e06b05 Mon Sep 17 00:00:00 2001 From: Robert Kaye Date: Fri, 16 Feb 2024 11:43:05 +0100 Subject: [PATCH 01/33] Start the process of updating troi in lb-server --- troi/patches/lb_radio_classes/artist.py | 1 + troi/patches/playlist_from_listenbrainz.py | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/troi/patches/lb_radio_classes/artist.py b/troi/patches/lb_radio_classes/artist.py index 6fa778ce..83505698 100755 --- a/troi/patches/lb_radio_classes/artist.py +++ b/troi/patches/lb_radio_classes/artist.py @@ -79,6 +79,7 @@ def fetch_artist_names(self, artist_mbids): Fetch artists names for a given list of artist_mbids """ + # TODO: Use mb artist cache data = [{"[artist_mbid]": mbid} for mbid in artist_mbids] r = requests.post("https://datasets.listenbrainz.org/artist-lookup/json", json=data) if r.status_code != 200: diff --git a/troi/patches/playlist_from_listenbrainz.py b/troi/patches/playlist_from_listenbrainz.py index 34eab385..3b2841f8 100755 --- a/troi/patches/playlist_from_listenbrainz.py +++ b/troi/patches/playlist_from_listenbrainz.py @@ -1,11 +1,12 @@ import json from troi import Playlist +from troi.patch import Patch from troi.playlist import PlaylistFromJSPFElement import troi.musicbrainz.recording_lookup -class TransferPlaylistPatch(troi.patch.Patch): +class TransferPlaylistPatch(Patch): @staticmethod def inputs(): From 55556b7b8e6154661418eddc339a26aa07321bce Mon Sep 17 00:00:00 2001 From: Robert Kaye Date: Mon, 19 Feb 2024 11:17:25 +0100 Subject: [PATCH 02/33] Interim check in --- troi/patches/lb_radio_classes/artist.py | 33 +++---------------------- troi/recording_search_service.py | 19 ++++++++++++++ 2 files changed, 22 insertions(+), 30 deletions(-) diff --git a/troi/patches/lb_radio_classes/artist.py b/troi/patches/lb_radio_classes/artist.py index 83505698..736a5fb5 100755 --- a/troi/patches/lb_radio_classes/artist.py +++ b/troi/patches/lb_radio_classes/artist.py @@ -33,9 +33,6 @@ class LBRadioArtistRecordingElement(troi.Element): def __init__(self, artist_mbid, mode="easy", include_similar_artists=True): troi.Element.__init__(self) - self.artist_mbid = str(artist_mbid) - self.artist_name = None - self.similar_artists = [] self.mode = mode self.include_similar_artists = include_similar_artists if include_similar_artists: @@ -49,31 +46,6 @@ def inputs(self): def outputs(self): return [Recording] - def get_similar_artists(self, artist_mbid): - """ Fetch similar artists, given an artist_mbid. Returns a sored plist of artists. """ - - r = requests.post("https://labs.api.listenbrainz.org/similar-artists/json", - json=[{ - 'artist_mbid': - artist_mbid, - 'algorithm': - "session_based_days_7500_session_300_contribution_5_threshold_10_limit_100_filter_True_skip_30" - }]) - if r.status_code != 200: - raise RuntimeError(f"Cannot fetch similar artists: {r.status_code} ({r.text})") - - try: - artists = r.json()[3]["data"] - except IndexError: - return [] - - # Knock down super hyped artists - for artist in artists: - if artist["artist_mbid"] in OVERHYPED_SIMILAR_ARTISTS: - artist["score"] /= 3 # Chop! - - return plist(sorted(artists, key=lambda a: a["score"], reverse=True)) - def fetch_artist_names(self, artist_mbids): """ Fetch artists names for a given list of artist_mbids @@ -92,8 +64,9 @@ def read(self, entities): # Fetch our mode ranges start, stop = self.local_storage["modes"][self.mode] - self.data_cache = self.local_storage["data_cache"] - artists = [{"mbid": self.artist_mbid}] + # Search artist and fetch MBID if not given.save to artist_mbid + + # TODO: Work out what to do about overhyped artists self.recording_search_by_artist = self.patch.get_service( "recording-search-by-artist") diff --git a/troi/recording_search_service.py b/troi/recording_search_service.py index 52db1154..b24cf0b0 100644 --- a/troi/recording_search_service.py +++ b/troi/recording_search_service.py @@ -67,6 +67,25 @@ def search(self, artists, begin_percent, end_percent, num_recordings): NOTE: This search is poor -- it should span all recordings by an artist not, just the top ones! """ +# TODO: Finish this + self.data_cache = self.local_storage["data_cache"] + params = { + "max_similar_artists": + "max_recordings_per_artist": + "begin_percent": + "end_percent": + } + url = f"https://test-api.listenbrainz.org/1/lb-radio/artist/{artist_mbid}" + + r = requests.post(url, params=params) + if r.status_code != 200: + raise RuntimeError(f"Cannot lb_radio artists: {r.status_code} ({r.text})") + + try: + artists = r.json() + except IndexError: + return [] + artists_recordings = {} for artist_mbid in artists: params={"artist_mbid": artist_mbid} From 6d6ce419912928f8cfc5733b211079b0b3e7aaef Mon Sep 17 00:00:00 2001 From: Robert Kaye Date: Tue, 20 Feb 2024 16:55:57 +0100 Subject: [PATCH 03/33] Start fixing the artist search to be compatible to the new endpoint --- troi/content_resolver/artist_search.py | 68 ++++++++++++++++++++----- troi/patches/lb_radio_classes/artist.py | 14 ----- troi/recording_search_service.py | 35 +++++-------- 3 files changed, 67 insertions(+), 50 deletions(-) diff --git a/troi/content_resolver/artist_search.py b/troi/content_resolver/artist_search.py index ca986738..24f1350c 100755 --- a/troi/content_resolver/artist_search.py +++ b/troi/content_resolver/artist_search.py @@ -12,6 +12,19 @@ from troi.recording_search_service import RecordingSearchByArtistService from troi.splitter import plist +OVERHYPED_SIMILAR_ARTISTS = [ + "b10bbbfc-cf9e-42e0-be17-e2c3e1d2600d", # The Beatles + "83d91898-7763-47d7-b03b-b92132375c47", # Pink Floyd + "a74b1b7f-71a5-4011-9441-d0b5e4122711", # Radiohead + "8bfac288-ccc5-448d-9573-c33ea2aa5c30", # Red Hot Chili Peppers + "9c9f1380-2516-4fc9-a3e6-f9f61941d090", # Muse + "cc197bad-dc9c-440d-a5b5-d52ba2e14234", # Coldplay + "65f4f0c5-ef9e-490c-aee3-909e7ae6b2ab", # Metallica + "5b11f4ce-a62d-471e-81fc-a69a8278c7da", # Nirvana + "f59c5520-5f46-4d2c-b2c4-822eabf53419", # Linkin Park + "cc0b7089-c08d-4c10-b6b0-873582c17fd6", # System of a Down + "ebfc1398-8d96-47e3-82c3-f782abcdb13d", # Beach boys +] class LocalRecordingSearchByArtistService(RecordingSearchByArtistService): ''' @@ -21,23 +34,50 @@ class LocalRecordingSearchByArtistService(RecordingSearchByArtistService): def __init__(self): RecordingSearchByArtistService.__init__(self) - def search(self, artist_mbids, begin_percent, end_percent, num_recordings): + def get_similar_artists(self, artist_mbid): + """ Fetch similar artists, given an artist_mbid. Returns a sored plist of artists. """ + + r = requests.post("https://labs.api.listenbrainz.org/similar-artists/json", + json=[{ + 'artist_mbid': + artist_mbid, + 'algorithm': + "session_based_days_7500_session_300_contribution_5_threshold_10_limit_100_filter_True_skip_30" + }]) + if r.status_code != 200: + raise RuntimeError(f"Cannot fetch similar artists: {r.status_code} ({r.text})") + + try: + artists = r.json()[3]["data"] + except IndexError: + return [] + + # Knock down super hyped artists + for artist in artists: + if artist["artist_mbid"] in OVERHYPED_SIMILAR_ARTISTS: + artist["score"] /= 3 # Chop! + + return plist(sorted(artists, key=lambda a: a["score"], reverse=True)) + + def search(self, artist_mbid, begin_percent, end_percent, max_recordings_per_artist, max_similar_artists): + """ Perform an artist search. Parameters: - tags - a list of artist_mbids for which to search recordings - begin_percent - if many recordings match the above parameters, return only - recordings that have a minimum popularity percent score - of begin_percent. - end_percent - if many recordings match the above parameters, return only - recordings that have a maximum popularity percent score - of end_percent. - num_recordings - ideally return these many recordings - - If only few recordings match, the begin_percent and end_percent are - ignored. + begin_percent: if many recordings match the above parameters, return only + recordings that have a minimum popularity percent score + of begin_percent. + end_percent: if many recordings match the above parameters, return only + recordings that have a maximum popularity percent score + of end_percent. + max_recordings_per_artist: The number of recordings to collect for each artist. + max_similar_artists: The maximum number of similar artists to select. + + If only few recordings match, the begin_percent and end_percent are ignored. """ + #TODO: Finish reviewing/updating this function + similar_artists = self.get_similar_artists(artist_mbid) query = """SELECT popularity , recording_mbid , artist_mbid @@ -50,8 +90,8 @@ def search(self, artist_mbids, begin_percent, end_percent, num_recordings): ORDER BY artist_mbid , popularity""" - placeholders = ",".join(("?", ) * len(artist_mbids)) - cursor = db.execute_sql(query % placeholders, params=tuple(artist_mbids)) + placeholders = ",".join(("?", ) * len(similar_artists)) + cursor = db.execute_sql(query % placeholders, params=tuple(similar_artists)) artists = defaultdict(list) for rec in cursor.fetchall(): diff --git a/troi/patches/lb_radio_classes/artist.py b/troi/patches/lb_radio_classes/artist.py index 736a5fb5..739f4d22 100755 --- a/troi/patches/lb_radio_classes/artist.py +++ b/troi/patches/lb_radio_classes/artist.py @@ -7,20 +7,6 @@ from troi.utils import interleave from troi.recording_search_service import RecordingSearchByArtistService -OVERHYPED_SIMILAR_ARTISTS = [ - "b10bbbfc-cf9e-42e0-be17-e2c3e1d2600d", # The Beatles - "83d91898-7763-47d7-b03b-b92132375c47", # Pink Floyd - "a74b1b7f-71a5-4011-9441-d0b5e4122711", # Radiohead - "8bfac288-ccc5-448d-9573-c33ea2aa5c30", # Red Hot Chili Peppers - "9c9f1380-2516-4fc9-a3e6-f9f61941d090", # Muse - "cc197bad-dc9c-440d-a5b5-d52ba2e14234", # Coldplay - "65f4f0c5-ef9e-490c-aee3-909e7ae6b2ab", # Metallica - "5b11f4ce-a62d-471e-81fc-a69a8278c7da", # Nirvana - "f59c5520-5f46-4d2c-b2c4-822eabf53419", # Linkin Park - "cc0b7089-c08d-4c10-b6b0-873582c17fd6", # System of a Down - "ebfc1398-8d96-47e3-82c3-f782abcdb13d", # Beach boys -] - class LBRadioArtistRecordingElement(troi.Element): """ diff --git a/troi/recording_search_service.py b/troi/recording_search_service.py index b24cf0b0..b6e4efb1 100644 --- a/troi/recording_search_service.py +++ b/troi/recording_search_service.py @@ -60,14 +60,16 @@ class RecordingSearchByArtistService(Service): def __init__(self): super().__init__(self.SLUG) - def search(self, artists, begin_percent, end_percent, num_recordings): + def search(self, artist_mbid, begin_percent, end_percent, max_recordings_per_artist, max_similar_artists): """ - Fetch the artist data from the LB API and return it as a dict. + Given a seed artist mbid, find and select similar artists (via LB similar artists data). - NOTE: This search is poor -- it should span all recordings by an artist not, just the top ones! + begin_percent: The lower bound on recording popularity + end_percent: The upper bound on recording popularity + max_recordings_per_artist: The number of recordings to collect for each artist. + max_similar_artists: The maximum number of similar artists to select. """ -# TODO: Finish this self.data_cache = self.local_storage["data_cache"] params = { "max_similar_artists": @@ -86,22 +88,11 @@ def search(self, artists, begin_percent, end_percent, num_recordings): except IndexError: return [] - artists_recordings = {} for artist_mbid in artists: - params={"artist_mbid": artist_mbid} - r = requests.get("https://api.listenbrainz.org/1/popularity/top-recordings-for-artist", params={"artist_mbid": artist_mbid}) - if r.status_code != 200: - raise RuntimeError(f"Cannot fetch top artist recordings: {r.status_code} ({r.text})") - - recordings = plist() - for recording in r.json(): - artist = Artist(mbids=recording["artist_mbids"], name=recording["artist_name"]) - recordings.append( - Recording(mbid=recording["recording_mbid"], - name=recording["recording_name"], - duration=recording["length"], - artist=artist)) - - artists_recordings[artist_mbid] = recordings.random_item(begin_percent, end_percent, num_recordings) - - return artists_recordings + recordings = artists[artist_mbid] + updated = [] + for rec in recordings: + updated.append(Recording(mbid=rec["recording_mbid"], musicbrainz={"total_listen_count": rec["total_listen_count"]})) + artists[artist_mbid] = updated + + return artists From 010c4c1c81d9adad16705b22450b85267503d9c0 Mon Sep 17 00:00:00 2001 From: Robert Kaye Date: Wed, 21 Feb 2024 15:14:30 +0100 Subject: [PATCH 04/33] Interim checkin --- troi/content_resolver/artist_search.py | 1 - troi/patches/lb_radio_classes/artist.py | 29 ++++--------------------- troi/recording_search_service.py | 9 ++++---- 3 files changed, 8 insertions(+), 31 deletions(-) diff --git a/troi/content_resolver/artist_search.py b/troi/content_resolver/artist_search.py index 24f1350c..aaa368a8 100755 --- a/troi/content_resolver/artist_search.py +++ b/troi/content_resolver/artist_search.py @@ -76,7 +76,6 @@ def search(self, artist_mbid, begin_percent, end_percent, max_recordings_per_art If only few recordings match, the begin_percent and end_percent are ignored. """ - #TODO: Finish reviewing/updating this function similar_artists = self.get_similar_artists(artist_mbid) query = """SELECT popularity , recording_mbid diff --git a/troi/patches/lb_radio_classes/artist.py b/troi/patches/lb_radio_classes/artist.py index 739f4d22..44b735bf 100755 --- a/troi/patches/lb_radio_classes/artist.py +++ b/troi/patches/lb_radio_classes/artist.py @@ -19,6 +19,7 @@ class LBRadioArtistRecordingElement(troi.Element): def __init__(self, artist_mbid, mode="easy", include_similar_artists=True): troi.Element.__init__(self) + self.artist_mbid = artist_mbid self.mode = mode self.include_similar_artists = include_similar_artists if include_similar_artists: @@ -37,7 +38,7 @@ def fetch_artist_names(self, artist_mbids): Fetch artists names for a given list of artist_mbids """ - # TODO: Use mb artist cache + # TODO: Use the artist cache data data = [{"[artist_mbid]": mbid} for mbid in artist_mbids] r = requests.post("https://datasets.listenbrainz.org/artist-lookup/json", json=data) if r.status_code != 200: @@ -50,28 +51,14 @@ def read(self, entities): # Fetch our mode ranges start, stop = self.local_storage["modes"][self.mode] - # Search artist and fetch MBID if not given.save to artist_mbid - # TODO: Work out what to do about overhyped artists - self.recording_search_by_artist = self.patch.get_service( "recording-search-by-artist") - # First, fetch similar artists if the user didn't override that. - if self.include_similar_artists: - # Fetch similar artists for original artist - similar_artists = self.get_similar_artists(self.artist_mbid) - # if len(similar_artists) == 0: - # raise RuntimeError(f"Not enough similar artist data available for artist {self.artist_name}. Please choose a different artist.") - - # select artists - for artist in similar_artists[start:stop]: - artists.append({"mbid": artist["artist_mbid"]}) - if len(artists) >= self.MAX_NUM_SIMILAR_ARTISTS: - break + artist_recordings = self.recording_search_by_artist.search(self.artist_mbid, start, stop, self.max_top_recordings_per_artist, self.MAX_NUM_SIMILAR_ARTISTS) # For all fetched artists, fetch their names - artist_names = self.fetch_artist_names([i["mbid"] for i in artists]) + artist_names = self.fetch_artist_names(list(artist_recordings)) for artist in artists: if artist["mbid"] not in artist_names: raise RuntimeError("Artist %s could not be found. Is this MBID valid?" % artist["artist_mbid"]) @@ -99,19 +86,11 @@ def read(self, entities): self.local_storage["user_feedback"].append(msg) self.data_cache["element-descriptions"].append("artist %s" % artists[0]["name"]) - artist_mbids = [ artist["mbid"] for artist in artists ] - artist_mbids = list(set(artist_mbids)) - artist_recordings = self.recording_search_by_artist.search(artist_mbids, start, stop, self.max_top_recordings_per_artist) # Now collect recordings from the artist and similar artists and return an interleaved # stream of recordings. for i, artist in enumerate(artists): - # TODO: This disables top recordings caching, which needs to be re-thought given the new approach - #if artist["mbid"] + "_top_recordings" in self.data_cache: - # artist["recordings"] = self.data_cache[artist["mbid"] + "_top_recordings"] - # continue - recs_plist = plist(artist_recordings[artist["mbid"]]) if len(recs_plist) < 20: self.local_storage["user_feedback"].append( diff --git a/troi/recording_search_service.py b/troi/recording_search_service.py index b6e4efb1..bc537b22 100644 --- a/troi/recording_search_service.py +++ b/troi/recording_search_service.py @@ -70,12 +70,11 @@ def search(self, artist_mbid, begin_percent, end_percent, max_recordings_per_art max_similar_artists: The maximum number of similar artists to select. """ - self.data_cache = self.local_storage["data_cache"] params = { - "max_similar_artists": - "max_recordings_per_artist": - "begin_percent": - "end_percent": + "max_similar_artists": max_similar_artists, + "max_recordings_per_artist": max_recordings_per_artist, + "begin_percent": begin_percent, + "end_percent": end_percent } url = f"https://test-api.listenbrainz.org/1/lb-radio/artist/{artist_mbid}" From 021128d798df55d17cdbc78caa4f109aac2affb3 Mon Sep 17 00:00:00 2001 From: Robert Kaye Date: Wed, 21 Feb 2024 16:51:42 +0100 Subject: [PATCH 05/33] New artist element is finally workin, but the server side needs some improvements before we get good results. --- troi/patches/lb_radio_classes/artist.py | 38 +++++++++++-------------- troi/recording_search_service.py | 4 +-- 2 files changed, 19 insertions(+), 23 deletions(-) diff --git a/troi/patches/lb_radio_classes/artist.py b/troi/patches/lb_radio_classes/artist.py index 44b735bf..d0625291 100755 --- a/troi/patches/lb_radio_classes/artist.py +++ b/troi/patches/lb_radio_classes/artist.py @@ -19,7 +19,7 @@ class LBRadioArtistRecordingElement(troi.Element): def __init__(self, artist_mbid, mode="easy", include_similar_artists=True): troi.Element.__init__(self) - self.artist_mbid = artist_mbid + self.artist_mbid = str(artist_mbid) self.mode = mode self.include_similar_artists = include_similar_artists if include_similar_artists: @@ -48,6 +48,8 @@ def fetch_artist_names(self, artist_mbids): def read(self, entities): + self.data_cache = self.local_storage["data_cache"] + # Fetch our mode ranges start, stop = self.local_storage["modes"][self.mode] @@ -59,49 +61,43 @@ def read(self, entities): # For all fetched artists, fetch their names artist_names = self.fetch_artist_names(list(artist_recordings)) - for artist in artists: - if artist["mbid"] not in artist_names: + for artist_mbid in artist_recordings: + if artist_mbid not in artist_names: raise RuntimeError("Artist %s could not be found. Is this MBID valid?" % artist["artist_mbid"]) - artist["name"] = artist_names[artist["mbid"]] - # Store data in cache, so the post processor can create decent descriptions, title - self.data_cache[artist["mbid"]] = artist["name"] + self.data_cache[artist_mbid] = artist_names[artist_mbid] # start crafting user feedback messages msgs = [] - if self.include_similar_artists and len(artists) == 1: + if self.include_similar_artists and len(artist_recordings) == 1: msgs.append(f"Seed artist {artist_names[self.artist_mbid]} no similar artists.") else: - if self.include_similar_artists and len(artists) < 4: + if self.include_similar_artists and len(artist_recordings) < 4: msgs.append(f"Seed artist {artist_names[self.artist_mbid]} few similar artists.") - msg = "artist: using seed artist %s" % artists[0]["name"] + msg = "artist: using seed artist %s" % artist_names[self.artist_mbid] if self.include_similar_artists: - msg += " and similar artists: " + ", ".join([a["name"] for a in artists[1:]]) + mbids = list(artist_recordings) + del mbids[mbids.index(self.artist_mbid)] + msg += " and similar artists: " + ", ".join([artist_names[mbid] for mbid in mbids]) else: msg += " only" msgs.append(msg) for msg in msgs: self.local_storage["user_feedback"].append(msg) - self.data_cache["element-descriptions"].append("artist %s" % artists[0]["name"]) + self.data_cache["element-descriptions"].append("artist %s" % artist_names[self.artist_mbid]) # Now collect recordings from the artist and similar artists and return an interleaved # stream of recordings. - for i, artist in enumerate(artists): + for i, artist_mbid in enumerate(artist_recordings): - recs_plist = plist(artist_recordings[artist["mbid"]]) + recs_plist = plist(artist_recordings[artist_mbid]) if len(recs_plist) < 20: self.local_storage["user_feedback"].append( - f"Artist {artist['name']} only has {'no' if len(recs_plist) == 0 else 'few'} top recordings.") + f"Artist {artist_names[artist_mbid]} only has {'no' if len(recs_plist) == 0 else 'few'} top recordings.") recordings = recs_plist.random_item(start, stop, self.max_top_recordings_per_artist) - # Now tuck away the data for caching and interleaving - # The whole artist caching concept hasn't worked very well, and with future changes, it will likely go away. - # For now, ignore. - #self.data_cache[artist["mbid"] + "_top_recordings"] = recordings - artist["recordings"] = recordings - - return interleave([a["recordings"] for a in artists]) + return interleave([artist_recordings[mbid] for mbid in artist_recordings]) diff --git a/troi/recording_search_service.py b/troi/recording_search_service.py index bc537b22..ce7717b2 100644 --- a/troi/recording_search_service.py +++ b/troi/recording_search_service.py @@ -76,9 +76,9 @@ def search(self, artist_mbid, begin_percent, end_percent, max_recordings_per_art "begin_percent": begin_percent, "end_percent": end_percent } - url = f"https://test-api.listenbrainz.org/1/lb-radio/artist/{artist_mbid}" + url = f"https://beta-api.listenbrainz.org/1/lb-radio/artist/{artist_mbid}" - r = requests.post(url, params=params) + r = requests.get(url, params=params) if r.status_code != 200: raise RuntimeError(f"Cannot lb_radio artists: {r.status_code} ({r.text})") From de7b842784093b159698a7f229ad139726be3df9 Mon Sep 17 00:00:00 2001 From: Robert Kaye Date: Thu, 14 Mar 2024 14:04:49 +0100 Subject: [PATCH 06/33] Interim check in --- troi/content_resolver/artist_search.py | 3 ++- troi/patches/lb_radio_classes/artist.py | 11 ++++------- troi/recording_search_service.py | 7 +++++-- 3 files changed, 11 insertions(+), 10 deletions(-) diff --git a/troi/content_resolver/artist_search.py b/troi/content_resolver/artist_search.py index aaa368a8..a7b521fb 100755 --- a/troi/content_resolver/artist_search.py +++ b/troi/content_resolver/artist_search.py @@ -59,11 +59,12 @@ def get_similar_artists(self, artist_mbid): return plist(sorted(artists, key=lambda a: a["score"], reverse=True)) - def search(self, artist_mbid, begin_percent, end_percent, max_recordings_per_artist, max_similar_artists): + def search(self, mode, artist_mbid, begin_percent, end_percent, max_recordings_per_artist, max_similar_artists): """ Perform an artist search. Parameters: + mode: the mode used for this artist search begin_percent: if many recordings match the above parameters, return only recordings that have a minimum popularity percent score of begin_percent. diff --git a/troi/patches/lb_radio_classes/artist.py b/troi/patches/lb_radio_classes/artist.py index d0625291..badfa866 100755 --- a/troi/patches/lb_radio_classes/artist.py +++ b/troi/patches/lb_radio_classes/artist.py @@ -52,12 +52,10 @@ def read(self, entities): # Fetch our mode ranges start, stop = self.local_storage["modes"][self.mode] - - # TODO: Work out what to do about overhyped artists - self.recording_search_by_artist = self.patch.get_service( - "recording-search-by-artist") - - artist_recordings = self.recording_search_by_artist.search(self.artist_mbid, start, stop, self.max_top_recordings_per_artist, self.MAX_NUM_SIMILAR_ARTISTS) + self.recording_search_by_artist = self.patch.get_service("recording-search-by-artist") + artist_recordings = self.recording_search_by_artist.search(self.mode, self.artist_mbid, start, stop, + self.max_top_recordings_per_artist, + self.MAX_NUM_SIMILAR_ARTISTS) # For all fetched artists, fetch their names artist_names = self.fetch_artist_names(list(artist_recordings)) @@ -88,7 +86,6 @@ def read(self, entities): self.local_storage["user_feedback"].append(msg) self.data_cache["element-descriptions"].append("artist %s" % artist_names[self.artist_mbid]) - # Now collect recordings from the artist and similar artists and return an interleaved # stream of recordings. for i, artist_mbid in enumerate(artist_recordings): diff --git a/troi/recording_search_service.py b/troi/recording_search_service.py index ce7717b2..903710e5 100644 --- a/troi/recording_search_service.py +++ b/troi/recording_search_service.py @@ -60,7 +60,7 @@ class RecordingSearchByArtistService(Service): def __init__(self): super().__init__(self.SLUG) - def search(self, artist_mbid, begin_percent, end_percent, max_recordings_per_artist, max_similar_artists): + def search(self, mode, artist_mbid, begin_percent, end_percent, max_recordings_per_artist, max_similar_artists): """ Given a seed artist mbid, find and select similar artists (via LB similar artists data). @@ -71,12 +71,15 @@ def search(self, artist_mbid, begin_percent, end_percent, max_recordings_per_art """ params = { + "mode": mode, "max_similar_artists": max_similar_artists, "max_recordings_per_artist": max_recordings_per_artist, "begin_percent": begin_percent, "end_percent": end_percent } - url = f"https://beta-api.listenbrainz.org/1/lb-radio/artist/{artist_mbid}" + # TODO: Update for production +# url = f"https://beta-api.listenbrainz.org/1/lb-radio/artist/{artist_mbid}" + url = f"http://localhost:8100/1/lb-radio/artist/{artist_mbid}" r = requests.get(url, params=params) if r.status_code != 200: From 141ba33da828fb00279bc00dbad3466b7ce9835c Mon Sep 17 00:00:00 2001 From: Robert Kaye Date: Fri, 15 Mar 2024 16:45:00 +0100 Subject: [PATCH 07/33] Change begin/end percent to pop_begin/end --- troi/content_resolver/artist_search.py | 14 +++++++------- troi/content_resolver/tag_search.py | 14 +++++++------- troi/content_resolver/utils.py | 12 ++++++------ troi/recording_search_service.py | 20 +++++++++----------- 4 files changed, 29 insertions(+), 31 deletions(-) diff --git a/troi/content_resolver/artist_search.py b/troi/content_resolver/artist_search.py index a7b521fb..3f9a4add 100755 --- a/troi/content_resolver/artist_search.py +++ b/troi/content_resolver/artist_search.py @@ -59,22 +59,22 @@ def get_similar_artists(self, artist_mbid): return plist(sorted(artists, key=lambda a: a["score"], reverse=True)) - def search(self, mode, artist_mbid, begin_percent, end_percent, max_recordings_per_artist, max_similar_artists): + def search(self, mode, artist_mbid, pop_begin, pop_end, max_recordings_per_artist, max_similar_artists): """ Perform an artist search. Parameters: mode: the mode used for this artist search - begin_percent: if many recordings match the above parameters, return only + pop_begin: if many recordings match the above parameters, return only recordings that have a minimum popularity percent score - of begin_percent. - end_percent: if many recordings match the above parameters, return only + of pop_begin. + pop_end: if many recordings match the above parameters, return only recordings that have a maximum popularity percent score - of end_percent. + of pop_end. max_recordings_per_artist: The number of recordings to collect for each artist. max_similar_artists: The maximum number of similar artists to select. - If only few recordings match, the begin_percent and end_percent are ignored. + If only few recordings match, the pop_begin and pop_end are ignored. """ similar_artists = self.get_similar_artists(artist_mbid) @@ -104,6 +104,6 @@ def search(self, mode, artist_mbid, begin_percent, end_percent, max_recordings_p }) for artist in artists: - artists[artist] = select_recordings_on_popularity(artists[artist], begin_percent, end_percent, num_recordings) + artists[artist] = select_recordings_on_popularity(artists[artist], pop_begin, pop_end, num_recordings) return artists diff --git a/troi/content_resolver/tag_search.py b/troi/content_resolver/tag_search.py index 0dc9d0bd..4889993f 100755 --- a/troi/content_resolver/tag_search.py +++ b/troi/content_resolver/tag_search.py @@ -21,20 +21,20 @@ class LocalRecordingSearchByTagService(RecordingSearchByTagService): def __init__(self): RecordingSearchByTagService.__init__(self) - def search(self, tags, operator, begin_percent, end_percent, num_recordings): + def search(self, tags, operator, pop_begin, pop_end, num_recordings): """ Perform a tag search. Parameters: tags - a list of string tags to search for operator - a string specifying "or" or "and" - begin_percent - if many recordings match the above parameters, return only + pop_begin - if many recordings match the above parameters, return only recordings that have a minimum popularity percent score - of begin_percent. - end_percent - if many recordings match the above parameters, return only + of pop_begin. + pop_end - if many recordings match the above parameters, return only recordings that have a maximum popularity percent score - of end_percent. + of pop_end. - If only few recordings match, the begin_percent and end_percent are + If only few recordings match, the pop_begin and pop_end are ignored. """ @@ -51,7 +51,7 @@ def search(self, tags, operator, begin_percent, end_percent, num_recordings): for rec in cursor.fetchall(): recordings.append({"recording_mbid": rec[0], "popularity": rec[1], "file_id": rec[2], "file_id_type": rec[3]}) - return select_recordings_on_popularity(recordings, begin_percent, end_percent, num_recordings) + return select_recordings_on_popularity(recordings, pop_begin, pop_end, num_recordings) def or_search(self, tags, min_popularity=None, max_popularity=None): """ diff --git a/troi/content_resolver/utils.py b/troi/content_resolver/utils.py index 8f4e31e5..5f377910 100755 --- a/troi/content_resolver/utils.py +++ b/troi/content_resolver/utils.py @@ -21,10 +21,10 @@ def ask_yes_no_question(prompt): print("eh? try again.") -def select_recordings_on_popularity(recordings, begin_percent, end_percent, num_recordings): +def select_recordings_on_popularity(recordings, pop_begin, pop_end, num_recordings): """ Given dicts of recording data, select up to num_recordings recordings randomly - from the recordings that ideally lie in popularity between begin_percent and end_percent. + from the recordings that ideally lie in popularity between pop_begin and pop_end. If too little data is found in the percent range, select recordings that are the closest to the disired range. @@ -34,8 +34,8 @@ def select_recordings_on_popularity(recordings, begin_percent, end_percent, num_ over_recordings = [] under_recordings = [] for rec in recordings: - if rec["popularity"] >= begin_percent: - if rec["popularity"] < end_percent: + if rec["popularity"] >= pop_begin: + if rec["popularity"] < pop_end: matching_recordings.append(rec) else: over_recordings.append(rec) @@ -49,12 +49,12 @@ def select_recordings_on_popularity(recordings, begin_percent, end_percent, num_ # Keep adding the best matches until we (hopefully) get our desired number of recordings while len(matching_recordings) < num_recordings: if under_recordings: - under_diff = begin_percent - under_recordings[-1]["popularity"] + under_diff = pop_begin - under_recordings[-1]["popularity"] else: under_diff = None if over_recordings: - over_diff = over_recordings[-1]["popularity"] - end_percent + over_diff = over_recordings[-1]["popularity"] - pop_end else: over_diff = None diff --git a/troi/recording_search_service.py b/troi/recording_search_service.py index 903710e5..0bbe656d 100644 --- a/troi/recording_search_service.py +++ b/troi/recording_search_service.py @@ -10,8 +10,6 @@ # Tags: # - flatten tag data for simplicity # - use series of tighter random spots to speed up or searches for popular tags -# Artist: -# - Support percent based popular track lookups and move logic to server class RecordingSearchByTagService(Service): @@ -21,7 +19,7 @@ class RecordingSearchByTagService(Service): def __init__(self): super().__init__(self.SLUG) - def search(self, tags, operator, begin_percent, end_percent, num_recordings): + def search(self, tags, operator, pop_begin, pop_end, num_recordings): """ Fetch the tag data from the LB API and return it as a dict. """ @@ -29,8 +27,8 @@ def search(self, tags, operator, begin_percent, end_percent, num_recordings): data = { "condition": operator, "count": num_recordings, - "begin_percent": begin_percent, - "end_percent": end_percent, + "begin_percent": pop_begin, + "end_percent": pop_end, "tag": tags, "min_tag_count": 1 } @@ -60,12 +58,12 @@ class RecordingSearchByArtistService(Service): def __init__(self): super().__init__(self.SLUG) - def search(self, mode, artist_mbid, begin_percent, end_percent, max_recordings_per_artist, max_similar_artists): + def search(self, mode, artist_mbid, pop_begin, pop_end, max_recordings_per_artist, max_similar_artists): """ Given a seed artist mbid, find and select similar artists (via LB similar artists data). - begin_percent: The lower bound on recording popularity - end_percent: The upper bound on recording popularity + pop_begin: The lower bound on recording popularity + pop_end: The upper bound on recording popularity max_recordings_per_artist: The number of recordings to collect for each artist. max_similar_artists: The maximum number of similar artists to select. """ @@ -74,8 +72,8 @@ def search(self, mode, artist_mbid, begin_percent, end_percent, max_recordings_p "mode": mode, "max_similar_artists": max_similar_artists, "max_recordings_per_artist": max_recordings_per_artist, - "begin_percent": begin_percent, - "end_percent": end_percent + "pop_begin": pop_begin, + "pop_end": pop_end } # TODO: Update for production # url = f"https://beta-api.listenbrainz.org/1/lb-radio/artist/{artist_mbid}" @@ -83,7 +81,7 @@ def search(self, mode, artist_mbid, begin_percent, end_percent, max_recordings_p r = requests.get(url, params=params) if r.status_code != 200: - raise RuntimeError(f"Cannot lb_radio artists: {r.status_code} ({r.text})") + raise RuntimeError(f"Cannot fetch lb_radio artists: {r.status_code} ({r.text})") try: artists = r.json() From 735969c49b55dd9059b410b9f00305eb9b02c37c Mon Sep 17 00:00:00 2001 From: Robert Kaye Date: Mon, 8 Apr 2024 21:34:36 +0200 Subject: [PATCH 08/33] Since artist search is now merged, update this. --- troi/patches/lb_radio_classes/tag.py | 3 +-- troi/recording_search_service.py | 4 +--- 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/troi/patches/lb_radio_classes/tag.py b/troi/patches/lb_radio_classes/tag.py index 37e1aa97..de0e0ae9 100755 --- a/troi/patches/lb_radio_classes/tag.py +++ b/troi/patches/lb_radio_classes/tag.py @@ -12,8 +12,7 @@ # release of LB Radio for lb-server: # - Review use of ranges. # - Review having to invert the tag ranges -# - Fix artist search to not just pick the bottom of the top, but really fetch the bottom -# - Review or remove artist caching from artist element +# - Remove artists caching and use new artist endpoint that moves all the work to the server class LBRadioTagRecordingElement(troi.Element): diff --git a/troi/recording_search_service.py b/troi/recording_search_service.py index 0bbe656d..d53d79eb 100644 --- a/troi/recording_search_service.py +++ b/troi/recording_search_service.py @@ -75,9 +75,7 @@ def search(self, mode, artist_mbid, pop_begin, pop_end, max_recordings_per_artis "pop_begin": pop_begin, "pop_end": pop_end } - # TODO: Update for production -# url = f"https://beta-api.listenbrainz.org/1/lb-radio/artist/{artist_mbid}" - url = f"http://localhost:8100/1/lb-radio/artist/{artist_mbid}" + url = f"https://api.listenbrainz.org/1/lb-radio/artist/{artist_mbid}" r = requests.get(url, params=params) if r.status_code != 200: From cb3923139cfc71d0d2ca337cdeee1580cfb07b76 Mon Sep 17 00:00:00 2001 From: Robert Kaye Date: Mon, 8 Apr 2024 22:32:37 +0200 Subject: [PATCH 09/33] Update for using beta's tag search --- troi/playlist.py | 2 +- troi/recording_search_service.py | 10 +++++----- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/troi/playlist.py b/troi/playlist.py index 6781b77f..98480b58 100755 --- a/troi/playlist.py +++ b/troi/playlist.py @@ -260,7 +260,7 @@ def submit(self, token, created_for=None): if len(playlist.recordings) == 0: continue - info("submit %d tracks" % len(playlist.recordings)) + logger.info("submit %d tracks" % len(playlist.recordings)) if playlist.patch_slug is not None: playlist.add_metadata({"algorithm_metadata": {"source_patch": playlist.patch_slug}}) r = requests.post(LISTENBRAINZ_PLAYLIST_CREATE_URL, diff --git a/troi/recording_search_service.py b/troi/recording_search_service.py index d53d79eb..c03fa2c4 100644 --- a/troi/recording_search_service.py +++ b/troi/recording_search_service.py @@ -25,14 +25,14 @@ def search(self, tags, operator, pop_begin, pop_end, num_recordings): """ data = { - "condition": operator, + "operator": operator, "count": num_recordings, - "begin_percent": pop_begin, - "end_percent": pop_end, + "pop_begin": pop_begin, + "pop_end": pop_end, "tag": tags, - "min_tag_count": 1 + "min_tag_count": 1, } - r = requests.get("https://api.listenbrainz.org/1/lb-radio/tags", params=data) + r = requests.get("https://beta-api.listenbrainz.org/1/lb-radio/tags", params=data) if r.status_code != 200: raise RuntimeError(f"Cannot fetch recordings for tags. {r.text}") From 88918a1fb6d0c441242eb7ef72312ab2322c2df8 Mon Sep 17 00:00:00 2001 From: Robert Kaye Date: Fri, 12 Apr 2024 18:53:03 +0200 Subject: [PATCH 10/33] LB artist has gotten pretty trivial now that most of the logic is in PG! --- troi/patches/lb_radio.py | 12 +++-- troi/patches/lb_radio_classes/artist.py | 64 +++++++------------------ troi/recording_search_service.py | 36 +++++++------- 3 files changed, 42 insertions(+), 70 deletions(-) diff --git a/troi/patches/lb_radio.py b/troi/patches/lb_radio.py index e98f2b00..ec370f79 100755 --- a/troi/patches/lb_radio.py +++ b/troi/patches/lb_radio.py @@ -99,7 +99,7 @@ def lookup_artist_name(self, artist_name): raise RuntimeError(err_msg) if fetched_name.lower() == artist_name.lower(): - return mbid + return fetched_name, mbid raise RuntimeError(err_msg) @@ -119,11 +119,12 @@ def create(self, inputs): "Argument mode must be one one easy, medium or hard.") # Lookup artist names embedded in the prompt + artist_names = {} for element in prompt_elements: - if element["entity"] == "artist" and isinstance( - element["values"][0], str): - element["values"][0] = UUID( - self.lookup_artist_name(element["values"][0])) + if element["entity"] == "artist" and isinstance( element["values"][0], str): + name, mbid = self.lookup_artist_name(element["values"][0]) + element["values"][0] = mbid + artist_names[mbid] = name # Save descriptions to local storage self.local_storage["data_cache"] = { @@ -162,6 +163,7 @@ def create(self, inputs): include_sim = False if "nosim" in element["opts"] else True source = LBRadioArtistRecordingElement( element["values"][0], + artist_name=artist_names[element["values"][0]], mode=mode, include_similar_artists=include_sim) diff --git a/troi/patches/lb_radio_classes/artist.py b/troi/patches/lb_radio_classes/artist.py index badfa866..5672f944 100755 --- a/troi/patches/lb_radio_classes/artist.py +++ b/troi/patches/lb_radio_classes/artist.py @@ -17,9 +17,10 @@ class LBRadioArtistRecordingElement(troi.Element): MAX_TOP_RECORDINGS_PER_ARTIST = 35 # should lower this when other sources of data get added MAX_NUM_SIMILAR_ARTISTS = 8 - def __init__(self, artist_mbid, mode="easy", include_similar_artists=True): + def __init__(self, artist_mbid, artist_name, mode="easy", include_similar_artists=True): troi.Element.__init__(self) self.artist_mbid = str(artist_mbid) + self.artist_name = artist_name self.mode = mode self.include_similar_artists = include_similar_artists if include_similar_artists: @@ -33,19 +34,6 @@ def inputs(self): def outputs(self): return [Recording] - def fetch_artist_names(self, artist_mbids): - """ - Fetch artists names for a given list of artist_mbids - """ - - # TODO: Use the artist cache data - data = [{"[artist_mbid]": mbid} for mbid in artist_mbids] - r = requests.post("https://datasets.listenbrainz.org/artist-lookup/json", json=data) - if r.status_code != 200: - raise RuntimeError(f"Cannot artist names: {r.status_code} ({r.text})") - - return {result["artist_mbid"]: result["artist_name"] for result in r.json()} - def read(self, entities): self.data_cache = self.local_storage["data_cache"] @@ -53,48 +41,32 @@ def read(self, entities): # Fetch our mode ranges start, stop = self.local_storage["modes"][self.mode] self.recording_search_by_artist = self.patch.get_service("recording-search-by-artist") - artist_recordings = self.recording_search_by_artist.search(self.mode, self.artist_mbid, start, stop, - self.max_top_recordings_per_artist, - self.MAX_NUM_SIMILAR_ARTISTS) - - # For all fetched artists, fetch their names - artist_names = self.fetch_artist_names(list(artist_recordings)) - for artist_mbid in artist_recordings: - if artist_mbid not in artist_names: - raise RuntimeError("Artist %s could not be found. Is this MBID valid?" % artist["artist_mbid"]) - - # Store data in cache, so the post processor can create decent descriptions, title - self.data_cache[artist_mbid] = artist_names[artist_mbid] + (artist_recordings, msgs) = self.recording_search_by_artist.search(self.mode, self.artist_mbid, start, stop, + self.max_top_recordings_per_artist, + self.MAX_NUM_SIMILAR_ARTISTS) + # Collect the names of the similar artists + similar_artist_names = [] + for mbid in artist_recordings: + try: + similar_artist_names.append(artist_recordings[mbid][0].artist_credit.name) + except IndexError: + pass # start crafting user feedback messages - msgs = [] - if self.include_similar_artists and len(artist_recordings) == 1: - msgs.append(f"Seed artist {artist_names[self.artist_mbid]} no similar artists.") + if not artist_recordings: + msgs.append(f"The seed artist %s has no similar artists, nor top recordings. Too niche?" % self.artist_name) else: - if self.include_similar_artists and len(artist_recordings) < 4: - msgs.append(f"Seed artist {artist_names[self.artist_mbid]} few similar artists.") - msg = "artist: using seed artist %s" % artist_names[self.artist_mbid] + msg = "Using seed artist %s" % self.artist_name if self.include_similar_artists: mbids = list(artist_recordings) del mbids[mbids.index(self.artist_mbid)] - msg += " and similar artists: " + ", ".join([artist_names[mbid] for mbid in mbids]) + msg += " and similar artists: " + ", ".join(similar_artist_names) else: msg += " only" - msgs.append(msg) + msgs.insert(0, msg) for msg in msgs: self.local_storage["user_feedback"].append(msg) - self.data_cache["element-descriptions"].append("artist %s" % artist_names[self.artist_mbid]) - - # Now collect recordings from the artist and similar artists and return an interleaved - # stream of recordings. - for i, artist_mbid in enumerate(artist_recordings): - - recs_plist = plist(artist_recordings[artist_mbid]) - if len(recs_plist) < 20: - self.local_storage["user_feedback"].append( - f"Artist {artist_names[artist_mbid]} only has {'no' if len(recs_plist) == 0 else 'few'} top recordings.") - - recordings = recs_plist.random_item(start, stop, self.max_top_recordings_per_artist) + self.data_cache["element-descriptions"].append("artist %s" % self.artist_name) return interleave([artist_recordings[mbid] for mbid in artist_recordings]) diff --git a/troi/recording_search_service.py b/troi/recording_search_service.py index 40e8c52a..df7b4ac6 100644 --- a/troi/recording_search_service.py +++ b/troi/recording_search_service.py @@ -84,25 +84,23 @@ def search(self, mode, artist_mbid, pop_begin, pop_end, max_recordings_per_artis try: artists = r.json() except IndexError: - return [] + return {}, [] + artist_recordings = {} + msgs = [] for artist_mbid in artists: - recordings = artists[artist_mbid] - updated = [] - for rec in recordings: - updated.append(Recording(mbid=rec["recording_mbid"], musicbrainz={"total_listen_count": rec["total_listen_count"]})) - artists[artist_mbid] = updated - recordings = plist() - for recording in r.json(): - artists = [ Artist(mbid=mbid) for mbid in recording["artist_mbids"] ] - artist_credit = ArtistCredit(artists=artists, name=recording["artist_name"]) - recordings.append( - Recording(mbid=recording["recording_mbid"], - name=recording["recording_name"], - duration=recording["length"], - artist_credit=artist_credit)) - - artists_recordings[artist_mbid] = recordings.random_item(begin_percent, end_percent, num_recordings) - - return artists_recordings + for recording in artists[artist_mbid]: + artist_credit = ArtistCredit(artists=[Artist(mbid=recording["similar_artist_mbid"])], + name=recording["similar_artist_name"]) + recordings.append(Recording(mbid=recording["recording_mbid"], + artist_credit=artist_credit, + musicbrainz={"total_listen_count": recording["total_listen_count"]})) + + print(len(recordings), max_recordings_per_artist) + if len(recordings) < max_recordings_per_artist: + msgs.append("Artist %s has only few top recordings." % recordings[0].artist_credit.name) + + artist_recordings[artist_mbid] = recordings.random_item(pop_begin, pop_end, max_recordings_per_artist) + + return artist_recordings, msgs From 240de7469afee3a7b3f181b42322c5a546568c7a Mon Sep 17 00:00:00 2001 From: Robert Kaye Date: Mon, 15 Apr 2024 18:15:14 +0200 Subject: [PATCH 11/33] Interim check-in --- troi/patches/lb_radio.py | 30 ++++++++++++++++++++++--- troi/patches/lb_radio_classes/artist.py | 15 ++++++++----- troi/recording_search_service.py | 4 ++-- 3 files changed, 39 insertions(+), 10 deletions(-) diff --git a/troi/patches/lb_radio.py b/troi/patches/lb_radio.py index ec370f79..fe08560e 100755 --- a/troi/patches/lb_radio.py +++ b/troi/patches/lb_radio.py @@ -1,3 +1,4 @@ +from time import sleep from random import randint, shuffle from uuid import UUID @@ -75,9 +76,12 @@ def slug(): def description(): return "Given an LB radio prompt, generate a playlist for that prompt." - def lookup_artist_name(self, artist_name): + def lookup_artist(self, artist_name): """ Fetch artist names for validation purposes """ + if isinstance(artist_name, UUID): + return self.lookup_artist_from_mbid(artist_name) + err_msg = f"Artist {artist_name} could not be looked up. Please use exact spelling." r = requests.get( @@ -103,6 +107,25 @@ def lookup_artist_name(self, artist_name): raise RuntimeError(err_msg) + def lookup_artist_from_mbid(self, artist_mbid): + """ Fetch artist names for validation purposes """ + + while True: + r = requests.get(f"https://musicbrainz.org/ws/2/artist/%s&fmt=json" % str(artist_mbid)) + if r.status_code == 404: + raise RuntimeError(f"Could not resolve artist mbid {artist_mbid}. Error {r.status_code}") + + if r.status_code == 429: + sleep(2) + continue + + if r.status_code != 200: + raise RuntimeError(f"Could not resolve artist name {artist_mbid}. Error {r.status_code}") + + break + + return r.json()["name"], artist_mbid + def create(self, inputs): self.prompt = inputs["prompt"] self.mode = inputs["mode"] @@ -121,8 +144,9 @@ def create(self, inputs): # Lookup artist names embedded in the prompt artist_names = {} for element in prompt_elements: - if element["entity"] == "artist" and isinstance( element["values"][0], str): - name, mbid = self.lookup_artist_name(element["values"][0]) + if element["entity"] == "artist": + name, mbid = self.lookup_artist(element["values"][0]) + print(name, mbid) element["values"][0] = mbid artist_names[mbid] = name diff --git a/troi/patches/lb_radio_classes/artist.py b/troi/patches/lb_radio_classes/artist.py index 5672f944..abf44547 100755 --- a/troi/patches/lb_radio_classes/artist.py +++ b/troi/patches/lb_radio_classes/artist.py @@ -24,9 +24,9 @@ def __init__(self, artist_mbid, artist_name, mode="easy", include_similar_artist self.mode = mode self.include_similar_artists = include_similar_artists if include_similar_artists: - self.max_top_recordings_per_artist = self.MAX_TOP_RECORDINGS_PER_ARTIST - else: self.max_top_recordings_per_artist = self.MAX_TOP_RECORDINGS_PER_ARTIST * 2 + else: + self.max_top_recordings_per_artist = self.MAX_TOP_RECORDINGS_PER_ARTIST * 3 def inputs(self): return [] @@ -47,6 +47,9 @@ def read(self, entities): # Collect the names of the similar artists similar_artist_names = [] for mbid in artist_recordings: + if mbid == self.artist_mbid: + continue + try: similar_artist_names.append(artist_recordings[mbid][0].artist_credit.name) except IndexError: @@ -58,11 +61,13 @@ def read(self, entities): else: msg = "Using seed artist %s" % self.artist_name if self.include_similar_artists: - mbids = list(artist_recordings) - del mbids[mbids.index(self.artist_mbid)] - msg += " and similar artists: " + ", ".join(similar_artist_names) + if similar_artist_names: + msg += " and similar artists: " + ", ".join(similar_artist_names) + else: + msg += " only, since this artist has no similar artists (yet)." else: msg += " only" + msgs.insert(0, msg) for msg in msgs: diff --git a/troi/recording_search_service.py b/troi/recording_search_service.py index df7b4ac6..8634e239 100644 --- a/troi/recording_search_service.py +++ b/troi/recording_search_service.py @@ -97,8 +97,8 @@ def search(self, mode, artist_mbid, pop_begin, pop_end, max_recordings_per_artis artist_credit=artist_credit, musicbrainz={"total_listen_count": recording["total_listen_count"]})) - print(len(recordings), max_recordings_per_artist) - if len(recordings) < max_recordings_per_artist: + # Below is a hack, since the endpoint seems to return one track too few + if len(recordings) < max_recordings_per_artist - 1: msgs.append("Artist %s has only few top recordings." % recordings[0].artist_credit.name) artist_recordings[artist_mbid] = recordings.random_item(pop_begin, pop_end, max_recordings_per_artist) From e441dd0b213c80b5b93ae85693a4b9f754cda267 Mon Sep 17 00:00:00 2001 From: Robert Kaye Date: Tue, 16 Apr 2024 12:21:07 +0200 Subject: [PATCH 12/33] Fix specifying artist mbid for an artist --- troi/patches/lb_radio.py | 27 +++++++++++++++------------ 1 file changed, 15 insertions(+), 12 deletions(-) diff --git a/troi/patches/lb_radio.py b/troi/patches/lb_radio.py index fe08560e..f21bd95d 100755 --- a/troi/patches/lb_radio.py +++ b/troi/patches/lb_radio.py @@ -84,16 +84,19 @@ def lookup_artist(self, artist_name): err_msg = f"Artist {artist_name} could not be looked up. Please use exact spelling." - r = requests.get( - f"https://musicbrainz.org/ws/2/artist?query={quote(artist_name)}&fmt=json" - ) - if r.status_code == 404: - raise RuntimeError(err_msg) + while True: + r = requests.get( f"https://musicbrainz.org/ws/2/artist?query={quote(artist_name)}&fmt=json") + if r.status_code == 404: + raise RuntimeError(err_msg) - if r.status_code != 200: - raise RuntimeError( - f"Could not resolve artist name {artist_name}. Error {r.status_code}" - ) + if r.status_code == 429: + sleep(2) + continue + + if r.status_code != 200: + raise RuntimeError( f"Could not resolve artist name {artist_name}. Error {r.status_code} {r.text}") + + break data = r.json() try: @@ -111,16 +114,16 @@ def lookup_artist_from_mbid(self, artist_mbid): """ Fetch artist names for validation purposes """ while True: - r = requests.get(f"https://musicbrainz.org/ws/2/artist/%s&fmt=json" % str(artist_mbid)) + r = requests.get(f"https://musicbrainz.org/ws/2/artist/%s?fmt=json" % str(artist_mbid)) if r.status_code == 404: - raise RuntimeError(f"Could not resolve artist mbid {artist_mbid}. Error {r.status_code}") + raise RuntimeError(f"Could not resolve artist mbid {artist_mbid}. Error {r.status_code} {r.text}") if r.status_code == 429: sleep(2) continue if r.status_code != 200: - raise RuntimeError(f"Could not resolve artist name {artist_mbid}. Error {r.status_code}") + raise RuntimeError(f"Could not resolve artist name {artist_mbid}. Error {r.status_code} {r.text}") break From 7b773154b92e2a9c3be0ae0107259fd6a187097e Mon Sep 17 00:00:00 2001 From: Robert Kaye Date: Tue, 16 Apr 2024 14:55:42 +0200 Subject: [PATCH 13/33] Rename splitter to plist --- troi/content_resolver/artist_search.py | 2 +- troi/content_resolver/tag_search.py | 2 +- troi/content_resolver/utils.py | 2 +- troi/patches/lb_radio_classes/artist.py | 2 +- troi/patches/lb_radio_classes/tag.py | 22 ++++++++++++++-------- troi/recording_search_service.py | 4 ++-- 6 files changed, 20 insertions(+), 14 deletions(-) diff --git a/troi/content_resolver/artist_search.py b/troi/content_resolver/artist_search.py index 3f9a4add..81091582 100755 --- a/troi/content_resolver/artist_search.py +++ b/troi/content_resolver/artist_search.py @@ -10,7 +10,7 @@ from troi.content_resolver.model.recording import Recording, RecordingMetadata from troi.content_resolver.utils import select_recordings_on_popularity from troi.recording_search_service import RecordingSearchByArtistService -from troi.splitter import plist +from troi.plist import plist OVERHYPED_SIMILAR_ARTISTS = [ "b10bbbfc-cf9e-42e0-be17-e2c3e1d2600d", # The Beatles diff --git a/troi/content_resolver/tag_search.py b/troi/content_resolver/tag_search.py index 4889993f..d1e878d9 100755 --- a/troi/content_resolver/tag_search.py +++ b/troi/content_resolver/tag_search.py @@ -10,7 +10,7 @@ from troi.content_resolver.model.recording import Recording, RecordingMetadata from troi.content_resolver.utils import select_recordings_on_popularity from troi.recording_search_service import RecordingSearchByTagService -from troi.splitter import plist +from troi.plist import plist class LocalRecordingSearchByTagService(RecordingSearchByTagService): diff --git a/troi/content_resolver/utils.py b/troi/content_resolver/utils.py index 760994ba..7fd9738e 100755 --- a/troi/content_resolver/utils.py +++ b/troi/content_resolver/utils.py @@ -1,7 +1,7 @@ import logging import os -from troi.splitter import plist +from troi.plist import plist from troi import Recording as TroiRecording from troi.content_resolver.model.recording import FileIdType diff --git a/troi/patches/lb_radio_classes/artist.py b/troi/patches/lb_radio_classes/artist.py index abf44547..12447df4 100755 --- a/troi/patches/lb_radio_classes/artist.py +++ b/troi/patches/lb_radio_classes/artist.py @@ -2,7 +2,7 @@ import troi from troi import Recording, Artist -from troi.splitter import plist +from troi.plist import plist from troi import TARGET_NUMBER_OF_RECORDINGS from troi.utils import interleave from troi.recording_search_service import RecordingSearchByArtistService diff --git a/troi/patches/lb_radio_classes/tag.py b/troi/patches/lb_radio_classes/tag.py index 72c00545..87a41278 100755 --- a/troi/patches/lb_radio_classes/tag.py +++ b/troi/patches/lb_radio_classes/tag.py @@ -4,7 +4,7 @@ import requests from troi import Recording -from troi.splitter import plist +from troi.plist import plist from troi import TARGET_NUMBER_OF_RECORDINGS from troi.utils import interleave @@ -46,13 +46,19 @@ def fetch_similar_tags(self, tag): Fetch similar tags from LB """ - r = requests.post( - "https://labs.api.listenbrainz.org/tag-similarity/json", - json=[{ - "tag": tag - }]) - if r.status_code != 200: - raise RuntimeError(f"Cannot fetch similar tags. {r.text}") + while True: + r = requests.post( "https://labs.api.listenbrainz.org/tag-similarity/json", json=[{ "tag": tag }]) + if r.status_code == 429: + sleep(2) + continue + + if r.status_code == 404: + return plist() + + if r.status_code != 200: + raise RuntimeError(f"Cannot fetch similar tags. {r.text}") + + break return plist(r.json()) diff --git a/troi/recording_search_service.py b/troi/recording_search_service.py index 8634e239..7def8d73 100644 --- a/troi/recording_search_service.py +++ b/troi/recording_search_service.py @@ -4,7 +4,7 @@ from troi import Recording, Artist, ArtistCredit from troi.service import Service -from troi.splitter import plist +from troi.plist import plist # NOTES FOR LB API improvements: # Tags: @@ -99,7 +99,7 @@ def search(self, mode, artist_mbid, pop_begin, pop_end, max_recordings_per_artis # Below is a hack, since the endpoint seems to return one track too few if len(recordings) < max_recordings_per_artist - 1: - msgs.append("Artist %s has only few top recordings." % recordings[0].artist_credit.name) + msgs.append("Artist %s has only few top recordings in %s mode" % (recordings[0].artist_credit.name, mode)) artist_recordings[artist_mbid] = recordings.random_item(pop_begin, pop_end, max_recordings_per_artist) From 7c67ffcfdc05e49ad611450a9a9864d8907b32aa Mon Sep 17 00:00:00 2001 From: Robert Kaye Date: Tue, 16 Apr 2024 14:56:46 +0200 Subject: [PATCH 14/33] Meh --- troi/{splitter.py => plist.py} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename troi/{splitter.py => plist.py} (100%) diff --git a/troi/splitter.py b/troi/plist.py similarity index 100% rename from troi/splitter.py rename to troi/plist.py From b80c37f4177b15625258cfb9adbd31ae6349013e Mon Sep 17 00:00:00 2001 From: Robert Kaye Date: Tue, 16 Apr 2024 14:58:16 +0200 Subject: [PATCH 15/33] Finish plist cleanup --- tests/test_splitter.py | 58 +-------------- troi/plist.py | 157 ----------------------------------------- 2 files changed, 1 insertion(+), 214 deletions(-) diff --git a/tests/test_splitter.py b/tests/test_splitter.py index dba3a5e2..bfce784f 100644 --- a/tests/test_splitter.py +++ b/tests/test_splitter.py @@ -1,66 +1,10 @@ import unittest -from troi.splitter import DataSetSplitter, plist +from troi.plist import plist class TestSplitter(unittest.TestCase): - def test_splitter_basic(self): - - # Make invalid test set - data = [{"score": i} for i in range(100)] - with self.assertRaises(ValueError): - DataSetSplitter(data, 3) - - # Now make it legal and test that - data.reverse() - dss = DataSetSplitter(data, 3) - assert dss[0][0] == {'score': 99} - assert dss[0][-1] == {'score': 66} - assert dss[1][0] == {'score': 65} - assert dss[1][-1] == {'score': 33} - assert dss[2][0] == {'score': 32} - assert dss[2][-1] == {'score': 0} - assert len(dss[0]) == 34 - assert len(dss[1]) == 33 - assert len(dss[2]) == 33 - - - def test_splitter_short(self): - - # Split and deal with empty set - dss = DataSetSplitter([], 3) - assert dss[0] == [] - assert dss[1] == [] - assert dss[2] == [] - - dss = DataSetSplitter([{"score": 4}], 3) - assert dss[0] == [{'score': 4}] - assert dss[1] == [] - assert dss[2] == [] - - dss = DataSetSplitter([{"score": 4}, {"score": 3}], 3) - assert dss[0] == [{'score': 4}] - assert dss[1] == [{'score': 3}] - assert dss[2] == [] - - dss = DataSetSplitter([{"score": 4}, {"score": 3}, {"score": 2}], 3) - assert dss[0] == [{'score': 4}] - assert dss[1] == [{'score': 3}] - assert dss[2] == [{'score': 2}] - - dss = DataSetSplitter([{"score": 4}, {"score": 3}, {"score": 2}, {"score": 1}], 3) - assert dss[0] == [{'score': 4}, {'score': 3}] - assert dss[1] == [{'score': 2}] - assert dss[2] == [{'score': 1}] - - def test_splitter_mod_operation(self): - dss = DataSetSplitter([{"score": 4}, {"score": 3}, {"score": 2}], 3) - # Test the % (random item from segment) - assert dss % 0 == {'score': 4} - assert dss % 1 == {'score': 3} - assert dss % 2 == {'score': 2} - def test_plist(self): pl = plist([0,1,2,3,4,5,6,7,8,9]) diff --git a/troi/plist.py b/troi/plist.py index e84755c8..1942d228 100755 --- a/troi/plist.py +++ b/troi/plist.py @@ -3,163 +3,6 @@ from random import randint -class DataSetSplitter: - """ - The data set splitter is a tool that can take descendingly - ordered lists of dicts. Given a number of segments, it - provides easy to use accessor functions for quick access - to the various sections of the data. - - This tool is useful for taking data sets that are - ordered by some key and breaking them into chunks based - on the field used to sort the data. By default it is - "score", but can be overriden with the field argument. - - The class will scan all the rows in the data and examine - the key field (e.g. "score") and determine the splits - based on the score, so that the segments split equally - according to score. This very likely means that the - different segments will contain a different number of - items, unless you data scores are perfectly linear. - - This class keeps a reference to your data set, but it - never modifies the underlying data, it only ever returns - sections of the data. - - Array indexing is possible to access each slice: - - dss = DataSetSplitter(data, 3) - first_segment = dss[0] - - % for returning a random item from a segment is possible: - - random_item = dss % 1 - - This will return one random item from segment 1. - - """ - - def __init__(self, data, segment_count, field="score"): - """ - Pass in the descendingly sorted data, keyed by the - field (default "score") and a segment count for - the number of segments to break this dataset into. - """ - self.segment_count = segment_count - self.field = field - - if len(data) == 0: - self.segments = [] - return - self.data = data - - high_score = data[0][self.field] - low_score = data[-1][self.field] - - # Calculate where the segments breaks should be from how to hi - segment_width = (high_score - low_score) / self.segment_count - self.segments = [] - for segment in range(self.segment_count): - self.segments.append({self.field: low_score + ((self.segment_count - 1) - segment) * segment_width}) - - # translate the breaks into actual indexes in the data - segment_index = 0 - count = 0 - for i, d in enumerate(data): - - # Test to ensure that the data is in descending order - if i > 0: - if d[self.field] > data[i - 1][self.field]: - raise ValueError("Data set is not in descending order!") - if d[self.field] < self.segments[segment_index][self.field]: - self.segments[segment_index]["index"] = i - 1 - self.segments[segment_index]["count"] = count - segment_index += 1 - count = 0 - - count += 1 - - # Finish off the data set and accont for special (short) data sets - for i in range(segment_index, segment_count): - if count > 0: - self.segments[i]["index"] = len(data) - 1 - self.segments[i]["count"] = count - count = 0 - else: - self.segments[i]["index"] = None - self.segments[i]["count"] = 0 - - def get_segment_count(self): - return self.segment_count - - def __getitem__(self, segment): - """ - Array indexing for access to the segments. See method items(). - """ - return self.items(segment) - - def items(self, segment): - """ - Return a list of all of the items in the specified segment. - If an invalid segment is specified, a ValueError will be thrown. - """ - if segment < 0 or segment >= self.segment_count: - raise ValueError("Invalid segment") - - if len(self.segments) == 0: - return [] - - try: - if segment == 0: - return self.data[0:self.segments[0]["index"] + 1] - else: - return self.data[self.segments[segment - 1]["index"] + 1:self.segments[segment]["index"] + 1] - except TypeError: - # Type error catches when a None value is being used in the index math - return [] - - def __mod__(self, segment): - """ - Use % operator to return a random item from segment. See random_item() - - """ - return self.random_item(segment) - - def random_item(self, segment, count=1): - """ - Return a random item from the specified segment. - - count specifies the number of items to return, default 1. - - Return value is a data item, unless count is > 1, then a list is returned - """ - if segment < 0 or segment >= self.segment_count: - raise ValueError("Invalid segment") - - if len(self.segments) == 0: - return [] - - data = self.items(segment) - items = [data[randint(0, len(data) - 1)] for i in range(min(count, len(data)))] - items = [dict(t) for t in {tuple(d.items()) for d in items}] - - if count > 1: - return items - - return items[0] if len(items) > 0 else [] - - def random(self): - """ - Return the data from a random segment. - NOTE: To return a random item, use random_item or the % operator. - """ - if len(self.segments) == 0: - return [] - - segment = randint(0, len(self.segments)) - return self.items(segment) - - class plist(list): """ This class implements a list object with percent based indexing and slicing: From fa5c1a1273bff1325300336016aba8d4858cf34e Mon Sep 17 00:00:00 2001 From: Robert Kaye Date: Tue, 16 Apr 2024 15:23:56 +0200 Subject: [PATCH 16/33] Make sure that plist random_items does not return duplicates --- tests/{test_splitter.py => test_plist.py} | 5 +++++ troi/plist.py | 13 ++++++++++--- 2 files changed, 15 insertions(+), 3 deletions(-) rename tests/{test_splitter.py => test_plist.py} (75%) diff --git a/tests/test_splitter.py b/tests/test_plist.py similarity index 75% rename from tests/test_splitter.py rename to tests/test_plist.py index bfce784f..743b7912 100644 --- a/tests/test_splitter.py +++ b/tests/test_plist.py @@ -18,3 +18,8 @@ def test_plist(self): assert pl.dslice(0, 2) == [0,1] assert pl.random_item(50, 100) in [5,6,7,8,9] + + def test_plist_unique(self): + pl = plist([0,1,2,3,4,5,6,7,8,9]) + rlist = pl.random_item(count=9) + assert len(rlist) == len(set(rlist)) diff --git a/troi/plist.py b/troi/plist.py index 1942d228..2cd6018a 100755 --- a/troi/plist.py +++ b/troi/plist.py @@ -73,8 +73,15 @@ def random_item(self, start_percent=0, stop_percent=99, count=1): stop_index = self._get_index(stop_percent) data = super().__getitem__(slice(start_index, stop_index)) - items = [data[randint(0, len(data) - 1)] for i in range(min(count, len(data)))] + items = [] + for i in range(len(data)): + index = randint(0, len(data) - 1) + items.append(data[index]) + del data[index] + if len(items) == count or not data: + break + if count > 1: return items - - return items[0] if len(items) > 0 else [] + else: + return items[0] if len(items) > 0 else [] From 5f9e8cf95edd829b4025ba5a39c9389566618d2f Mon Sep 17 00:00:00 2001 From: Robert Kaye Date: Tue, 16 Apr 2024 16:23:04 +0200 Subject: [PATCH 17/33] Fix the artist limiter and turn it on! --- troi/patches/lb_radio.py | 2 +- troi/patches/lb_radio_classes/artist.py | 2 +- troi/patches/lb_radio_classes/blend.py | 7 +++---- 3 files changed, 5 insertions(+), 6 deletions(-) diff --git a/troi/patches/lb_radio.py b/troi/patches/lb_radio.py index f21bd95d..94710d8b 100755 --- a/troi/patches/lb_radio.py +++ b/troi/patches/lb_radio.py @@ -249,7 +249,7 @@ def create(self, inputs): elements.append(hate_filter) # Finish the pipeline with the element that blends and weighs the streams - blend = WeighAndBlendRecordingsElement(weights, max_num_recordings=100) + blend = WeighAndBlendRecordingsElement(weights, max_num_recordings=100, max_artist_occurrence=3) blend.set_sources(elements) pl_maker = PlaylistMakerElement( diff --git a/troi/patches/lb_radio_classes/artist.py b/troi/patches/lb_radio_classes/artist.py index 12447df4..e283beb6 100755 --- a/troi/patches/lb_radio_classes/artist.py +++ b/troi/patches/lb_radio_classes/artist.py @@ -55,7 +55,7 @@ def read(self, entities): except IndexError: pass - # start crafting user feedback messages + # craft user feedback messages if not artist_recordings: msgs.append(f"The seed artist %s has no similar artists, nor top recordings. Too niche?" % self.artist_name) else: diff --git a/troi/patches/lb_radio_classes/blend.py b/troi/patches/lb_radio_classes/blend.py index 09f9ccb8..102b11ac 100755 --- a/troi/patches/lb_radio_classes/blend.py +++ b/troi/patches/lb_radio_classes/blend.py @@ -91,15 +91,14 @@ def read(self, entities): if rec.mbid in dedup_set: total_available -= 1 continue - - if self.max_artist_occurrence is not None and artist_counts[",".join( - rec.artist.mbids)] == self.max_artist_occurrence: + if self.max_artist_occurrence is not None and \ + artist_counts[rec.artist_credit.artist_credit_id] == self.max_artist_occurrence: total_available -= 1 continue recordings.append(rec) dedup_set.add(rec.mbid) - artist_counts[",".join([ a.mbid for a in rec.artist_credit.artists ])] += 1 + artist_counts[rec.artist_credit.artist_credit_id] += 1 break if len(recordings) >= self.max_num_recordings or len(recordings) == total_available: From 00908d25698f3141562070d268cf477d881dfbb4 Mon Sep 17 00:00:00 2001 From: Robert Kaye Date: Tue, 16 Apr 2024 16:49:23 +0200 Subject: [PATCH 18/33] Start on tag element cleanup --- troi/patches/lb_radio_classes/tag.py | 22 +++------------------- 1 file changed, 3 insertions(+), 19 deletions(-) diff --git a/troi/patches/lb_radio_classes/tag.py b/troi/patches/lb_radio_classes/tag.py index 87a41278..9a9b9dc8 100755 --- a/troi/patches/lb_radio_classes/tag.py +++ b/troi/patches/lb_radio_classes/tag.py @@ -11,18 +11,10 @@ # TODO improvements for post troi/liblistenbrainz/content-resolve packaging work, but before the next # release of LB Radio for lb-server: # - Review use of ranges. -# - Review having to invert the tag ranges class LBRadioTagRecordingElement(troi.Element): NUM_RECORDINGS_TO_COLLECT = TARGET_NUMBER_OF_RECORDINGS * 4 - MIN_RECORDINGS_EASY = NUM_RECORDINGS_TO_COLLECT - MIN_RECORDINGS_MEDIUM = 50 - MIN_RECORDINGS_HARD = 25 - EASY_MODE_RELEASE_GROUP_MIN_TAG_COUNT = 4 - MEDIUM_MODE_ARTIST_MIN_TAG_COUNT = 4 - - TAG_THRESHOLD_MAPPING = {"easy": 3, "medium": 2, "hard": 1} def __init__(self, tags, @@ -62,15 +54,10 @@ def fetch_similar_tags(self, tag): return plist(r.json()) - def invert_for_tag_search(self, startstop): - return tuple( - (1.0 - (startstop[1] / 100.), 1.0 - (startstop[0] / 100.0))) - def select_recordings_on_easy(self): msgs = [] - start, stop = self.invert_for_tag_search( - self.local_storage["modes"]["easy"]) + start, stop = self.local_storage["modes"]["easy"] tag_data = self.recording_search_by_tag.search( self.tags, self.operator, start, stop, self.NUM_RECORDINGS_TO_COLLECT) @@ -85,8 +72,7 @@ def select_recordings_on_easy(self): def select_recordings_on_medium(self): msgs = [] - start, stop = self.invert_for_tag_search( - self.local_storage["modes"]["medium"]) + start, stop = self.local_storage["modes"]["medium"] tag_data = self.recording_search_by_tag.search( self.tags, self.operator, start, stop, self.NUM_RECORDINGS_TO_COLLECT) @@ -120,8 +106,7 @@ def select_recordings_on_medium(self): def select_recordings_on_hard(self): msgs = [] - start, stop = self.invert_for_tag_search( - self.local_storage["modes"]["hard"]) + start, stop = self.local_storage["modes"]["hard"] tag_data = self.recording_search_by_tag.search( self.tags, self.operator, start, stop, @@ -180,7 +165,6 @@ def select_recordings_on_hard(self): def read(self, entities): - min_tag_count = self.TAG_THRESHOLD_MAPPING[self.mode] self.recording_search_by_tag = self.patch.get_service( "recording-search-by-tag") From 9341a176b8e4da503312b533ad6ad1acd60d4c16 Mon Sep 17 00:00:00 2001 From: Robert Kaye Date: Wed, 17 Apr 2024 16:07:32 +0200 Subject: [PATCH 19/33] Start work on the improved tag element -- things are looking promising but the tag endpoint needs some more work. --- troi/patches/lb_radio_classes/tag.py | 107 +++++---------------------- troi/playlist.py | 6 +- troi/print_recording.py | 2 +- troi/recording_search_service.py | 22 +----- 4 files changed, 26 insertions(+), 111 deletions(-) diff --git a/troi/patches/lb_radio_classes/tag.py b/troi/patches/lb_radio_classes/tag.py index 9a9b9dc8..3733f89e 100755 --- a/troi/patches/lb_radio_classes/tag.py +++ b/troi/patches/lb_radio_classes/tag.py @@ -8,9 +8,6 @@ from troi import TARGET_NUMBER_OF_RECORDINGS from troi.utils import interleave -# TODO improvements for post troi/liblistenbrainz/content-resolve packaging work, but before the next -# release of LB Radio for lb-server: -# - Review use of ranges. class LBRadioTagRecordingElement(troi.Element): @@ -54,73 +51,24 @@ def fetch_similar_tags(self, tag): return plist(r.json()) - def select_recordings_on_easy(self): + def select_recordings(self): msgs = [] - start, stop = self.local_storage["modes"]["easy"] - tag_data = self.recording_search_by_tag.search( - self.tags, self.operator, start, stop, - self.NUM_RECORDINGS_TO_COLLECT) + start, stop = { "easy": (66, 100), "medium": (33, 66), "hard": (1, 33) }[self.mode] + sim_start, sim_stop = { "easy": (0, 0), "medium": (50, 100), "hard": (10, 50) }[self.mode] + num_similar_tags_to_include = { "easy": 0, "medium": 1, "hard": 2 }[self.mode] - if len(tag_data) > self.NUM_RECORDINGS_TO_COLLECT: - tag_data = tag_data.random_item(start, stop, - self.NUM_RECORDINGS_TO_COLLECT) - - msgs = [f"""tag: using seed tags: '{ "', '".join(self.tags)}' only"""] - return tag_data, msgs - - def select_recordings_on_medium(self): - - msgs = [] - start, stop = self.local_storage["modes"]["medium"] - tag_data = self.recording_search_by_tag.search( - self.tags, self.operator, start, stop, - self.NUM_RECORDINGS_TO_COLLECT) - - if len(tag_data) > self.NUM_RECORDINGS_TO_COLLECT: - tag_data = tag_data.random_item(start, stop, - self.NUM_RECORDINGS_TO_COLLECT) + tag_streams = [] + print("main search: %d - %d" % (start, stop)) + tag_streams.append(self.recording_search_by_tag.search(self.tags, self.operator, start, stop, + self.NUM_RECORDINGS_TO_COLLECT)) if len(self.tags) == 1 and self.include_similar_tags: similar_tags = self.fetch_similar_tags(self.tags[0]) - similar_tag = similar_tags.random_item(0, 50, 1) - if similar_tag is not None: - similar_tag = similar_tag["similar_tag"] - msgs = [ - f"tag: using seed tag '{self.tags[0]}' and similar tag '{similar_tag}'." - ] - - sim_tag_data = self.recording_search_by_tag.search( - [similar_tag], "OR", start, stop, - self.NUM_RECORDINGS_TO_COLLECT) - - if len(sim_tag_data) > self.NUM_RECORDINGS_TO_COLLECT: - sim_tag_data = sim_tag_data.random_item( - start, stop, self.NUM_RECORDINGS_TO_COLLECT) - - return interleave((tag_data, sim_tag_data)), msgs - - msgs = [f"""tag: using seed tags: '{ "', '".join(self.tags)}' only"""] - return tag_data, msgs - def select_recordings_on_hard(self): - - msgs = [] - start, stop = self.local_storage["modes"]["hard"] - - tag_data = self.recording_search_by_tag.search( - self.tags, self.operator, start, stop, - self.NUM_RECORDINGS_TO_COLLECT) - if len(tag_data) > self.NUM_RECORDINGS_TO_COLLECT: - tag_data = tag_data.random_item(start, stop, - self.NUM_RECORDINGS_TO_COLLECT) - - sim_start, sim_stop = 10, 50 - if len(self.tags) == 1 and self.include_similar_tags: - similar_tags = self.fetch_similar_tags(self.tags[0]) if len(similar_tags[sim_start:sim_stop]) > 2: while True: - selected_tags = similar_tags.random_item(10, 50, 2) + selected_tags = similar_tags.random_item(count=2) if selected_tags[0] == selected_tags[1]: continue @@ -130,24 +78,17 @@ def select_recordings_on_hard(self): similar_tags = similar_tags[sim_start:sim_stop] similar_tags = [tag["similar_tag"] for tag in similar_tags] - if len(similar_tags) > 0: - sim_tag_data = self.recording_search_by_tag.search( - (self.tags[0], similar_tags[0]), "AND", start, stop, - self.NUM_RECORDINGS_TO_COLLECT) - if len(sim_tag_data) > self.NUM_RECORDINGS_TO_COLLECT: - sim_tag_data = sim_tag_data.random_item( - start, stop, self.NUM_RECORDINGS_TO_COLLECT) - - if len(similar_tags) > 1: - sim_tag_data_2 = self.recording_search_by_tag.search( - (self.tags[0], similar_tags[1]), "AND", start, stop, + for i in range(num_similar_tags_to_include): + sim_tag_data = self.recording_search_by_tag.search( + (self.tags[0], similar_tags[i]), "AND", start, stop, self.NUM_RECORDINGS_TO_COLLECT) + if len(sim_tag_data) > self.NUM_RECORDINGS_TO_COLLECT: + sim_tag_data = sim_tag_data.random_item( start, stop, self.NUM_RECORDINGS_TO_COLLECT) - if len(sim_tag_data_2) > self.NUM_RECORDINGS_TO_COLLECT: - sim_tag_data_2 = sim_tag_data_2.random_item( - start, stop, self.NUM_RECORDINGS_TO_COLLECT) + tag_streams.append(sim_tag_data) + if num_similar_tags_to_include > 1: msgs = [ f"""tag: using seed tag '{self.tags[0]}' and similar tags '{"', '".join(similar_tags)}'.""" ] @@ -155,13 +96,11 @@ def select_recordings_on_hard(self): msgs = [ f"""tag: using seed tag '{self.tags[0]}' and similar tag '{similar_tags[0]}'.""" ] - sim_tag_data_2 = [] + else: + msgs = [f"""tag: using only seed tag '{self.tags[0]}'."""] - return interleave((tag_data, sim_tag_data, sim_tag_data_2)), msgs - else: - msgs = [f"""tag: using only seed tag '{self.tags[0]}'."""] + return interleave(tag_streams), msgs - return tag_data, msgs def read(self, entities): @@ -171,13 +110,7 @@ def read(self, entities): self.local_storage["data_cache"]["element-descriptions"].append( f'tag{"" if len(self.tags) == 1 else "s"} {", ".join(self.tags)}') - if self.mode == "easy": - recordings, feedback = self.select_recordings_on_easy() - elif self.mode == "medium": - recordings, feedback = self.select_recordings_on_medium() - else: - recordings, feedback = self.select_recordings_on_hard() - + recordings, feedback = self.select_recordings() for msg in feedback: self.local_storage["user_feedback"].append(msg) diff --git a/troi/playlist.py b/troi/playlist.py index e89b5857..ba2a9b47 100755 --- a/troi/playlist.py +++ b/troi/playlist.py @@ -64,9 +64,9 @@ def _serialize_to_jspf(playlist, created_for=None, track_count=None): for e in playlist.recordings[:track_count]: track = {} artist_mbids = [] - if e.artist is not None: - artist_mbids = [str(mbid) for mbid in e.artist.mbids or []] - track["creator"] = e.artist.name if e.artist else "" + if e.artist_credit is not None: + artist_mbids = [str(artist.mbid) for artist in e.artist_credit.artists or []] + track["creator"] = e.artist_credit.name if e.artist_credit else "" track["title"] = e.name track["identifier"] = "https://musicbrainz.org/recording/" + str(e.mbid) diff --git a/troi/print_recording.py b/troi/print_recording.py index 2b682597..b0f2a294 100755 --- a/troi/print_recording.py +++ b/troi/print_recording.py @@ -84,7 +84,7 @@ def _print_recording(self, recording, year=False, popularity=False, listen_count if self.print_bpm or bpm: text += " %3d" % recording.acousticbrainz['bpm'] if self.print_popularity or popularity: - text += " %.3f" % recording.musicbrainz['popularity'] + text += " %.1f" % recording.musicbrainz['popularity'] if self.print_latest_listened_at: if recording.listenbrainz["latest_listened_at"] is None: text += " never " diff --git a/troi/recording_search_service.py b/troi/recording_search_service.py index 7def8d73..45ac6cad 100644 --- a/troi/recording_search_service.py +++ b/troi/recording_search_service.py @@ -6,11 +6,6 @@ from troi.service import Service from troi.plist import plist -# NOTES FOR LB API improvements: -# Tags: -# - flatten tag data for simplicity -# - use series of tighter random spots to speed up or searches for popular tags - class RecordingSearchByTagService(Service): @@ -29,26 +24,13 @@ def search(self, tags, operator, pop_begin, pop_end, num_recordings): "count": num_recordings, "pop_begin": pop_begin, "pop_end": pop_end, - "tag": tags, - "min_tag_count": 1, + "tag": tags } r = requests.get("https://beta-api.listenbrainz.org/1/lb-radio/tags", params=data) if r.status_code != 200: raise RuntimeError(f"Cannot fetch recordings for tags. {r.text}") - recordings = [] - for rec in self.flatten_tag_data(dict(r.json())): - recordings.append(Recording(mbid=rec["recording_mbid"])) - - return plist(recordings) - - def flatten_tag_data(self, tag_data): - - flat_data = list(tag_data["recording"]) - flat_data.extend(list(tag_data["release-group"])) - flat_data.extend(list(tag_data["artist"])) - - return sorted(flat_data, key=lambda f: f["percent"], reverse=True) + return plist([ Recording(mbid=rec["recording_mbid"], musicbrainz={"popularity": rec["percent"]}) for rec in r.json() ]) class RecordingSearchByArtistService(Service): From fd428598869c078e2e0bb61b169bbbdb3237ed79 Mon Sep 17 00:00:00 2001 From: Robert Kaye Date: Wed, 17 Apr 2024 16:22:39 +0200 Subject: [PATCH 20/33] Tweak --- troi/patches/lb_radio_classes/tag.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/troi/patches/lb_radio_classes/tag.py b/troi/patches/lb_radio_classes/tag.py index 3733f89e..c7242297 100755 --- a/troi/patches/lb_radio_classes/tag.py +++ b/troi/patches/lb_radio_classes/tag.py @@ -54,7 +54,7 @@ def fetch_similar_tags(self, tag): def select_recordings(self): msgs = [] - start, stop = { "easy": (66, 100), "medium": (33, 66), "hard": (1, 33) }[self.mode] + start, stop = { "easy": (66, 95), "medium": (33, 66), "hard": (1, 33) }[self.mode] sim_start, sim_stop = { "easy": (0, 0), "medium": (50, 100), "hard": (10, 50) }[self.mode] num_similar_tags_to_include = { "easy": 0, "medium": 1, "hard": 2 }[self.mode] From b030d0a0cc486e4aaefab134334493a613b80ab2 Mon Sep 17 00:00:00 2001 From: Robert Kaye Date: Wed, 17 Apr 2024 17:29:33 +0200 Subject: [PATCH 21/33] Use prod, since new tag search was merged --- troi/recording_search_service.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/troi/recording_search_service.py b/troi/recording_search_service.py index 45ac6cad..27322b0c 100644 --- a/troi/recording_search_service.py +++ b/troi/recording_search_service.py @@ -26,7 +26,7 @@ def search(self, tags, operator, pop_begin, pop_end, num_recordings): "pop_end": pop_end, "tag": tags } - r = requests.get("https://beta-api.listenbrainz.org/1/lb-radio/tags", params=data) + r = requests.get("https://api.listenbrainz.org/1/lb-radio/tags", params=data) if r.status_code != 200: raise RuntimeError(f"Cannot fetch recordings for tags. {r.text}") From 815e697a36665961ed40de8ca18f2f980988bb24 Mon Sep 17 00:00:00 2001 From: Robert Kaye Date: Mon, 22 Apr 2024 17:04:06 +0200 Subject: [PATCH 22/33] Add country element --- tests/test_parser.py | 8 +++ troi/parse_prompt.py | 5 +- troi/patches/lb_radio.py | 6 ++ troi/patches/lb_radio_classes/country.py | 84 ++++++++++++++++++++++++ 4 files changed, 101 insertions(+), 2 deletions(-) create mode 100644 troi/patches/lb_radio_classes/country.py diff --git a/tests/test_parser.py b/tests/test_parser.py index 93de9db1..450c9a2a 100644 --- a/tests/test_parser.py +++ b/tests/test_parser.py @@ -156,3 +156,11 @@ def test_recs(self): r = pp.parse("recs:(rob zombie)") assert r[0] == {"entity": "recs", "values": ["rob zombie"], "weight": 1, "opts": []} + + def test_country(self): + pp = PromptParser() + r = pp.parse("country:57baa3c6-ee43-4db3-9e6a-50bbc9792ee4") + assert r[0] == {"entity": "country", "values": [UUID("57baa3c6-ee43-4db3-9e6a-50bbc9792ee4")], "weight": 1, "opts": []} + + r = pp.parse("country:mali") + assert r[0] == {"entity": "country", "values": ["mali"], "weight": 1, "opts": []} diff --git a/troi/parse_prompt.py b/troi/parse_prompt.py index a7c54a70..ca4b6671 100755 --- a/troi/parse_prompt.py +++ b/troi/parse_prompt.py @@ -2,7 +2,7 @@ import re TIME_RANGES = ["week", "month", "quarter", "half_yearly", "year", "all_time", "this_week", "this_month", "this_year"] -ELEMENTS = ["artist", "tag", "collection", "playlist", "stats", "recs"] +ELEMENTS = ["artist", "tag", "collection", "playlist", "stats", "recs", "country"] ELEMENT_OPTIONS = { "artist": ["nosim", "easy", "medium", "hard"], @@ -10,7 +10,8 @@ "collection": ["easy", "medium", "hard"], "playlist": ["easy", "medium", "hard"], "stats": TIME_RANGES, - "recs": ["easy", "medium", "hard", "listened", "unlistened"] + "recs": ["easy", "medium", "hard", "listened", "unlistened"], + "country": ["easy", "medium", "hard"] } OPTIONS = set() diff --git a/troi/patches/lb_radio.py b/troi/patches/lb_radio.py index 94710d8b..72107875 100755 --- a/troi/patches/lb_radio.py +++ b/troi/patches/lb_radio.py @@ -20,6 +20,7 @@ from troi.patches.lb_radio_classes.tag import LBRadioTagRecordingElement from troi.patches.lb_radio_classes.stats import LBRadioStatsRecordingElement from troi.patches.lb_radio_classes.recs import LBRadioRecommendationRecordingElement +from troi.patches.lb_radio_classes.country import LBRadioCountryRecordingElement from troi import TARGET_NUMBER_OF_RECORDINGS, Playlist from troi.utils import interleave @@ -203,6 +204,11 @@ def create(self, inputs): operator=operator, include_similar_tags=include_sim) + if element["entity"] == "country": + source = LBRadioCountryRecordingElement( + area_name=element["values"][0], + mode=mode) + if element["entity"] == "collection": source = LBRadioCollectionRecordingElement( element["values"][0], mode=mode) diff --git a/troi/patches/lb_radio_classes/country.py b/troi/patches/lb_radio_classes/country.py new file mode 100644 index 00000000..d58238f4 --- /dev/null +++ b/troi/patches/lb_radio_classes/country.py @@ -0,0 +1,84 @@ +import logging +from time import sleep + +import requests + +import troi.patch +from troi import TARGET_NUMBER_OF_RECORDINGS +from troi.plist import plist +from troi import Element, ArtistCredit, Recording, PipelineError, DEVELOPMENT_SERVER_URL + +logger = logging.getLogger(__name__) + + +class LBRadioCountryRecordingElement(Element): + ''' + Given a country, return recordings for that country. + + Arguments: + area_name: the name of the area to make a playlist for + ''' + + def __init__(self, area_name, mode): + super().__init__() + self.area_name = area_name + self.mode = mode + + @staticmethod + def inputs(): + return [] + + @staticmethod + def outputs(): + return [Recording] + + def lookup_area(self, area_name): + + while True: + r = requests.get("http://musicbrainz.org/ws/2/area?query=%s&fmt=json" % area_name) + if r.status_code == 503: + sleep(1) + continue + + if r.status_code != 200: + raise PipelineError("Cannot fetch country code from MusicBrainz. HTTP code %s" % r.status_code) + + return r.json()['areas'][0]['id'] + + def recording_from_row(self, row): + if row['recording_mbid'] is None: + return None + + r = Recording(mbid=row['recording_mbid']) + if 'artist_credit_name' in row: + r.artist = ArtistCredit(name=row['artist_credit_name']) + + if 'recording_name' in row: + r.name = row['recording_name'] + + if 'year' in row: + r.year = row['year'] + + if 'listen_count' in row: + r.listenbrainz = {"listen_count": row["listen_count"]} + + return r + + def read(self, inputs): + + start, stop = {"easy": (66, 100), "medium": (33, 66), "hard": (0, 33)}[self.mode] + area_mbid = self.lookup_area(self.area_name) + args = [{"[area_mbid]": area_mbid}] + r = requests.post(DEVELOPMENT_SERVER_URL + "/popular-recordings-by-country/json", json=args) + if r.status_code != 200: + raise PipelineError("Cannot fetch first dataset recordings from ListenBrainz. HTTP code %s (%s)" % + (r.status_code, r.text)) + + self.data_cache = self.local_storage["data_cache"] + self.data_cache["element-descriptions"].append("country %s" % self.area_name) + + recordings = plist() + for row in r.json(): + recordings.append(self.recording_from_row(row)) + + return recordings.random_item(start, stop, TARGET_NUMBER_OF_RECORDINGS) From adf63fefff05b87b4db6fc6b6087e66bbdabbbf1 Mon Sep 17 00:00:00 2001 From: Robert Kaye Date: Tue, 23 Apr 2024 13:09:28 +0200 Subject: [PATCH 23/33] Country and tag improvements. --- troi/patches/lb_radio.py | 2 +- troi/patches/lb_radio_classes/country.py | 11 +++++++++-- troi/patches/lb_radio_classes/tag.py | 3 ++- 3 files changed, 12 insertions(+), 4 deletions(-) diff --git a/troi/patches/lb_radio.py b/troi/patches/lb_radio.py index 72107875..41c557d0 100755 --- a/troi/patches/lb_radio.py +++ b/troi/patches/lb_radio.py @@ -199,7 +199,7 @@ def create(self, inputs): include_sim = False if "nosim" in element["opts"] else True operator = "or" if "or" in element["opts"] else "and" source = LBRadioTagRecordingElement( - element["values"], + [ t.lower() for t in element["values"]], mode=mode, operator=operator, include_similar_tags=include_sim) diff --git a/troi/patches/lb_radio_classes/country.py b/troi/patches/lb_radio_classes/country.py index d58238f4..f71449de 100644 --- a/troi/patches/lb_radio_classes/country.py +++ b/troi/patches/lb_radio_classes/country.py @@ -35,7 +35,7 @@ def outputs(): def lookup_area(self, area_name): while True: - r = requests.get("http://musicbrainz.org/ws/2/area?query=%s&fmt=json" % area_name) + r = requests.get("https://musicbrainz.org/ws/2/area?query=%s&fmt=json" % area_name) if r.status_code == 503: sleep(1) continue @@ -43,7 +43,11 @@ def lookup_area(self, area_name): if r.status_code != 200: raise PipelineError("Cannot fetch country code from MusicBrainz. HTTP code %s" % r.status_code) - return r.json()['areas'][0]['id'] + area = r.json()['areas'][0] + if area["type"] == "Country": + return area["id"] + else: + return None def recording_from_row(self, row): if row['recording_mbid'] is None: @@ -68,6 +72,9 @@ def read(self, inputs): start, stop = {"easy": (66, 100), "medium": (33, 66), "hard": (0, 33)}[self.mode] area_mbid = self.lookup_area(self.area_name) + if area_mbid is None: + raise PipelineError("Cannot find country '%s'" % self.area_name) + args = [{"[area_mbid]": area_mbid}] r = requests.post(DEVELOPMENT_SERVER_URL + "/popular-recordings-by-country/json", json=args) if r.status_code != 200: diff --git a/troi/patches/lb_radio_classes/tag.py b/troi/patches/lb_radio_classes/tag.py index c7242297..b44a8c1b 100755 --- a/troi/patches/lb_radio_classes/tag.py +++ b/troi/patches/lb_radio_classes/tag.py @@ -59,9 +59,10 @@ def select_recordings(self): num_similar_tags_to_include = { "easy": 0, "medium": 1, "hard": 2 }[self.mode] tag_streams = [] - print("main search: %d - %d" % (start, stop)) tag_streams.append(self.recording_search_by_tag.search(self.tags, self.operator, start, stop, self.NUM_RECORDINGS_TO_COLLECT)) + if not tag_streams[0]: + return [], ["Could not find any recordings for tag search '%s', ignoring." % (",".join(self.tags)) ] if len(self.tags) == 1 and self.include_similar_tags: similar_tags = self.fetch_similar_tags(self.tags[0]) From 06ad2919444c08d8b488893d4caa061b532db558 Mon Sep 17 00:00:00 2001 From: Robert Kaye Date: Tue, 23 Apr 2024 15:56:11 +0200 Subject: [PATCH 24/33] Allow area_mbids --- tests/test_parser.py | 3 ++ troi/parse_prompt.py | 16 +++++----- troi/patches/lb_radio.py | 11 +++++-- troi/patches/lb_radio_classes/country.py | 37 ++++++++++++++++++++---- 4 files changed, 51 insertions(+), 16 deletions(-) diff --git a/tests/test_parser.py b/tests/test_parser.py index 450c9a2a..a5dac969 100644 --- a/tests/test_parser.py +++ b/tests/test_parser.py @@ -52,6 +52,9 @@ def test_tags(self): r = pp.parse("tag:(モーニング娘。)") assert r[0] == {"entity": "tag", "values": ["モーニング娘。"], "weight": 1, "opts": []} + r = pp.parse("tag:(57baa3c6-ee43-4db3-9e6a-50bbc9792ee4)") + assert r[0] == {"entity": "tag", "values": ["57baa3c6-ee43-4db3-9e6a-50bbc9792ee4"], "weight": 1, "opts": []} + def test_tag_errors(self): pp = PromptParser() self.assertRaises(ParseError, pp.parse, "t:(abstract rock blues):bork") diff --git a/troi/parse_prompt.py b/troi/parse_prompt.py index ca4b6671..438c1f2e 100755 --- a/troi/parse_prompt.py +++ b/troi/parse_prompt.py @@ -66,14 +66,16 @@ def set_block_values(self, name, values, weight, opts, text, block): """Parse, process and sanity check data for an element""" if values is None: - try: - values = [UUID(text)] - except ValueError: - if name == "tag": - values = text.split(",") - values = [v.strip() for v in values] - else: + if name in ("artist", "country", "collection", "playlist"): + try: + values = [UUID(text)] + except ValueError: values = [text] + elif name == "tag": + values = text.split(",") + values = [v.strip() for v in values] + else: + values = [text] elif weight is None: if not text: weight = 1 diff --git a/troi/patches/lb_radio.py b/troi/patches/lb_radio.py index 41c557d0..a42c7f74 100755 --- a/troi/patches/lb_radio.py +++ b/troi/patches/lb_radio.py @@ -205,9 +205,14 @@ def create(self, inputs): include_similar_tags=include_sim) if element["entity"] == "country": - source = LBRadioCountryRecordingElement( - area_name=element["values"][0], - mode=mode) + if isinstance(element["values"][0], UUID): + source = LBRadioCountryRecordingElement( + mode, + area_mbid=element["values"][0]) + else: + source = LBRadioCountryRecordingElement( + mode, + area_name=element["values"][0]) if element["entity"] == "collection": source = LBRadioCollectionRecordingElement( diff --git a/troi/patches/lb_radio_classes/country.py b/troi/patches/lb_radio_classes/country.py index f71449de..0ca8cb84 100644 --- a/troi/patches/lb_radio_classes/country.py +++ b/troi/patches/lb_radio_classes/country.py @@ -19,9 +19,10 @@ class LBRadioCountryRecordingElement(Element): area_name: the name of the area to make a playlist for ''' - def __init__(self, area_name, mode): + def __init__(self, mode, area_name=None, area_mbid=None): super().__init__() self.area_name = area_name + self.area_mbid = str(area_mbid) self.mode = mode @staticmethod @@ -32,7 +33,7 @@ def inputs(): def outputs(): return [Recording] - def lookup_area(self, area_name): + def lookup_area_by_name(self, area_name): while True: r = requests.get("https://musicbrainz.org/ws/2/area?query=%s&fmt=json" % area_name) @@ -49,6 +50,19 @@ def lookup_area(self, area_name): else: return None + def lookup_area_by_mbid(self, area_mbid): + + while True: + r = requests.get("https://musicbrainz.org/ws/2/area/%s?fmt=json" % area_mbid) + if r.status_code == 503: + sleep(1) + continue + + if r.status_code != 200: + raise PipelineError("Cannot fetch country code from MusicBrainz. Error: %s" % r.text) + + return r.json()["name"] + def recording_from_row(self, row): if row['recording_mbid'] is None: return None @@ -71,11 +85,22 @@ def recording_from_row(self, row): def read(self, inputs): start, stop = {"easy": (66, 100), "medium": (33, 66), "hard": (0, 33)}[self.mode] - area_mbid = self.lookup_area(self.area_name) - if area_mbid is None: - raise PipelineError("Cannot find country '%s'" % self.area_name) - args = [{"[area_mbid]": area_mbid}] + if self.area_name is None and self.area_mbid is None: + raise PipelineError("An area name or area mbid must be specified.") + + if self.area_name: + self.area_mbid = self.lookup_area_by_name(self.area_name) + if self.area_mbid is None: + raise PipelineError("Cannot find country '%s'" % self.area_name) + else: + self.area_name = self.lookup_area_by_mbid(self.area_mbid) + if self.area_name is None: + raise PipelineError("Cannot lookup country from mbid '%s'" % self.area_mbid) + + print(self.area_name, self.area_mbid) + + args = [{"[area_mbid]": self.area_mbid}] r = requests.post(DEVELOPMENT_SERVER_URL + "/popular-recordings-by-country/json", json=args) if r.status_code != 200: raise PipelineError("Cannot fetch first dataset recordings from ListenBrainz. HTTP code %s (%s)" % From 8b5f56a6e82443ac3f0cda37e0715d3e30dffbc8 Mon Sep 17 00:00:00 2001 From: Robert Kaye Date: Tue, 23 Apr 2024 16:53:06 +0200 Subject: [PATCH 25/33] More minor fixes --- troi/patches/lb_radio_classes/country.py | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/troi/patches/lb_radio_classes/country.py b/troi/patches/lb_radio_classes/country.py index 0ca8cb84..04e8ff91 100644 --- a/troi/patches/lb_radio_classes/country.py +++ b/troi/patches/lb_radio_classes/country.py @@ -44,7 +44,11 @@ def lookup_area_by_name(self, area_name): if r.status_code != 200: raise PipelineError("Cannot fetch country code from MusicBrainz. HTTP code %s" % r.status_code) - area = r.json()['areas'][0] + try: + area = r.json()['areas'][0] + except IndexError: + return None + if area["type"] == "Country": return area["id"] else: @@ -61,7 +65,12 @@ def lookup_area_by_mbid(self, area_mbid): if r.status_code != 200: raise PipelineError("Cannot fetch country code from MusicBrainz. Error: %s" % r.text) - return r.json()["name"] + area = r.json() + if area["type"] != "Country": + raise PipelineError("The specified area_mbid (%s) refers to a %s, but only countries are supported." % + (area_mbid, area["type"])) + + return area["name"] def recording_from_row(self, row): if row['recording_mbid'] is None: From 23a128c9449f707fc7b4d5c1538a91efb50907a6 Mon Sep 17 00:00:00 2001 From: Robert Kaye Date: Thu, 25 Apr 2024 11:57:17 +0200 Subject: [PATCH 26/33] Shuffle tag radio results --- troi/patches/lb_radio_classes/tag.py | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/troi/patches/lb_radio_classes/tag.py b/troi/patches/lb_radio_classes/tag.py index b44a8c1b..36e0c77b 100755 --- a/troi/patches/lb_radio_classes/tag.py +++ b/troi/patches/lb_radio_classes/tag.py @@ -1,12 +1,11 @@ import troi -from random import randint +from random import randint, shuffle import requests from troi import Recording from troi.plist import plist from troi import TARGET_NUMBER_OF_RECORDINGS -from troi.utils import interleave class LBRadioTagRecordingElement(troi.Element): @@ -58,10 +57,9 @@ def select_recordings(self): sim_start, sim_stop = { "easy": (0, 0), "medium": (50, 100), "hard": (10, 50) }[self.mode] num_similar_tags_to_include = { "easy": 0, "medium": 1, "hard": 2 }[self.mode] - tag_streams = [] - tag_streams.append(self.recording_search_by_tag.search(self.tags, self.operator, start, stop, - self.NUM_RECORDINGS_TO_COLLECT)) - if not tag_streams[0]: + recordings = self.recording_search_by_tag.search(self.tags, self.operator, start, stop, + self.NUM_RECORDINGS_TO_COLLECT) + if not recordings: return [], ["Could not find any recordings for tag search '%s', ignoring." % (",".join(self.tags)) ] if len(self.tags) == 1 and self.include_similar_tags: @@ -87,7 +85,7 @@ def select_recordings(self): if len(sim_tag_data) > self.NUM_RECORDINGS_TO_COLLECT: sim_tag_data = sim_tag_data.random_item( start, stop, self.NUM_RECORDINGS_TO_COLLECT) - tag_streams.append(sim_tag_data) + recordings.extend(sim_tag_data) if num_similar_tags_to_include > 1: msgs = [ @@ -100,7 +98,10 @@ def select_recordings(self): else: msgs = [f"""tag: using only seed tag '{self.tags[0]}'."""] - return interleave(tag_streams), msgs + + recordings = list(recordings) + shuffle(recordings) + return recordings, msgs def read(self, entities): From d5682b5e5fd72388482b298c4534ba6adce9b4d1 Mon Sep 17 00:00:00 2001 From: Robert Kaye Date: Thu, 25 Apr 2024 12:47:03 +0200 Subject: [PATCH 27/33] Make syntax more consistent --- tests/test_parser.py | 24 +++++++++++++----------- troi/parse_prompt.py | 4 ++++ troi/patches/lb_radio_classes/country.py | 2 -- 3 files changed, 17 insertions(+), 13 deletions(-) diff --git a/tests/test_parser.py b/tests/test_parser.py index a5dac969..9d3b29af 100644 --- a/tests/test_parser.py +++ b/tests/test_parser.py @@ -8,19 +8,21 @@ class TestParser(unittest.TestCase): def test_basic_entities(self): pp = PromptParser() - r = pp.parse("artist:57baa3c6-ee43-4db3-9e6a-50bbc9792ee4") + r = pp.parse("artist:(57baa3c6-ee43-4db3-9e6a-50bbc9792ee4)") assert r[0] == {"entity": "artist", "values": [UUID("57baa3c6-ee43-4db3-9e6a-50bbc9792ee4")], "weight": 1, "opts": []} - r = pp.parse("artist:57baa3c6-ee43-4db3-9e6a-50bbc9792ee4") + r = pp.parse("artist:(57baa3c6-ee43-4db3-9e6a-50bbc9792ee4)") assert r[0] == {"entity": "artist", "values": [UUID("57baa3c6-ee43-4db3-9e6a-50bbc9792ee4")], "weight": 1, "opts": []} self.assertRaises(ParseError, pp.parse, "wrong:57baa3c6-ee43-4db3-9e6a-50bbc9792ee4") + self.assertRaises(ParseError, pp.parse, "artist:57baa3c6-ee43-4db3-9e6a-50bbc9792ee4") r = pp.parse("artist:(the knife)") assert r[0] == {"entity": "artist", "values": ["the knife"], "weight": 1, "opts": []} self.assertRaises(ParseError, pp.parse, "artist:u2:nosim") self.assertRaises(ParseError, pp.parse, "artists:u2:nosim") + self.assertRaises(ParseError, pp.parse, "country:andorra") def test_tags(self): pp = PromptParser() @@ -76,14 +78,14 @@ def test_shortcuts(self): def test_compound(self): pp = PromptParser() - r = pp.parse('artist:05319f96-e409-4199-b94f-3cabe7cc188a:2 tag:(downtempo):1 tag:(trip hop, abstract):2') + r = pp.parse('artist:(05319f96-e409-4199-b94f-3cabe7cc188a):2 tag:(downtempo):1 tag:(trip hop, abstract):2') assert r[0] == {"entity": "artist", "values": [UUID("05319f96-e409-4199-b94f-3cabe7cc188a")], "weight": 2, "opts": []} assert r[1] == {"entity": "tag", "values": ["downtempo"], "weight": 1, "opts": []} assert r[2] == {"entity": "tag", "values": ["trip hop", "abstract"], "weight": 2, "opts": []} def test_weights(self): pp = PromptParser() - r = pp.parse("artist:57baa3c6-ee43-4db3-9e6a-50bbc9792ee4:1 artist:f54ba4c6-12dd-4358-9136-c64ad89420c5:2") + r = pp.parse("artist:(57baa3c6-ee43-4db3-9e6a-50bbc9792ee4):1 artist:(f54ba4c6-12dd-4358-9136-c64ad89420c5):2") assert r[0] == {"entity": "artist", "values": [UUID("57baa3c6-ee43-4db3-9e6a-50bbc9792ee4")], "weight": 1, "opts": []} assert r[1] == {"entity": "artist", "values": [UUID("f54ba4c6-12dd-4358-9136-c64ad89420c5")], "weight": 2, "opts": []} @@ -91,10 +93,10 @@ def test_weights(self): "artist:57baa3c6-ee43-4db3-9e6a-50bbc9792ee4:1 artist:f54ba4c6-12dd-4358-9136-c64ad89420c5:fussy") self.assertRaises(ParseError, pp.parse, "artist:57baa3c6-ee43-4db3-9e6a-50bbc9792ee4:1 artist:f54ba4c6-12dd-4358-9136-c64ad89420c5:.5") - r = pp.parse("artist:portishead::easy") + r = pp.parse("artist:(portishead)::easy") assert r[0] == {"entity": "artist", "values": ["portishead"], "weight": 1, "opts": ["easy"]} - r = pp.parse("artist:57baa3c6-ee43-4db3-9e6a-50bbc9792ee4::easy") + r = pp.parse("artist:(57baa3c6-ee43-4db3-9e6a-50bbc9792ee4)::easy") assert r[0] == {"entity": "artist", "values": [UUID("57baa3c6-ee43-4db3-9e6a-50bbc9792ee4")], "weight": 1, "opts": ["easy"]} def test_opts(self): @@ -115,13 +117,13 @@ def test_parens(self): def test_collection_playlist(self): pp = PromptParser() - r = pp.parse("collection:57baa3c6-ee43-4db3-9e6a-50bbc9792ee4") + r = pp.parse("collection:(57baa3c6-ee43-4db3-9e6a-50bbc9792ee4)") assert r[0] == {"entity": "collection", "values": [UUID("57baa3c6-ee43-4db3-9e6a-50bbc9792ee4")], "weight": 1, "opts": []} - r = pp.parse("playlist:57baa3c6-ee43-4db3-9e6a-50bbc9792ee4") + r = pp.parse("playlist:(57baa3c6-ee43-4db3-9e6a-50bbc9792ee4)") assert r[0] == {"entity": "playlist", "values": [UUID("57baa3c6-ee43-4db3-9e6a-50bbc9792ee4")], "weight": 1, "opts": []} - r = pp.parse("playlist:57baa3c6-ee43-4db3-9e6a-50bbc9792ee4") + r = pp.parse("playlist:(57baa3c6-ee43-4db3-9e6a-50bbc9792ee4)") assert r[0] == {"entity": "playlist", "values": [UUID("57baa3c6-ee43-4db3-9e6a-50bbc9792ee4")], "weight": 1, "opts": []} def test_stats(self): @@ -162,8 +164,8 @@ def test_recs(self): def test_country(self): pp = PromptParser() - r = pp.parse("country:57baa3c6-ee43-4db3-9e6a-50bbc9792ee4") + r = pp.parse("country:(57baa3c6-ee43-4db3-9e6a-50bbc9792ee4)") assert r[0] == {"entity": "country", "values": [UUID("57baa3c6-ee43-4db3-9e6a-50bbc9792ee4")], "weight": 1, "opts": []} - r = pp.parse("country:mali") + r = pp.parse("country:(mali)") assert r[0] == {"entity": "country", "values": ["mali"], "weight": 1, "opts": []} diff --git a/troi/parse_prompt.py b/troi/parse_prompt.py index 438c1f2e..ba8709ec 100755 --- a/troi/parse_prompt.py +++ b/troi/parse_prompt.py @@ -145,6 +145,10 @@ def parse(self, prompt): text = "" continue + # Check to make sure that some values are in () + if name in ("artist", "country", "collection", "playlist") and i == 0 and not block[i] == "(": + raise ParseError("Element value must be enclosed in (). Try: %s:(name)" % (name)) + if block[i] == ' ' and parens == 0: break diff --git a/troi/patches/lb_radio_classes/country.py b/troi/patches/lb_radio_classes/country.py index 04e8ff91..4c3395f1 100644 --- a/troi/patches/lb_radio_classes/country.py +++ b/troi/patches/lb_radio_classes/country.py @@ -107,8 +107,6 @@ def read(self, inputs): if self.area_name is None: raise PipelineError("Cannot lookup country from mbid '%s'" % self.area_mbid) - print(self.area_name, self.area_mbid) - args = [{"[area_mbid]": self.area_mbid}] r = requests.post(DEVELOPMENT_SERVER_URL + "/popular-recordings-by-country/json", json=args) if r.status_code != 200: From 6da8e98a7c51632483c99c70f110bc1736a46400 Mon Sep 17 00:00:00 2001 From: Robert Kaye Date: Thu, 25 Apr 2024 12:57:34 +0200 Subject: [PATCH 28/33] More debugging, updating docs and testing all the examples --- docs/lb_radio.rst | 6 +++--- troi/print_recording.py | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/docs/lb_radio.rst b/docs/lb_radio.rst index 0bdee7fa..2d2b3575 100644 --- a/docs/lb_radio.rst +++ b/docs/lb_radio.rst @@ -165,13 +165,13 @@ If LB-radio does not find your artist, you can specify an artist using an Artist :: - artist:8f6bd1e4-fbe1-4f50-aa9b-94c450ec0f11 + artist:(8f6bd1e4-fbe1-4f50-aa9b-94c450ec0f11) LB-radio also supports MusicBrainz collections as sources: :: - collection:8be1a919-a386-45f3-8cc2-0d9249b02aa4 + collection:(8be1a919-a386-45f3-8cc2-0d9249b02aa4) Will select random recordings from a MusicBrainz recording collection -- the modes wont have any affect on collections, since collections have no inherent ranking that could be used to select recordings according to mode. :( @@ -179,7 +179,7 @@ collections have no inherent ranking that could be used to select recordings acc :: - playlist:8be1a919-a386-45f3-8cc2-0d9249b02aa4 + playlist:(8be1a919-a386-45f3-8cc2-0d9249b02aa4) Will select random recordings from a ListenBrainz playlist -- the modes wont have any affect on collections, since plylists have no inherent ranking that could be used to select recordings according to mode. :( diff --git a/troi/print_recording.py b/troi/print_recording.py index b0f2a294..b7da1234 100755 --- a/troi/print_recording.py +++ b/troi/print_recording.py @@ -84,7 +84,7 @@ def _print_recording(self, recording, year=False, popularity=False, listen_count if self.print_bpm or bpm: text += " %3d" % recording.acousticbrainz['bpm'] if self.print_popularity or popularity: - text += " %.1f" % recording.musicbrainz['popularity'] + text += " %.1f" % recording.musicbrainz.get('popularity', 0.0) if self.print_latest_listened_at: if recording.listenbrainz["latest_listened_at"] is None: text += " never " From 2477f5d51820a32a3c854e589688a3aaac37e735 Mon Sep 17 00:00:00 2001 From: Robert Kaye Date: Thu, 25 Apr 2024 14:51:21 +0200 Subject: [PATCH 29/33] Improve error handling --- troi/cli.py | 6 +++++- troi/patch.py | 9 +++------ 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/troi/cli.py b/troi/cli.py index cbe99d7f..46bc63a8 100755 --- a/troi/cli.py +++ b/troi/cli.py @@ -108,7 +108,11 @@ def playlist(patch, quiet, save, token, upload, args, created_for, name, desc, m "This is a local patch and should be invoked via the specific troi function, rather than the playlist function.") return None - ret = patch.generate_playlist() + try: + ret = patch.generate_playlist() + except RuntimeError as err: + logger.error(err) + ret = 0 user_feedback = patch.user_feedback() if len(user_feedback) > 0: diff --git a/troi/patch.py b/troi/patch.py index 117c59bf..e5824c72 100755 --- a/troi/patch.py +++ b/troi/patch.py @@ -166,21 +166,18 @@ def generate_playlist(self): logger.info("done.") except troi.PipelineError as err: - logging.error("Failed to generate playlist: %s" % err) - return None + raise RuntimeError("Playlist generation failed: %s" % err) upload = self.patch_args["upload"] token = self.patch_args["token"] spotify = self.patch_args["spotify"] if upload and not token and not spotify: - logger.info("In order to upload a playlist, you must provide an auth token. Use option --token.") - return None + raise RuntimeError("In order to upload a playlist, you must provide an auth token. Use option --token.") min_recordings = self.patch_args["min_recordings"] if min_recordings is not None and \ (len(playlist.playlists) == 0 or len(playlist.playlists[0].recordings) < min_recordings): - logger.info("Playlist does not have at least %d recordings, stopping." % min_recordings) - return None + raise RuntimeError("Playlist does not have at least %d recordings" % min_recordings) save = self.patch_args["save"] if result is not None and spotify and upload: From 3b22cc7e42084f093e715de826d22847b75f0cf0 Mon Sep 17 00:00:00 2001 From: Robert Kaye Date: Thu, 25 Apr 2024 14:59:09 +0200 Subject: [PATCH 30/33] Minor error message improvement --- troi/parse_prompt.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/troi/parse_prompt.py b/troi/parse_prompt.py index ba8709ec..97d843eb 100755 --- a/troi/parse_prompt.py +++ b/troi/parse_prompt.py @@ -147,7 +147,7 @@ def parse(self, prompt): # Check to make sure that some values are in () if name in ("artist", "country", "collection", "playlist") and i == 0 and not block[i] == "(": - raise ParseError("Element value must be enclosed in (). Try: %s:(name)" % (name)) + raise ParseError("Element value must be enclosed in ( ). Try: %s:(name)" % (name)) if block[i] == ' ' and parens == 0: break From 99d5096dc695ae8251fb206f513ef76ddc8deafd Mon Sep 17 00:00:00 2001 From: Robert Kaye Date: Thu, 25 Apr 2024 15:15:12 +0200 Subject: [PATCH 31/33] Add missing requirements file --- docs/requirements.txt | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 docs/requirements.txt diff --git a/docs/requirements.txt b/docs/requirements.txt new file mode 100644 index 00000000..ab50cba5 --- /dev/null +++ b/docs/requirements.txt @@ -0,0 +1,5 @@ +Sphinx==7.2.6 +sphinxcontrib-httpdomain==1.8.1 +sphinx_rtd_theme==2.0.0 +docutils==0.20.1 +sphinx-click==5.1.0 From 921cc71394cb3417853c21cd48b895afb66e1f48 Mon Sep 17 00:00:00 2001 From: Robert Kaye Date: Thu, 25 Apr 2024 15:36:54 +0200 Subject: [PATCH 32/33] Add docs for country element --- docs/lb_radio.rst | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/docs/lb_radio.rst b/docs/lb_radio.rst index 2d2b3575..55b080d5 100644 --- a/docs/lb_radio.rst +++ b/docs/lb_radio.rst @@ -35,6 +35,7 @@ The LB Radio supports the following entities: #. **playlist**: Use a ListenBrainz playlist as a source of recordings. (mode also does not apply to playlists) #. **stats**: Use a ListenBrainz user's statistics as a source of recordings. #. **recs**: Use a ListenBrainz user's recommended recordings as a source of recordings. +#. **country**: Select recordings from artists who are from the given country. Options ------- @@ -198,6 +199,14 @@ Will select random recordings from the ListenBrainz user lucifer recordings stat Will select random recordings from the ListenBrainz user mr_monkey's recommended recordings that mr_monkey hasn't listened to. +:: + + country:(Mali) + +Will select random recordings from artists who are from the given country. While this features generally represents music from +that selected country, some artists leave their home country and don't perform music representative of their country, so +this element may not always be 100% on point. But it can still create some very interesting playlists! + More complex examples --------------------- From 5b2737710e95758a4ddffefa09f4142c4de39b6a Mon Sep 17 00:00:00 2001 From: Robert Kaye Date: Thu, 25 Apr 2024 17:16:57 +0200 Subject: [PATCH 33/33] PR feedback fixes --- troi/patches/periodic_jams.py | 3 ++- troi/utils.py | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/troi/patches/periodic_jams.py b/troi/patches/periodic_jams.py index eefb962c..0ccce48b 100755 --- a/troi/patches/periodic_jams.py +++ b/troi/patches/periodic_jams.py @@ -1,5 +1,6 @@ from datetime import datetime, timedelta +from troi.patch import Patch import troi.filters import troi.listenbrainz.feedback import troi.listenbrainz.listens @@ -34,7 +35,7 @@ """ -class PeriodicJamsPatch(troi.patch.Patch): +class PeriodicJamsPatch(Patch): """ Create either daily-jams, weekly-jams or weekly-exploration with this patch. diff --git a/troi/utils.py b/troi/utils.py index 9e82baba..861d2720 100755 --- a/troi/utils.py +++ b/troi/utils.py @@ -52,7 +52,8 @@ def discover_patches_from_dir(module_path, patch_dir, add_dot=False): for member in inspect.getmembers(patch): if inspect.isclass(member[1]): if issubclass(member[1], troi.patch.Patch): - patch_dict[member[1].slug()] = member[1] + if member[1].slug() is not None: + patch_dict[member[1].slug()] = member[1] if add_dot: sys.path.pop(-1)