Also use native titles for matching (#204)

RickDB · Jun 18, 2023 · f195231 · f195231
1 parent b7dc801
commit f195231
Show file tree

Hide file tree

Showing 4 changed files with 58 additions and 123 deletions.
diff --git a/.github/workflows/build-docker-image.yml b/.github/workflows/build-docker-image.yml
@@ -24,7 +24,8 @@ jobs:
       packages: write
 
     steps:
-        # Set IMAGE_NAMES to ghcr.io, also set to docker.io if building a tag or master
+        # Set IMAGE_NAMES to ghcr.io, additionally add docker.io if building a tag or master
+        # Limit platforms to amd64 if building for pull requests
       - name: Set variables
         run: |
           EOF=$(dd if=/dev/urandom bs=15 count=1 status=none | base64)
@@ -38,6 +39,12 @@ jobs:
           if [[ -n "$DOCKER_IO" ]]; then
             echo "DOCKER_IO=true" >> $GITHUB_ENV
           fi
+
+          if [[ ${{ inputs.eventname }} == 'pull_request' ]]; then
+            echo "PLATFORMS=linux/amd64" >> $GITHUB_ENV
+          else
+            echo "PLATFORMS=${{ inputs.platforms }}" >> $GITHUB_ENV
+          fi
       - name: Checkout repository
         uses: actions/checkout@v3
       - name: Set up QEMU
@@ -72,7 +79,7 @@ jobs:
         with:
           context: .
           file: ${{ inputs.dockerfile }}
-          platforms: ${{ inputs.platforms }}
+          platforms: ${{ env.PLATFORMS }}
           tags: ${{ steps.meta.outputs.tags }}
           labels: ${{ steps.meta.outputs.labels }}
           push: ${{ inputs.eventname != 'pull_request' }}

diff --git a/plexanisync/anilist.py b/plexanisync/anilist.py
@@ -3,7 +3,7 @@
 from dataclasses import dataclass
 from typing import Dict, List, Optional
 import logging
-import re
+import regex as re
 from statistics import mean
 import inflect
 
@@ -328,31 +328,11 @@ def __find_mapped_series(self, anilist_series: List[AnilistSeries], anime_id: in
     def __match_series_against_potential_titles(
         self, series: AnilistSeries, potential_titles: List[str], matched_anilist_series: List[AnilistSeries]
     ):
-        if series.title_english:
-            if series.title_english.lower() in potential_titles:
-                matched_anilist_series.append(series)
-            else:
-                series_title_english_clean = self.__clean_title(series.title_english)
-                if series_title_english_clean in potential_titles:
-                    matched_anilist_series.append(series)
-        if series.title_romaji:
-            if series.title_romaji.lower() in potential_titles:
+        for title in series.titles():
+            if (title.lower() in potential_titles
+                    or self.__clean_title(title) in potential_titles):
                 if series not in matched_anilist_series:
                     matched_anilist_series.append(series)
-            else:
-                series_title_romaji_clean = self.__clean_title(series.title_romaji)
-                if series_title_romaji_clean in potential_titles:
-                    if series not in matched_anilist_series:
-                        matched_anilist_series.append(series)
-        if series.synonyms:
-            for synonym in series.synonyms:
-                if synonym.lower() in potential_titles:
-                    if series not in matched_anilist_series:
-                        matched_anilist_series.append(series)
-                else:
-                    synonym_clean = self.__clean_title(synonym)
-                    if synonym_clean in potential_titles:
-                        matched_anilist_series.append(series)
 
     def __find_id_season_best_match(self, title: str, season: int, year: int) -> Optional[int]:
         media_id = None
@@ -397,53 +377,32 @@ def __find_id_season_best_match(self, title: str, season: int, year: int) -> Opt
         matches = self.graphql.search_by_name(title)
         if matches:
             for match in matches:
-                title_english = ""
-                title_english_for_matching = ""
-                title_romaji = ""
-                title_romaji_for_matching = ""
-                started_year = ""
-
-                if match.title_english:
-                    title_english = match.title_english
-                    title_english_for_matching = self.__clean_title(title_english)
-                if match.title_romaji:
-                    title_romaji = match.title_romaji
-                    title_romaji_for_matching = self.__clean_title(title_romaji)
-                if match.started_year:
-                    started_year = match.started_year
-                else:
+                started_year = match.started_year
+                if not started_year:
                     logger.warning(
                         "Anilist series did not have year attribute so skipping this result and moving to next: "
-                        f"{title_english} | {title_romaji}"
+                        f"{match.title_english} | {match.title_romaji}"
                     )
                     continue
 
+                # key = cleaned title, value = original title
+                titles_for_matching = {self.__clean_title(t): t for t in match.titles()}
                 for potential_title in potential_titles:
                     potential_title = self.__clean_title(potential_title)
                     # logger.info('Comparing AniList: %s | %s[%s] <===> %s' %
-                    #  (title_english_for_matching, title_romaji_for_matching, started_year, potential_title))
-                    if title_english_for_matching == potential_title:
+                    #  (titles_for_matching, started_year, potential_title))
+                    if potential_title in titles_for_matching:
+                        # Use original title for logging
+                        original_title = titles_for_matching[potential_title]
                         if started_year < match_year:
                             logger.warning(
-                                f"Found match: {title_english} [{media_id}] | "
+                                f"Found match: {original_title} [{media_id}] | "
                                 f"skipping as it was released before first season ({started_year} <==> {match_year})"
                             )
                         else:
                             media_id = match.anilist_id
                             logger.info(
-                                f"Found match: {title_english} [{media_id}]"
-                            )
-                            break
-                    if title_romaji_for_matching == potential_title:
-                        if started_year < match_year:
-                            logger.warning(
-                                f"Found match: {title_romaji} [{media_id}] | "
-                                f"skipping as it was released before first season ({started_year} <==> {match_year})"
-                            )
-                        else:
-                            media_id = match.anilist_id
-                            logger.info(
-                                f"Found match: {title_romaji} [{media_id}]"
+                                f"Found match: {original_title} [{media_id}]"
                             )
                             break
         if media_id == 0:
@@ -458,69 +417,25 @@ def __find_id_best_match(self, title: str, year: int) -> Optional[int]:
         matches = self.graphql.search_by_name(title)
         if matches:
             for match in matches:
-                title_english = ""
-                title_english_for_matching = ""
-                title_romaji = ""
-                title_romaji_for_matching = ""
-                synonyms = ""
-                synonyms_for_matching = ""
-                started_year = None
-
-                if match.title_english:
-                    title_english = match.title_english
-                    title_english_for_matching = self.__clean_title(title_english)
-                if match.title_romaji:
-                    title_romaji = match.title_romaji
-                    title_romaji_for_matching = self.__clean_title(title_romaji)
-                if match.started_year:
-                    started_year = match.started_year
+                started_year = match.started_year
+
+                # key = cleaned title, value = original title
+                titles_for_matching = {self.__clean_title(t): t for t in match.titles()}
 
                 # logger.info('Comparing AniList: %s | %s[%s] <===> %s[%s]' % (title_english, title_romaji, started_year, match_title, match_year))
-                if (
-                    match_title == title_english_for_matching
-                    and year == started_year
-                ):
-                    media_id = match.anilist_id
-                    logger.warning(
-                        f"Found match: {title_english} [{media_id}]"
-                    )
-                    break
-                if (
-                    match_title == title_romaji_for_matching
-                    and year == started_year
-                ):
-                    media_id = match.anilist_id
-                    logger.warning(
-                        f"Found match: {title_romaji} [{media_id}]"
-                    )
-                    break
-                if match.synonyms:
-                    for synonym in match.synonyms:
-                        synonyms = synonym
-                        synonyms_for_matching = self.__clean_title(synonyms)
-                        if (
-                            match_title == synonyms_for_matching
-                            and year == started_year
-                        ):
-                            media_id = match.anilist_id
-                            logger.warning(
-                                f"Found match in synonyms: {synonyms} [{media_id}]"
-                            )
-                            break
-                if (
-                    match_title == title_romaji_for_matching
-                    and year != started_year
-                ):
-                    logger.info(
-                        f"Found match however started year is a mismatch: {title_romaji} [AL: {started_year} <==> Plex: {year}] "
-                    )
-                elif (
-                    match_title == title_english_for_matching
-                    and year != started_year
-                ):
-                    logger.info(
-                        f"Found match however started year is a mismatch: {title_english} [AL: {started_year} <==> Plex: {year}] "
-                    )
+                if match_title in titles_for_matching:
+                    # Use original title for logging
+                    original_title = titles_for_matching[match_title]
+                    if year == started_year:
+                        media_id = match.anilist_id
+                        logger.warning(
+                            f"Found match: {original_title} [{media_id}]"
+                        )
+                        break
+                    else:
+                        logger.info(
+                            f"Found match however started year is a mismatch: {original_title} [AL: {started_year} <==> Plex: {year}] "
+                        )
         if media_id is None:
             logger.error(f"No match found for title: {title}")
         return media_id
@@ -760,4 +675,4 @@ def __map_watchcount_to_seasons(
         return episodes_in_anilist_entry
 
     def __clean_title(self, title: str) -> str:
-        return re.sub("[^A-Za-z0-9]+", "", title.lower().strip())
+        return re.sub(r'[^A-Za-z0-9\p{IsHan}\p{IsBopo}\p{IsHira}\p{IsKatakana}]+', "", title.lower().strip())
diff --git a/plexanisync/graphql.py b/plexanisync/graphql.py
@@ -28,11 +28,19 @@ class AnilistSeries:
     episodes: int
     title_english: str
     title_romaji: str
+    title_native: str
     synonyms: List[str]
     started_year: int
     ended_year: int
     score: int
 
+    def titles(self) -> List[str]:
+        titles = [self.title_english, self.title_romaji, self.title_native]
+        if self.synonyms:
+            titles += self.synonyms
+        # filter out empty values
+        return [title for title in titles if title]
+
 
 class GraphQL:
     def __init__(self, anilist_settings: SectionProxy):
@@ -65,7 +73,7 @@ def search_by_id(self, anilist_id: int):
             'episodes',
             'synonyms'
         )
-        media.title.__fields__('romaji', 'english')
+        media.title.__fields__('romaji', 'english', 'native')
         media.start_date.year()
         media.end_date.year()
 
@@ -87,7 +95,7 @@ def search_by_name(self, anilist_show_name: str) -> List[AnilistSeries]:
             'episodes',
             'synonyms'
         )
-        media.title.__fields__('romaji', 'english')
+        media.title.__fields__('romaji', 'english', 'native')
         media.start_date.year()
         media.end_date.year()
 
@@ -115,7 +123,7 @@ def fetch_user_list(self) -> List[AnilistSeries]:
         )
         lists.entries.media.start_date.year()
         lists.entries.media.end_date.year()
-        lists.entries.media.title.__fields__('romaji', 'english')
+        lists.entries.media.title.__fields__('romaji', 'english', 'native')
 
         data = self.__send_graphql_request(operation)
         list_items = (operation + data).media_list_collection
@@ -193,6 +201,7 @@ def __mediaitem_to_object(self, media_item) -> AnilistSeries:
         episodes = 0
         title_english = ""
         title_romaji = ""
+        title_native = ""
         synonyms = []
         started_year = 0
         ended_year = 0
@@ -213,6 +222,8 @@ def __mediaitem_to_object(self, media_item) -> AnilistSeries:
             title_english = media_item.title.english
         if hasattr(media_item.title, "romaji"):
             title_romaji = media_item.title.romaji
+        if hasattr(media_item.title, "native"):
+            title_native = media_item.title.native
         if hasattr(media_item, "synonyms"):
             synonyms = media_item.synonyms
         if hasattr(media_item.start_date, "year"):
@@ -232,6 +243,7 @@ def __mediaitem_to_object(self, media_item) -> AnilistSeries:
             episodes,
             title_english,
             title_romaji,
+            title_native,
             synonyms,
             started_year,
             ended_year,

diff --git a/requirements.txt b/requirements.txt
@@ -10,6 +10,7 @@ jsonschema==4.17.3
 PlexAPI==4.13.4
 pyreadline3==3.4.1
 pyrsistent==0.19.3
+regex==2023.6.3 
 requests==2.31.0
 ruyaml==0.91.0
 sgqlc==16.1