Skip to content

Commit

Permalink
Also use native titles for matching (#204)
Browse files Browse the repository at this point in the history
  • Loading branch information
reconman authored Jun 18, 2023
1 parent b7dc801 commit f195231
Show file tree
Hide file tree
Showing 4 changed files with 58 additions and 123 deletions.
11 changes: 9 additions & 2 deletions .github/workflows/build-docker-image.yml
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,8 @@ jobs:
packages: write

steps:
# Set IMAGE_NAMES to ghcr.io, also set to docker.io if building a tag or master
# Set IMAGE_NAMES to ghcr.io, additionally add docker.io if building a tag or master
# Limit platforms to amd64 if building for pull requests
- name: Set variables
run: |
EOF=$(dd if=/dev/urandom bs=15 count=1 status=none | base64)
Expand All @@ -38,6 +39,12 @@ jobs:
if [[ -n "$DOCKER_IO" ]]; then
echo "DOCKER_IO=true" >> $GITHUB_ENV
fi
if [[ ${{ inputs.eventname }} == 'pull_request' ]]; then
echo "PLATFORMS=linux/amd64" >> $GITHUB_ENV
else
echo "PLATFORMS=${{ inputs.platforms }}" >> $GITHUB_ENV
fi
- name: Checkout repository
uses: actions/checkout@v3
- name: Set up QEMU
Expand Down Expand Up @@ -72,7 +79,7 @@ jobs:
with:
context: .
file: ${{ inputs.dockerfile }}
platforms: ${{ inputs.platforms }}
platforms: ${{ env.PLATFORMS }}
tags: ${{ steps.meta.outputs.tags }}
labels: ${{ steps.meta.outputs.labels }}
push: ${{ inputs.eventname != 'pull_request' }}
Expand Down
151 changes: 33 additions & 118 deletions plexanisync/anilist.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from dataclasses import dataclass
from typing import Dict, List, Optional
import logging
import re
import regex as re
from statistics import mean
import inflect

Expand Down Expand Up @@ -328,31 +328,11 @@ def __find_mapped_series(self, anilist_series: List[AnilistSeries], anime_id: in
def __match_series_against_potential_titles(
self, series: AnilistSeries, potential_titles: List[str], matched_anilist_series: List[AnilistSeries]
):
if series.title_english:
if series.title_english.lower() in potential_titles:
matched_anilist_series.append(series)
else:
series_title_english_clean = self.__clean_title(series.title_english)
if series_title_english_clean in potential_titles:
matched_anilist_series.append(series)
if series.title_romaji:
if series.title_romaji.lower() in potential_titles:
for title in series.titles():
if (title.lower() in potential_titles
or self.__clean_title(title) in potential_titles):
if series not in matched_anilist_series:
matched_anilist_series.append(series)
else:
series_title_romaji_clean = self.__clean_title(series.title_romaji)
if series_title_romaji_clean in potential_titles:
if series not in matched_anilist_series:
matched_anilist_series.append(series)
if series.synonyms:
for synonym in series.synonyms:
if synonym.lower() in potential_titles:
if series not in matched_anilist_series:
matched_anilist_series.append(series)
else:
synonym_clean = self.__clean_title(synonym)
if synonym_clean in potential_titles:
matched_anilist_series.append(series)

def __find_id_season_best_match(self, title: str, season: int, year: int) -> Optional[int]:
media_id = None
Expand Down Expand Up @@ -397,53 +377,32 @@ def __find_id_season_best_match(self, title: str, season: int, year: int) -> Opt
matches = self.graphql.search_by_name(title)
if matches:
for match in matches:
title_english = ""
title_english_for_matching = ""
title_romaji = ""
title_romaji_for_matching = ""
started_year = ""

if match.title_english:
title_english = match.title_english
title_english_for_matching = self.__clean_title(title_english)
if match.title_romaji:
title_romaji = match.title_romaji
title_romaji_for_matching = self.__clean_title(title_romaji)
if match.started_year:
started_year = match.started_year
else:
started_year = match.started_year
if not started_year:
logger.warning(
"Anilist series did not have year attribute so skipping this result and moving to next: "
f"{title_english} | {title_romaji}"
f"{match.title_english} | {match.title_romaji}"
)
continue

# key = cleaned title, value = original title
titles_for_matching = {self.__clean_title(t): t for t in match.titles()}
for potential_title in potential_titles:
potential_title = self.__clean_title(potential_title)
# logger.info('Comparing AniList: %s | %s[%s] <===> %s' %
# (title_english_for_matching, title_romaji_for_matching, started_year, potential_title))
if title_english_for_matching == potential_title:
# (titles_for_matching, started_year, potential_title))
if potential_title in titles_for_matching:
# Use original title for logging
original_title = titles_for_matching[potential_title]
if started_year < match_year:
logger.warning(
f"Found match: {title_english} [{media_id}] | "
f"Found match: {original_title} [{media_id}] | "
f"skipping as it was released before first season ({started_year} <==> {match_year})"
)
else:
media_id = match.anilist_id
logger.info(
f"Found match: {title_english} [{media_id}]"
)
break
if title_romaji_for_matching == potential_title:
if started_year < match_year:
logger.warning(
f"Found match: {title_romaji} [{media_id}] | "
f"skipping as it was released before first season ({started_year} <==> {match_year})"
)
else:
media_id = match.anilist_id
logger.info(
f"Found match: {title_romaji} [{media_id}]"
f"Found match: {original_title} [{media_id}]"
)
break
if media_id == 0:
Expand All @@ -458,69 +417,25 @@ def __find_id_best_match(self, title: str, year: int) -> Optional[int]:
matches = self.graphql.search_by_name(title)
if matches:
for match in matches:
title_english = ""
title_english_for_matching = ""
title_romaji = ""
title_romaji_for_matching = ""
synonyms = ""
synonyms_for_matching = ""
started_year = None

if match.title_english:
title_english = match.title_english
title_english_for_matching = self.__clean_title(title_english)
if match.title_romaji:
title_romaji = match.title_romaji
title_romaji_for_matching = self.__clean_title(title_romaji)
if match.started_year:
started_year = match.started_year
started_year = match.started_year

# key = cleaned title, value = original title
titles_for_matching = {self.__clean_title(t): t for t in match.titles()}

# logger.info('Comparing AniList: %s | %s[%s] <===> %s[%s]' % (title_english, title_romaji, started_year, match_title, match_year))
if (
match_title == title_english_for_matching
and year == started_year
):
media_id = match.anilist_id
logger.warning(
f"Found match: {title_english} [{media_id}]"
)
break
if (
match_title == title_romaji_for_matching
and year == started_year
):
media_id = match.anilist_id
logger.warning(
f"Found match: {title_romaji} [{media_id}]"
)
break
if match.synonyms:
for synonym in match.synonyms:
synonyms = synonym
synonyms_for_matching = self.__clean_title(synonyms)
if (
match_title == synonyms_for_matching
and year == started_year
):
media_id = match.anilist_id
logger.warning(
f"Found match in synonyms: {synonyms} [{media_id}]"
)
break
if (
match_title == title_romaji_for_matching
and year != started_year
):
logger.info(
f"Found match however started year is a mismatch: {title_romaji} [AL: {started_year} <==> Plex: {year}] "
)
elif (
match_title == title_english_for_matching
and year != started_year
):
logger.info(
f"Found match however started year is a mismatch: {title_english} [AL: {started_year} <==> Plex: {year}] "
)
if match_title in titles_for_matching:
# Use original title for logging
original_title = titles_for_matching[match_title]
if year == started_year:
media_id = match.anilist_id
logger.warning(
f"Found match: {original_title} [{media_id}]"
)
break
else:
logger.info(
f"Found match however started year is a mismatch: {original_title} [AL: {started_year} <==> Plex: {year}] "
)
if media_id is None:
logger.error(f"No match found for title: {title}")
return media_id
Expand Down Expand Up @@ -760,4 +675,4 @@ def __map_watchcount_to_seasons(
return episodes_in_anilist_entry

def __clean_title(self, title: str) -> str:
return re.sub("[^A-Za-z0-9]+", "", title.lower().strip())
return re.sub(r'[^A-Za-z0-9\p{IsHan}\p{IsBopo}\p{IsHira}\p{IsKatakana}]+', "", title.lower().strip())
18 changes: 15 additions & 3 deletions plexanisync/graphql.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,11 +28,19 @@ class AnilistSeries:
episodes: int
title_english: str
title_romaji: str
title_native: str
synonyms: List[str]
started_year: int
ended_year: int
score: int

def titles(self) -> List[str]:
titles = [self.title_english, self.title_romaji, self.title_native]
if self.synonyms:
titles += self.synonyms
# filter out empty values
return [title for title in titles if title]


class GraphQL:
def __init__(self, anilist_settings: SectionProxy):
Expand Down Expand Up @@ -65,7 +73,7 @@ def search_by_id(self, anilist_id: int):
'episodes',
'synonyms'
)
media.title.__fields__('romaji', 'english')
media.title.__fields__('romaji', 'english', 'native')
media.start_date.year()
media.end_date.year()

Expand All @@ -87,7 +95,7 @@ def search_by_name(self, anilist_show_name: str) -> List[AnilistSeries]:
'episodes',
'synonyms'
)
media.title.__fields__('romaji', 'english')
media.title.__fields__('romaji', 'english', 'native')
media.start_date.year()
media.end_date.year()

Expand Down Expand Up @@ -115,7 +123,7 @@ def fetch_user_list(self) -> List[AnilistSeries]:
)
lists.entries.media.start_date.year()
lists.entries.media.end_date.year()
lists.entries.media.title.__fields__('romaji', 'english')
lists.entries.media.title.__fields__('romaji', 'english', 'native')

data = self.__send_graphql_request(operation)
list_items = (operation + data).media_list_collection
Expand Down Expand Up @@ -193,6 +201,7 @@ def __mediaitem_to_object(self, media_item) -> AnilistSeries:
episodes = 0
title_english = ""
title_romaji = ""
title_native = ""
synonyms = []
started_year = 0
ended_year = 0
Expand All @@ -213,6 +222,8 @@ def __mediaitem_to_object(self, media_item) -> AnilistSeries:
title_english = media_item.title.english
if hasattr(media_item.title, "romaji"):
title_romaji = media_item.title.romaji
if hasattr(media_item.title, "native"):
title_native = media_item.title.native
if hasattr(media_item, "synonyms"):
synonyms = media_item.synonyms
if hasattr(media_item.start_date, "year"):
Expand All @@ -232,6 +243,7 @@ def __mediaitem_to_object(self, media_item) -> AnilistSeries:
episodes,
title_english,
title_romaji,
title_native,
synonyms,
started_year,
ended_year,
Expand Down
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ jsonschema==4.17.3
PlexAPI==4.13.4
pyreadline3==3.4.1
pyrsistent==0.19.3
regex==2023.6.3
requests==2.31.0
ruyaml==0.91.0
sgqlc==16.1
Expand Down

0 comments on commit f195231

Please sign in to comment.