From 6e23958bea31fd5bc2b5a71ad9ac0d25fa4ea741 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Felipe=20Ba=C3=B1ados=20Schwerter?= Date: Mon, 25 Nov 2024 22:04:12 +0000 Subject: [PATCH] Separating query and results in what was searchrun --- src/morphodict/frontend/views.py | 2 +- src/morphodict/search/affix.py | 10 +++--- src/morphodict/search/core.py | 12 +++---- src/morphodict/search/cvd_search.py | 6 ++-- src/morphodict/search/espt.py | 7 ++-- src/morphodict/search/lookup.py | 26 +++++++------- src/morphodict/search/pos_matches.py | 3 -- src/morphodict/search/presentation.py | 2 +- src/morphodict/search/runner.py | 41 +++++++++++----------- src/morphodict/tests/espt/test_espt_crk.py | 16 +++++---- 10 files changed, 62 insertions(+), 63 deletions(-) diff --git a/src/morphodict/frontend/views.py b/src/morphodict/frontend/views.py index 4bb233214..d283df35d 100644 --- a/src/morphodict/frontend/views.py +++ b/src/morphodict/frontend/views.py @@ -129,7 +129,7 @@ def index(request): # pragma: no cover context["show_dict_source_setting"] = settings.SHOW_DICT_SOURCE_SETTING context["show_morphemes"] = request.COOKIES.get("show_morphemes") context["show_ic"] = request.COOKIES.get("show_inflectional_category") - if search_results and search_results.verbose_messages and search_results.query.verbose: + if search_results and search_results.verbose_messages and search_results.verbose: context["verbose_messages"] = json.dumps( search_results.verbose_messages, indent=2, ensure_ascii=False ) diff --git a/src/morphodict/search/affix.py b/src/morphodict/search/affix.py index c64004498..20c51f15c 100644 --- a/src/morphodict/search/affix.py +++ b/src/morphodict/search/affix.py @@ -97,18 +97,18 @@ def do_affix_search(query: InternalForm, affixes: AffixSearcher) -> Iterable[Wor return Wordform.objects.filter(id__in=matched_ids) -def do_target_language_affix_search(search_results: core.SearchResults): +def do_target_language_affix_search(query: core.Query, search_results: core.SearchResults): matching_words = do_affix_search( - search_results.internal_query, + query.query_string, cache.target_language_affix_searcher, ) for word in matching_words: search_results.add_result(Result(word, target_language_affix_match=True)) -def do_source_language_affix_search(search_results: core.SearchResults): +def do_source_language_affix_search(query: core.Query, search_results: core.SearchResults): matching_words = do_affix_search( - search_results.internal_query, + query.query_string, cache.source_language_affix_searcher, ) for word in matching_words: @@ -117,7 +117,7 @@ def do_source_language_affix_search(search_results: core.SearchResults): word, source_language_affix_match=True, query_wordform_edit_distance=get_modified_distance( - word.text, search_results.internal_query + word.text, query.query_string ), ) ) diff --git a/src/morphodict/search/core.py b/src/morphodict/search/core.py index 805facad9..21e30d5b9 100644 --- a/src/morphodict/search/core.py +++ b/src/morphodict/search/core.py @@ -24,18 +24,19 @@ class SearchResults: and to add results to the result collection for future ranking. """ - def __init__(self, query: str, include_auto_definitions=None): - self.query = Query(query) + def __init__(self, query: Query, include_auto_definitions=None): self.include_auto_definitions = first_non_none_value( - self.query.auto, include_auto_definitions, default=False + query.auto, include_auto_definitions, default=False ) + self.verbose = query.verbose self._results = {} self._verbose_messages = [] - include_auto_definition: bool + include_auto_definitions: bool _results: dict[WordformKey, types.Result] VerboseMessage = dict[str, str] _verbose_messages: list[VerboseMessage] + verbose: bool # Set this to use a custom sort function sort_function: Optional[Callable[[Result], Any]] = None @@ -149,9 +150,6 @@ def add_verbose_message(self, message=None, **messages): def verbose_messages(self): return self._verbose_messages - @property - def internal_query(self): - return self.query.query_string def __repr__(self): return f"SearchResults" diff --git a/src/morphodict/search/cvd_search.py b/src/morphodict/search/cvd_search.py index dd5e214cd..13c116925 100644 --- a/src/morphodict/search/cvd_search.py +++ b/src/morphodict/search/cvd_search.py @@ -1,7 +1,7 @@ import itertools import logging -from morphodict.search.core import SearchResults +from morphodict.search.core import SearchResults, Query from morphodict.search.types import Result from morphodict.cvd import ( definition_vectors, @@ -19,13 +19,13 @@ logger = logging.getLogger(__name__) -def do_cvd_search(search_results: SearchResults): +def do_cvd_search(query: Query, search_results: SearchResults): """Use cosine vector distance to add results to the search run. Keywords from the query string are turned into vectors from Google News, added together, and then compared against pre-computed definition vectors. """ - keys = extract_keyed_words(search_results.query.query_string, google_news_vectors()) + keys = extract_keyed_words(query.query_string, google_news_vectors()) if not keys: return diff --git a/src/morphodict/search/espt.py b/src/morphodict/search/espt.py index e66574625..33e334a9a 100644 --- a/src/morphodict/search/espt.py +++ b/src/morphodict/search/espt.py @@ -39,8 +39,9 @@ class EsptSearch: other methods. """ - def __init__(self, search_results): + def __init__(self, query, search_results): self.search_results = search_results + self.query = query self.query_analyzed_ok = False def convert_search_query_to_espt(self): @@ -53,7 +54,7 @@ def convert_search_query_to_espt(self): """ self.new_tags = [] analyzed_query = PhraseAnalyzedQuery( - self.search_results.internal_query, + self.query.query_string, add_verbose_message=self.search_results.add_verbose_message, ) if analyzed_query.has_tags: @@ -71,7 +72,7 @@ def convert_search_query_to_espt(self): self.search_results.add_verbose_message(espt_analysis_error=repr(e)) return - self.search_results.query.replace_query(analyzed_query.filtered_query) + self.query.replace_query(analyzed_query.filtered_query) self.query_analyzed_ok = True self.search_results.add_verbose_message( diff --git a/src/morphodict/search/lookup.py b/src/morphodict/search/lookup.py index af552f822..258ffd1b0 100644 --- a/src/morphodict/search/lookup.py +++ b/src/morphodict/search/lookup.py @@ -20,14 +20,14 @@ logger = logging.getLogger(__name__) -def fetch_results(search_results: core.SearchResults): - fetch_results_from_target_language_keywords(search_results) - fetch_results_from_source_language_keywords(search_results) +def fetch_results(query: core.Query, search_results: core.SearchResults): + fetch_results_from_target_language_keywords(query, search_results) + fetch_results_from_source_language_keywords(query, search_results) # Use the spelling relaxation to try to decipher the query # e.g., "atchakosuk" becomes "acâhkos+N+A+Pl" -- # thus, we can match "acâhkos" in the dictionary! - fst_analyses = set(rich_analyze_relaxed(search_results.internal_query)) + fst_analyses = set(rich_analyze_relaxed(query.query_string)) # print([a.tuple for a in fst_analyses]) db_matches = list( @@ -40,7 +40,7 @@ def fetch_results(search_results: core.SearchResults): wf, source_language_match=wf.text, query_wordform_edit_distance=get_modified_distance( - wf.text, search_results.internal_query + wf.text, query.query_string ), ) ) @@ -61,7 +61,7 @@ def fetch_results(search_results: core.SearchResults): logger.error( "Cannot generate normative form for analysis: %s (query: %s)", analysis, - search_results.internal_query, + query.query_string, ) continue @@ -69,7 +69,7 @@ def fetch_results(search_results: core.SearchResults): # closest to what the user typed. normatized_user_query = min( normatized_form_for_analysis, - key=lambda f: get_modified_distance(f, search_results.internal_query), + key=lambda f: get_modified_distance(f, query.query_string), ) possible_lemma_wordforms = best_lemma_matches( @@ -87,7 +87,7 @@ def fetch_results(search_results: core.SearchResults): synthetic_wordform, analyzable_inflection_match=True, query_wordform_edit_distance=get_modified_distance( - search_results.internal_query, + query.query_string, normatized_user_query, ), ) @@ -136,8 +136,8 @@ def best_lemma_matches(analysis, possible_lemmas) -> list[Wordform]: ] -def fetch_results_from_target_language_keywords(search_results): - for stemmed_keyword in stem_keywords(search_results.internal_query): +def fetch_results_from_target_language_keywords(query: core.Query,search_results: core.SearchResults): + for stemmed_keyword in stem_keywords(query.query_string): for wordform in Wordform.objects.filter( target_language_keyword__text__iexact=stemmed_keyword ): @@ -146,9 +146,9 @@ def fetch_results_from_target_language_keywords(search_results): ) -def fetch_results_from_source_language_keywords(search_results): +def fetch_results_from_source_language_keywords(query: core.Query, search_results: core.SearchResults): res = SourceLanguageKeyword.objects.filter( - Q(text=to_source_language_keyword(search_results.internal_query)) + Q(text=to_source_language_keyword(query.query_string)) ) for kw in res: search_results.add_result( @@ -156,7 +156,7 @@ def fetch_results_from_source_language_keywords(search_results): kw.wordform, source_language_keyword_match=[kw.text], query_wordform_edit_distance=get_modified_distance( - search_results.internal_query, kw.wordform.text + query.query_string, kw.wordform.text ), ) ) diff --git a/src/morphodict/search/pos_matches.py b/src/morphodict/search/pos_matches.py index 5f5ab1ead..9a80fe7b3 100644 --- a/src/morphodict/search/pos_matches.py +++ b/src/morphodict/search/pos_matches.py @@ -4,9 +4,6 @@ def find_pos_matches(search_results: SearchResults) -> None: - analyzed_query = AnalyzedQuery(search_results.internal_query) - # print(search_results.verbose_messages["new_tags"]) - if len(search_results.verbose_messages) <= 1: return tags = search_results.verbose_messages[1].get("tags") diff --git a/src/morphodict/search/presentation.py b/src/morphodict/search/presentation.py index 7f1363278..a749bc7b5 100644 --- a/src/morphodict/search/presentation.py +++ b/src/morphodict/search/presentation.py @@ -235,7 +235,7 @@ def serialize(self) -> SerializedPresentationResult: "morphemes": self.morphemes, "lemma_morphemes": self.lemma_morphemes, } - if self._search_results.query.verbose: + if self._search_results.verbose: cast(Any, ret)["verbose_info"] = self._result return ret diff --git a/src/morphodict/search/runner.py b/src/morphodict/search/runner.py index 1efdf0efd..5a015ede7 100644 --- a/src/morphodict/search/runner.py +++ b/src/morphodict/search/runner.py @@ -14,11 +14,9 @@ from morphodict.search.espt import EsptSearch from morphodict.search.lookup import fetch_results from morphodict.search.pos_matches import find_pos_matches -from morphodict.search.query import CvdSearchType +from morphodict.search.query import CvdSearchType, Query from morphodict.search.types import Result from morphodict.search.util import first_non_none_value -from morphodict.utils.types import cast_away_optional - def search( @@ -33,24 +31,27 @@ def search( This class encapsulates the logic of which search methods to try, and in which order, to build up results in a SearchResults object. """ + + search_query = Query(query) search_results = SearchResults( - query=query, include_auto_definitions=include_auto_definitions + search_query, + include_auto_definitions=include_auto_definitions ) - initial_query_terms = search_results.query.query_terms[:] + initial_query_terms = search_query.query_terms[:] # If we need to do english simple phrase search - if (search_results.query.espt or inflect_english_phrases) and ( + if (search_query.espt or inflect_english_phrases) and ( len(initial_query_terms) > 1 ): - espt_search = EsptSearch(search_results) + espt_search = EsptSearch(search_query, search_results) espt_search.convert_search_query_to_espt() if settings.MORPHODICT_ENABLE_CVD: - cvd_search_type = cast_away_optional( - first_non_none_value(search_results.query.cvd, default=CvdSearchType.DEFAULT) - ) - + cvd_search_type = first_non_none_value( + search_query.cvd, + default=CvdSearchType.DEFAULT) + # For when you type 'cvd:exclusive' in a query to debug ONLY CVD results! if cvd_search_type == CvdSearchType.EXCLUSIVE: @@ -58,26 +59,27 @@ def sort_by_cvd(r: Result): return r.cosine_vector_distance search_results.sort_function = sort_by_cvd - do_cvd_search(search_results) + do_cvd_search(search_query, search_results) return search_results - fetch_results(search_results) + fetch_results(search_query, search_results) if ( settings.MORPHODICT_ENABLE_AFFIX_SEARCH and include_affixes - and not query_would_return_too_many_results(search_results.internal_query) + and not query_would_return_too_many_results(search_query.query_string) ): - do_source_language_affix_search(search_results) - do_target_language_affix_search(search_results) + do_source_language_affix_search(search_query, search_results) + do_target_language_affix_search(search_query, search_results) if settings.MORPHODICT_ENABLE_CVD: if cvd_search_type.should_do_search() and not is_almost_certainly_cree( + search_query, search_results ): - do_cvd_search(search_results) + do_cvd_search(search_query, search_results) - if (search_results.query.espt or inflect_english_phrases) and ( + if (search_query.espt or inflect_english_phrases) and ( len(initial_query_terms) > 1 ): espt_search.inflect_search_results() @@ -90,11 +92,10 @@ def sort_by_cvd(r: Result): CREE_LONG_VOWEL = re.compile("[êîôâēīōā]") -def is_almost_certainly_cree(search_results: SearchResults) -> bool: +def is_almost_certainly_cree(query: Query, search_results: SearchResults) -> bool: """ Heuristics intended to AVOID doing an English search. """ - query = search_results.query # If there is a word with two or more dashes in it, it's probably Cree: if any(term.count("-") >= 2 for term in query.query_terms): diff --git a/src/morphodict/tests/espt/test_espt_crk.py b/src/morphodict/tests/espt/test_espt_crk.py index c26873112..2f98313bf 100644 --- a/src/morphodict/tests/espt/test_espt_crk.py +++ b/src/morphodict/tests/espt/test_espt_crk.py @@ -1,6 +1,6 @@ import pytest -from morphodict.search.core import SearchResults +from morphodict.search.core import SearchResults, Query from morphodict.search.espt import EsptSearch, PhraseAnalyzedQuery from morphodict.search.types import Result from morphodict.lexicon.models import Wordform @@ -92,11 +92,12 @@ def test_search_with_tags(query, has_tags, tags, filtered_query): ], ) def test_espt_search(db, search, params): - search_results = SearchResults(search) - espt_search = EsptSearch(search_results) + search_query = Query(search) + search_results = SearchResults(search_query) + espt_search = EsptSearch(search_query,search_results) espt_search.convert_search_query_to_espt() - assert search_results.query.query_terms == params["expected_query_terms"] - assert search_results.query.query_string == " ".join(params["expected_query_terms"]) + assert search_query.query_terms == params["expected_query_terms"] + assert search_query.query_string == " ".join(params["expected_query_terms"]) assert espt_search.new_tags == params["expected_new_tags"] lemma1 = Wordform.objects.get(slug=params["slug"], is_lemma=True) @@ -116,8 +117,9 @@ def test_espt_search(db, search, params): def test_espt_search_doesnt_crash_when_no_analysis(db): - search_results = SearchResults("my little bears") - espt_search = EsptSearch(search_results) + search_query = Query("my little bears") + search_results = SearchResults(search_query) + espt_search = EsptSearch(search_query,search_results) espt_search.convert_search_query_to_espt() wordform = Wordform(text="pê-")