Skip to content

Commit

Permalink
Separating query and results in what was searchrun
Browse files Browse the repository at this point in the history
  • Loading branch information
fbanados committed Nov 25, 2024
1 parent 491f681 commit 6e23958
Show file tree
Hide file tree
Showing 10 changed files with 62 additions and 63 deletions.
2 changes: 1 addition & 1 deletion src/morphodict/frontend/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,7 @@ def index(request): # pragma: no cover
context["show_dict_source_setting"] = settings.SHOW_DICT_SOURCE_SETTING
context["show_morphemes"] = request.COOKIES.get("show_morphemes")
context["show_ic"] = request.COOKIES.get("show_inflectional_category")
if search_results and search_results.verbose_messages and search_results.query.verbose:
if search_results and search_results.verbose_messages and search_results.verbose:
context["verbose_messages"] = json.dumps(
search_results.verbose_messages, indent=2, ensure_ascii=False
)
Expand Down
10 changes: 5 additions & 5 deletions src/morphodict/search/affix.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,18 +97,18 @@ def do_affix_search(query: InternalForm, affixes: AffixSearcher) -> Iterable[Wor
return Wordform.objects.filter(id__in=matched_ids)


def do_target_language_affix_search(search_results: core.SearchResults):
def do_target_language_affix_search(query: core.Query, search_results: core.SearchResults):
matching_words = do_affix_search(
search_results.internal_query,
query.query_string,
cache.target_language_affix_searcher,
)
for word in matching_words:
search_results.add_result(Result(word, target_language_affix_match=True))


def do_source_language_affix_search(search_results: core.SearchResults):
def do_source_language_affix_search(query: core.Query, search_results: core.SearchResults):
matching_words = do_affix_search(
search_results.internal_query,
query.query_string,
cache.source_language_affix_searcher,
)
for word in matching_words:
Expand All @@ -117,7 +117,7 @@ def do_source_language_affix_search(search_results: core.SearchResults):
word,
source_language_affix_match=True,
query_wordform_edit_distance=get_modified_distance(
word.text, search_results.internal_query
word.text, query.query_string
),
)
)
Expand Down
12 changes: 5 additions & 7 deletions src/morphodict/search/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,18 +24,19 @@ class SearchResults:
and to add results to the result collection for future ranking.
"""

def __init__(self, query: str, include_auto_definitions=None):
self.query = Query(query)
def __init__(self, query: Query, include_auto_definitions=None):
self.include_auto_definitions = first_non_none_value(
self.query.auto, include_auto_definitions, default=False
query.auto, include_auto_definitions, default=False
)
self.verbose = query.verbose
self._results = {}
self._verbose_messages = []

include_auto_definition: bool
include_auto_definitions: bool
_results: dict[WordformKey, types.Result]
VerboseMessage = dict[str, str]
_verbose_messages: list[VerboseMessage]
verbose: bool
# Set this to use a custom sort function
sort_function: Optional[Callable[[Result], Any]] = None

Expand Down Expand Up @@ -149,9 +150,6 @@ def add_verbose_message(self, message=None, **messages):
def verbose_messages(self):
return self._verbose_messages

@property
def internal_query(self):
return self.query.query_string

def __repr__(self):
return f"SearchResults<query={self.query!r}>"
Expand Down
6 changes: 3 additions & 3 deletions src/morphodict/search/cvd_search.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import itertools
import logging

from morphodict.search.core import SearchResults
from morphodict.search.core import SearchResults, Query
from morphodict.search.types import Result
from morphodict.cvd import (
definition_vectors,
Expand All @@ -19,13 +19,13 @@
logger = logging.getLogger(__name__)


def do_cvd_search(search_results: SearchResults):
def do_cvd_search(query: Query, search_results: SearchResults):
"""Use cosine vector distance to add results to the search run.
Keywords from the query string are turned into vectors from Google News,
added together, and then compared against pre-computed definition vectors.
"""
keys = extract_keyed_words(search_results.query.query_string, google_news_vectors())
keys = extract_keyed_words(query.query_string, google_news_vectors())
if not keys:
return

Expand Down
7 changes: 4 additions & 3 deletions src/morphodict/search/espt.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,8 +39,9 @@ class EsptSearch:
other methods.
"""

def __init__(self, search_results):
def __init__(self, query, search_results):
self.search_results = search_results
self.query = query
self.query_analyzed_ok = False

def convert_search_query_to_espt(self):
Expand All @@ -53,7 +54,7 @@ def convert_search_query_to_espt(self):
"""
self.new_tags = []
analyzed_query = PhraseAnalyzedQuery(
self.search_results.internal_query,
self.query.query_string,
add_verbose_message=self.search_results.add_verbose_message,
)
if analyzed_query.has_tags:
Expand All @@ -71,7 +72,7 @@ def convert_search_query_to_espt(self):
self.search_results.add_verbose_message(espt_analysis_error=repr(e))
return

self.search_results.query.replace_query(analyzed_query.filtered_query)
self.query.replace_query(analyzed_query.filtered_query)
self.query_analyzed_ok = True

self.search_results.add_verbose_message(
Expand Down
26 changes: 13 additions & 13 deletions src/morphodict/search/lookup.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,14 +20,14 @@
logger = logging.getLogger(__name__)


def fetch_results(search_results: core.SearchResults):
fetch_results_from_target_language_keywords(search_results)
fetch_results_from_source_language_keywords(search_results)
def fetch_results(query: core.Query, search_results: core.SearchResults):
fetch_results_from_target_language_keywords(query, search_results)
fetch_results_from_source_language_keywords(query, search_results)

# Use the spelling relaxation to try to decipher the query
# e.g., "atchakosuk" becomes "acâhkos+N+A+Pl" --
# thus, we can match "acâhkos" in the dictionary!
fst_analyses = set(rich_analyze_relaxed(search_results.internal_query))
fst_analyses = set(rich_analyze_relaxed(query.query_string))
# print([a.tuple for a in fst_analyses])

db_matches = list(
Expand All @@ -40,7 +40,7 @@ def fetch_results(search_results: core.SearchResults):
wf,
source_language_match=wf.text,
query_wordform_edit_distance=get_modified_distance(
wf.text, search_results.internal_query
wf.text, query.query_string
),
)
)
Expand All @@ -61,15 +61,15 @@ def fetch_results(search_results: core.SearchResults):
logger.error(
"Cannot generate normative form for analysis: %s (query: %s)",
analysis,
search_results.internal_query,
query.query_string,
)
continue

# If there are multiple forms for this analysis, use the one that is
# closest to what the user typed.
normatized_user_query = min(
normatized_form_for_analysis,
key=lambda f: get_modified_distance(f, search_results.internal_query),
key=lambda f: get_modified_distance(f, query.query_string),
)

possible_lemma_wordforms = best_lemma_matches(
Expand All @@ -87,7 +87,7 @@ def fetch_results(search_results: core.SearchResults):
synthetic_wordform,
analyzable_inflection_match=True,
query_wordform_edit_distance=get_modified_distance(
search_results.internal_query,
query.query_string,
normatized_user_query,
),
)
Expand Down Expand Up @@ -136,8 +136,8 @@ def best_lemma_matches(analysis, possible_lemmas) -> list[Wordform]:
]


def fetch_results_from_target_language_keywords(search_results):
for stemmed_keyword in stem_keywords(search_results.internal_query):
def fetch_results_from_target_language_keywords(query: core.Query,search_results: core.SearchResults):
for stemmed_keyword in stem_keywords(query.query_string):
for wordform in Wordform.objects.filter(
target_language_keyword__text__iexact=stemmed_keyword
):
Expand All @@ -146,17 +146,17 @@ def fetch_results_from_target_language_keywords(search_results):
)


def fetch_results_from_source_language_keywords(search_results):
def fetch_results_from_source_language_keywords(query: core.Query, search_results: core.SearchResults):
res = SourceLanguageKeyword.objects.filter(
Q(text=to_source_language_keyword(search_results.internal_query))
Q(text=to_source_language_keyword(query.query_string))
)
for kw in res:
search_results.add_result(
Result(
kw.wordform,
source_language_keyword_match=[kw.text],
query_wordform_edit_distance=get_modified_distance(
search_results.internal_query, kw.wordform.text
query.query_string, kw.wordform.text
),
)
)
3 changes: 0 additions & 3 deletions src/morphodict/search/pos_matches.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,6 @@


def find_pos_matches(search_results: SearchResults) -> None:
analyzed_query = AnalyzedQuery(search_results.internal_query)
# print(search_results.verbose_messages["new_tags"])

if len(search_results.verbose_messages) <= 1:
return
tags = search_results.verbose_messages[1].get("tags")
Expand Down
2 changes: 1 addition & 1 deletion src/morphodict/search/presentation.py
Original file line number Diff line number Diff line change
Expand Up @@ -235,7 +235,7 @@ def serialize(self) -> SerializedPresentationResult:
"morphemes": self.morphemes,
"lemma_morphemes": self.lemma_morphemes,
}
if self._search_results.query.verbose:
if self._search_results.verbose:
cast(Any, ret)["verbose_info"] = self._result

return ret
Expand Down
41 changes: 21 additions & 20 deletions src/morphodict/search/runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,9 @@
from morphodict.search.espt import EsptSearch
from morphodict.search.lookup import fetch_results
from morphodict.search.pos_matches import find_pos_matches
from morphodict.search.query import CvdSearchType
from morphodict.search.query import CvdSearchType, Query
from morphodict.search.types import Result
from morphodict.search.util import first_non_none_value
from morphodict.utils.types import cast_away_optional



def search(
Expand All @@ -33,51 +31,55 @@ def search(
This class encapsulates the logic of which search methods to try, and in
which order, to build up results in a SearchResults object.
"""

search_query = Query(query)
search_results = SearchResults(
query=query, include_auto_definitions=include_auto_definitions
search_query,
include_auto_definitions=include_auto_definitions
)

initial_query_terms = search_results.query.query_terms[:]
initial_query_terms = search_query.query_terms[:]

# If we need to do english simple phrase search
if (search_results.query.espt or inflect_english_phrases) and (
if (search_query.espt or inflect_english_phrases) and (
len(initial_query_terms) > 1
):
espt_search = EsptSearch(search_results)
espt_search = EsptSearch(search_query, search_results)
espt_search.convert_search_query_to_espt()

if settings.MORPHODICT_ENABLE_CVD:
cvd_search_type = cast_away_optional(
first_non_none_value(search_results.query.cvd, default=CvdSearchType.DEFAULT)
)

cvd_search_type = first_non_none_value(
search_query.cvd,
default=CvdSearchType.DEFAULT)
# For when you type 'cvd:exclusive' in a query to debug ONLY CVD results!
if cvd_search_type == CvdSearchType.EXCLUSIVE:

def sort_by_cvd(r: Result):
return r.cosine_vector_distance

search_results.sort_function = sort_by_cvd
do_cvd_search(search_results)
do_cvd_search(search_query, search_results)
return search_results

fetch_results(search_results)
fetch_results(search_query, search_results)

if (
settings.MORPHODICT_ENABLE_AFFIX_SEARCH
and include_affixes
and not query_would_return_too_many_results(search_results.internal_query)
and not query_would_return_too_many_results(search_query.query_string)
):
do_source_language_affix_search(search_results)
do_target_language_affix_search(search_results)
do_source_language_affix_search(search_query, search_results)
do_target_language_affix_search(search_query, search_results)

if settings.MORPHODICT_ENABLE_CVD:
if cvd_search_type.should_do_search() and not is_almost_certainly_cree(
search_query,
search_results
):
do_cvd_search(search_results)
do_cvd_search(search_query, search_results)

if (search_results.query.espt or inflect_english_phrases) and (
if (search_query.espt or inflect_english_phrases) and (
len(initial_query_terms) > 1
):
espt_search.inflect_search_results()
Expand All @@ -90,11 +92,10 @@ def sort_by_cvd(r: Result):

CREE_LONG_VOWEL = re.compile("[êîôâēīōā]")

def is_almost_certainly_cree(search_results: SearchResults) -> bool:
def is_almost_certainly_cree(query: Query, search_results: SearchResults) -> bool:
"""
Heuristics intended to AVOID doing an English search.
"""
query = search_results.query

# If there is a word with two or more dashes in it, it's probably Cree:
if any(term.count("-") >= 2 for term in query.query_terms):
Expand Down
16 changes: 9 additions & 7 deletions src/morphodict/tests/espt/test_espt_crk.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import pytest

from morphodict.search.core import SearchResults
from morphodict.search.core import SearchResults, Query
from morphodict.search.espt import EsptSearch, PhraseAnalyzedQuery
from morphodict.search.types import Result
from morphodict.lexicon.models import Wordform
Expand Down Expand Up @@ -92,11 +92,12 @@ def test_search_with_tags(query, has_tags, tags, filtered_query):
],
)
def test_espt_search(db, search, params):
search_results = SearchResults(search)
espt_search = EsptSearch(search_results)
search_query = Query(search)
search_results = SearchResults(search_query)
espt_search = EsptSearch(search_query,search_results)
espt_search.convert_search_query_to_espt()
assert search_results.query.query_terms == params["expected_query_terms"]
assert search_results.query.query_string == " ".join(params["expected_query_terms"])
assert search_query.query_terms == params["expected_query_terms"]
assert search_query.query_string == " ".join(params["expected_query_terms"])
assert espt_search.new_tags == params["expected_new_tags"]

lemma1 = Wordform.objects.get(slug=params["slug"], is_lemma=True)
Expand All @@ -116,8 +117,9 @@ def test_espt_search(db, search, params):


def test_espt_search_doesnt_crash_when_no_analysis(db):
search_results = SearchResults("my little bears")
espt_search = EsptSearch(search_results)
search_query = Query("my little bears")
search_results = SearchResults(search_query)
espt_search = EsptSearch(search_query,search_results)
espt_search.convert_search_query_to_espt()

wordform = Wordform(text="pê-")
Expand Down

0 comments on commit 6e23958

Please sign in to comment.