Skip to content

Commit

Permalink
Merge pull request #1513 from benoit-pierre/pr/translation_avoid_unne…
Browse files Browse the repository at this point in the history
…cessary_lookups

translation: avoid unnecessary lookups
  • Loading branch information
benoit-pierre authored May 14, 2022
2 parents 38cd7b0 + 51bf53b commit 56f1770
Show file tree
Hide file tree
Showing 3 changed files with 233 additions and 34 deletions.
1 change: 1 addition & 0 deletions news.d/feature/1513.core.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Improve translation stage: cut down on unnecessary / duplicate dictionary lookups.
126 changes: 95 additions & 31 deletions plover/translation.py
Original file line number Diff line number Diff line change
Expand Up @@ -292,15 +292,21 @@ def translate_stroke(self, stroke):
stroke -- The Stroke object to process.
"""
mapping = self._lookup_with_prefix(self._state.translations, [stroke])
max_len = self._dictionary.longest_key
mapping = self._lookup_with_prefix(max_len, self._state.translations, [stroke])
macro = _mapping_to_macro(mapping, stroke)
if macro is not None:
self.translate_macro(macro)
return
t = (
self._find_translation_helper(stroke) or
self._find_translation_helper(stroke, system.SUFFIX_KEYS) or
Translation([stroke], mapping)
# No prefix lookups (note we avoid looking up [stroke] again).
self._find_longest_match(2, max_len, stroke) or
# Return [stroke] result if mapped.
(mapping is not None and Translation([stroke], mapping)) or
# No direct match, try with suffixes.
self._find_longest_match(1, max_len, stroke, system.SUFFIX_KEYS) or
# Fallback to untranslate.
Translation([stroke], None)
)
self.translate_translation(t)

Expand Down Expand Up @@ -328,15 +334,33 @@ def _do(self, *translations):
self._state.translations.extend(translations)
self._to_do += len(translations)

def _find_translation_helper(self, stroke, suffixes=()):
def _find_longest_match(self, min_len, max_len, stroke, suffixes=()):
'''Find mapping with the longest series of strokes.
min_len -- Minimum number of strokes involved.
max_len -- Maximum number of strokes involved.
stroke -- The latest stroke.
suffixes -- List of suffix keys to try.
Return the corresponding translation, or None if no match is found.
Note: the code either look for a direct match (empty suffix
list), or assume the last stroke contains an implicit suffix
and look for a corresponding match, but not both.
'''
if suffixes:
# Implicit suffix lookup, determine possible suffixes.
suffixes = self._lookup_involved_suffixes(stroke, suffixes)
if not suffixes:
# No suffix involved, abort.
return None
# Figure out how much of the translation buffer can be involved in this
# stroke and build the stroke list for translation.
num_strokes = 1
translation_count = 0
longest_key = self._dictionary.longest_key
for t in reversed(self._state.translations):
num_strokes += len(t)
if num_strokes > longest_key:
if num_strokes > max_len:
break
translation_count += 1
translation_index = len(self._state.translations) - translation_count
Expand All @@ -348,48 +372,88 @@ def _find_translation_helper(self, stroke, suffixes=()):
replaced = translations[i:]
strokes = [s for t in replaced for s in t.strokes]
strokes.append(stroke)
mapping = self._lookup_with_prefix(translations[:i], strokes, suffixes)
if len(strokes) < min_len:
continue
mapping = self._lookup_with_prefix(max_len, translations[:i], strokes, suffixes)
if mapping is not None:
t = Translation(strokes, mapping)
t.replaced = replaced
return t
return None

def _lookup_strokes(self, strokes):
'''Look for a matching translation.
strokes -- a list of Stroke instances.
Return the resulting mapping.
'''
return self._dictionary.lookup(tuple(s.rtfcre for s in strokes))

def _lookup_with_suffix(self, strokes, suffixes=()):
'''Look for a matching translation.
suffixes -- A list of (suffix stroke, suffix mapping) pairs to try.
If the suffix list is empty, look for a direct match.
Otherwise, assume the last stroke contains an implicit suffix,
and look for a corresponding match.
'''
if not suffixes:
# No suffix, do a regular lookup.
return self._lookup_strokes(strokes)
for suffix_stroke, suffix_mapping in suffixes:
assert suffix_stroke in strokes[-1]
main_mapping = self._lookup_strokes(strokes[:-1] + [strokes[-1] - suffix_stroke])
if main_mapping is not None:
return main_mapping + ' ' + suffix_mapping
return None

def _lookup_involved_suffixes(self, stroke, suffixes):
'''Find possible implicit suffixes for a stroke.
stroke -- The stroke to check for implicit suffixes.
suffixes -- List of supported suffix keys.
Return a list of (suffix_stroke, suffix_mapping) pairs.
'''
possible_suffixes = []
for suffix_stroke in map(Stroke, suffixes):
if suffix_stroke not in stroke:
continue
suffix_mapping = self._lookup_strokes((suffix_stroke,))
if suffix_mapping is None:
continue
possible_suffixes.append((suffix_stroke, suffix_mapping))
return possible_suffixes

def lookup(self, strokes, suffixes=()):
dict_key = tuple(s.rtfcre for s in strokes)
result = self._dictionary.lookup(dict_key)
result = self._lookup_strokes(strokes)
if result is not None:
return result
for key in suffixes:
if key in strokes[-1].steno_keys:
dict_key = (Stroke([key]).rtfcre,)
suffix_mapping = self._dictionary.lookup(dict_key)
if suffix_mapping is None:
continue
keys = strokes[-1].steno_keys[:]
keys.remove(key)
copy = strokes[:]
copy[-1] = Stroke(keys)
dict_key = tuple(s.rtfcre for s in copy)
main_mapping = self._dictionary.lookup(dict_key)
if main_mapping is None:
continue
return main_mapping + ' ' + suffix_mapping
return None
suffixes = self._lookup_involved_suffixes(strokes[-1], suffixes)
if not suffixes:
return None
return self._lookup_with_suffix(strokes, suffixes)

def _previous_word_is_finished(self, last_translations):
@staticmethod
def _previous_word_is_finished(last_translations):
if not last_translations:
return True
formatting = last_translations[-1].formatting
if not formatting:
return True
return formatting[-1].word_is_finished

def _lookup_with_prefix(self, last_translations, strokes, suffixes=()):
if self._previous_word_is_finished(last_translations):
mapping = self.lookup([Stroke.PREFIX_STROKE] + strokes, suffixes)
def _lookup_with_prefix(self, max_len, last_translations, strokes, suffixes=()):
if len(strokes) < max_len and self._previous_word_is_finished(last_translations):
mapping = self._lookup_with_suffix([Stroke.PREFIX_STROKE] + strokes, suffixes)
if mapping is not None:
return mapping
return self.lookup(strokes, suffixes)
if len(strokes) <= max_len:
return self._lookup_with_suffix(strokes, suffixes)
return None


class _State:
Expand Down
140 changes: 137 additions & 3 deletions test/test_translation.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
"""Unit tests for translation.py."""

from collections import namedtuple
import ast
import copy
import operator

Expand Down Expand Up @@ -416,15 +417,18 @@ def test_restrict_size_multiple_translations(self):

class TestTranslateStroke:

DICT_COLLECTION_CLASS = StenoDictionaryCollection

class CaptureOutput:
output = namedtuple('output', 'undo do prev')

Output = namedtuple('Output', 'undo do prev')

def __init__(self):
self.output = []

def __call__(self, undo, new, prev):
prev = list(prev) if prev else None
self.output = type(self).output(undo, new, prev)
self.output = self.Output(undo, new, prev)

def t(self, strokes):
"""A quick way to make a translation."""
Expand Down Expand Up @@ -470,7 +474,7 @@ def _check_output(self, undo, do, prev):

def setup_method(self):
self.d = StenoDictionary()
self.dc = StenoDictionaryCollection([self.d])
self.dc = self.DICT_COLLECTION_CLASS([self.d])
self.s = _State()
self.o = self.CaptureOutput()
self.tlor = Translator()
Expand Down Expand Up @@ -808,3 +812,133 @@ def test_untranslate_translation(self):
def test_escape_unescape_translation(raw, escaped):
assert unescape_translation(escaped) == raw
assert escape_translation(raw) == escaped


class TestNoUnnecessaryLookups(TestTranslateStroke):

# Custom dictionary collection class for tracking lookups.
class DictTracy(StenoDictionaryCollection):

def __init__(self, dicts):
super().__init__(dicts)
self.lookup_history = []

def lookup(self, key):
self.lookup_history.append(key)
return super().lookup(key)

DICT_COLLECTION_CLASS = DictTracy

def _prepare_state(self, definitions, translations):
if definitions:
for steno, english in ast.literal_eval('{' + definitions + '}').items():
self.define(steno, english)
translations = self.lt(translations)
for t in translations:
for s in t.strokes:
self.translate(s.rtfcre)
state = translations[len(translations)-self.dc.longest_key:]
self._check_translations(state)
self.dc.lookup_history.clear()

def _check_lookup_history(self, expected):
# Hide from traceback on assertions (reduce output size for failed tests).
__tracebackhide__ = operator.methodcaller('errisinstance', AssertionError)
result = ['/'.join(key) for key in self.dc.lookup_history]
expected = expected.split()
msg = '''
lookup history:
results: %s
expected: %s
''' % (result, expected)
assert result == expected, msg

def test_zero_lookups(self):
# No lookups at all if longest key is zero.
self.translate('TEFT')
self._check_lookup_history('')
self._check_translations(self.lt('TEFT'))

def test_no_prefix_lookup_over_the_longest_key_limit(self):
self._prepare_state(
'''
"HROPBG/EFT/KAOE": "longest key",
"HRETS": "let's",
"TKO": "do",
"SPH": "some",
"TEFT": "test",
"-G": "{^ing}",
''',
'HRETS TKO SPH TEFT')
self.translate('-G')
self._check_lookup_history(
# Macros.
'''
/-G
-G
'''
# Others.
'''
SPH/TEFT/-G
/TEFT/-G TEFT/-G
'''
)

def test_no_duplicate_lookups_for_longest_no_suffix_match(self):
self._prepare_state(
'''
"TEFT": "test",
"-G": "{^ing}",
''',
'TEFT')
self.translate('TEFGT')
self._check_lookup_history(
# Macros.
'''
TEFGT
'''
# No suffix.
'''
'''
# With suffix.
'''
-G TEFT
'''
)

def test_lookup_suffixes_once(self):
self._prepare_state(
'''
"HROPBG/EFT/KAOE": "longest key",
"HRETS": "let's",
"TEFT": "test",
"SPH": "some",
"SUFBGS": "suffix",
"-G": "{^ing}",
"-S": "{^s}",
"-D": "{^ed}",
"-Z": "{^s}",
''',
'HRETS TEFT SPH')
self.translate('SUFBGSZ')
self._check_lookup_history(
# Macros.
'''
/SUFBGSZ
SUFBGSZ
'''
# Without suffix.
'''
TEFT/SPH/SUFBGSZ
/SPH/SUFBGSZ
SPH/SUFBGSZ
'''
# Suffix lookups.
'''
-Z -S -G
TEFT/SPH/SUFBGS TEFT/SPH/SUFBGZ TEFT/SPH/SUFBSZ
/SPH/SUFBGS /SPH/SUFBGZ /SPH/SUFBSZ
SPH/SUFBGS SPH/SUFBGZ SPH/SUFBSZ
/SUFBGS /SUFBGZ /SUFBSZ
SUFBGS
''')

0 comments on commit 56f1770

Please sign in to comment.