From b11509efb4550c6c6945c12124ac9bb6cbfd6933 Mon Sep 17 00:00:00 2001 From: Benoit Pierre Date: Sat, 26 Mar 2022 15:38:56 +0100 Subject: [PATCH 1/9] translation: small simplification --- plover/translation.py | 38 +++++++++++++++++++++----------------- 1 file changed, 21 insertions(+), 17 deletions(-) diff --git a/plover/translation.py b/plover/translation.py index 7432195cb..a6594efff 100644 --- a/plover/translation.py +++ b/plover/translation.py @@ -354,26 +354,30 @@ def _find_translation_helper(self, stroke, suffixes=()): t.replaced = replaced return t + def _lookup_strokes(self, strokes): + '''Look for a matching translation. + + strokes -- a list of Stroke instances. + + Return the resulting mapping. + ''' + return self._dictionary.lookup(tuple(s.rtfcre for s in strokes)) + def lookup(self, strokes, suffixes=()): - dict_key = tuple(s.rtfcre for s in strokes) - result = self._dictionary.lookup(dict_key) + result = self._lookup_strokes(strokes) if result is not None: return result - for key in suffixes: - if key in strokes[-1].steno_keys: - dict_key = (Stroke([key]).rtfcre,) - suffix_mapping = self._dictionary.lookup(dict_key) - if suffix_mapping is None: - continue - keys = strokes[-1].steno_keys[:] - keys.remove(key) - copy = strokes[:] - copy[-1] = Stroke(keys) - dict_key = tuple(s.rtfcre for s in copy) - main_mapping = self._dictionary.lookup(dict_key) - if main_mapping is None: - continue - return main_mapping + ' ' + suffix_mapping + for suffix_key in suffixes: + suffix_stroke = strokes[-1] & suffix_key + if not suffix_stroke: + continue + suffix_mapping = self._lookup_strokes((suffix_stroke,)) + if suffix_mapping is None: + continue + main_mapping = self._lookup_strokes(strokes[:-1] + [strokes[-1] - suffix_stroke]) + if main_mapping is None: + continue + return main_mapping + ' ' + suffix_mapping return None def _previous_word_is_finished(self, last_translations): From a2dda931de3ebc0448b449134bd0dec92fb4db88 Mon Sep 17 00:00:00 2001 From: Benoit Pierre Date: Sat, 26 Mar 2022 15:40:50 +0100 Subject: [PATCH 2/9] translation: minor cleanup --- plover/translation.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/plover/translation.py b/plover/translation.py index a6594efff..7d522fd37 100644 --- a/plover/translation.py +++ b/plover/translation.py @@ -380,7 +380,8 @@ def lookup(self, strokes, suffixes=()): return main_mapping + ' ' + suffix_mapping return None - def _previous_word_is_finished(self, last_translations): + @staticmethod + def _previous_word_is_finished(last_translations): if not last_translations: return True formatting = last_translations[-1].formatting From b110fbe8a6d5f42f4d177d02fe08985663e18cec Mon Sep 17 00:00:00 2001 From: Benoit Pierre Date: Fri, 25 Mar 2022 18:36:41 +0100 Subject: [PATCH 3/9] test: add framework for checking for unnecessary translation lookups --- test/test_translation.py | 50 +++++++++++++++++++++++++++++++++++++--- 1 file changed, 47 insertions(+), 3 deletions(-) diff --git a/test/test_translation.py b/test/test_translation.py index 886ea3db1..372e48704 100644 --- a/test/test_translation.py +++ b/test/test_translation.py @@ -4,6 +4,7 @@ """Unit tests for translation.py.""" from collections import namedtuple +import ast import copy import operator @@ -416,15 +417,18 @@ def test_restrict_size_multiple_translations(self): class TestTranslateStroke: + DICT_COLLECTION_CLASS = StenoDictionaryCollection + class CaptureOutput: - output = namedtuple('output', 'undo do prev') + + Output = namedtuple('Output', 'undo do prev') def __init__(self): self.output = [] def __call__(self, undo, new, prev): prev = list(prev) if prev else None - self.output = type(self).output(undo, new, prev) + self.output = self.Output(undo, new, prev) def t(self, strokes): """A quick way to make a translation.""" @@ -470,7 +474,7 @@ def _check_output(self, undo, do, prev): def setup_method(self): self.d = StenoDictionary() - self.dc = StenoDictionaryCollection([self.d]) + self.dc = self.DICT_COLLECTION_CLASS([self.d]) self.s = _State() self.o = self.CaptureOutput() self.tlor = Translator() @@ -808,3 +812,43 @@ def test_untranslate_translation(self): def test_escape_unescape_translation(raw, escaped): assert unescape_translation(escaped) == raw assert escape_translation(raw) == escaped + + +class TestNoUnnecessaryLookups(TestTranslateStroke): + + # Custom dictionary collection class for tracking lookups. + class DictTracy(StenoDictionaryCollection): + + def __init__(self, dicts): + super().__init__(dicts) + self.lookup_history = [] + + def lookup(self, key): + self.lookup_history.append(key) + return super().lookup(key) + + DICT_COLLECTION_CLASS = DictTracy + + def _prepare_state(self, definitions, translations): + if definitions: + for steno, english in ast.literal_eval('{' + definitions + '}').items(): + self.define(steno, english) + translations = self.lt(translations) + for t in translations: + for s in t.strokes: + self.translate(s.rtfcre) + state = translations[len(translations)-self.dc.longest_key:] + self._check_translations(state) + self.dc.lookup_history.clear() + + def _check_lookup_history(self, expected): + # Hide from traceback on assertions (reduce output size for failed tests). + __tracebackhide__ = operator.methodcaller('errisinstance', AssertionError) + result = ['/'.join(key) for key in self.dc.lookup_history] + expected = expected.split() + msg = ''' + lookup history: + results: %s + expected: %s + ''' % (result, expected) + assert result == expected, msg From d60dd150188d0f32d7e6a5ec4e76131ccc5ae00d Mon Sep 17 00:00:00 2001 From: Benoit Pierre Date: Fri, 25 Mar 2022 22:39:47 +0100 Subject: [PATCH 4/9] translation: avoid lookups over the longest key limit --- plover/translation.py | 30 ++++++++++++++++++++---------- test/test_translation.py | 32 ++++++++++++++++++++++++++++++++ 2 files changed, 52 insertions(+), 10 deletions(-) diff --git a/plover/translation.py b/plover/translation.py index 7d522fd37..a34a918c8 100644 --- a/plover/translation.py +++ b/plover/translation.py @@ -292,14 +292,15 @@ def translate_stroke(self, stroke): stroke -- The Stroke object to process. """ - mapping = self._lookup_with_prefix(self._state.translations, [stroke]) + max_len = self._dictionary.longest_key + mapping = self._lookup_with_prefix(max_len, self._state.translations, [stroke]) macro = _mapping_to_macro(mapping, stroke) if macro is not None: self.translate_macro(macro) return t = ( - self._find_translation_helper(stroke) or - self._find_translation_helper(stroke, system.SUFFIX_KEYS) or + self._find_longest_match(max_len, stroke) or + self._find_longest_match(max_len, stroke, system.SUFFIX_KEYS) or Translation([stroke], mapping) ) self.translate_translation(t) @@ -328,15 +329,22 @@ def _do(self, *translations): self._state.translations.extend(translations) self._to_do += len(translations) - def _find_translation_helper(self, stroke, suffixes=()): + def _find_longest_match(self, max_len, stroke, suffixes=()): + '''Find mapping with the longest series of strokes. + + max_len -- Maximum number of strokes involved. + stroke -- The latest stroke. + suffixes -- List of suffix keys to try. + + Return the corresponding translation, or None if no match is found. + ''' # Figure out how much of the translation buffer can be involved in this # stroke and build the stroke list for translation. num_strokes = 1 translation_count = 0 - longest_key = self._dictionary.longest_key for t in reversed(self._state.translations): num_strokes += len(t) - if num_strokes > longest_key: + if num_strokes > max_len: break translation_count += 1 translation_index = len(self._state.translations) - translation_count @@ -348,7 +356,7 @@ def _find_translation_helper(self, stroke, suffixes=()): replaced = translations[i:] strokes = [s for t in replaced for s in t.strokes] strokes.append(stroke) - mapping = self._lookup_with_prefix(translations[:i], strokes, suffixes) + mapping = self._lookup_with_prefix(max_len, translations[:i], strokes, suffixes) if mapping is not None: t = Translation(strokes, mapping) t.replaced = replaced @@ -389,12 +397,14 @@ def _previous_word_is_finished(last_translations): return True return formatting[-1].word_is_finished - def _lookup_with_prefix(self, last_translations, strokes, suffixes=()): - if self._previous_word_is_finished(last_translations): + def _lookup_with_prefix(self, max_len, last_translations, strokes, suffixes=()): + if len(strokes) < max_len and self._previous_word_is_finished(last_translations): mapping = self.lookup([Stroke.PREFIX_STROKE] + strokes, suffixes) if mapping is not None: return mapping - return self.lookup(strokes, suffixes) + if len(strokes) <= max_len: + return self.lookup(strokes, suffixes) + return None class _State: diff --git a/test/test_translation.py b/test/test_translation.py index 372e48704..3ddb97516 100644 --- a/test/test_translation.py +++ b/test/test_translation.py @@ -852,3 +852,35 @@ def _check_lookup_history(self, expected): expected: %s ''' % (result, expected) assert result == expected, msg + + def test_zero_lookups(self): + # No lookups at all if longest key is zero. + self.translate('TEFT') + self._check_lookup_history('') + self._check_translations(self.lt('TEFT')) + + def test_no_prefix_lookup_over_the_longest_key_limit(self): + self._prepare_state( + ''' + "HROPBG/EFT/KAOE": "longest key", + "HRETS": "let's", + "TKO": "do", + "SPH": "some", + "TEFT": "test", + "-G": "{^ing}", + ''', + 'HRETS TKO SPH TEFT') + self.translate('-G') + self._check_lookup_history( + # Macros. + ''' + /-G + -G + ''' + # Others. + ''' + SPH/TEFT/-G + /TEFT/-G TEFT/-G + /-G -G + ''' + ) From 6a7f689edf23ca805b6c8e53e2259039be8b66a4 Mon Sep 17 00:00:00 2001 From: Benoit Pierre Date: Sat, 26 Mar 2022 01:56:35 +0100 Subject: [PATCH 5/9] translation: avoid some duplicate lookups When calling `_find_longest_match` a second time to account for a possible implicit suffix in the last stroke, don't duplicate lookups for a non-suffix match. --- plover/translation.py | 31 +++++++++++++++++++++++++------ test/test_translation.py | 23 +++++++++++++++++++++++ 2 files changed, 48 insertions(+), 6 deletions(-) diff --git a/plover/translation.py b/plover/translation.py index a34a918c8..8b9b73f0d 100644 --- a/plover/translation.py +++ b/plover/translation.py @@ -337,6 +337,10 @@ def _find_longest_match(self, max_len, stroke, suffixes=()): suffixes -- List of suffix keys to try. Return the corresponding translation, or None if no match is found. + + Note: the code either look for a direct match (empty suffix + list), or assume the last stroke contains an implicit suffix + and look for a corresponding match, but not both. ''' # Figure out how much of the translation buffer can be involved in this # stroke and build the stroke list for translation. @@ -371,10 +375,19 @@ def _lookup_strokes(self, strokes): ''' return self._dictionary.lookup(tuple(s.rtfcre for s in strokes)) - def lookup(self, strokes, suffixes=()): - result = self._lookup_strokes(strokes) - if result is not None: - return result + def _lookup_with_suffix(self, strokes, suffixes=()): + '''Look for a matching translation. + + suffixes -- A list of suffix keys to try. + + If the suffix list is empty, look for a direct match. + + Otherwise, assume the last stroke contains an implicit suffix, + and look for a corresponding match. + ''' + if not suffixes: + # No suffix, do a regular lookup. + return self._lookup_strokes(strokes) for suffix_key in suffixes: suffix_stroke = strokes[-1] & suffix_key if not suffix_stroke: @@ -388,6 +401,12 @@ def lookup(self, strokes, suffixes=()): return main_mapping + ' ' + suffix_mapping return None + def lookup(self, strokes, suffixes=()): + result = self._lookup_strokes(strokes) + if result is not None: + return result + return self._lookup_with_suffix(strokes, suffixes) + @staticmethod def _previous_word_is_finished(last_translations): if not last_translations: @@ -399,11 +418,11 @@ def _previous_word_is_finished(last_translations): def _lookup_with_prefix(self, max_len, last_translations, strokes, suffixes=()): if len(strokes) < max_len and self._previous_word_is_finished(last_translations): - mapping = self.lookup([Stroke.PREFIX_STROKE] + strokes, suffixes) + mapping = self._lookup_with_suffix([Stroke.PREFIX_STROKE] + strokes, suffixes) if mapping is not None: return mapping if len(strokes) <= max_len: - return self.lookup(strokes, suffixes) + return self._lookup_with_suffix(strokes, suffixes) return None diff --git a/test/test_translation.py b/test/test_translation.py index 3ddb97516..9df25960f 100644 --- a/test/test_translation.py +++ b/test/test_translation.py @@ -884,3 +884,26 @@ def test_no_prefix_lookup_over_the_longest_key_limit(self): /-G -G ''' ) + + def test_no_duplicate_lookups_for_longest_no_suffix_match(self): + self._prepare_state( + ''' + "TEFT": "test", + "-G": "{^ing}", + ''', + 'TEFT') + self.translate('TEFGT') + self._check_lookup_history( + # Macros. + ''' + TEFGT + ''' + # No suffix. + ''' + TEFGT + ''' + # With suffix. + ''' + -G TEFT + ''' + ) From 2d40df370ebc53085bb05f608e7e338cf081450d Mon Sep 17 00:00:00 2001 From: Benoit Pierre Date: Sat, 26 Mar 2022 02:03:53 +0100 Subject: [PATCH 6/9] translation: avoid some duplicate lookups Only lookup each possible implicit suffix once. --- plover/translation.py | 39 ++++++++++++++++++++++++++++++--------- test/test_translation.py | 39 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 69 insertions(+), 9 deletions(-) diff --git a/plover/translation.py b/plover/translation.py index 8b9b73f0d..2eaae1a2b 100644 --- a/plover/translation.py +++ b/plover/translation.py @@ -342,6 +342,12 @@ def _find_longest_match(self, max_len, stroke, suffixes=()): list), or assume the last stroke contains an implicit suffix and look for a corresponding match, but not both. ''' + if suffixes: + # Implicit suffix lookup, determine possible suffixes. + suffixes = self._lookup_involved_suffixes(stroke, suffixes) + if not suffixes: + # No suffix involved, abort. + return None # Figure out how much of the translation buffer can be involved in this # stroke and build the stroke list for translation. num_strokes = 1 @@ -378,7 +384,7 @@ def _lookup_strokes(self, strokes): def _lookup_with_suffix(self, strokes, suffixes=()): '''Look for a matching translation. - suffixes -- A list of suffix keys to try. + suffixes -- A list of (suffix stroke, suffix mapping) pairs to try. If the suffix list is empty, look for a direct match. @@ -388,23 +394,38 @@ def _lookup_with_suffix(self, strokes, suffixes=()): if not suffixes: # No suffix, do a regular lookup. return self._lookup_strokes(strokes) - for suffix_key in suffixes: - suffix_stroke = strokes[-1] & suffix_key - if not suffix_stroke: + for suffix_stroke, suffix_mapping in suffixes: + assert suffix_stroke in strokes[-1] + main_mapping = self._lookup_strokes(strokes[:-1] + [strokes[-1] - suffix_stroke]) + if main_mapping is not None: + return main_mapping + ' ' + suffix_mapping + return None + + def _lookup_involved_suffixes(self, stroke, suffixes): + '''Find possible implicit suffixes for a stroke. + + stroke -- The stroke to check for implicit suffixes. + suffixes -- List of supported suffix keys. + + Return a list of (suffix_stroke, suffix_mapping) pairs. + ''' + possible_suffixes = [] + for suffix_stroke in map(Stroke, suffixes): + if suffix_stroke not in stroke: continue suffix_mapping = self._lookup_strokes((suffix_stroke,)) if suffix_mapping is None: continue - main_mapping = self._lookup_strokes(strokes[:-1] + [strokes[-1] - suffix_stroke]) - if main_mapping is None: - continue - return main_mapping + ' ' + suffix_mapping - return None + possible_suffixes.append((suffix_stroke, suffix_mapping)) + return possible_suffixes def lookup(self, strokes, suffixes=()): result = self._lookup_strokes(strokes) if result is not None: return result + suffixes = self._lookup_involved_suffixes(strokes[-1], suffixes) + if not suffixes: + return None return self._lookup_with_suffix(strokes, suffixes) @staticmethod diff --git a/test/test_translation.py b/test/test_translation.py index 9df25960f..7012d614c 100644 --- a/test/test_translation.py +++ b/test/test_translation.py @@ -907,3 +907,42 @@ def test_no_duplicate_lookups_for_longest_no_suffix_match(self): -G TEFT ''' ) + + def test_lookup_suffixes_once(self): + self._prepare_state( + ''' + "HROPBG/EFT/KAOE": "longest key", + "HRETS": "let's", + "TEFT": "test", + "SPH": "some", + "SUFBGS": "suffix", + "-G": "{^ing}", + "-S": "{^s}", + "-D": "{^ed}", + "-Z": "{^s}", + ''', + 'HRETS TEFT SPH') + self.translate('SUFBGSZ') + self._check_lookup_history( + # Macros. + ''' + /SUFBGSZ + SUFBGSZ + ''' + # Without suffix. + ''' + TEFT/SPH/SUFBGSZ + /SPH/SUFBGSZ + SPH/SUFBGSZ + /SUFBGSZ + SUFBGSZ + ''' + # Suffix lookups. + ''' + -Z -S -G + TEFT/SPH/SUFBGS TEFT/SPH/SUFBGZ TEFT/SPH/SUFBSZ + /SPH/SUFBGS /SPH/SUFBGZ /SPH/SUFBSZ + SPH/SUFBGS SPH/SUFBGZ SPH/SUFBSZ + /SUFBGS /SUFBGZ /SUFBSZ + SUFBGS + ''') From 02efbf4ac5722eb7f9ab4ac6ae8d05286a551713 Mon Sep 17 00:00:00 2001 From: Benoit Pierre Date: Sat, 26 Mar 2022 03:13:49 +0100 Subject: [PATCH 7/9] translation: avoid some duplicate lookups Reuse the result of the initial (macro check) lookups of the last stroke if needed, instead of re-doing them. --- plover/translation.py | 16 ++++++++++++---- test/test_translation.py | 4 ---- 2 files changed, 12 insertions(+), 8 deletions(-) diff --git a/plover/translation.py b/plover/translation.py index 2eaae1a2b..6fb04b619 100644 --- a/plover/translation.py +++ b/plover/translation.py @@ -299,9 +299,14 @@ def translate_stroke(self, stroke): self.translate_macro(macro) return t = ( - self._find_longest_match(max_len, stroke) or - self._find_longest_match(max_len, stroke, system.SUFFIX_KEYS) or - Translation([stroke], mapping) + # No prefix lookups (note we avoid looking up [stroke] again). + self._find_longest_match(2, max_len, stroke) or + # Return [stroke] result if mapped. + (mapping is not None and Translation([stroke], mapping)) or + # No direct match, try with suffixes. + self._find_longest_match(1, max_len, stroke, system.SUFFIX_KEYS) or + # Fallback to untranslate. + Translation([stroke], None) ) self.translate_translation(t) @@ -329,9 +334,10 @@ def _do(self, *translations): self._state.translations.extend(translations) self._to_do += len(translations) - def _find_longest_match(self, max_len, stroke, suffixes=()): + def _find_longest_match(self, min_len, max_len, stroke, suffixes=()): '''Find mapping with the longest series of strokes. + min_len -- Minimum number of strokes involved. max_len -- Maximum number of strokes involved. stroke -- The latest stroke. suffixes -- List of suffix keys to try. @@ -366,6 +372,8 @@ def _find_longest_match(self, max_len, stroke, suffixes=()): replaced = translations[i:] strokes = [s for t in replaced for s in t.strokes] strokes.append(stroke) + if len(strokes) < min_len: + continue mapping = self._lookup_with_prefix(max_len, translations[:i], strokes, suffixes) if mapping is not None: t = Translation(strokes, mapping) diff --git a/test/test_translation.py b/test/test_translation.py index 7012d614c..747829c53 100644 --- a/test/test_translation.py +++ b/test/test_translation.py @@ -881,7 +881,6 @@ def test_no_prefix_lookup_over_the_longest_key_limit(self): ''' SPH/TEFT/-G /TEFT/-G TEFT/-G - /-G -G ''' ) @@ -900,7 +899,6 @@ def test_no_duplicate_lookups_for_longest_no_suffix_match(self): ''' # No suffix. ''' - TEFGT ''' # With suffix. ''' @@ -934,8 +932,6 @@ def test_lookup_suffixes_once(self): TEFT/SPH/SUFBGSZ /SPH/SUFBGSZ SPH/SUFBGSZ - /SUFBGSZ - SUFBGSZ ''' # Suffix lookups. ''' From 6695805fe958d7d4d947348537f84e1870096229 Mon Sep 17 00:00:00 2001 From: Benoit Pierre Date: Sat, 26 Mar 2022 16:12:01 +0100 Subject: [PATCH 8/9] translation: minor cleanup --- plover/translation.py | 1 + 1 file changed, 1 insertion(+) diff --git a/plover/translation.py b/plover/translation.py index 6fb04b619..68abcfbcf 100644 --- a/plover/translation.py +++ b/plover/translation.py @@ -379,6 +379,7 @@ def _find_longest_match(self, min_len, max_len, stroke, suffixes=()): t = Translation(strokes, mapping) t.replaced = replaced return t + return None def _lookup_strokes(self, strokes): '''Look for a matching translation. From 51bf53b0d021fa98b84be6446ddcbd75d5ad9a3a Mon Sep 17 00:00:00 2001 From: Benoit Pierre Date: Sat, 26 Mar 2022 16:30:29 +0100 Subject: [PATCH 9/9] add news entry --- news.d/feature/1513.core.md | 1 + 1 file changed, 1 insertion(+) create mode 100644 news.d/feature/1513.core.md diff --git a/news.d/feature/1513.core.md b/news.d/feature/1513.core.md new file mode 100644 index 000000000..7efebf707 --- /dev/null +++ b/news.d/feature/1513.core.md @@ -0,0 +1 @@ +Improve translation stage: cut down on unnecessary / duplicate dictionary lookups.