-
Notifications
You must be signed in to change notification settings - Fork 90
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
gsub_diff: Compare LookupTypes 2, 3, 4, 7 #455
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -44,41 +44,144 @@ def __init__(self, file_a, file_b, output_lines=20): | |
|
||
def find_gsub_diffs(self): | ||
"""Report differences in substitution rules.""" | ||
|
||
rules_a = self._get_gsub_rules(self.text_a, self.file_a) | ||
rules_b = self._get_gsub_rules(self.text_b, self.file_b) | ||
|
||
diffs = [] | ||
report = [''] # first line replaced by difference count | ||
for rule in rules_a: | ||
if rule not in rules_b: | ||
diffs.append(('-',) + rule) | ||
for rule in rules_b: | ||
if rule not in rules_a: | ||
diffs.append(('+',) + rule) | ||
# ('+', 'smcp', 'Q', 'Q.sc') | ||
new = [self._format_rule(r, '+') for r in self.find_new_rules()] | ||
missing = [self._format_rule(r, '-') for r in self.find_missing_rules()] | ||
diffs = missing + new | ||
# ('+', 'smcp', 'Q', 'by', Q.sc') | ||
# Sort order: | ||
# 1. Feature tag | ||
# 2. Glyph name before substitution | ||
# 3. Glyph name after substitution | ||
diffs.sort(key=lambda t:(t[1], t[2], t[3])) | ||
diffs.sort(key=lambda t:(t[1], t[2], t[4])) | ||
report = ['%d differences in GSUB rules' % len(diffs)] | ||
report.extend(' '.join(diff) for diff in diffs) | ||
return '\n'.join(report[:self.output_lines + 1]) | ||
|
||
def _get_gsub_rules(self, text, filename): | ||
"""Get substitution rules in this ttxn output.""" | ||
def find_new_rules(self): | ||
rules_a = self._get_gsub_rules(self.text_a, self.file_a) | ||
rules_b = self._get_gsub_rules(self.text_b, self.file_b) | ||
return [r for r in rules_b if r not in rules_a] | ||
|
||
def find_missing_rules(self): | ||
rules_a = self._get_gsub_rules(self.text_a, self.file_a) | ||
rules_b = self._get_gsub_rules(self.text_b, self.file_b) | ||
return [r for r in rules_a if r not in rules_b] | ||
|
||
def _get_gsub_rules(self, text, file): | ||
""" | ||
Parse the ttxn GSUB table in the following manner: | ||
|
||
1. Get features | ||
2. Get feature content | ||
3. Extract lookup rules from feature content | ||
|
||
Following substitutions are currently implemented: | ||
- Type 1: Single substitutions | ||
- Type 2: Multiple substitutions | ||
- Type 3: Alternate substitutions | ||
- Type 4: Ligature substitutionss | ||
|
||
TODO: LookupTypes 5, 6, 8 still need implementing | ||
""" | ||
rules = [] | ||
features = self._get_gsub_features(text) | ||
for feature in features: | ||
content = self._get_feature_content(text, feature) | ||
lookups_rules = self._get_lookups_rules(text, content[0], feature) | ||
rules += lookups_rules | ||
return rules | ||
|
||
def _get_gsub_features(self, text): | ||
features = set() | ||
feature_name_rx = r'feature (\w+) {' | ||
contents_rx = r'feature %s {(.*?)} %s;' | ||
rule_rx = r'sub ([\w.]+) by ([\w.]+);' | ||
|
||
rules = set() | ||
for name in re.findall(feature_name_rx, text): | ||
contents = re.findall(contents_rx % (name, name), text, re.S) | ||
assert len(contents) == 1, 'Multiple %s features in %s' % ( | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. the previous code would assert that only one feature block with a given name exists (which might not be true, at least for handwritten, non-ttxn-generated, feature files, since the spec allows multiple feature blocks with same name). |
||
name, filename) | ||
contents = contents[0] | ||
for lhs, rhs in re.findall(rule_rx, contents): | ||
rules.add((name, lhs, rhs)) | ||
return rules | ||
features.add(name) | ||
return list(features) | ||
|
||
def _get_feature_content(self, text, feature): | ||
contents_rx = r'feature %s {(.*?)} %s;' | ||
contents = re.findall(contents_rx % (feature, feature), text, re.S) | ||
return contents | ||
|
||
def _get_lookups_rules(self, text, content, feature): | ||
"""Ignore rules which use "'". These are contextual and not in | ||
lookups 1-4""" | ||
rule_rx = r"[^C] sub (.*[^\']) (by|from) (.*);" | ||
rules = re.findall(rule_rx, content) | ||
parsed_rules = self._parse_gsub_rules(rules, feature) | ||
return parsed_rules | ||
|
||
def _parse_gsub_rules(self, rules, feature): | ||
""" | ||
Parse GSUB sub LookupTypes 1, 2, 3, 4, 7. Return list of tuples with | ||
the following tuple sequence. | ||
|
||
(feature, [input glyphs], operator, [output glyphs]) | ||
|
||
Type 1 Single Sub: | ||
sub a by a.sc; | ||
sub b by b.sc; | ||
[ | ||
(feat, ['a'], 'by' ['a.sc']), | ||
(feat, ['b'], 'by' ['b.cs']) | ||
] | ||
|
||
|
||
Type 2 Multiple Sub: | ||
sub f_f by f f; | ||
sub f_f_i by f f i; | ||
[ | ||
(feat, ['f_f'], 'by', ['f', 'f']), | ||
(feat, ['f_f_i'], 'by', ['f', 'f', 'i']) | ||
] | ||
|
||
Type 3 Alternative Sub: | ||
sub ampersand from [ampersand.1 ampersand.2 ampersand.3]; | ||
[ | ||
(feat, ['ampersand'], 'from', ['ampersand.1']), | ||
(feat, ['ampersand'], 'from', ['ampersand.2']), | ||
(feat, ['ampersand'], 'from', ['ampersand.3']) | ||
] | ||
|
||
Type 4 Ligature Sub: | ||
sub f f by f_f; | ||
sub f f i by f_f_i; | ||
[ | ||
(feat, ['f', 'f'] 'by' ['f_f]), | ||
(feat, ['f', 'f', 'i'] 'by' ['f_f_i']) | ||
] | ||
|
||
http://www.adobe.com/devnet/opentype/afdko/topic_feature_file_syntax.html#4.e | ||
""" | ||
parsed = [] | ||
for idx, (left, op, right) in enumerate(rules): | ||
|
||
left_group, right_group = [], [] | ||
if left.startswith('[') and left.endswith(']'): | ||
left = self._gsub_rule_group_to_string(left) | ||
|
||
if right.startswith('[') and right.endswith(']'): | ||
right = self._gsub_rule_group_to_string(right) | ||
|
||
if op == 'by': # parse LookupType 1, 2, 4 | ||
parsed.append((feature, left.split(), op, right.split())) | ||
elif op == 'from': # parse LookupType 3 | ||
for glyph in right.split(): # 'a.alt a.sc' -> ['a.alt', 'a.sc'] | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. comment is redundant |
||
parsed.append((feature, left.split(), op, [glyph])) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
return parsed | ||
|
||
def _format_rule(self, rule, sign): | ||
"""Unnest the tuple rule sequence to more report friendly format""" | ||
s = [sign] | ||
for item in rule: | ||
if not isinstance(item, str): | ||
for sub_item in item: | ||
s.append(sub_item) | ||
else: | ||
s.append(item) | ||
return s | ||
|
||
def _gsub_rule_group_to_string(self, seq): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I find defining a method a bit overkill for such a trivial operation as |
||
"""[a a.sc a.sups] --> 'a a.sc a.sups'""" | ||
return seq[1:-1] |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,174 @@ | ||
# Copyright 2016 Google Inc. All Rights Reserved. | ||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
|
||
|
||
"""Tests for gsub_diff module. Test examples for each LookupType taken from | ||
the Adobe fea spec: | ||
http://www.adobe.com/devnet/opentype/afdko/topic_feature_file_syntax.html#5.b | ||
""" | ||
|
||
import tempfile | ||
import unittest | ||
from nototools.gsub_diff import GsubDiffFinder | ||
from hb_input_test import make_font | ||
|
||
|
||
class GposDiffFinderText(unittest.TestCase): | ||
def _expect_gsub_diffs(self, source_a, source_b, pairs): | ||
font_a = make_font('feature ccmp {\n%s\n} ccmp;' % source_a) | ||
font_b = make_font('feature ccmp {\n%s\n} ccmp;' % source_b) | ||
file_a = tempfile.NamedTemporaryFile() | ||
file_b = tempfile.NamedTemporaryFile() | ||
font_a.save(file_a.name) | ||
font_b.save(file_b.name) | ||
finder = GsubDiffFinder(file_a.name, file_b.name) | ||
|
||
diffs = finder.find_gsub_diffs() | ||
self.assertIn('%d differences in GSUB rules' % len(pairs), diffs) | ||
for pair_diff in pairs: | ||
self.assertIn(pair_diff, diffs) | ||
|
||
def test_type1_gsub_1(self): | ||
"""Test LookupType 1 Single substitutions""" | ||
self._expect_gsub_diffs(''' | ||
sub A by A.sc; | ||
sub B by B.sc; | ||
''', ''' | ||
sub A by A.sc; | ||
''', | ||
[('- ccmp B by B.sc')]) | ||
|
||
def test_type1_gsub_2(self): | ||
"""Test LookupType 1 Single substitutions on groups""" | ||
self._expect_gsub_diffs(''' | ||
sub [A B] by [A.sc B.sc]; | ||
''', ''' | ||
sub [A] by [A.sc]; | ||
''', | ||
[('- ccmp B by B.sc')]) | ||
|
||
def test_type2_gsub(self): | ||
"""Test LookupType 2 Multiple substitutions""" | ||
self._expect_gsub_diffs(''' | ||
sub f_l by f l; | ||
''', ''' | ||
sub f_l by f l; | ||
sub c_h by c h; | ||
''', | ||
[('+ ccmp c_h by c h')]) | ||
|
||
def test_type3_gsub(self): | ||
"""Test LookupType 3 Alternate substitutions""" | ||
self._expect_gsub_diffs(''' | ||
sub A from [A.swash A.sc]; | ||
''', ''' | ||
sub A from [A.swash A.sc]; | ||
sub B from [B.swash B.sc]; | ||
''', | ||
[('+ ccmp B from B.swash'), | ||
('+ ccmp B from B.sc')]) | ||
|
||
def test_type4_gsub_1(self): | ||
"""Test LookupType 4 Ligature substitutions""" | ||
self._expect_gsub_diffs(''' | ||
sub f l by f_l; | ||
sub c h by c_h; | ||
''', ''' | ||
sub f l by f_l; | ||
''', | ||
[('- ccmp c h by c_h')]) | ||
|
||
def test_type4_gsub_2(self): | ||
"""Test LookupType 4 Ligature substitutions on groups""" | ||
self._expect_gsub_diffs(''' | ||
sub [f F.swash] [l L.swash] by f_l; | ||
''', ''' | ||
sub [f] [l] by f_l; | ||
''', | ||
[('- ccmp F.swash L.swash by f_l'), | ||
('- ccmp F.swash l by f_l'), | ||
('- ccmp f L.swash by f_l'), | ||
]) | ||
|
||
def test_type5_and_6_gsub_1(self): | ||
"""LookupType 5 and 6 not implemented, make sure it returns nothing. | ||
|
||
This lookupType can use other lookups so include them in the test""" | ||
self._expect_gsub_diffs(''' | ||
lookup CNTXT_LIGS { | ||
sub c t by c_t; | ||
} CNTXT_LIGS; | ||
|
||
lookup CNTXT_SUB { | ||
sub s by s.end; | ||
} CNTXT_SUB; | ||
|
||
# LookupType 6 implementation | ||
lookup test { | ||
sub [ a e i o u] c' lookup CNTXT_LIGS t' s' lookup CNTXT_SUB; | ||
} test; | ||
''',''' | ||
lookup CNTXT_LIGS { | ||
sub c t by c_t; | ||
} CNTXT_LIGS; | ||
|
||
lookup CNTXT_SUB { | ||
sub s by s.end; | ||
} CNTXT_SUB; | ||
''', | ||
[]) | ||
|
||
def test_type5_and_6_gsub_2(self): | ||
"""LookupType 5 and 6 not implemented, make sure it returns nothing. | ||
""" | ||
self._expect_gsub_diffs(''' | ||
substitute [a e n] d' by d.alt; | ||
''',''' | ||
''', | ||
[]) | ||
|
||
def test_type5_and_6_gsub_3(self): | ||
"""LookupType 5 and 6 not implemented, make sure it returns nothing. | ||
""" | ||
self._expect_gsub_diffs(''' | ||
substitute [e e.begin]' t' c by ampersand; | ||
''',''' | ||
''', | ||
[]) | ||
|
||
def test_type7_gsub(self): | ||
"""Test LookupType 7 Extension substitution""" | ||
self._expect_gsub_diffs(''' | ||
lookup fracbar useExtension { | ||
sub slash by fraction; | ||
} fracbar; | ||
''',''' | ||
lookup fracbar useExtension { | ||
# missing rules | ||
} fracbar; | ||
''', | ||
[('- ccmp slash by fraction')]) | ||
|
||
def test_type8_gsub(self): | ||
"""LookupType 8 not implemented, make sure it returns nothing""" | ||
self._expect_gsub_diffs(''' | ||
reversesub [a e n] d' by d.alt; | ||
''',''' | ||
|
||
''', | ||
[]) | ||
|
||
|
||
if __name__ == '__main__': | ||
unittest.main() |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I see that refactoring those two for loops into new
find_new_rules
andfind_missing_rules
methods make the code more readable, but you are calling the same_get_gsub_rules
method twice now, which may be an expensive operation. Perhaps you could computerules_a
andrules_b
once and pass them as arguments to the previous methods.. but then again the methods would just become a label for a list comprehension (readable enough from POV). Well, up to you.