notofonts · m4rc1e · Oct 3, 2017 · Oct 3, 2017 · anthrotype · Oct 12, 2017
diff --git a/nototools/gsub_diff.py b/nototools/gsub_diff.py
@@ -44,41 +44,144 @@ def __init__(self, file_a, file_b, output_lines=20):
 
     def find_gsub_diffs(self):
         """Report differences in substitution rules."""
-
-        rules_a = self._get_gsub_rules(self.text_a, self.file_a)
-        rules_b = self._get_gsub_rules(self.text_b, self.file_b)
-
-        diffs = []
-        report = ['']  # first line replaced by difference count
-        for rule in rules_a:
-            if rule not in rules_b:
-                diffs.append(('-',) + rule)
-        for rule in rules_b:
-            if rule not in rules_a:
-                diffs.append(('+',) + rule)
-        # ('+', 'smcp', 'Q', 'Q.sc')
+        new = [self._format_rule(r, '+') for r in self.find_new_rules()]
+        missing = [self._format_rule(r, '-') for r in self.find_missing_rules()]
+        diffs = missing + new
+        # ('+', 'smcp', 'Q', 'by', Q.sc')
         # Sort order:
         # 1. Feature tag
         # 2. Glyph name before substitution
         # 3. Glyph name after substitution
-        diffs.sort(key=lambda t:(t[1], t[2], t[3]))
+        diffs.sort(key=lambda t:(t[1], t[2], t[4]))
         report = ['%d differences in GSUB rules' % len(diffs)]
         report.extend(' '.join(diff) for diff in diffs)
         return '\n'.join(report[:self.output_lines + 1])
 
-    def _get_gsub_rules(self, text, filename):
-        """Get substitution rules in this ttxn output."""
+    def find_new_rules(self):
+        rules_a = self._get_gsub_rules(self.text_a, self.file_a)
+        rules_b = self._get_gsub_rules(self.text_b, self.file_b)
+        return [r for r in rules_b if r not in rules_a]
+
+    def find_missing_rules(self):
+        rules_a = self._get_gsub_rules(self.text_a, self.file_a)
+        rules_b = self._get_gsub_rules(self.text_b, self.file_b)
+        return [r for r in rules_a if r not in rules_b]
+
+    def _get_gsub_rules(self, text, file):
+        """
+        Parse the ttxn GSUB table in the following manner:
+
+        1. Get features
+        2. Get feature content
+        3. Extract lookup rules from feature content
+
+        Following substitutions are currently implemented:
+        - Type 1: Single substitutions
+        - Type 2: Multiple substitutions
+        - Type 3: Alternate substitutions
+        - Type 4: Ligature substitutionss
 
+        TODO: LookupTypes 5, 6, 8 still need implementing
+        """
+        rules = []
+        features = self._get_gsub_features(text)
+        for feature in features:
+            content = self._get_feature_content(text, feature)
+            lookups_rules = self._get_lookups_rules(text, content[0], feature)
+            rules += lookups_rules
+        return rules
+
+    def _get_gsub_features(self, text):
+        features = set()
         feature_name_rx = r'feature (\w+) {'
-        contents_rx = r'feature %s {(.*?)} %s;'
-        rule_rx = r'sub ([\w.]+) by ([\w.]+);'
 
-        rules = set()
         for name in re.findall(feature_name_rx, text):
-            contents = re.findall(contents_rx % (name, name), text, re.S)
-            assert len(contents) == 1, 'Multiple %s features in %s' % (
-                name, filename)
-            contents = contents[0]
-            for lhs, rhs in re.findall(rule_rx, contents):
-                rules.add((name, lhs, rhs))
-        return rules
+            features.add(name)
+        return list(features)
+
+    def _get_feature_content(self, text, feature):
+        contents_rx = r'feature %s {(.*?)} %s;'
+        contents = re.findall(contents_rx % (feature, feature), text, re.S)
+        return contents
+
+    def _get_lookups_rules(self, text, content, feature):
+        """Ignore rules which use "'". These are contextual and not in
+        lookups 1-4"""
+        rule_rx = r"[^C] sub (.*[^\']) (by|from) (.*);"
+        rules = re.findall(rule_rx, content)
+        parsed_rules = self._parse_gsub_rules(rules, feature)
+        return parsed_rules
+
+    def _parse_gsub_rules(self, rules, feature):
+        """
+        Parse GSUB sub LookupTypes 1, 2, 3, 4, 7. Return list of tuples with
+        the following tuple sequence.
+
+        (feature, [input glyphs], operator, [output glyphs])
+
+        Type 1 Single Sub:
+        sub a by a.sc;
+        sub b by b.sc;
+        [
+            (feat, ['a'], 'by' ['a.sc']),
+            (feat, ['b'], 'by' ['b.cs'])
+        ]
+
+
+        Type 2 Multiple Sub:
+        sub f_f by f f;
+        sub f_f_i by f f i;
+        [
+            (feat, ['f_f'], 'by', ['f', 'f']),
+            (feat, ['f_f_i'], 'by', ['f', 'f', 'i'])
+        ]
+
+        Type 3 Alternative Sub:
+        sub ampersand from [ampersand.1 ampersand.2 ampersand.3];
+            [
+                (feat, ['ampersand'], 'from', ['ampersand.1']),
+                (feat, ['ampersand'], 'from', ['ampersand.2']),
+                (feat, ['ampersand'], 'from', ['ampersand.3'])
+            ]
+
+        Type 4 Ligature Sub:
+        sub f f by f_f;
+        sub f f i by f_f_i;
+        [
+            (feat, ['f', 'f'] 'by' ['f_f]),
+            (feat, ['f', 'f', 'i'] 'by' ['f_f_i'])
+        ]
+
+        http://www.adobe.com/devnet/opentype/afdko/topic_feature_file_syntax.html#4.e
+        """
+        parsed = []
+        for idx, (left, op, right) in enumerate(rules):
+
+            left_group, right_group = [], []
+            if left.startswith('[') and left.endswith(']'):
+                left = self._gsub_rule_group_to_string(left)
+
+            if right.startswith('[') and right.endswith(']'):
+                right = self._gsub_rule_group_to_string(right)
+
+            if op == 'by': # parse LookupType 1, 2, 4
+                parsed.append((feature, left.split(), op, right.split()))
+            elif op == 'from': # parse LookupType 3
+                for glyph in right.split(): # 'a.alt a.sc' -> ['a.alt', 'a.sc']
+                    parsed.append((feature, left.split(), op, [glyph]))
+        return parsed
+
+    def _format_rule(self, rule, sign):
+        """Unnest the tuple rule sequence to more report friendly format"""
+        s = [sign]
+        for item in rule:
+            if not isinstance(item, str):
+                for sub_item in item:
+                    s.append(sub_item)
+            else:
+                s.append(item)
+        return s
+
+    def _gsub_rule_group_to_string(self, seq):
+        """[a a.sc a.sups] --> 'a a.sc a.sups'"""
+        return seq[1:-1]
diff --git a/tests/gsub_diff_test.py b/tests/gsub_diff_test.py
@@ -0,0 +1,174 @@
+# Copyright 2016 Google Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+"""Tests for gsub_diff module. Test examples for each LookupType taken from
+the Adobe fea spec:
+http://www.adobe.com/devnet/opentype/afdko/topic_feature_file_syntax.html#5.b
+"""
+
+import tempfile
+import unittest
+from nototools.gsub_diff import GsubDiffFinder
+from hb_input_test import make_font
+
+
+class GposDiffFinderText(unittest.TestCase):
+    def _expect_gsub_diffs(self, source_a, source_b, pairs):
+        font_a = make_font('feature ccmp {\n%s\n} ccmp;' % source_a)
+        font_b = make_font('feature ccmp {\n%s\n} ccmp;' % source_b)
+        file_a = tempfile.NamedTemporaryFile()
+        file_b = tempfile.NamedTemporaryFile()
+        font_a.save(file_a.name)
+        font_b.save(file_b.name)
+        finder = GsubDiffFinder(file_a.name, file_b.name)
+
+        diffs = finder.find_gsub_diffs()
+        self.assertIn('%d differences in GSUB rules' % len(pairs), diffs)
+        for pair_diff in pairs:
+            self.assertIn(pair_diff, diffs)
+
+    def test_type1_gsub_1(self):
+        """Test LookupType 1 Single substitutions"""
+        self._expect_gsub_diffs('''
+                sub A by A.sc;
+                sub B by B.sc;
+            ''', '''
+                sub A by A.sc;
+            ''',
+            [('- ccmp B by B.sc')])
+
+    def test_type1_gsub_2(self):
+        """Test LookupType 1 Single substitutions on groups"""
+        self._expect_gsub_diffs('''
+                sub [A B] by [A.sc B.sc];
+            ''', '''
+                sub [A] by [A.sc];
+            ''',
+            [('- ccmp B by B.sc')])
+
+    def test_type2_gsub(self):
+        """Test LookupType 2 Multiple substitutions"""
+        self._expect_gsub_diffs('''
+                sub f_l by f l;
+            ''', '''
+                sub f_l by f l;
+                sub c_h by c h;
+            ''',
+            [('+ ccmp c_h by c h')])
+
+    def test_type3_gsub(self):
+        """Test LookupType 3 Alternate substitutions"""
+        self._expect_gsub_diffs('''
+                sub A from [A.swash A.sc];
+            ''', '''
+                sub A from [A.swash A.sc];
+                sub B from [B.swash B.sc];
+            ''',
+            [('+ ccmp B from B.swash'),
+             ('+ ccmp B from B.sc')])
+
+    def test_type4_gsub_1(self):
+        """Test LookupType 4 Ligature substitutions"""
+        self._expect_gsub_diffs('''
+            sub f l by f_l;
+            sub c h by c_h;
+            ''', '''
+            sub f l by f_l;
+            ''',
+            [('- ccmp c h by c_h')])
+
+    def test_type4_gsub_2(self):
+        """Test LookupType 4 Ligature substitutions on groups"""
+        self._expect_gsub_diffs('''
+            sub [f F.swash] [l L.swash] by f_l;
+            ''', '''
+            sub [f] [l] by f_l;
+            ''',
+            [('- ccmp F.swash L.swash by f_l'),
+             ('- ccmp F.swash l by f_l'),
+             ('- ccmp f L.swash by f_l'),
+            ])
+
+    def test_type5_and_6_gsub_1(self):
+        """LookupType 5 and 6 not implemented, make sure it returns nothing.
+
+        This lookupType can use other lookups so include them in the test"""
+        self._expect_gsub_diffs('''
+            lookup CNTXT_LIGS {
+                 sub c t by c_t;
+             } CNTXT_LIGS;
+
+            lookup CNTXT_SUB {
+                 sub s by s.end;
+             } CNTXT_SUB;
+
+            # LookupType 6 implementation
+            lookup test {
+                 sub [ a e i o u] c' lookup CNTXT_LIGS t' s' lookup CNTXT_SUB;
+             } test;
+            ''','''
+            lookup CNTXT_LIGS {
+                 sub c t by c_t;
+             } CNTXT_LIGS;
+
+            lookup CNTXT_SUB {
+                 sub s by s.end;
+             } CNTXT_SUB;
+            ''',
+            [])
+
+    def test_type5_and_6_gsub_2(self):
+        """LookupType 5 and 6 not implemented, make sure it returns nothing.
+        """
+        self._expect_gsub_diffs('''
+            substitute [a e n] d' by d.alt;
+            ''','''
+            ''',
+            [])
+
+    def test_type5_and_6_gsub_3(self):
+        """LookupType 5 and 6 not implemented, make sure it returns nothing.
+        """
+        self._expect_gsub_diffs('''
+            substitute [e e.begin]' t' c by ampersand;
+            ''','''
+            ''',
+            [])
+
+    def test_type7_gsub(self):
+        """Test LookupType 7 Extension substitution"""
+        self._expect_gsub_diffs('''
+            lookup fracbar useExtension {
+                 sub slash by fraction;
+             } fracbar;
+            ''','''
+            lookup fracbar useExtension {
+                # missing rules
+             } fracbar;
+            ''',
+            [('- ccmp slash by fraction')])
+
+    def test_type8_gsub(self):
+        """LookupType 8 not implemented, make sure it returns nothing"""
+        self._expect_gsub_diffs('''
+            reversesub [a e n] d' by d.alt;
+            ''','''
+
+            ''',
+            [])
+
+
+if __name__ == '__main__':
+    unittest.main()