From 1ea42601227aa017d176bc8415c375c9d2dbe525 Mon Sep 17 00:00:00 2001
From: Colin Rofls <colin@cmyr.net>
Date: Wed, 13 Mar 2024 15:15:54 -0400
Subject: [PATCH] [kerning] Set lookupflags & mark filter set

This also adds tests that we are generating the expected lookups & flags
for mark & non-mark kerning lookups.
---
 fontbe/src/features/kern.rs | 197 ++++++++++++++++++++++++++++--------
 1 file changed, 155 insertions(+), 42 deletions(-)
diff --git a/fontbe/src/features/kern.rs b/fontbe/src/features/kern.rs
index 71e981568..b0ab992d3 100644
--- a/fontbe/src/features/kern.rs
+++ b/fontbe/src/features/kern.rs
@@ -3,6 +3,7 @@
 use std::{
     borrow::Cow,
     collections::{BTreeMap, BTreeSet, HashMap, HashSet},
+    sync::Arc,
 };
 
 use fea_rs::{
@@ -19,7 +20,7 @@ use fontdrasil::{
     types::GlyphName,
 };
 use fontir::{
-    ir::{KernGroup, KernPair, KernParticipant, KerningGroups, KerningInstance},
+    ir::{Glyph, KernGroup, KernPair, KernParticipant, KerningGroups, KerningInstance},
     orchestration::WorkId as FeWorkId,
 };
 use icu_properties::BidiClass;
@@ -69,6 +70,13 @@ struct KerningFragmentWork {
 #[derive(Debug)]
 struct KerningGatherWork;
 
+/// Whether or not a given mark glyph is a spacing mark, e.g. has width
+#[derive(Clone, Copy, Debug)]
+enum MarkSpacing {
+    Spacing,
+    NonSpacing,
+}
+
 pub fn create_gather_ir_kerning_work() -> Box<BeWork> {
     Box::new(GatherIrKerningWork {})
 }
@@ -401,7 +409,7 @@ impl Work<Context, AnyWorkId, Error> for KerningGatherWork {
                 )
             })
             .collect::<Vec<_>>();
-        let lookups = self.finalize_kerning(&fragments, &ast.ast, &glyph_map, &glyphs_and_gids)?;
+        let lookups = self.finalize_kerning(&fragments, &ast.ast, &glyph_map, glyphs_and_gids)?;
         context.fea_rs_kerns.set(lookups);
         Ok(())
     }
@@ -416,7 +424,7 @@ impl KerningGatherWork {
         fragments: &[&KernFragment],
         ast: &ParseTree,
         glyph_map: &GlyphMap,
-        glyphs: &impl CharMap,
+        glyphs: Vec<(Arc<Glyph>, GlyphId)>,
     ) -> Result<FeaRsKerns, Error> {
         // ignore diagnostics, they'll get logged during actual GSUB compilation
         let (compilation, _) = fea_rs::compile::compile::<NopVariationInfo, NopFeatureProvider>(
@@ -448,8 +456,29 @@ impl KerningGatherWork {
             .flat_map(|frag| frag.kerns.iter())
             .collect::<Vec<_>>();
 
-        let known_scripts = guess_font_scripts(ast, glyphs);
-        let split_ctx = KernSplitContext::new(glyphs, &known_scripts, gsub, gdef)?;
+        let known_scripts = guess_font_scripts(ast, &glyphs);
+        let mark_glyphs = glyphs
+            .iter()
+            .filter_map(|(glyph, gid)| {
+                let is_mark = gdef
+                    .as_ref()
+                    .map(|gdef| gdef.get(gid) == Some(&GlyphClassDef::Mark))
+                    .unwrap_or(false);
+                is_mark.then(|| {
+                    let spacing = if glyph
+                        .sources()
+                        .values()
+                        .all(|instance| instance.width != 0.0)
+                    {
+                        MarkSpacing::Spacing
+                    } else {
+                        MarkSpacing::NonSpacing
+                    };
+                    (*gid, spacing)
+                })
+            })
+            .collect();
+        let split_ctx = KernSplitContext::new(&glyphs, &known_scripts, gsub, mark_glyphs)?;
 
         let lookups = split_ctx.make_lookups(&pairs);
         let (lookups, features) = self.assign_lookups_to_scripts(lookups, ast, KERN);
@@ -590,7 +619,8 @@ fn debug_ordered_lookups(
 
 /// All the state needed for splitting kern pairs by script & writing direction
 struct KernSplitContext {
-    gdef: Option<HashMap<GlyphId, GlyphClassDef>>,
+    /// map of all mark glyphs; bool indicates if mark is spacing
+    mark_glyphs: HashMap<GlyphId, MarkSpacing>,
     glyph_scripts: HashMap<GlyphId, HashSet<UnicodeShortName>>,
     bidi_glyphs: HashMap<BidiClass, HashSet<GlyphId>>,
     opts: KernSplitOptions,
@@ -614,14 +644,14 @@ impl KernSplitContext {
         glyphs: &impl CharMap,
         known_scripts: &HashSet<UnicodeShortName>,
         gsub: Option<Gsub>,
-        gdef: Option<HashMap<GlyphId, GlyphClassDef>>,
+        mark_glyphs: HashMap<GlyphId, MarkSpacing>,
     ) -> Result<Self, ReadError> {
         let glyph_scripts =
             super::properties::scripts_by_glyph(glyphs, known_scripts, gsub.as_ref())?;
         let bidi_glyphs = super::properties::glyphs_by_bidi_class(glyphs, gsub.as_ref())?;
 
         Ok(Self {
-            gdef,
+            mark_glyphs,
             glyph_scripts,
             bidi_glyphs,
             opts: KernSplitOptions::default(),
@@ -630,18 +660,6 @@ impl KernSplitContext {
         })
     }
 
-    fn mark_glyph_ids(&self) -> HashSet<GlyphId> {
-        self.gdef
-            .as_ref()
-            .map(|classes| {
-                classes
-                    .iter()
-                    .filter_map(|(gid, cls)| (*cls == GlyphClassDef::Mark).then_some(*gid))
-                    .collect()
-            })
-            .unwrap_or_default()
-    }
-
     // <https://github.com/googlefonts/ufo2ft/blob/cea60d71dfc/Lib/ufo2ft/featureWriters/kernFeatureWriter.py#L242>
     fn make_lookups(
         &self,
@@ -674,8 +692,7 @@ impl KernSplitContext {
     fn make_split_script_kern_lookups(
         &self,
         pairs: &[Cow<PairPosEntry>],
-        //TODO: handle marks
-        _are_marks: bool,
+        are_marks: bool,
     ) -> BTreeMap<BTreeSet<UnicodeShortName>, Vec<PendingLookup<PairPosBuilder>>> {
         let mut lookups_by_script = BTreeMap::new();
         let kerning_per_script = self.split_kerns(pairs);
@@ -710,12 +727,39 @@ impl KernSplitContext {
                 }
                 pair.add_to(&mut builder);
             }
-            let lookup = PendingLookup::new(vec![builder], LookupFlag::empty(), None);
+            let lookup = self.make_lookup(builder, !are_marks);
             lookups_by_script.insert(scripts, vec![lookup]);
         }
         lookups_by_script
     }
 
+    // logic from
+    // <https://github.com/googlefonts/ufo2ft/blob/cea60d71dfc/Lib/ufo2ft/featureWriters/kernFeatureWriter.py#L385>
+    fn make_lookup(
+        &self,
+        builder: PairPosBuilder,
+        ignore_marks: bool,
+    ) -> PendingLookup<PairPosBuilder> {
+        let mut flags = LookupFlag::empty();
+        let mut filter_class = None;
+        if ignore_marks && self.opts.ignore_marks {
+            let spacing_marks: GlyphSet = self
+                .mark_glyphs
+                .iter()
+                .filter_map(|(gid, spacing)| {
+                    matches!(spacing, MarkSpacing::Spacing).then_some(*gid)
+                })
+                .collect();
+            if spacing_marks.is_empty() {
+                flags.set_ignore_marks(true);
+            } else {
+                filter_class = Some(spacing_marks);
+                flags.set_use_mark_filtering_set(true);
+            }
+        }
+        PendingLookup::new(vec![builder], flags, filter_class)
+    }
+
     // <https://github.com/googlefonts/ufo2ft/blob/f6b4f42460b/Lib/ufo2ft/featureWriters/kernFeatureWriter.py#L842>
     fn split_kerns(
         &self,
@@ -842,10 +886,10 @@ impl KernSplitContext {
         // little helper to tell us what's in a glyphset
         fn classify_glyphset_contents(
             glyph_set: &GlyphSet,
-            marks: &HashSet<GlyphId>,
+            marks: &HashMap<GlyphId, MarkSpacing>,
         ) -> GlyphSetContent {
             glyph_set.iter().fold(GlyphSetContent::Empty, |val, gid| {
-                match (val, marks.contains(&gid)) {
+                match (val, marks.contains_key(&gid)) {
                     (GlyphSetContent::Empty, true) => GlyphSetContent::MarksOnly,
                     (GlyphSetContent::Empty, false) => GlyphSetContent::BasesOnly,
                     (GlyphSetContent::MarksOnly, true) => GlyphSetContent::MarksOnly,
@@ -856,13 +900,15 @@ impl KernSplitContext {
         }
 
         /// split a glyphset into (bases, marks)
-        fn split_glyphs(glyphs: &GlyphSet, marks: &HashSet<GlyphId>) -> (GlyphSet, GlyphSet) {
-            let (x, y): (Vec<_>, Vec<_>) = glyphs.iter().partition(|gid| !marks.contains(gid));
+        fn split_glyphs(
+            glyphs: &GlyphSet,
+            marks: &HashMap<GlyphId, MarkSpacing>,
+        ) -> (GlyphSet, GlyphSet) {
+            let (x, y): (Vec<_>, Vec<_>) = glyphs.iter().partition(|gid| !marks.contains_key(gid));
             (x.into(), y.into())
         }
 
-        let marks = self.mark_glyph_ids();
-        if marks.is_empty() {
+        if self.mark_glyphs.is_empty() {
             return (
                 pairs.iter().map(|x| Cow::Borrowed(*x)).collect(),
                 Vec::new(),
@@ -874,7 +920,8 @@ impl KernSplitContext {
         for pair in pairs {
             match pair {
                 PairPosEntry::Pair(side1, _, side2, _)
-                    if !marks.contains(side1) && !marks.contains(side2) =>
+                    if !self.mark_glyphs.contains_key(side1)
+                        && !self.mark_glyphs.contains_key(side2) =>
                 {
                     base_pairs.push(Cow::Borrowed(*pair))
                 }
@@ -882,8 +929,8 @@ impl KernSplitContext {
 
                 // handle the case where all are marks or bases, first:
                 PairPosEntry::Class(side1, val1, side2, val2) => {
-                    let side1_cls = classify_glyphset_contents(side1, &marks);
-                    let side2_cls = classify_glyphset_contents(side2, &marks);
+                    let side1_cls = classify_glyphset_contents(side1, &self.mark_glyphs);
+                    let side2_cls = classify_glyphset_contents(side2, &self.mark_glyphs);
 
                     match (side1_cls, side2_cls) {
                         (GlyphSetContent::Empty, _) | (_, GlyphSetContent::Empty) => continue,
@@ -898,8 +945,8 @@ impl KernSplitContext {
                             continue;
                         }
                         _ => {
-                            let (side1_bases, side1_marks) = split_glyphs(side1, &marks);
-                            let (side2_bases, side2_marks) = split_glyphs(side2, &marks);
+                            let (side1_bases, side1_marks) = split_glyphs(side1, &self.mark_glyphs);
+                            let (side2_bases, side2_marks) = split_glyphs(side2, &self.mark_glyphs);
 
                             if !side1_bases.is_empty() && !side2_bases.is_empty() {
                                 base_pairs.push(Cow::Owned(PairPosEntry::Class(
@@ -1037,6 +1084,7 @@ fn merge_scripts(
 mod tests {
     use super::*;
 
+    const LATN: UnicodeShortName = unsafe { UnicodeShortName::from_bytes_unchecked(*b"Latn") };
     struct MockCharMap(HashMap<char, GlyphId>);
 
     impl CharMap for MockCharMap {
@@ -1047,15 +1095,17 @@ mod tests {
 
     impl MockCharMap {
         fn make_rule(&self, left: char, right: char, val: i16) -> PairPosEntry {
-            let left = self.0.get(&left).unwrap();
-            let right = self.0.get(&right).unwrap();
             PairPosEntry::Pair(
-                *left,
+                self.get(left),
                 ValueRecordBuilder::new().with_x_advance(val),
-                *right,
+                self.get(right),
                 ValueRecordBuilder::new(),
             )
         }
+
+        fn get(&self, c: char) -> GlyphId {
+            self.0.get(&c).copied().unwrap()
+        }
     }
 
     impl FromIterator<char> for MockCharMap {
@@ -1079,7 +1129,8 @@ mod tests {
             .into_iter()
             .map(|(a, b, val)| charmap.make_rule(a, b, val))
             .collect::<Vec<_>>();
-        let ctx = KernSplitContext::new(&charmap, &known_scripts, None, None).unwrap();
+        let ctx =
+            KernSplitContext::new(&charmap, &known_scripts, None, Default::default()).unwrap();
 
         let pairs_ref = pairs.iter().collect::<Vec<_>>();
 
@@ -1090,9 +1141,7 @@ mod tests {
             .into_iter()
             .collect();
 
-        let latn: BTreeSet<_> = [UnicodeShortName::from_str("Latn").unwrap()]
-            .into_iter()
-            .collect();
+        let latn: BTreeSet<_> = [LATN].into_iter().collect();
 
         let cyr_rules = result.get(&cyrillic).unwrap();
         assert_eq!(
@@ -1112,4 +1161,68 @@ mod tests {
             2
         );
     }
+
+    fn flags_and_rule_count(lookup: &PendingLookup<PairPosBuilder>) -> (LookupFlag, usize) {
+        (
+            lookup.flags,
+            lookup.subtables.iter().map(|sub| sub.len()).sum::<usize>(),
+        )
+    }
+
+    #[test]
+    fn mark_to_base_kern() {
+        const ACUTE_COMB: char = '\u{0301}';
+        let charmap: MockCharMap = ['A', 'B', 'C', ACUTE_COMB].into_iter().collect();
+        let known_scripts = scripts_for_chars(&charmap);
+        let mark_glyphs = [(charmap.get(ACUTE_COMB), MarkSpacing::NonSpacing)]
+            .into_iter()
+            .collect();
+        let pairs = [('A', ACUTE_COMB, -55), ('B', 'C', -30), ('A', 'C', -22)]
+            .into_iter()
+            .map(|(a, b, val)| charmap.make_rule(a, b, val))
+            .collect::<Vec<_>>();
+
+        let ctx = KernSplitContext::new(&charmap, &known_scripts, None, mark_glyphs).unwrap();
+
+        let pairs_ref = pairs.iter().collect::<Vec<_>>();
+        let result = ctx.make_lookups(&pairs_ref);
+
+        let latn: BTreeSet<_> = [LATN].into_iter().collect();
+        assert_eq!(result.len(), 1);
+        let lookups = result.get(&latn).unwrap();
+        assert_eq!(lookups.len(), 2);
+
+        let mut ignore_marks = LookupFlag::empty();
+        ignore_marks.set_ignore_marks(true);
+
+        let bases = &lookups[0];
+        let marks = &lookups[1];
+        assert_eq!(
+            (flags_and_rule_count(bases), flags_and_rule_count(marks)),
+            ((ignore_marks, 2), (LookupFlag::empty(), 1)),
+        );
+    }
+
+    #[test]
+    fn mark_to_base_only() {
+        const ACUTE_COMB: char = '\u{0301}';
+        let charmap: MockCharMap = ['A', 'B', 'C', ACUTE_COMB].into_iter().collect();
+        let known_scripts = scripts_for_chars(&charmap);
+        let mark_glyphs = [(charmap.get(ACUTE_COMB), MarkSpacing::NonSpacing)]
+            .into_iter()
+            .collect();
+        let pairs = vec![charmap.make_rule('A', ACUTE_COMB, -55)];
+
+        let ctx = KernSplitContext::new(&charmap, &known_scripts, None, mark_glyphs).unwrap();
+
+        let pairs_ref = pairs.iter().collect::<Vec<_>>();
+        let result = ctx.make_lookups(&pairs_ref);
+
+        let latn: BTreeSet<_> = [LATN].into_iter().collect();
+        assert_eq!(result.len(), 1);
+        let lookups = result.get(&latn).unwrap();
+        assert_eq!(lookups.len(), 1);
+
+        assert_eq!(flags_and_rule_count(&lookups[0]), (LookupFlag::empty(), 1));
+    }
 }