From 3cd80287489988c7601687ec433a4ce530e0e25e Mon Sep 17 00:00:00 2001 From: Colin Rofls Date: Wed, 13 Mar 2024 15:15:54 -0400 Subject: [PATCH] [kerning] Set lookupflags & mark filter set This also adds tests that we are generating the expected lookups & flags for mark & non-mark kerning lookups. --- fontbe/src/features/kern.rs | 197 ++++++++++++++++++++++++++++-------- 1 file changed, 155 insertions(+), 42 deletions(-) diff --git a/fontbe/src/features/kern.rs b/fontbe/src/features/kern.rs index 71e981568..b0ab992d3 100644 --- a/fontbe/src/features/kern.rs +++ b/fontbe/src/features/kern.rs @@ -3,6 +3,7 @@ use std::{ borrow::Cow, collections::{BTreeMap, BTreeSet, HashMap, HashSet}, + sync::Arc, }; use fea_rs::{ @@ -19,7 +20,7 @@ use fontdrasil::{ types::GlyphName, }; use fontir::{ - ir::{KernGroup, KernPair, KernParticipant, KerningGroups, KerningInstance}, + ir::{Glyph, KernGroup, KernPair, KernParticipant, KerningGroups, KerningInstance}, orchestration::WorkId as FeWorkId, }; use icu_properties::BidiClass; @@ -69,6 +70,13 @@ struct KerningFragmentWork { #[derive(Debug)] struct KerningGatherWork; +/// Whether or not a given mark glyph is a spacing mark, e.g. has width +#[derive(Clone, Copy, Debug)] +enum MarkSpacing { + Spacing, + NonSpacing, +} + pub fn create_gather_ir_kerning_work() -> Box { Box::new(GatherIrKerningWork {}) } @@ -401,7 +409,7 @@ impl Work for KerningGatherWork { ) }) .collect::>(); - let lookups = self.finalize_kerning(&fragments, &ast.ast, &glyph_map, &glyphs_and_gids)?; + let lookups = self.finalize_kerning(&fragments, &ast.ast, &glyph_map, glyphs_and_gids)?; context.fea_rs_kerns.set(lookups); Ok(()) } @@ -416,7 +424,7 @@ impl KerningGatherWork { fragments: &[&KernFragment], ast: &ParseTree, glyph_map: &GlyphMap, - glyphs: &impl CharMap, + glyphs: Vec<(Arc, GlyphId)>, ) -> Result { // ignore diagnostics, they'll get logged during actual GSUB compilation let (compilation, _) = fea_rs::compile::compile::( @@ -448,8 +456,29 @@ impl KerningGatherWork { .flat_map(|frag| frag.kerns.iter()) .collect::>(); - let known_scripts = guess_font_scripts(ast, glyphs); - let split_ctx = KernSplitContext::new(glyphs, &known_scripts, gsub, gdef)?; + let known_scripts = guess_font_scripts(ast, &glyphs); + let mark_glyphs = glyphs + .iter() + .filter_map(|(glyph, gid)| { + let is_mark = gdef + .as_ref() + .map(|gdef| gdef.get(gid) == Some(&GlyphClassDef::Mark)) + .unwrap_or(false); + is_mark.then(|| { + let spacing = if glyph + .sources() + .values() + .all(|instance| instance.width != 0.0) + { + MarkSpacing::Spacing + } else { + MarkSpacing::NonSpacing + }; + (*gid, spacing) + }) + }) + .collect(); + let split_ctx = KernSplitContext::new(&glyphs, &known_scripts, gsub, mark_glyphs)?; let lookups = split_ctx.make_lookups(&pairs); let (lookups, features) = self.assign_lookups_to_scripts(lookups, ast, KERN); @@ -590,7 +619,8 @@ fn debug_ordered_lookups( /// All the state needed for splitting kern pairs by script & writing direction struct KernSplitContext { - gdef: Option>, + /// map of all mark glyphs; bool indicates if mark is spacing + mark_glyphs: HashMap, glyph_scripts: HashMap>, bidi_glyphs: HashMap>, opts: KernSplitOptions, @@ -614,14 +644,14 @@ impl KernSplitContext { glyphs: &impl CharMap, known_scripts: &HashSet, gsub: Option, - gdef: Option>, + mark_glyphs: HashMap, ) -> Result { let glyph_scripts = super::properties::scripts_by_glyph(glyphs, known_scripts, gsub.as_ref())?; let bidi_glyphs = super::properties::glyphs_by_bidi_class(glyphs, gsub.as_ref())?; Ok(Self { - gdef, + mark_glyphs, glyph_scripts, bidi_glyphs, opts: KernSplitOptions::default(), @@ -630,18 +660,6 @@ impl KernSplitContext { }) } - fn mark_glyph_ids(&self) -> HashSet { - self.gdef - .as_ref() - .map(|classes| { - classes - .iter() - .filter_map(|(gid, cls)| (*cls == GlyphClassDef::Mark).then_some(*gid)) - .collect() - }) - .unwrap_or_default() - } - // fn make_lookups( &self, @@ -674,8 +692,7 @@ impl KernSplitContext { fn make_split_script_kern_lookups( &self, pairs: &[Cow], - //TODO: handle marks - _are_marks: bool, + are_marks: bool, ) -> BTreeMap, Vec>> { let mut lookups_by_script = BTreeMap::new(); let kerning_per_script = self.split_kerns(pairs); @@ -710,12 +727,39 @@ impl KernSplitContext { } pair.add_to(&mut builder); } - let lookup = PendingLookup::new(vec![builder], LookupFlag::empty(), None); + let lookup = self.make_lookup(builder, !are_marks); lookups_by_script.insert(scripts, vec![lookup]); } lookups_by_script } + // logic from + // + fn make_lookup( + &self, + builder: PairPosBuilder, + ignore_marks: bool, + ) -> PendingLookup { + let mut flags = LookupFlag::empty(); + let mut filter_class = None; + if ignore_marks && self.opts.ignore_marks { + let spacing_marks: GlyphSet = self + .mark_glyphs + .iter() + .filter_map(|(gid, spacing)| { + matches!(spacing, MarkSpacing::Spacing).then_some(*gid) + }) + .collect(); + if spacing_marks.is_empty() { + flags.set_ignore_marks(true); + } else { + filter_class = Some(spacing_marks); + flags.set_use_mark_filtering_set(true); + } + } + PendingLookup::new(vec![builder], flags, filter_class) + } + // fn split_kerns( &self, @@ -842,10 +886,10 @@ impl KernSplitContext { // little helper to tell us what's in a glyphset fn classify_glyphset_contents( glyph_set: &GlyphSet, - marks: &HashSet, + marks: &HashMap, ) -> GlyphSetContent { glyph_set.iter().fold(GlyphSetContent::Empty, |val, gid| { - match (val, marks.contains(&gid)) { + match (val, marks.contains_key(&gid)) { (GlyphSetContent::Empty, true) => GlyphSetContent::MarksOnly, (GlyphSetContent::Empty, false) => GlyphSetContent::BasesOnly, (GlyphSetContent::MarksOnly, true) => GlyphSetContent::MarksOnly, @@ -856,13 +900,15 @@ impl KernSplitContext { } /// split a glyphset into (bases, marks) - fn split_glyphs(glyphs: &GlyphSet, marks: &HashSet) -> (GlyphSet, GlyphSet) { - let (x, y): (Vec<_>, Vec<_>) = glyphs.iter().partition(|gid| !marks.contains(gid)); + fn split_glyphs( + glyphs: &GlyphSet, + marks: &HashMap, + ) -> (GlyphSet, GlyphSet) { + let (x, y): (Vec<_>, Vec<_>) = glyphs.iter().partition(|gid| !marks.contains_key(gid)); (x.into(), y.into()) } - let marks = self.mark_glyph_ids(); - if marks.is_empty() { + if self.mark_glyphs.is_empty() { return ( pairs.iter().map(|x| Cow::Borrowed(*x)).collect(), Vec::new(), @@ -874,7 +920,8 @@ impl KernSplitContext { for pair in pairs { match pair { PairPosEntry::Pair(side1, _, side2, _) - if !marks.contains(side1) && !marks.contains(side2) => + if !self.mark_glyphs.contains_key(side1) + && !self.mark_glyphs.contains_key(side2) => { base_pairs.push(Cow::Borrowed(*pair)) } @@ -882,8 +929,8 @@ impl KernSplitContext { // handle the case where all are marks or bases, first: PairPosEntry::Class(side1, val1, side2, val2) => { - let side1_cls = classify_glyphset_contents(side1, &marks); - let side2_cls = classify_glyphset_contents(side2, &marks); + let side1_cls = classify_glyphset_contents(side1, &self.mark_glyphs); + let side2_cls = classify_glyphset_contents(side2, &self.mark_glyphs); match (side1_cls, side2_cls) { (GlyphSetContent::Empty, _) | (_, GlyphSetContent::Empty) => continue, @@ -898,8 +945,8 @@ impl KernSplitContext { continue; } _ => { - let (side1_bases, side1_marks) = split_glyphs(side1, &marks); - let (side2_bases, side2_marks) = split_glyphs(side2, &marks); + let (side1_bases, side1_marks) = split_glyphs(side1, &self.mark_glyphs); + let (side2_bases, side2_marks) = split_glyphs(side2, &self.mark_glyphs); if !side1_bases.is_empty() && !side2_bases.is_empty() { base_pairs.push(Cow::Owned(PairPosEntry::Class( @@ -1037,6 +1084,7 @@ fn merge_scripts( mod tests { use super::*; + const LATN: UnicodeShortName = unsafe { UnicodeShortName::from_bytes_unchecked(*b"Latn") }; struct MockCharMap(HashMap); impl CharMap for MockCharMap { @@ -1047,15 +1095,17 @@ mod tests { impl MockCharMap { fn make_rule(&self, left: char, right: char, val: i16) -> PairPosEntry { - let left = self.0.get(&left).unwrap(); - let right = self.0.get(&right).unwrap(); PairPosEntry::Pair( - *left, + self.get(left), ValueRecordBuilder::new().with_x_advance(val), - *right, + self.get(right), ValueRecordBuilder::new(), ) } + + fn get(&self, c: char) -> GlyphId { + self.0.get(&c).copied().unwrap() + } } impl FromIterator for MockCharMap { @@ -1079,7 +1129,8 @@ mod tests { .into_iter() .map(|(a, b, val)| charmap.make_rule(a, b, val)) .collect::>(); - let ctx = KernSplitContext::new(&charmap, &known_scripts, None, None).unwrap(); + let ctx = + KernSplitContext::new(&charmap, &known_scripts, None, Default::default()).unwrap(); let pairs_ref = pairs.iter().collect::>(); @@ -1090,9 +1141,7 @@ mod tests { .into_iter() .collect(); - let latn: BTreeSet<_> = [UnicodeShortName::from_str("Latn").unwrap()] - .into_iter() - .collect(); + let latn: BTreeSet<_> = [LATN].into_iter().collect(); let cyr_rules = result.get(&cyrillic).unwrap(); assert_eq!( @@ -1112,4 +1161,68 @@ mod tests { 2 ); } + + fn flags_and_rule_count(lookup: &PendingLookup) -> (LookupFlag, usize) { + ( + lookup.flags, + lookup.subtables.iter().map(|sub| sub.len()).sum::(), + ) + } + + #[test] + fn mark_to_base_kern() { + const ACUTE_COMB: char = '\u{0301}'; + let charmap: MockCharMap = ['A', 'B', 'C', ACUTE_COMB].into_iter().collect(); + let known_scripts = scripts_for_chars(&charmap); + let mark_glyphs = [(charmap.get(ACUTE_COMB), MarkSpacing::NonSpacing)] + .into_iter() + .collect(); + let pairs = [('A', ACUTE_COMB, -55), ('B', 'C', -30), ('A', 'C', -22)] + .into_iter() + .map(|(a, b, val)| charmap.make_rule(a, b, val)) + .collect::>(); + + let ctx = KernSplitContext::new(&charmap, &known_scripts, None, mark_glyphs).unwrap(); + + let pairs_ref = pairs.iter().collect::>(); + let result = ctx.make_lookups(&pairs_ref); + + let latn: BTreeSet<_> = [LATN].into_iter().collect(); + assert_eq!(result.len(), 1); + let lookups = result.get(&latn).unwrap(); + assert_eq!(lookups.len(), 2); + + let mut ignore_marks = LookupFlag::empty(); + ignore_marks.set_ignore_marks(true); + + let bases = &lookups[0]; + let marks = &lookups[1]; + assert_eq!( + (flags_and_rule_count(bases), flags_and_rule_count(marks)), + ((ignore_marks, 2), (LookupFlag::empty(), 1)), + ); + } + + #[test] + fn mark_to_base_only() { + const ACUTE_COMB: char = '\u{0301}'; + let charmap: MockCharMap = ['A', 'B', 'C', ACUTE_COMB].into_iter().collect(); + let known_scripts = scripts_for_chars(&charmap); + let mark_glyphs = [(charmap.get(ACUTE_COMB), MarkSpacing::NonSpacing)] + .into_iter() + .collect(); + let pairs = vec![charmap.make_rule('A', ACUTE_COMB, -55)]; + + let ctx = KernSplitContext::new(&charmap, &known_scripts, None, mark_glyphs).unwrap(); + + let pairs_ref = pairs.iter().collect::>(); + let result = ctx.make_lookups(&pairs_ref); + + let latn: BTreeSet<_> = [LATN].into_iter().collect(); + assert_eq!(result.len(), 1); + let lookups = result.get(&latn).unwrap(); + assert_eq!(lookups.len(), 1); + + assert_eq!(flags_and_rule_count(&lookups[0]), (LookupFlag::empty(), 1)); + } }