From b687af6ca00898bbeb8fd3e0893450b2d4c8797d Mon Sep 17 00:00:00 2001 From: Colin Rofls Date: Thu, 14 Mar 2024 12:26:52 -0400 Subject: [PATCH] [kerning] cleanup and code review --- fontbe/src/features.rs | 1 + fontbe/src/features/kern.rs | 25 ++--- fontbe/src/features/ot_tags.rs | 176 ++++++++++++++++++++++++++++++ fontbe/src/features/properties.rs | 149 ++----------------------- fontbe/src/orchestration.rs | 4 +- 5 files changed, 198 insertions(+), 157 deletions(-) create mode 100644 fontbe/src/features/ot_tags.rs diff --git a/fontbe/src/features.rs b/fontbe/src/features.rs index 8a674b512..7a41dda97 100644 --- a/fontbe/src/features.rs +++ b/fontbe/src/features.rs @@ -41,6 +41,7 @@ use crate::{ mod kern; mod marks; +mod ot_tags; mod properties; pub use kern::{create_gather_ir_kerning_work, create_kern_segment_work, create_kerns_work}; diff --git a/fontbe/src/features/kern.rs b/fontbe/src/features/kern.rs index 2eadf7d92..b58842760 100644 --- a/fontbe/src/features/kern.rs +++ b/fontbe/src/features/kern.rs @@ -443,8 +443,6 @@ impl KerningGatherWork { let gdef = compilation.gdef_classes; - // serves as a standin for cmap in the fonttools impl - let pairs = fragments .iter() .flat_map(|frag| frag.kerns.iter()) @@ -460,18 +458,20 @@ impl KerningGatherWork { /// returns a vec of lookups (as a vec of subtables), along with a map of features -> lookups /// (by order in the first vec) + /// + /// this based on + /// fn assign_lookups_to_scripts( &self, lookups: HashMap, Vec>, ast: &ParseTree, // one of 'kern' or 'dist' current_feature: Tag, - ) -> (Vec>, HashMap>) { + ) -> (Vec>, BTreeMap>) { let dflt_langs = vec![DFLT_LANG]; let is_kern_feature = current_feature == KERN; assert!(is_kern_feature || current_feature == DIST); - // this is based on the _registerLookups fn, and this is where we should start typing let mut lookups_by_script = HashMap::new(); let mut ordered_lookups = Vec::new(); @@ -492,8 +492,6 @@ impl KerningGatherWork { } } - // now implement the logic from _registerLookups in KernFeatureWriter: - // let mut default_lookups = Vec::new(); if let Some(common_lookups) = lookups_by_script .get(&COMMON_SCRIPT) @@ -504,6 +502,7 @@ impl KerningGatherWork { } //inDesign bugfix: + // let dist_enabled_scripts = super::properties::dist_feature_enabled_scripts(); let (mut ltr_lookups, mut rtl_lookups) = (Vec::new(), Vec::new()); for (script, lookups) in lookups_by_script @@ -525,10 +524,9 @@ impl KerningGatherWork { } default_lookups.sort_unstable(); - let mut features = HashMap::new(); + let mut features = BTreeMap::new(); if !default_lookups.is_empty() { let languages = fea_langs_by_script.get(&DFLT_SCRIPT).unwrap_or(&dflt_langs); - log::debug!("DFLT languages: {languages:?}"); for lang in languages { features.insert( FeatureKey::new(KERN, *lang, DFLT_SCRIPT), @@ -552,11 +550,12 @@ impl KerningGatherWork { for (script, mut lookups) in lookups_by_script { lookups.extend(dflt_lookups.iter().copied()); + lookups.sort_unstable(); + lookups.dedup(); for tag in super::properties::script_to_ot_tags(&script) { let languages = fea_langs_by_script.get(&tag).unwrap_or(&dflt_langs); for lang in languages { - log::info!("added {} lookups for {tag}/{lang}", lookups.len()); features.insert(FeatureKey::new(KERN, *lang, tag), lookups.clone()); } } @@ -568,19 +567,19 @@ impl KerningGatherWork { } fn debug_ordered_lookups( - features: &HashMap>, + features: &BTreeMap>, lookups: &[Vec], ) { for (i, subtables) in lookups.iter().enumerate() { let total_rules = subtables.iter().map(|x| x.len()).sum::(); - log::debug!("lookup {i}, {total_rules} rules"); + log::trace!("lookup {i}, {total_rules} rules"); } let mut feature_keys = features.keys().collect::>(); feature_keys.sort(); for feature in feature_keys { let indices = features.get(feature).unwrap(); - log::debug!("feature {feature:?}, lookups {indices:?}"); + log::trace!("feature {feature:?}, lookups {indices:?}"); } } @@ -731,7 +730,7 @@ impl KernSplitContext { kerning_per_script = merge_scripts(kerning_per_script); for scripts in kerning_per_script.keys().filter(|x| x.len() > 1) { - log::info!("merging kerning lookups for {scripts:?}"); + log::debug!("merged kerning lookups for {scripts:?}"); } kerning_per_script diff --git a/fontbe/src/features/ot_tags.rs b/fontbe/src/features/ot_tags.rs new file mode 100644 index 000000000..c8c768237 --- /dev/null +++ b/fontbe/src/features/ot_tags.rs @@ -0,0 +1,176 @@ +//! mapping opentype tags to unicode scripts +//! +//! based on +//! + +use write_fonts::types::Tag; + +pub(crate) const DFLT_SCRIPT: Tag = Tag::new(b"DFLT"); + +pub(crate) static SCRIPT_ALIASES: &[(Tag, Tag)] = &[(Tag::new(b"jamo"), Tag::new(b"hang"))]; + +pub(crate) static SCRIPT_EXCEPTIONS: &[(&str, Tag)] = &[ + ("Hira", Tag::new(b"kana")), + ("Hrkt", Tag::new(b"kana")), + ("Laoo", Tag::new(b"lao ")), + ("Nkoo", Tag::new(b"nko ")), + ("Vaii", Tag::new(b"vai ")), + ("Yiii", Tag::new(b"yi ")), + ("Zinh", DFLT_SCRIPT), + ("Zmth", Tag::new(b"math")), + ("Zyyy", DFLT_SCRIPT), + ("Zzzz", DFLT_SCRIPT), +]; + +// 'math' is used as a script in opentype features: +// +pub(crate) static SCRIPT_EXCEPTIONS_REVERSED: &[(Tag, &str)] = &[(Tag::new(b"math"), "Zmth")]; + +pub(crate) static NEW_SCRIPTS: &[(Tag, &str)] = &[ + (Tag::new(b"bng2"), "Beng"), + (Tag::new(b"dev2"), "Deva"), + (Tag::new(b"gjr2"), "Gujr"), + (Tag::new(b"gur2"), "Guru"), + (Tag::new(b"knd2"), "Knda"), + (Tag::new(b"mlm2"), "Mlym"), + (Tag::new(b"mym2"), "Mymr"), + (Tag::new(b"ory2"), "Orya"), + (Tag::new(b"tel2"), "Telu"), + (Tag::new(b"tml2"), "Taml"), +]; + +pub(crate) static NEW_SCRIPT_TAGS: &[(&str, Tag)] = &[ + ("Beng", Tag::new(b"bng2")), + ("Deva", Tag::new(b"dev2")), + ("Gujr", Tag::new(b"gjr2")), + ("Guru", Tag::new(b"gur2")), + ("Kana", Tag::new(b"knd2")), + ("Mlym", Tag::new(b"mlm2")), + ("Mymr", Tag::new(b"mym2")), + ("Orya", Tag::new(b"ory2")), + ("Taml", Tag::new(b"tml2")), + ("Telu", Tag::new(b"tel2")), +]; + +pub(crate) static INDIC_SCRIPTS: &[&str] = &[ + "Beng", // Bengali + "Deva", // Devanagari + "Gujr", // Gujarati + "Guru", // Gurmukhi + "Knda", // Kannada + "Mlym", // Malayalam + "Orya", // Oriya + "Sinh", // Sinhala + "Taml", // Tamil + "Telu", // Telugu +]; + +pub(crate) static USE_SCRIPTS: &[&str] = &[ + // Correct as at Unicode 15.0 + "Adlm", // Adlam + "Ahom", // Ahom + "Bali", // Balinese + "Batk", // Batak + "Brah", // Brahmi + "Bugi", // Buginese + "Buhd", // Buhid + "Cakm", // Chakma + "Cham", // Cham + "Chrs", // Chorasmian + "Cpmn", // Cypro Minoan + "Diak", // Dives Akuru + "Dogr", // Dogra + "Dupl", // Duployan + "Egyp", // Egyptian Hieroglyphs + "Elym", // Elymaic + "Gong", // Gunjala Gondi + "Gonm", // Masaram Gondi + "Gran", // Grantha + "Hano", // Hanunoo + "Hmng", // Pahawh Hmong + "Hmnp", // Nyiakeng Puachue Hmong + "Java", // Javanese + "Kali", // Kayah Li + "Kawi", // Kawi + "Khar", // Kharosthi + "Khoj", // Khojki + "Kits", // Khitan Small Script + "Kthi", // Kaithi + "Lana", // Tai Tham + "Lepc", // Lepcha + "Limb", // Limbu + "Mahj", // Mahajani + "Maka", // Makasar + "Mand", // Mandaic + "Mani", // Manichaean + "Marc", // Marchen + "Medf", // Medefaidrin + "Modi", // Modi + "Mong", // Mongolian + "Mtei", // Meetei Mayek + "Mult", // Multani + "Nagm", // Nag Mundari + "Nand", // Nandinagari + "Newa", // Newa + "Nhks", // Bhaiksuki + "Nko ", // Nko + "Ougr", // Old Uyghur + "Phag", // Phags Pa + "Phlp", // Psalter Pahlavi + "Plrd", // Miao + "Rjng", // Rejang + "Rohg", // Hanifi Rohingya + "Saur", // Saurashtra + "Shrd", // Sharada + "Sidd", // Siddham + "Sind", // Khudawadi + "Sogd", // Sogdian + "Sogo", // Old Sogdian + "Soyo", // Soyombo + "Sund", // Sundanese + "Sylo", // Syloti Nagri + "Tagb", // Tagbanwa + "Takr", // Takri + "Tale", // Tai Le + "Tavt", // Tai Viet + "Tfng", // Tifinagh + "Tglg", // Tagalog + "Tibt", // Tibetan + "Tirh", // Tirhuta + "Tnsa", // Tangsa + "Toto", // Toto + "Vith", // Vithkuqi + "Wcho", // Wancho + "Yezi", // Yezidi + "Zanb", // Zanabazar Square +]; + +#[cfg(test)] +mod tests { + use super::*; + + /// we want to binary search these, so let's enforce that they are sorted, + /// to avoid future headaches + #[test] + fn const_arrays_are_sorted() { + fn get_original_and_sorted_items( + items: &[(T, U)], + ) -> (Vec, Vec) { + let originals = items.iter().map(|(a, _)| a.clone()).collect::>(); + let mut sorted = originals.clone(); + sorted.sort(); + (originals, sorted) + } + + let (actual, expected) = get_original_and_sorted_items(SCRIPT_ALIASES); + assert_eq!(actual, expected); + let (actual, expected) = get_original_and_sorted_items(SCRIPT_EXCEPTIONS_REVERSED); + assert_eq!(actual, expected); + let (actual, expected) = get_original_and_sorted_items(NEW_SCRIPTS); + assert_eq!(actual, expected); + let (actual, expected) = get_original_and_sorted_items(NEW_SCRIPT_TAGS); + assert_eq!(actual, expected); + let (actual, expected) = get_original_and_sorted_items(SCRIPT_EXCEPTIONS); + assert_eq!(actual, expected); + } +} diff --git a/fontbe/src/features/properties.rs b/fontbe/src/features/properties.rs index db06611ec..bb4d21239 100644 --- a/fontbe/src/features/properties.rs +++ b/fontbe/src/features/properties.rs @@ -12,6 +12,10 @@ use write_fonts::{ types::{GlyphId, Tag}, }; +use crate::features::ot_tags::{NEW_SCRIPTS, SCRIPT_ALIASES, SCRIPT_EXCEPTIONS_REVERSED}; + +use super::ot_tags::{DFLT_SCRIPT, INDIC_SCRIPTS, NEW_SCRIPT_TAGS, SCRIPT_EXCEPTIONS, USE_SCRIPTS}; + // SAFETY: we can easily verify that neither of these strings contains a nul byte. // (this is the only way we can declare these as constants) pub const COMMON_SCRIPT: UnicodeShortName = @@ -56,8 +60,11 @@ impl CharMap for Vec<(Arc, GlyphId)> { impl ScriptDirection { /// Returns the writing direction for the provided script + // pub(crate) fn for_script(script: &UnicodeShortName) -> Self { match script.as_str() { + // this list from + // "Zyyy" => ScriptDirection::Auto, "Arab" | "Hebr" | "Syrc" | "Thaa" | "Cprt" | "Khar" | "Phnx" | "Nkoo" | "Lydi" | "Avst" | "Armi" | "Phli" | "Prti" | "Sarb" | "Orkh" | "Samr" | "Mand" | "Merc" @@ -200,148 +207,6 @@ pub(crate) fn glyphs_by_bidi_class( ) } -const DFLT_SCRIPT: Tag = Tag::new(b"DFLT"); - -static SCRIPT_ALIASES: &[(Tag, Tag)] = &[(Tag::new(b"jamo"), Tag::new(b"hang"))]; - -static SCRIPT_EXCEPTIONS: &[(&str, Tag)] = &[ - ("Hira", Tag::new(b"kana")), - ("Hrkt", Tag::new(b"kana")), - ("Laoo", Tag::new(b"lao ")), - ("Nkoo", Tag::new(b"nko ")), - ("Vaii", Tag::new(b"vai ")), - ("Yiii", Tag::new(b"yi ")), - ("Zinh", DFLT_SCRIPT), - ("Zmth", Tag::new(b"math")), - ("Zyyy", DFLT_SCRIPT), - ("Zzzz", DFLT_SCRIPT), -]; - -// 'math' is used as a script in opentype features: -// -static SCRIPT_EXCEPTIONS_REVERSED: &[(Tag, &str)] = &[(Tag::new(b"math"), "Zmth")]; - -// I don't know what's going on here, just copying OTTags.py -static NEW_SCRIPTS: &[(Tag, &str)] = &[ - (Tag::new(b"bng2"), "Beng"), - (Tag::new(b"dev2"), "Deva"), - (Tag::new(b"gjr2"), "Gujr"), - (Tag::new(b"gur2"), "Guru"), - (Tag::new(b"knd2"), "Knda"), - (Tag::new(b"mlm2"), "Mlym"), - (Tag::new(b"mym2"), "Mymr"), - (Tag::new(b"ory2"), "Orya"), - (Tag::new(b"tel2"), "Telu"), - (Tag::new(b"tml2"), "Taml"), -]; - -// I don't know what's going on here, just copying OTTags.py -static NEW_SCRIPT_TAGS: &[(&str, Tag)] = &[ - ("Beng", Tag::new(b"bng2")), - ("Deva", Tag::new(b"dev2")), - ("Gujr", Tag::new(b"gjr2")), - ("Guru", Tag::new(b"gur2")), - ("Kana", Tag::new(b"knd2")), - ("Mlym", Tag::new(b"mlm2")), - ("Mymr", Tag::new(b"mym2")), - ("Orya", Tag::new(b"ory2")), - ("Taml", Tag::new(b"tml2")), - ("Telu", Tag::new(b"tel2")), -]; - -static INDIC_SCRIPTS: &[&str] = &[ - "Beng", // Bengali - "Deva", // Devanagari - "Gujr", // Gujarati - "Guru", // Gurmukhi - "Knda", // Kannada - "Mlym", // Malayalam - "Orya", // Oriya - "Sinh", // Sinhala - "Taml", // Tamil - "Telu", // Telugu -]; - -static USE_SCRIPTS: &[&str] = &[ - // Correct as at Unicode 15.0 - "Adlm", // Adlam - "Ahom", // Ahom - "Bali", // Balinese - "Batk", // Batak - "Brah", // Brahmi - "Bugi", // Buginese - "Buhd", // Buhid - "Cakm", // Chakma - "Cham", // Cham - "Chrs", // Chorasmian - "Cpmn", // Cypro Minoan - "Diak", // Dives Akuru - "Dogr", // Dogra - "Dupl", // Duployan - "Egyp", // Egyptian Hieroglyphs - "Elym", // Elymaic - "Gong", // Gunjala Gondi - "Gonm", // Masaram Gondi - "Gran", // Grantha - "Hano", // Hanunoo - "Hmng", // Pahawh Hmong - "Hmnp", // Nyiakeng Puachue Hmong - "Java", // Javanese - "Kali", // Kayah Li - "Kawi", // Kawi - "Khar", // Kharosthi - "Khoj", // Khojki - "Kits", // Khitan Small Script - "Kthi", // Kaithi - "Lana", // Tai Tham - "Lepc", // Lepcha - "Limb", // Limbu - "Mahj", // Mahajani - "Maka", // Makasar - "Mand", // Mandaic - "Mani", // Manichaean - "Marc", // Marchen - "Medf", // Medefaidrin - "Modi", // Modi - "Mong", // Mongolian - "Mtei", // Meetei Mayek - "Mult", // Multani - "Nagm", // Nag Mundari - "Nand", // Nandinagari - "Newa", // Newa - "Nhks", // Bhaiksuki - "Nko ", // Nko - "Ougr", // Old Uyghur - "Phag", // Phags Pa - "Phlp", // Psalter Pahlavi - "Plrd", // Miao - "Rjng", // Rejang - "Rohg", // Hanifi Rohingya - "Saur", // Saurashtra - "Shrd", // Sharada - "Sidd", // Siddham - "Sind", // Khudawadi - "Sogd", // Sogdian - "Sogo", // Old Sogdian - "Soyo", // Soyombo - "Sund", // Sundanese - "Sylo", // Syloti Nagri - "Tagb", // Tagbanwa - "Takr", // Takri - "Tale", // Tai Le - "Tavt", // Tai Viet - "Tfng", // Tifinagh - "Tglg", // Tagalog - "Tibt", // Tibetan - "Tirh", // Tirhuta - "Tnsa", // Tangsa - "Toto", // Toto - "Vith", // Vithkuqi - "Wcho", // Wancho - "Yezi", // Yezidi - "Zanb", // Zanabazar Square -]; - pub(crate) fn dist_feature_enabled_scripts() -> HashSet { INDIC_SCRIPTS .iter() diff --git a/fontbe/src/orchestration.rs b/fontbe/src/orchestration.rs index da189842f..a92d2d441 100644 --- a/fontbe/src/orchestration.rs +++ b/fontbe/src/orchestration.rs @@ -1,7 +1,7 @@ //! Helps coordinate the graph execution for BE use std::{ - collections::{BTreeMap, HashMap, HashSet}, + collections::{BTreeMap, HashSet}, fmt::Display, fs::File, io::{self, BufReader, BufWriter, Read, Write}, @@ -377,7 +377,7 @@ pub struct FeaRsKerns { /// ordered! pub lookups: Vec>, /// each value is a set of lookups, referenced by their order in array above - pub features: HashMap>, + pub features: BTreeMap>, } /// The abstract syntax tree of any user FEA.