From 73b542c1c5b70fc392c6e595bafff7bb6d65ecc3 Mon Sep 17 00:00:00 2001 From: Colin Rofls Date: Mon, 6 May 2024 16:16:53 -0400 Subject: [PATCH 1/3] [marks] Get GDEF classes from FEA if necessary --- fontbe/src/features/marks.rs | 58 +++++++++++++++++++++++++++++++----- 1 file changed, 50 insertions(+), 8 deletions(-) diff --git a/fontbe/src/features/marks.rs b/fontbe/src/features/marks.rs index 54148676..911a7070 100644 --- a/fontbe/src/features/marks.rs +++ b/fontbe/src/features/marks.rs @@ -1,8 +1,11 @@ //! Generates a [FeaRsMarks] datastructure to be fed to fea-rs -use std::collections::{BTreeMap, BTreeSet, HashMap, HashSet}; +use std::collections::{BTreeMap, BTreeSet, HashSet}; -use fea_rs::compile::{MarkToBaseBuilder, MarkToMarkBuilder}; +use fea_rs::{ + compile::{MarkToBaseBuilder, MarkToMarkBuilder, NopFeatureProvider, NopVariationInfo}, + Opts, +}; use fontdrasil::{ orchestration::{Access, AccessBuilder, Work}, types::GlyphName, @@ -17,7 +20,7 @@ use write_fonts::{ use crate::{ error::Error, - orchestration::{AnyWorkId, BeWork, Context, FeaRsMarks, WorkId}, + orchestration::{AnyWorkId, BeWork, Context, FeaAst, FeaRsMarks, WorkId}, }; use fontir::{ ir::{self, AnchorKind, GlyphAnchors, GlyphOrder, StaticMetadata}, @@ -34,9 +37,9 @@ pub fn create_mark_work() -> Box { /// The canonical name shared for a given mark/base pair, e.g. `top` for `top`/`_top` type MarkGroupName = SmolStr; -#[allow(dead_code)] // a few currently unused fields struct MarkLookupBuilder<'a> { - gdef_classes: Option>, + // extracted from public.openTypeCatgories/GlyphData.xml or FEA + gdef_classes: BTreeMap, // pruned, only the anchors we are using anchors: BTreeMap>, glyph_order: &'a GlyphOrder, @@ -83,7 +86,7 @@ impl<'a> MarkLookupBuilder<'a> { fn new( anchors: Vec<&'a GlyphAnchors>, glyph_order: &'a GlyphOrder, - gdef_classes: Option>, + gdef_classes: BTreeMap, static_metadata: &'a StaticMetadata, ) -> Self { // first we want to narrow our input down to only anchors that are participating. @@ -127,11 +130,11 @@ impl<'a> MarkLookupBuilder<'a> { !anchors.is_empty() }); Self { - gdef_classes, anchors: pruned, glyph_order, fea_scripts: Default::default(), static_metadata, + gdef_classes, } } @@ -264,6 +267,7 @@ impl Work for MarkWork { AccessBuilder::new() .variant(FeWorkId::StaticMetadata) .variant(FeWorkId::GlyphOrder) + .variant(WorkId::FeaturesAst) .variant(FeWorkId::ALL_ANCHORS) .build() } @@ -273,13 +277,15 @@ impl Work for MarkWork { let static_metadata = context.ir.static_metadata.get(); let glyph_order = context.ir.glyph_order.get(); let raw_anchors = context.ir.anchors.all(); + let ast = context.fea_ast.get(); + let gdef_classes = get_gdef_classes(&static_metadata, &ast, &glyph_order); let anchors = raw_anchors .iter() .map(|(_, anchors)| anchors.as_ref()) .collect::>(); - let ctx = MarkLookupBuilder::new(anchors, &glyph_order, None, &static_metadata); + let ctx = MarkLookupBuilder::new(anchors, &glyph_order, gdef_classes, &static_metadata); let all_marks = ctx.build()?; context.fea_rs_marks.set(all_marks); @@ -288,6 +294,42 @@ impl Work for MarkWork { } } +fn get_gdef_classes( + meta: &StaticMetadata, + ast: &FeaAst, + glyph_order: &GlyphOrder, +) -> BTreeMap { + let glyph_map = glyph_order.iter().cloned().collect(); + // if we prefer classes defined in fea, compile the fea and see if we have any + if meta.gdef_categories.prefer_gdef_categories_in_fea { + if let Some(gdef_classes) = + fea_rs::compile::compile::( + &ast.ast, + &glyph_map, + None, + None, + Opts::new().compile_gpos(false), + ) + .ok() + .and_then(|mut comp| comp.0.gdef_classes.take()) + { + return gdef_classes + .into_iter() + .filter_map(|(g, cls)| { + glyph_order + .glyph_name(g.to_u16() as _) + .cloned() + .map(|name| (name, cls)) + }) + .collect(); + } + } + // otherwise (we don't care about FEA or nothing is defined) we use the + // classes in StaticMetadata (which are from public.openTypeCategories or + // GlyphData.xml) + meta.gdef_categories.categories.clone() +} + fn resolve_anchor( anchor: &fontir::ir::Anchor, static_metadata: &StaticMetadata, From b455abb62499b38946acefbce4b223953b825d5d Mon Sep 17 00:00:00 2001 From: Colin Rofls Date: Mon, 6 May 2024 16:47:48 -0400 Subject: [PATCH 2/3] [marks] Get scripts from FEA --- fontbe/src/features.rs | 3 +++ fontbe/src/features/kern.rs | 4 +--- fontbe/src/features/marks.rs | 26 +++++++++++++++++++++++--- 3 files changed, 27 insertions(+), 6 deletions(-) diff --git a/fontbe/src/features.rs b/fontbe/src/features.rs index f57ab30e..24f0bfde 100644 --- a/fontbe/src/features.rs +++ b/fontbe/src/features.rs @@ -50,6 +50,9 @@ pub(crate) use common::PendingLookup; pub use kern::{create_gather_ir_kerning_work, create_kern_segment_work, create_kerns_work}; pub use marks::create_mark_work; +const DFLT_SCRIPT: Tag = Tag::new(b"DFLT"); +const DFLT_LANG: Tag = Tag::new(b"dflt"); + #[derive(Debug)] pub struct FeatureParsingWork {} diff --git a/fontbe/src/features/kern.rs b/fontbe/src/features/kern.rs index e78da51b..cee0cd00 100644 --- a/fontbe/src/features/kern.rs +++ b/fontbe/src/features/kern.rs @@ -44,7 +44,7 @@ use crate::{ }, }; -use super::{properties::CharMap, PendingLookup}; +use super::{properties::CharMap, PendingLookup, DFLT_LANG, DFLT_SCRIPT}; /// On Linux it took ~0.01 ms per loop, try to get enough to make fan out worthwhile /// based on empirical testing @@ -53,8 +53,6 @@ const KERN: Tag = Tag::new(b"kern"); // we don't currently compile this feature, but we will, and it is referenced // in places because our impl is based on fonttools. const DIST: Tag = Tag::new(b"dist"); -const DFLT_SCRIPT: Tag = Tag::new(b"DFLT"); -const DFLT_LANG: Tag = Tag::new(b"dflt"); /// Accumulation of all the kerning from IR #[derive(Debug)] diff --git a/fontbe/src/features/marks.rs b/fontbe/src/features/marks.rs index 911a7070..eef250ee 100644 --- a/fontbe/src/features/marks.rs +++ b/fontbe/src/features/marks.rs @@ -4,7 +4,8 @@ use std::collections::{BTreeMap, BTreeSet, HashSet}; use fea_rs::{ compile::{MarkToBaseBuilder, MarkToMarkBuilder, NopFeatureProvider, NopVariationInfo}, - Opts, + typed::{AstNode, LanguageSystem}, + Opts, ParseTree, }; use fontdrasil::{ orchestration::{Access, AccessBuilder, Work}, @@ -27,6 +28,8 @@ use fontir::{ orchestration::WorkId as FeWorkId, }; +use super::DFLT_SCRIPT; + #[derive(Debug)] struct MarkWork {} @@ -88,6 +91,7 @@ impl<'a> MarkLookupBuilder<'a> { glyph_order: &'a GlyphOrder, gdef_classes: BTreeMap, static_metadata: &'a StaticMetadata, + fea_scripts: HashSet, ) -> Self { // first we want to narrow our input down to only anchors that are participating. let mut pruned = BTreeMap::new(); @@ -132,7 +136,7 @@ impl<'a> MarkLookupBuilder<'a> { Self { anchors: pruned, glyph_order, - fea_scripts: Default::default(), + fea_scripts, static_metadata, gdef_classes, } @@ -285,7 +289,14 @@ impl Work for MarkWork { .map(|(_, anchors)| anchors.as_ref()) .collect::>(); - let ctx = MarkLookupBuilder::new(anchors, &glyph_order, gdef_classes, &static_metadata); + let fea_scripts = get_fea_scripts(&ast.ast); + let ctx = MarkLookupBuilder::new( + anchors, + &glyph_order, + gdef_classes, + &static_metadata, + fea_scripts, + ); let all_marks = ctx.build()?; context.fea_rs_marks.set(all_marks); @@ -294,6 +305,15 @@ impl Work for MarkWork { } } +fn get_fea_scripts(ast: &ParseTree) -> HashSet { + ast.typed_root() + .statements() + .filter_map(LanguageSystem::cast) + .map(|langsys| langsys.script().to_raw()) + .filter(|tag| *tag != DFLT_SCRIPT) + .collect() +} + fn get_gdef_classes( meta: &StaticMetadata, ast: &FeaAst, From ab6f2f94e4616244d3d902922a9dd18ea4833378 Mon Sep 17 00:00:00 2001 From: Colin Rofls Date: Wed, 8 May 2024 14:39:28 -0400 Subject: [PATCH 3/3] [marks] Be more judicious in what we consider a mark Specifically this means looking at the actual gdef category information for combining marks (and ignoring spacing marks!) specifically 'cedillacomb' is a combining mark, but 'cedilla' is not. --- fontbe/src/features/marks.rs | 97 ++++++++++++++++++++++++++---------- fontir/src/ir.rs | 4 ++ 2 files changed, 75 insertions(+), 26 deletions(-) diff --git a/fontbe/src/features/marks.rs b/fontbe/src/features/marks.rs index eef250ee..8b460a92 100644 --- a/fontbe/src/features/marks.rs +++ b/fontbe/src/features/marks.rs @@ -24,7 +24,7 @@ use crate::{ orchestration::{AnyWorkId, BeWork, Context, FeaAst, FeaRsMarks, WorkId}, }; use fontir::{ - ir::{self, AnchorKind, GlyphAnchors, GlyphOrder, StaticMetadata}, + ir::{self, Anchor, AnchorKind, GlyphAnchors, GlyphOrder, StaticMetadata}, orchestration::WorkId as FeWorkId, }; @@ -44,14 +44,16 @@ struct MarkLookupBuilder<'a> { // extracted from public.openTypeCatgories/GlyphData.xml or FEA gdef_classes: BTreeMap, // pruned, only the anchors we are using - anchors: BTreeMap>, + anchor_lists: BTreeMap>, glyph_order: &'a GlyphOrder, static_metadata: &'a StaticMetadata, - fea_scripts: HashSet, + // we don't currently use this, because just adding lookups to all scripts works? + _fea_scripts: HashSet, + mark_glyphs: BTreeSet, } /// The bases and marks in a particular group, e.g. "top" or "bottom" -#[derive(Default, Clone, PartialEq)] +#[derive(Default, Debug, Clone, PartialEq)] struct MarkGroup<'a> { bases: Vec<(GlyphName, &'a ir::Anchor)>, marks: Vec<(GlyphName, &'a ir::Anchor)>, @@ -94,23 +96,40 @@ impl<'a> MarkLookupBuilder<'a> { fea_scripts: HashSet, ) -> Self { // first we want to narrow our input down to only anchors that are participating. + // in pythonland this is https://github.com/googlefonts/ufo2ft/blob/8e9e6eb66a/Lib/ufo2ft/featureWriters/markFeatureWriter.py#L380 let mut pruned = BTreeMap::new(); - let mut base_names = HashSet::new(); - let mut mark_names = HashSet::new(); + let mut base_groups = HashSet::new(); + let mut mark_groups = HashSet::new(); + let include = gdef_classes + .iter() + .filter_map(|(name, cls)| { + matches!( + *cls, + GlyphClassDef::Base | GlyphClassDef::Mark | GlyphClassDef::Ligature + ) + .then_some(name) + }) + .collect::>(); for anchors in anchors { // skip anchors for non-export glyphs if !glyph_order.contains(&anchors.glyph_name) { continue; } + // skip glyphs that are not mark/lig/base, if we have any defined categories + if !include.is_empty() && !include.contains(&anchors.glyph_name) { + continue; + } for anchor in &anchors.anchors { match &anchor.kind { - AnchorKind::Base(group) => { - base_names.insert(group); + AnchorKind::Base(group) + | AnchorKind::Ligature { + group_name: group, .. + } => { + base_groups.insert(group); } AnchorKind::Mark(group) => { - mark_names.insert(group); + mark_groups.insert(group); } - //TODO: ligatures // skip non base/mark anchors _ => continue, } @@ -121,7 +140,9 @@ impl<'a> MarkLookupBuilder<'a> { } } - let used_groups = base_names.intersection(&mark_names).collect::>(); + let used_groups = base_groups + .intersection(&mark_groups) + .collect::>(); // we don't care about marks with no corresponding bases, & vice-versa see: // pruned.retain(|_, anchors| { @@ -133,12 +154,15 @@ impl<'a> MarkLookupBuilder<'a> { }); !anchors.is_empty() }); + + let mark_glyphs = find_mark_glyphs(&pruned, &gdef_classes); Self { - anchors: pruned, + anchor_lists: pruned, glyph_order, - fea_scripts, + _fea_scripts: fea_scripts, static_metadata, gdef_classes, + mark_glyphs, } } @@ -161,12 +185,8 @@ impl<'a> MarkLookupBuilder<'a> { ) -> Result, Error> { groups .into_iter() + .filter(|(_, group)| !(group.bases.is_empty() || group.marks.is_empty())) .map(|(group_name, group)| { - assert!( - !group.bases.is_empty() && !group.marks.is_empty(), - "prechecked" - ); - let mut builder = T::default(); for (mark_name, anchor) in group.marks { // we already filtered to only things in glyph order @@ -188,15 +208,21 @@ impl<'a> MarkLookupBuilder<'a> { fn make_mark_to_base_groups(&self) -> BTreeMap> { let mut groups = BTreeMap::<_, MarkGroup>::new(); - for (glyph_name, anchors) in &self.anchors { + for (glyph_name, anchors) in &self.anchor_lists { + let is_mark = self.mark_glyphs.contains(glyph_name); + // if we have explicit gdef classes and this is not an expilcit base + let is_not_base = !self.gdef_classes.is_empty() + && (self.gdef_classes.get(glyph_name)) != Some(&GlyphClassDef::Base); + + let treat_as_base = !(is_mark | is_not_base); for anchor in anchors { match &anchor.kind { - fontir::ir::AnchorKind::Base(group) => groups + fontir::ir::AnchorKind::Base(group) if treat_as_base => groups .entry(group.clone()) .or_default() .bases .push((glyph_name.clone(), anchor)), - fontir::ir::AnchorKind::Mark(group) => groups + fontir::ir::AnchorKind::Mark(group) if is_mark => groups .entry(group.clone()) .or_default() .marks @@ -211,12 +237,13 @@ impl<'a> MarkLookupBuilder<'a> { fn make_mark_to_mark_groups(&self) -> BTreeMap> { // first find the set of glyphs that are marks, i.e. have any mark attachment point. let (mark_glyphs, mark_anchors): (BTreeSet<_>, BTreeSet<_>) = self - .anchors + .anchor_lists .iter() + .filter(|(name, _)| self.mark_glyphs.contains(*name)) .flat_map(|(name, anchors)| { - anchors.iter().filter_map(|anchor| { + anchors.iter().filter_map(move |anchor| { if let AnchorKind::Mark(group) = &anchor.kind { - Some((name.clone(), group.clone())) + Some((name, group)) } else { None } @@ -227,7 +254,7 @@ impl<'a> MarkLookupBuilder<'a> { // then iterate again, looking for glyphs that we have identified as marks, // but which also have participating base anchors let mut result = BTreeMap::::new(); - for (glyph, glyph_anchors) in &self.anchors { + for (glyph, glyph_anchors) in &self.anchor_lists { if !mark_glyphs.contains(glyph) { continue; } @@ -248,7 +275,7 @@ impl<'a> MarkLookupBuilder<'a> { // then add the anchors for the mark glyphs, if they exist for mark in mark_glyphs { - let anchors = self.anchors.get(&mark).unwrap(); + let anchors = self.anchor_lists.get(mark).unwrap(); for anchor in anchors { if let AnchorKind::Mark(group) = &anchor.kind { // only add mark glyph if there is at least one base @@ -290,6 +317,8 @@ impl Work for MarkWork { .collect::>(); let fea_scripts = get_fea_scripts(&ast.ast); + // this code is roughly equivalent to what in pythonland happens in + // setContext: https://github.com/googlefonts/ufo2ft/blob/8e9e6eb66a/Lib/ufo2ft/featureWriters/markFeatureWriter.py#L322 let ctx = MarkLookupBuilder::new( anchors, &glyph_order, @@ -305,6 +334,22 @@ impl Work for MarkWork { } } +// in py this is set during _groupMarkGlyphsByAnchor; we try to match that logic +// https://github.com/googlefonts/ufo2ft/blob/8e9e6eb66/Lib/ufo2ft/featureWriters/markFeatureWriter.py#L412 +fn find_mark_glyphs( + anchors: &BTreeMap>, + gdef_classes: &BTreeMap, +) -> BTreeSet { + anchors + .iter() + .filter(|(name, anchors)| { + (!gdef_classes.is_empty() && gdef_classes.get(*name) == Some(&GlyphClassDef::Mark)) + && anchors.iter().any(|a| a.is_mark()) + }) + .map(|(name, _)| name.to_owned()) + .collect() +} + fn get_fea_scripts(ast: &ParseTree) -> HashSet { ast.typed_root() .statements() diff --git a/fontir/src/ir.rs b/fontir/src/ir.rs index 2a1c4553..30029ec9 100644 --- a/fontir/src/ir.rs +++ b/fontir/src/ir.rs @@ -1112,6 +1112,10 @@ impl Anchor { _ => None, } } + + pub fn is_mark(&self) -> bool { + matches!(self.kind, AnchorKind::Mark(_)) + } } /// A type for building glyph anchors, reused by different backends