From ab6f2f94e4616244d3d902922a9dd18ea4833378 Mon Sep 17 00:00:00 2001 From: Colin Rofls Date: Wed, 8 May 2024 14:39:28 -0400 Subject: [PATCH] [marks] Be more judicious in what we consider a mark Specifically this means looking at the actual gdef category information for combining marks (and ignoring spacing marks!) specifically 'cedillacomb' is a combining mark, but 'cedilla' is not. --- fontbe/src/features/marks.rs | 97 ++++++++++++++++++++++++++---------- fontir/src/ir.rs | 4 ++ 2 files changed, 75 insertions(+), 26 deletions(-) diff --git a/fontbe/src/features/marks.rs b/fontbe/src/features/marks.rs index eef250ee..8b460a92 100644 --- a/fontbe/src/features/marks.rs +++ b/fontbe/src/features/marks.rs @@ -24,7 +24,7 @@ use crate::{ orchestration::{AnyWorkId, BeWork, Context, FeaAst, FeaRsMarks, WorkId}, }; use fontir::{ - ir::{self, AnchorKind, GlyphAnchors, GlyphOrder, StaticMetadata}, + ir::{self, Anchor, AnchorKind, GlyphAnchors, GlyphOrder, StaticMetadata}, orchestration::WorkId as FeWorkId, }; @@ -44,14 +44,16 @@ struct MarkLookupBuilder<'a> { // extracted from public.openTypeCatgories/GlyphData.xml or FEA gdef_classes: BTreeMap, // pruned, only the anchors we are using - anchors: BTreeMap>, + anchor_lists: BTreeMap>, glyph_order: &'a GlyphOrder, static_metadata: &'a StaticMetadata, - fea_scripts: HashSet, + // we don't currently use this, because just adding lookups to all scripts works? + _fea_scripts: HashSet, + mark_glyphs: BTreeSet, } /// The bases and marks in a particular group, e.g. "top" or "bottom" -#[derive(Default, Clone, PartialEq)] +#[derive(Default, Debug, Clone, PartialEq)] struct MarkGroup<'a> { bases: Vec<(GlyphName, &'a ir::Anchor)>, marks: Vec<(GlyphName, &'a ir::Anchor)>, @@ -94,23 +96,40 @@ impl<'a> MarkLookupBuilder<'a> { fea_scripts: HashSet, ) -> Self { // first we want to narrow our input down to only anchors that are participating. + // in pythonland this is https://github.com/googlefonts/ufo2ft/blob/8e9e6eb66a/Lib/ufo2ft/featureWriters/markFeatureWriter.py#L380 let mut pruned = BTreeMap::new(); - let mut base_names = HashSet::new(); - let mut mark_names = HashSet::new(); + let mut base_groups = HashSet::new(); + let mut mark_groups = HashSet::new(); + let include = gdef_classes + .iter() + .filter_map(|(name, cls)| { + matches!( + *cls, + GlyphClassDef::Base | GlyphClassDef::Mark | GlyphClassDef::Ligature + ) + .then_some(name) + }) + .collect::>(); for anchors in anchors { // skip anchors for non-export glyphs if !glyph_order.contains(&anchors.glyph_name) { continue; } + // skip glyphs that are not mark/lig/base, if we have any defined categories + if !include.is_empty() && !include.contains(&anchors.glyph_name) { + continue; + } for anchor in &anchors.anchors { match &anchor.kind { - AnchorKind::Base(group) => { - base_names.insert(group); + AnchorKind::Base(group) + | AnchorKind::Ligature { + group_name: group, .. + } => { + base_groups.insert(group); } AnchorKind::Mark(group) => { - mark_names.insert(group); + mark_groups.insert(group); } - //TODO: ligatures // skip non base/mark anchors _ => continue, } @@ -121,7 +140,9 @@ impl<'a> MarkLookupBuilder<'a> { } } - let used_groups = base_names.intersection(&mark_names).collect::>(); + let used_groups = base_groups + .intersection(&mark_groups) + .collect::>(); // we don't care about marks with no corresponding bases, & vice-versa see: // pruned.retain(|_, anchors| { @@ -133,12 +154,15 @@ impl<'a> MarkLookupBuilder<'a> { }); !anchors.is_empty() }); + + let mark_glyphs = find_mark_glyphs(&pruned, &gdef_classes); Self { - anchors: pruned, + anchor_lists: pruned, glyph_order, - fea_scripts, + _fea_scripts: fea_scripts, static_metadata, gdef_classes, + mark_glyphs, } } @@ -161,12 +185,8 @@ impl<'a> MarkLookupBuilder<'a> { ) -> Result, Error> { groups .into_iter() + .filter(|(_, group)| !(group.bases.is_empty() || group.marks.is_empty())) .map(|(group_name, group)| { - assert!( - !group.bases.is_empty() && !group.marks.is_empty(), - "prechecked" - ); - let mut builder = T::default(); for (mark_name, anchor) in group.marks { // we already filtered to only things in glyph order @@ -188,15 +208,21 @@ impl<'a> MarkLookupBuilder<'a> { fn make_mark_to_base_groups(&self) -> BTreeMap> { let mut groups = BTreeMap::<_, MarkGroup>::new(); - for (glyph_name, anchors) in &self.anchors { + for (glyph_name, anchors) in &self.anchor_lists { + let is_mark = self.mark_glyphs.contains(glyph_name); + // if we have explicit gdef classes and this is not an expilcit base + let is_not_base = !self.gdef_classes.is_empty() + && (self.gdef_classes.get(glyph_name)) != Some(&GlyphClassDef::Base); + + let treat_as_base = !(is_mark | is_not_base); for anchor in anchors { match &anchor.kind { - fontir::ir::AnchorKind::Base(group) => groups + fontir::ir::AnchorKind::Base(group) if treat_as_base => groups .entry(group.clone()) .or_default() .bases .push((glyph_name.clone(), anchor)), - fontir::ir::AnchorKind::Mark(group) => groups + fontir::ir::AnchorKind::Mark(group) if is_mark => groups .entry(group.clone()) .or_default() .marks @@ -211,12 +237,13 @@ impl<'a> MarkLookupBuilder<'a> { fn make_mark_to_mark_groups(&self) -> BTreeMap> { // first find the set of glyphs that are marks, i.e. have any mark attachment point. let (mark_glyphs, mark_anchors): (BTreeSet<_>, BTreeSet<_>) = self - .anchors + .anchor_lists .iter() + .filter(|(name, _)| self.mark_glyphs.contains(*name)) .flat_map(|(name, anchors)| { - anchors.iter().filter_map(|anchor| { + anchors.iter().filter_map(move |anchor| { if let AnchorKind::Mark(group) = &anchor.kind { - Some((name.clone(), group.clone())) + Some((name, group)) } else { None } @@ -227,7 +254,7 @@ impl<'a> MarkLookupBuilder<'a> { // then iterate again, looking for glyphs that we have identified as marks, // but which also have participating base anchors let mut result = BTreeMap::::new(); - for (glyph, glyph_anchors) in &self.anchors { + for (glyph, glyph_anchors) in &self.anchor_lists { if !mark_glyphs.contains(glyph) { continue; } @@ -248,7 +275,7 @@ impl<'a> MarkLookupBuilder<'a> { // then add the anchors for the mark glyphs, if they exist for mark in mark_glyphs { - let anchors = self.anchors.get(&mark).unwrap(); + let anchors = self.anchor_lists.get(mark).unwrap(); for anchor in anchors { if let AnchorKind::Mark(group) = &anchor.kind { // only add mark glyph if there is at least one base @@ -290,6 +317,8 @@ impl Work for MarkWork { .collect::>(); let fea_scripts = get_fea_scripts(&ast.ast); + // this code is roughly equivalent to what in pythonland happens in + // setContext: https://github.com/googlefonts/ufo2ft/blob/8e9e6eb66a/Lib/ufo2ft/featureWriters/markFeatureWriter.py#L322 let ctx = MarkLookupBuilder::new( anchors, &glyph_order, @@ -305,6 +334,22 @@ impl Work for MarkWork { } } +// in py this is set during _groupMarkGlyphsByAnchor; we try to match that logic +// https://github.com/googlefonts/ufo2ft/blob/8e9e6eb66/Lib/ufo2ft/featureWriters/markFeatureWriter.py#L412 +fn find_mark_glyphs( + anchors: &BTreeMap>, + gdef_classes: &BTreeMap, +) -> BTreeSet { + anchors + .iter() + .filter(|(name, anchors)| { + (!gdef_classes.is_empty() && gdef_classes.get(*name) == Some(&GlyphClassDef::Mark)) + && anchors.iter().any(|a| a.is_mark()) + }) + .map(|(name, _)| name.to_owned()) + .collect() +} + fn get_fea_scripts(ast: &ParseTree) -> HashSet { ast.typed_root() .statements() diff --git a/fontir/src/ir.rs b/fontir/src/ir.rs index 2a1c4553..30029ec9 100644 --- a/fontir/src/ir.rs +++ b/fontir/src/ir.rs @@ -1112,6 +1112,10 @@ impl Anchor { _ => None, } } + + pub fn is_mark(&self) -> bool { + matches!(self.kind, AnchorKind::Mark(_)) + } } /// A type for building glyph anchors, reused by different backends