Skip to content

Commit

Permalink
[marks] Be more judicious in what we consider a mark
Browse files Browse the repository at this point in the history
Specifically this means looking at the actual gdef category information
for combining marks (and ignoring spacing marks!) specifically
'cedillacomb' is a combining mark, but 'cedilla' is not.
  • Loading branch information
cmyr committed May 8, 2024
1 parent b455abb commit ab6f2f9
Show file tree
Hide file tree
Showing 2 changed files with 75 additions and 26 deletions.
97 changes: 71 additions & 26 deletions fontbe/src/features/marks.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ use crate::{
orchestration::{AnyWorkId, BeWork, Context, FeaAst, FeaRsMarks, WorkId},
};
use fontir::{
ir::{self, AnchorKind, GlyphAnchors, GlyphOrder, StaticMetadata},
ir::{self, Anchor, AnchorKind, GlyphAnchors, GlyphOrder, StaticMetadata},
orchestration::WorkId as FeWorkId,
};

Expand All @@ -44,14 +44,16 @@ struct MarkLookupBuilder<'a> {
// extracted from public.openTypeCatgories/GlyphData.xml or FEA
gdef_classes: BTreeMap<GlyphName, GlyphClassDef>,
// pruned, only the anchors we are using
anchors: BTreeMap<GlyphName, Vec<&'a ir::Anchor>>,
anchor_lists: BTreeMap<GlyphName, Vec<&'a ir::Anchor>>,
glyph_order: &'a GlyphOrder,
static_metadata: &'a StaticMetadata,
fea_scripts: HashSet<Tag>,
// we don't currently use this, because just adding lookups to all scripts works?
_fea_scripts: HashSet<Tag>,
mark_glyphs: BTreeSet<GlyphName>,
}

/// The bases and marks in a particular group, e.g. "top" or "bottom"
#[derive(Default, Clone, PartialEq)]
#[derive(Default, Debug, Clone, PartialEq)]
struct MarkGroup<'a> {
bases: Vec<(GlyphName, &'a ir::Anchor)>,
marks: Vec<(GlyphName, &'a ir::Anchor)>,
Expand Down Expand Up @@ -94,23 +96,40 @@ impl<'a> MarkLookupBuilder<'a> {
fea_scripts: HashSet<Tag>,
) -> Self {
// first we want to narrow our input down to only anchors that are participating.
// in pythonland this is https://github.com/googlefonts/ufo2ft/blob/8e9e6eb66a/Lib/ufo2ft/featureWriters/markFeatureWriter.py#L380
let mut pruned = BTreeMap::new();
let mut base_names = HashSet::new();
let mut mark_names = HashSet::new();
let mut base_groups = HashSet::new();
let mut mark_groups = HashSet::new();
let include = gdef_classes
.iter()
.filter_map(|(name, cls)| {
matches!(
*cls,
GlyphClassDef::Base | GlyphClassDef::Mark | GlyphClassDef::Ligature
)
.then_some(name)
})
.collect::<HashSet<_>>();
for anchors in anchors {
// skip anchors for non-export glyphs
if !glyph_order.contains(&anchors.glyph_name) {
continue;
}
// skip glyphs that are not mark/lig/base, if we have any defined categories
if !include.is_empty() && !include.contains(&anchors.glyph_name) {
continue;
}
for anchor in &anchors.anchors {
match &anchor.kind {
AnchorKind::Base(group) => {
base_names.insert(group);
AnchorKind::Base(group)
| AnchorKind::Ligature {
group_name: group, ..
} => {
base_groups.insert(group);
}
AnchorKind::Mark(group) => {
mark_names.insert(group);
mark_groups.insert(group);
}
//TODO: ligatures
// skip non base/mark anchors
_ => continue,
}
Expand All @@ -121,7 +140,9 @@ impl<'a> MarkLookupBuilder<'a> {
}
}

let used_groups = base_names.intersection(&mark_names).collect::<HashSet<_>>();
let used_groups = base_groups
.intersection(&mark_groups)
.collect::<HashSet<_>>();
// we don't care about marks with no corresponding bases, & vice-versa see:
// <https://github.com/googlefonts/ufo2ft/blob/6787e37e63530/Lib/ufo2ft/featureWriters/markFeatureWriter.py#L359>
pruned.retain(|_, anchors| {
Expand All @@ -133,12 +154,15 @@ impl<'a> MarkLookupBuilder<'a> {
});
!anchors.is_empty()
});

let mark_glyphs = find_mark_glyphs(&pruned, &gdef_classes);
Self {
anchors: pruned,
anchor_lists: pruned,
glyph_order,
fea_scripts,
_fea_scripts: fea_scripts,
static_metadata,
gdef_classes,
mark_glyphs,
}
}

Expand All @@ -161,12 +185,8 @@ impl<'a> MarkLookupBuilder<'a> {
) -> Result<Vec<T>, Error> {
groups
.into_iter()
.filter(|(_, group)| !(group.bases.is_empty() || group.marks.is_empty()))
.map(|(group_name, group)| {
assert!(
!group.bases.is_empty() && !group.marks.is_empty(),
"prechecked"
);

let mut builder = T::default();
for (mark_name, anchor) in group.marks {
// we already filtered to only things in glyph order
Expand All @@ -188,15 +208,21 @@ impl<'a> MarkLookupBuilder<'a> {

fn make_mark_to_base_groups(&self) -> BTreeMap<MarkGroupName, MarkGroup<'a>> {
let mut groups = BTreeMap::<_, MarkGroup>::new();
for (glyph_name, anchors) in &self.anchors {
for (glyph_name, anchors) in &self.anchor_lists {
let is_mark = self.mark_glyphs.contains(glyph_name);
// if we have explicit gdef classes and this is not an expilcit base
let is_not_base = !self.gdef_classes.is_empty()
&& (self.gdef_classes.get(glyph_name)) != Some(&GlyphClassDef::Base);

let treat_as_base = !(is_mark | is_not_base);
for anchor in anchors {
match &anchor.kind {
fontir::ir::AnchorKind::Base(group) => groups
fontir::ir::AnchorKind::Base(group) if treat_as_base => groups
.entry(group.clone())
.or_default()
.bases
.push((glyph_name.clone(), anchor)),
fontir::ir::AnchorKind::Mark(group) => groups
fontir::ir::AnchorKind::Mark(group) if is_mark => groups
.entry(group.clone())
.or_default()
.marks
Expand All @@ -211,12 +237,13 @@ impl<'a> MarkLookupBuilder<'a> {
fn make_mark_to_mark_groups(&self) -> BTreeMap<MarkGroupName, MarkGroup<'a>> {
// first find the set of glyphs that are marks, i.e. have any mark attachment point.
let (mark_glyphs, mark_anchors): (BTreeSet<_>, BTreeSet<_>) = self
.anchors
.anchor_lists
.iter()
.filter(|(name, _)| self.mark_glyphs.contains(*name))
.flat_map(|(name, anchors)| {
anchors.iter().filter_map(|anchor| {
anchors.iter().filter_map(move |anchor| {
if let AnchorKind::Mark(group) = &anchor.kind {
Some((name.clone(), group.clone()))
Some((name, group))
} else {
None
}
Expand All @@ -227,7 +254,7 @@ impl<'a> MarkLookupBuilder<'a> {
// then iterate again, looking for glyphs that we have identified as marks,
// but which also have participating base anchors
let mut result = BTreeMap::<MarkGroupName, MarkGroup>::new();
for (glyph, glyph_anchors) in &self.anchors {
for (glyph, glyph_anchors) in &self.anchor_lists {
if !mark_glyphs.contains(glyph) {
continue;
}
Expand All @@ -248,7 +275,7 @@ impl<'a> MarkLookupBuilder<'a> {

// then add the anchors for the mark glyphs, if they exist
for mark in mark_glyphs {
let anchors = self.anchors.get(&mark).unwrap();
let anchors = self.anchor_lists.get(mark).unwrap();
for anchor in anchors {
if let AnchorKind::Mark(group) = &anchor.kind {
// only add mark glyph if there is at least one base
Expand Down Expand Up @@ -290,6 +317,8 @@ impl Work<Context, AnyWorkId, Error> for MarkWork {
.collect::<Vec<_>>();

let fea_scripts = get_fea_scripts(&ast.ast);
// this code is roughly equivalent to what in pythonland happens in
// setContext: https://github.com/googlefonts/ufo2ft/blob/8e9e6eb66a/Lib/ufo2ft/featureWriters/markFeatureWriter.py#L322
let ctx = MarkLookupBuilder::new(
anchors,
&glyph_order,
Expand All @@ -305,6 +334,22 @@ impl Work<Context, AnyWorkId, Error> for MarkWork {
}
}

// in py this is set during _groupMarkGlyphsByAnchor; we try to match that logic
// https://github.com/googlefonts/ufo2ft/blob/8e9e6eb66/Lib/ufo2ft/featureWriters/markFeatureWriter.py#L412
fn find_mark_glyphs(
anchors: &BTreeMap<GlyphName, Vec<&Anchor>>,
gdef_classes: &BTreeMap<GlyphName, GlyphClassDef>,
) -> BTreeSet<GlyphName> {
anchors
.iter()
.filter(|(name, anchors)| {
(!gdef_classes.is_empty() && gdef_classes.get(*name) == Some(&GlyphClassDef::Mark))
&& anchors.iter().any(|a| a.is_mark())
})
.map(|(name, _)| name.to_owned())
.collect()
}

fn get_fea_scripts(ast: &ParseTree) -> HashSet<Tag> {
ast.typed_root()
.statements()
Expand Down
4 changes: 4 additions & 0 deletions fontir/src/ir.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1112,6 +1112,10 @@ impl Anchor {
_ => None,
}
}

pub fn is_mark(&self) -> bool {
matches!(self.kind, AnchorKind::Mark(_))
}
}

/// A type for building glyph anchors, reused by different backends
Expand Down

0 comments on commit ab6f2f9

Please sign in to comment.