From 0fc1096bfb1bb20a2df53a0510d214f952bc04f1 Mon Sep 17 00:00:00 2001 From: Colin Rofls Date: Tue, 25 Jul 2023 14:20:05 -0400 Subject: [PATCH] [designspace] Deserialize lib fields This was about as painful as expected, and requires us to manually glue together the xml data model with the plist model, using serde. To briefly summarize the problem: XML data, by definition, does not have any defined semantics. Semantics are added on top of XML on a per-case basis, by indicating the document type inline (). The plist crate does implement derive for plist types, but they are based on the assumption that type information is being provided by some deserializer (e.g. for a specific file format). This is the right thing for the plist crate to do. Unfortunately quick_xml cannot provide this type information, since quick_xml does not know that we are deserializing a plist. To make this all work, I have added a bunch of manual deserialize code. This code will only work when deserializing from XML; if in the future we wish to serialize to other formats, we will need to revise it. Some notes: - This only implements deserialization, not serialization. (This is fine, since we do not currently serialize anyway, but we'll have to figure that out eventually) - this does not support the UID type, which is not accepted in XML plists --- Cargo.toml | 1 + src/designspace.rs | 31 +++ src/lib.rs | 1 + src/serde_xml_plist.rs | 467 ++++++++++++++++++++++++++++++++++++++ testdata/wght.designspace | 4 +- 5 files changed, 503 insertions(+), 1 deletion(-) create mode 100644 src/serde_xml_plist.rs diff --git a/Cargo.toml b/Cargo.toml index d54e2ac4..cad1a29f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -28,6 +28,7 @@ rayon = { version = "1.3.0", optional = true } kurbo = { version = "0.9.0", optional = true } thiserror = "1.0" indexmap = {version = "2.0.0", features = ["serde"] } +base64 = "0.21.2" [dependencies.druid] default-features = false diff --git a/src/designspace.rs b/src/designspace.rs index baf729b2..41284719 100644 --- a/src/designspace.rs +++ b/src/designspace.rs @@ -4,7 +4,10 @@ use std::{fs::File, io::BufReader, path::Path}; +use plist::Dictionary; + use crate::error::DesignSpaceLoadError; +use crate::serde_xml_plist as serde_plist; /// A [designspace]. /// @@ -24,6 +27,9 @@ pub struct DesignSpaceDocument { /// One or more instances. #[serde(default, deserialize_with = "serde_impls::deserialize_instances")] pub instances: Vec, + /// Additional arbitrary user data + #[serde(default, deserialize_with = "serde_plist::deserialize_dict")] + pub lib: Dictionary, } /// An [axis]. @@ -131,6 +137,9 @@ pub struct Instance { /// Location in designspace. #[serde(deserialize_with = "serde_impls::deserialize_location")] pub location: Vec, + /// Arbitrary data about this instance + #[serde(default, deserialize_with = "serde_plist::deserialize_dict")] + pub lib: Dictionary, } /// A [design space dimension]. @@ -238,6 +247,7 @@ mod serde_impls { mod tests { use std::path::Path; + use plist::Value; use pretty_assertions::assert_eq; use crate::designspace::{AxisMapping, Dimension}; @@ -305,4 +315,25 @@ mod tests { assert!(ds.instances[1].name.starts_with("unnamed_instance_")); assert_ne!(ds.instances[0].name, ds.instances[1].name); } + + #[test] + fn load_lib() { + let loaded = DesignSpaceDocument::load("testdata/wght.designspace").unwrap(); + assert_eq!( + loaded.lib.get("org.linebender.hasLoadedLibCorrectly"), + Some(&Value::String("Absolutely!".into())) + ); + + let params = loaded.instances[0] + .lib + .get("com.schriftgestaltung.customParameters") + .and_then(Value::as_array) + .unwrap(); + assert_eq!(params[0].as_array().unwrap()[0].as_string(), Some("xHeight")); + assert_eq!(params[0].as_array().unwrap()[1].as_string(), Some("536")); + assert_eq!( + params[1].as_array().unwrap()[1].as_array().unwrap()[0].as_unsigned_integer(), + Some(2) + ); + } } diff --git a/src/lib.rs b/src/lib.rs index c656fe6a..aa3a636f 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -84,6 +84,7 @@ mod kerning; mod layer; mod name; mod names; +mod serde_xml_plist; mod shared_types; mod upconversion; pub(crate) mod util; diff --git a/src/serde_xml_plist.rs b/src/serde_xml_plist.rs new file mode 100644 index 00000000..f9477bbe --- /dev/null +++ b/src/serde_xml_plist.rs @@ -0,0 +1,467 @@ +//! Helpers for deserializing plist values from untyped XML +//! +//! This is essentially glue between the `plist` crate and the `quick_xml` crate. +//! It allows plist values, dictionaries and arrays to be used inside types that +//! derive Deserialize/Serialize. + +use std::{fmt::Display, marker::PhantomData, str::FromStr}; + +use base64::{engine::general_purpose::STANDARD as base64_standard, Engine}; +use plist::{Dictionary, Value}; +use serde::{ + de::{Error as DeError, Visitor}, + Deserialize, Deserializer, +}; + +/// Deserialize a plist Dictionary +/// +/// This relies on the specific structure presented by the quick_xml crate and +/// is likely not suited to other formats. +pub fn deserialize_dict<'de, D>(deserializer: D) -> Result +where + D: Deserializer<'de>, +{ + deserializer.deserialize_map(ValueVisitor::DictOnly).map(|x| x.into_dictionary().unwrap()) +} + +struct DictWrapper(Dictionary); +struct ValueWrapper(Value); +struct ArrayWrapper(Vec); +struct IntWrapper(plist::Integer); + +/// The literal keyword 'key'. +struct KeyKeywordLiteral; + +/// PLIST value keywords +/// +/// We use types for keywords, with custom deserialize impls, to avoid needing +/// to transiently allocate strings each time we encounter them. :shrug: +enum ValueKeyword { + Dict, + Array, + Integer, + Real, + String, + Data, + Date, + True, + False, +} + +// the logic for deserializing a dict is a subset of the general deser logic, +// so we reuse this type for both cases. +enum ValueVisitor { + AnyValue, + DictOnly, +} + +impl<'de> Visitor<'de> for ValueVisitor { + type Value = Value; + + fn expecting(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result { + match self { + ValueVisitor::AnyValue => formatter.write_str("plist value"), + ValueVisitor::DictOnly => formatter.write_str("plist dictionary"), + } + } + + fn visit_map(self, mut map: A) -> Result + where + A: serde::de::MapAccess<'de>, + A::Error: DeError, + { + match read_xml_value(&mut map, matches!(self, ValueVisitor::DictOnly)) { + Ok(Some(val)) => Ok(val), + Ok(None) => Err(A::Error::custom("expected value")), + Err(e) => Err(e), + } + } +} + +/// shared helper for deserializing a plist value from the serde map repr used by quick_xml +/// +/// if `dict_only` is true, this will reject values that are not dicts. +fn read_xml_value<'de, A>(map: &mut A, dict_only: bool) -> Result, A::Error> +where + A: serde::de::MapAccess<'de>, + A::Error: DeError, +{ + let value = match map.next_key::()? { + Some(ValueKeyword::Dict) => map.next_value::().map(|x| Value::Dictionary(x.0)), + Some(other) if dict_only => { + Err(A::Error::custom(format!("expected 'dict', found '{other}'"))) + } + Some(ValueKeyword::String) => map.next_value::().map(Value::String), + Some(ValueKeyword::Array) => map.next_value::().map(|x| Value::Array(x.0)), + Some(ValueKeyword::Data) => { + //FIXME: remove this + base64 dep when/if we merge + // + let b64_str = map.next_value::<&str>()?; + base64_standard + .decode(b64_str) + .map(Value::Data) + .map_err(|e| A::Error::custom(format!("Invalid XML data: '{e}'"))) + } + Some(ValueKeyword::Date) => { + let date_str = map.next_value::<&str>()?; + plist::Date::from_xml_format(date_str).map_err(A::Error::custom).map(Value::Date) + } + Some(ValueKeyword::Real) => map.next_value::().map(Value::Real), + Some(ValueKeyword::Integer) => map.next_value::().map(|x| Value::Integer(x.0)), + Some(kw @ ValueKeyword::True | kw @ ValueKeyword::False) => { + // there's no value, but we need to call this to not confuse the parser + let _ = map.next_value::<()>(); + Ok(Value::Boolean(matches!(kw, ValueKeyword::True))) + } + None => return Ok(None), + }; + value.map(Some) +} + +impl<'de> Deserialize<'de> for ValueWrapper { + fn deserialize(deserializer: D) -> Result + where + D: Deserializer<'de>, + { + deserializer.deserialize_any(ValueVisitor::AnyValue).map(ValueWrapper) + } +} + +impl<'de> Deserialize<'de> for DictWrapper { + fn deserialize(deserializer: D) -> Result + where + D: Deserializer<'de>, + { + // read a key in the form, "SomeKey" + fn read_key<'de, A>(map: &mut A) -> Result, A::Error> + where + A: serde::de::MapAccess<'de>, + A::Error: DeError, + { + match map.next_key::()? { + Some(_) => map.next_value(), + None => Ok(None), + } + } + + struct DictVisitor; + + impl<'de> Visitor<'de> for DictVisitor { + type Value = Dictionary; + + fn expecting(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result { + formatter.write_str("plist dictionary") + } + + fn visit_map(self, mut map: A) -> Result + where + A: serde::de::MapAccess<'de>, + { + let mut dict = plist::Dictionary::new(); + // each logical key/value pair is two xml key/value pairs, + // where the first is the key and the second is the value. + while let Some(key) = read_key(&mut map)? { + // if we read a key it's an error for the value to be missing + let value = read_xml_value(&mut map, false)? + .ok_or_else(|| A::Error::custom("expected value"))?; + dict.insert(key, value); + } + Ok(dict) + } + } + + deserializer.deserialize_map(DictVisitor).map(DictWrapper) + } +} + +impl<'de> Deserialize<'de> for ArrayWrapper { + fn deserialize(deserializer: D) -> Result + where + D: Deserializer<'de>, + { + struct ArrayVisitor; + + impl<'de> Visitor<'de> for ArrayVisitor { + type Value = Vec; + + fn expecting(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result { + formatter.write_str("plist array") + } + + fn visit_map(self, mut map: A) -> Result + where + A: serde::de::MapAccess<'de>, + { + let mut array = Vec::with_capacity(map.size_hint().unwrap_or_default()); + while let Some(value) = read_xml_value(&mut map, false)? { + array.push(value) + } + Ok(array) + } + } + + // NOTE: in quick_xml our arrays are represented as maps, where the key + // is the tag and the content is the value. + deserializer.deserialize_map(ArrayVisitor).map(ArrayWrapper) + } +} + +// a bit of over-engineering to match the semantics of Apple/the plist crate +// +// TL;DR: we deserialize hex values, but always as unsigned values. +impl<'de> Deserialize<'de> for IntWrapper { + fn deserialize(deserializer: D) -> Result + where + D: Deserializer<'de>, + { + struct IntegerVisitor; + + impl<'de> Visitor<'de> for IntegerVisitor { + type Value = plist::Integer; + + fn expecting(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result { + formatter.write_str("plist integer. NOTE: this currently expects the to only be used with the quick-xml crate, otherwise you'll need to impl more visitor methods") + } + + // taken from the plist crate, under MIT license: + // + fn visit_str(self, s: &str) -> Result + where + E: DeError, + { + if s.starts_with("0x") { + // NetBSD dialect adds the `0x` numeric objects, + // which are always unsigned. + // See the `PROP_NUMBER(3)` man page + let s = s.trim_start_matches("0x"); + u64::from_str_radix(s, 16).map(Into::into).map_err(E::custom) + } else { + // Match Apple's implementation in CFPropertyList.h - always try to parse as an i64 first. + // TODO: Use IntErrorKind once stable and retry parsing on overflow only. + Ok(match s.parse::() { + Ok(v) => v.into(), + Err(_) => s.parse::().map_err(E::custom)?.into(), + }) + } + } + // END MIT license use + } + deserializer.deserialize_str(IntegerVisitor).map(IntWrapper) + } +} + +// visitor impl shared between key/value keywords +struct KeywordVisitor(PhantomData<*const T>); + +impl<'de, T> Visitor<'de> for KeywordVisitor +where + T: FromStr, + T::Err: Display, +{ + type Value = T; + + fn expecting(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + write!(f, "{}", std::any::type_name::()) + } + + fn visit_str(self, v: &str) -> Result + where + E: DeError, + { + v.parse().map_err(E::custom) + } +} + +impl<'de> Deserialize<'de> for KeyKeywordLiteral { + fn deserialize(deserializer: D) -> Result + where + D: Deserializer<'de>, + { + deserializer.deserialize_str(KeywordVisitor::(PhantomData)) + } +} + +impl<'de> Deserialize<'de> for ValueKeyword { + fn deserialize(deserializer: D) -> Result + where + D: Deserializer<'de>, + { + deserializer.deserialize_str(KeywordVisitor::(PhantomData)) + } +} + +impl FromStr for KeyKeywordLiteral { + type Err = String; + + fn from_str(s: &str) -> Result { + match s { + "key" => Ok(Self), + other => Err(other.to_string()), + } + } +} + +impl FromStr for ValueKeyword { + type Err = String; + + fn from_str(s: &str) -> Result { + match s { + "dict" => Ok(Self::Dict), + "array" => Ok(Self::Array), + "integer" => Ok(Self::Integer), + "real" => Ok(Self::Real), + "string" => Ok(Self::String), + "data" => Ok(Self::Data), + "date" => Ok(Self::Date), + "true" => Ok(Self::True), + "false" => Ok(Self::False), + other => Err(other.to_string()), + } + } +} + +impl Display for ValueKeyword { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let s = match self { + ValueKeyword::Dict => "dict", + ValueKeyword::Array => "array", + ValueKeyword::Integer => "integer", + ValueKeyword::Real => "real", + ValueKeyword::String => "string", + ValueKeyword::Data => "data", + ValueKeyword::Date => "date", + ValueKeyword::True => "true", + ValueKeyword::False => "false", + }; + f.write_str(s) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[derive(Deserialize)] + struct TestMe { + #[serde(deserialize_with = "deserialize_dict")] + lib: Dictionary, + } + + #[test] + fn deserialize_everything() { + let xml = r#" + + + + + hasLoadedLib + Absolutely! + anArray + + + class + aristocracy + heft + 42.42 + + 6 + + isWorking + + isBroken + + bestBefore + 2345-01-24T23:22:21Z + payload + + dSBnb3QgMHduZWQ= + + + + +"#; + + let readme: TestMe = quick_xml::de::from_str(xml).unwrap(); + assert_eq!(readme.lib.get("hasLoadedLib").unwrap().as_string(), Some("Absolutely!")); + let array = readme.lib.get("anArray").unwrap().as_array().unwrap(); + assert_eq!( + array[0].as_dictionary().and_then(|d| d.get("class")), + Some(&Value::String("aristocracy".into())) + ); + assert_eq!(array[0].as_dictionary().and_then(|d| d.get("heft")), Some(&Value::Real(42.42))); + assert_eq!(array[1].as_signed_integer(), Some(6)); + assert_eq!(readme.lib.get("isWorking"), Some(&Value::Boolean(true))); + assert_eq!(readme.lib.get("isBroken"), Some(&Value::Boolean(false))); + assert_eq!( + readme.lib.get("bestBefore").and_then(Value::as_date).map(|d| d.to_xml_format()), + Some("2345-01-24T23:22:21Z".into()) + ); + assert_eq!( + readme.lib.get("payload").and_then(Value::as_data), + Some("u got 0wned".as_bytes()) + ); + } + + #[test] + fn empty_array_is_a_okay() { + let xml = r#" + + + + + emptyDict + + emptyArray + + emptyString + + + + +"#; + + let readme: TestMe = quick_xml::de::from_str(xml).unwrap(); + assert_eq!( + readme.lib.get("emptyDict").and_then(Value::as_dictionary), + Some(&Dictionary::new()) + ); + assert_eq!(readme.lib.get("emptyArray").and_then(Value::as_array), Some(&Vec::new())); + assert_eq!(readme.lib.get("emptyString").and_then(Value::as_string), Some("")); + } + + #[test] + #[should_panic(expected = "Invalid XML data")] + fn invalid_data() { + let xml = r#" + + + + + badData + 💣 + + + +"#; + + let _readme: TestMe = quick_xml::de::from_str(xml).unwrap(); + } + + #[test] + #[should_panic(expected = "date")] + fn invalid_date() { + let xml = r#" + + + + + badDate + yesterday + + + +"#; + + let _readme: TestMe = quick_xml::de::from_str(xml).unwrap(); + } +} diff --git a/testdata/wght.designspace b/testdata/wght.designspace index d5c1c80a..367368b0 100644 --- a/testdata/wght.designspace +++ b/testdata/wght.designspace @@ -90,6 +90,8 @@ + org.linebender.hasLoadedLibCorrectly + Absolutely! com.github.googlei18n.ufo2ft.featureWriters @@ -110,7 +112,7 @@ skip - +