From 250c59f5901f3a2c65599ac8127776c6fee0e215 Mon Sep 17 00:00:00 2001 From: Nico Burns Date: Sat, 7 Sep 2024 00:35:58 +0100 Subject: [PATCH] Make return type of `TreeSink::elem_name` an associated type. This allows this type to be owned or contain data protected by a RefCell or Mutex. It can also simply be `ExpandedName` - the type which was previously the return type of `TreeSink::elem_name` --- html5ever/examples/arena.rs | 7 +- html5ever/examples/noop-tree-builder.rs | 1 + html5ever/examples/print-tree-actions.rs | 16 ++-- html5ever/src/tree_builder/mod.rs | 47 ++++++---- html5ever/src/tree_builder/rules.rs | 18 ++-- markup5ever/interface/mod.rs | 111 ++++++++++++++++++++++- markup5ever/interface/tree_builder.rs | 22 ++++- rcdom/lib.rs | 2 + rcdom/tests/html-tree-sink.rs | 1 + xml5ever/src/tree_builder/mod.rs | 8 +- 10 files changed, 188 insertions(+), 45 deletions(-) diff --git a/html5ever/examples/arena.rs b/html5ever/examples/arena.rs index cfdb0416..e3a6ad59 100644 --- a/html5ever/examples/arena.rs +++ b/html5ever/examples/arena.rs @@ -12,7 +12,7 @@ extern crate typed_arena; use html5ever::interface::tree_builder::{ElementFlags, NodeOrText, QuirksMode, TreeSink}; use html5ever::tendril::{StrTendril, TendrilSink}; -use html5ever::{parse_document, Attribute, ExpandedName, QualName}; +use html5ever::{parse_document, Attribute, QualName}; use std::borrow::Cow; use std::cell::{Cell, RefCell}; use std::collections::HashSet; @@ -183,6 +183,7 @@ impl<'arena> Sink<'arena> { impl<'arena> TreeSink for Sink<'arena> { type Handle = Ref<'arena>; type Output = Ref<'arena>; + type ElemName<'a> = &'a QualName where Self : 'a; fn finish(self) -> Ref<'arena> { self.document @@ -202,9 +203,9 @@ impl<'arena> TreeSink for Sink<'arena> { ptr::eq::(*x, *y) } - fn elem_name<'a>(&self, target: &'a Ref<'arena>) -> ExpandedName<'a> { + fn elem_name(&self, target: &Ref<'arena>) -> Self::ElemName<'_> { match target.data { - NodeData::Element { ref name, .. } => name.expanded(), + NodeData::Element { ref name, .. } => &name, _ => panic!("not an element!"), } } diff --git a/html5ever/examples/noop-tree-builder.rs b/html5ever/examples/noop-tree-builder.rs index ab7fc531..ee09155a 100644 --- a/html5ever/examples/noop-tree-builder.rs +++ b/html5ever/examples/noop-tree-builder.rs @@ -40,6 +40,7 @@ impl Sink { impl TreeSink for Sink { type Handle = usize; type Output = Self; + type ElemName<'a> = ExpandedName<'a>; fn finish(self) -> Self { self } diff --git a/html5ever/examples/print-tree-actions.rs b/html5ever/examples/print-tree-actions.rs index 104d6444..cae76cfc 100644 --- a/html5ever/examples/print-tree-actions.rs +++ b/html5ever/examples/print-tree-actions.rs @@ -11,7 +11,7 @@ extern crate html5ever; use std::borrow::Cow; -use std::cell::{Cell, RefCell}; +use std::cell::{Cell, Ref, RefCell}; use std::collections::HashMap; use std::io; @@ -20,7 +20,7 @@ use html5ever::tendril::*; use html5ever::tree_builder::{ AppendNode, AppendText, ElementFlags, NodeOrText, QuirksMode, TreeSink, }; -use html5ever::{Attribute, ExpandedName, QualName}; +use html5ever::{Attribute, QualName}; struct Sink { next_id: Cell, @@ -38,6 +38,7 @@ impl Sink { impl TreeSink for Sink { type Handle = usize; type Output = Self; + type ElemName<'a> = Ref<'a, QualName>; fn finish(self) -> Self { self } @@ -68,13 +69,10 @@ impl TreeSink for Sink { x == y } - fn elem_name(&self, target: &usize) -> ExpandedName { - self.names - .borrow() - .get(target) - .cloned() - .expect("not an element") - .expanded() + fn elem_name(&self, target: &usize) -> Self::ElemName<'_> { + Ref::map(self.names.borrow(), |map| { + *map.get(target).expect("not an element") + }) } fn create_element(&self, name: QualName, _: Vec, _: ElementFlags) -> usize { diff --git a/html5ever/src/tree_builder/mod.rs b/html5ever/src/tree_builder/mod.rs index 49dfc062..348c211d 100644 --- a/html5ever/src/tree_builder/mod.rs +++ b/html5ever/src/tree_builder/mod.rs @@ -9,7 +9,9 @@ //! The HTML5 tree builder. -pub use crate::interface::{create_element, ElementFlags, NextParserState, Tracer, TreeSink}; +pub use crate::interface::{ + create_element, ElemName, ElementFlags, NextParserState, Tracer, TreeSink, +}; pub use crate::interface::{AppendNode, AppendText, Attribute, NodeOrText}; pub use crate::interface::{LimitedQuirks, NoQuirks, Quirks, QuirksMode}; @@ -187,7 +189,8 @@ where opts: TreeBuilderOpts, ) -> TreeBuilder { let doc_handle = sink.get_document(); - let context_is_template = sink.elem_name(&context_elem) == expanded_name!(html "template"); + let context_is_template = + sink.elem_name(&context_elem).expanded() == expanded_name!(html "template"); let template_modes = if context_is_template { RefCell::new(vec![InTemplate]) } else { @@ -231,7 +234,8 @@ where pub fn tokenizer_state_for_context_elem(&self) -> tok_state::State { let context_elem = self.context_elem.borrow(); let elem = context_elem.as_ref().expect("no context element"); - let name = match self.sink.elem_name(elem) { + let elem_name = self.sink.elem_name(elem); + let name = match elem_name.expanded() { ExpandedName { ns: &ns!(html), local, @@ -296,8 +300,8 @@ where print!(" open_elems:"); for node in self.open_elems.borrow().iter() { let name = self.sink.elem_name(node); - match *name.ns { - ns!(html) => print!(" {}", name.local), + match *name.ns() { + ns!(html) => print!(" {}", name.local_name()), _ => panic!(), } } @@ -308,8 +312,8 @@ where &Marker => print!(" Marker"), Element(h, _) => { let name = self.sink.elem_name(h); - match *name.ns { - ns!(html) => print!(" {}", name.local), + match *name.ns() { + ns!(html) => print!(" {}", name.local_name()), _ => panic!(), } }, @@ -541,7 +545,7 @@ where fn adjusted_current_node_present_but_not_in_html_namespace(&self) -> bool { !self.open_elems.borrow().is_empty() - && self.sink.elem_name(&self.adjusted_current_node()).ns != &ns!(html) + && *self.sink.elem_name(&self.adjusted_current_node()).ns() != ns!(html) } } @@ -689,7 +693,7 @@ where where TagSet: Fn(ExpandedName) -> bool, { - set(self.sink.elem_name(&self.current_node())) + set(self.sink.elem_name(&self.current_node()).expanded()) } // Insert at the "appropriate place for inserting a node". @@ -1014,7 +1018,8 @@ where for elem in self.open_elems.borrow().iter() { let error; { - let name = self.sink.elem_name(elem); + let elem_name = self.sink.elem_name(elem); + let name = elem_name.expanded(); if body_end_ok(name) { continue; } @@ -1041,7 +1046,7 @@ where if pred(node.clone()) { return true; } - if scope(self.sink.elem_name(node)) { + if scope(self.sink.elem_name(node).expanded()) { return false; } } @@ -1055,12 +1060,12 @@ where where TagSet: Fn(ExpandedName) -> bool, { - set(self.sink.elem_name(elem)) + set(self.sink.elem_name(elem).expanded()) } fn html_elem_named(&self, elem: &Handle, name: LocalName) -> bool { - let expanded = self.sink.elem_name(elem); - *expanded.ns == ns!(html) && *expanded.local == name + let elem_name = self.sink.elem_name(elem); + *elem_name.ns() == ns!(html) && *elem_name.local_name() == name } fn in_html_elem_named(&self, name: LocalName) -> bool { @@ -1090,8 +1095,8 @@ where { let open_elems = self.open_elems.borrow(); let elem = unwrap_or_return!(open_elems.last()); - let nsname = self.sink.elem_name(elem); - if !set(nsname) { + let elem_name = self.sink.elem_name(elem); + if !set(elem_name.expanded()) { return; } } @@ -1132,7 +1137,7 @@ where match self.open_elems.borrow_mut().pop() { None => break, Some(elem) => { - if pred(self.sink.elem_name(&elem)) { + if pred(self.sink.elem_name(&elem).expanded()) { break; } }, @@ -1217,7 +1222,8 @@ where if let (true, Some(ctx)) = (last, context_elem.as_ref()) { node = ctx; } - let name = match self.sink.elem_name(node) { + let elem_name = self.sink.elem_name(node); + let name = match elem_name.expanded() { ExpandedName { ns: &ns!(html), local, @@ -1470,7 +1476,8 @@ where } let current = self.adjusted_current_node(); - let name = self.sink.elem_name(¤t); + let elem_name = self.sink.elem_name(¤t); + let name = elem_name.expanded(); if let ns!(html) = *name.ns { return false; } @@ -1681,7 +1688,7 @@ where let current_ns = self .sink .elem_name(&self.adjusted_current_node()) - .ns + .ns() .clone(); match current_ns { ns!(mathml) => self.adjust_mathml_attributes(&mut tag), diff --git a/html5ever/src/tree_builder/rules.rs b/html5ever/src/tree_builder/rules.rs index a8e8d610..9bce3570 100644 --- a/html5ever/src/tree_builder/rules.rs +++ b/html5ever/src/tree_builder/rules.rs @@ -9,15 +9,18 @@ // The tree builder rules, as a single, enormous nested match expression. -use markup5ever::{expanded_name, local_name, namespace_prefix, namespace_url, ns}; +use crate::interface::Quirks; use crate::tokenizer::states::{Plaintext, Rawtext, Rcdata, ScriptData}; +use crate::tokenizer::TagKind::{EndTag, StartTag}; use crate::tree_builder::tag_sets::*; use crate::tree_builder::types::*; +use crate::tree_builder::{ + create_element, html_elem, ElemName, NodeOrText::AppendNode, StrTendril, Tag, TreeBuilder, + TreeSink, +}; use crate::QualName; -use crate::tree_builder::{create_element, html_elem, TreeSink, Tag, NodeOrText::AppendNode, StrTendril, TreeBuilder}; -use crate::tokenizer::TagKind::{StartTag, EndTag}; +use markup5ever::{expanded_name, local_name, namespace_prefix, namespace_url, ns}; use std::borrow::Cow::Borrowed; -use crate::interface::Quirks; use std::borrow::ToOwned; @@ -402,7 +405,8 @@ where let mut to_close = None; for node in self.open_elems.borrow().iter().rev() { - let name = self.sink.elem_name(node); + let elem_name = self.sink.elem_name(node); + let name = elem_name.expanded(); let can_close = if list { close_list(name) } else { @@ -1441,8 +1445,8 @@ where { let open_elems = self.open_elems.borrow(); let node_name = self.sink.elem_name(&open_elems[stack_idx]); - html = *node_name.ns == ns!(html); - eq = node_name.local.eq_ignore_ascii_case(&tag.name); + html = *node_name.ns() == ns!(html); + eq = node_name.local_name().eq_ignore_ascii_case(&tag.name); } if !first && html { let mode = self.mode.get(); diff --git a/markup5ever/interface/mod.rs b/markup5ever/interface/mod.rs index 0cac03db..b2e4e492 100644 --- a/markup5ever/interface/mod.rs +++ b/markup5ever/interface/mod.rs @@ -8,12 +8,13 @@ // except according to those terms. //! Types for tag and attribute names, and tree-builder functionality. +use std::cell::Ref; use std::fmt; use tendril::StrTendril; pub use self::tree_builder::{create_element, AppendNode, AppendText, ElementFlags, NodeOrText}; +pub use self::tree_builder::{ElemName, NextParserState, Tracer, TreeSink}; pub use self::tree_builder::{LimitedQuirks, NoQuirks, Quirks, QuirksMode}; -pub use self::tree_builder::{NextParserState, Tracer, TreeSink}; use super::{LocalName, Namespace, Prefix}; /// An [expanded name], containing the tag and the namespace. @@ -25,6 +26,30 @@ pub struct ExpandedName<'a> { pub local: &'a LocalName, } +impl<'a> ElemName for ExpandedName<'a> { + #[inline(always)] + fn ns(&self) -> &Namespace { + self.ns + } + + #[inline(always)] + fn local_name(&self) -> &LocalName { + self.local + } +} + +impl<'a> ElemName for Ref<'a, ExpandedName<'a>> { + #[inline(always)] + fn ns(&self) -> &Namespace { + self.ns + } + + #[inline(always)] + fn local_name(&self) -> &LocalName { + self.local + } +} + impl<'a> fmt::Debug for ExpandedName<'a> { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { if self.ns.is_empty() { @@ -35,6 +60,66 @@ impl<'a> fmt::Debug for ExpandedName<'a> { } } +// impl<'a> AsRef> for ExpandedName<'a> { +// fn as_ref(&self) -> &ExpandedName<'a> { +// self +// } +// } + +// /// A wrapper around an `ExpandedName` which allows it to be borrowed from a RefCell +// pub struct ExpandedNameRef<'a>(pub Ref<'a, ExpandedName<'a>>); + +// impl<'a> AsRef> for ExpandedNameRef<'a> { +// fn as_ref(&self) -> &ExpandedName<'a> { +// &*self.0 +// } +// } + +// impl<'a> fmt::Debug for ExpandedNameRef<'a> { +// fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { +// write!(f, "{:?}", *self.0.as_ref()) +// } +// } + +// /// A wrapper around an `QualName` which allows it to be borrowed from a RefCell +// pub struct QualNameRef<'a>(pub Ref<'a, &'a QualName>); + +// impl<'a> AsRef> for QualNameRef<'a> { +// fn as_ref(&self) -> &ExpandedName<'a> { +// &self.0.expanded() +// } +// } + +// impl<'a> fmt::Debug for QualNameRef<'a> { +// fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { +// write!(f, "{:?}", *self.0.as_ref()) +// } +// } + +// /// A wrapper around an `ExpandedName` which allows it to be borrowed from a RefCell +// pub struct LazyExpandedName<'a, T: 'a, Ext: Fn(&T) -> &ExpandedName<'a>> { +// inner: T, +// extractor: Ext, +// } + +// impl<'a, T: 'a, Ext: Fn(&T) -> &ExpandedName<'a>> LazyExpandedName<'a, T, Ext> { +// fn new(inner: T, extractor: Ext) -> Self { +// Self { inner, extractor } +// } +// } + +// impl<'a, T: 'a, Ext: Fn(&T) -> &ExpandedName<'a>> AsRef> for LazyExpandedName<'a, T, Ext> { +// fn as_ref(&self) -> &ExpandedName<'a> { +// (self.extractor)(&self.inner) +// } +// } + +// impl<'a, T: 'a, Ext: Fn(&T) -> &ExpandedName<'a>> fmt::Debug for LazyExpandedName<'a, T, Ext> { +// fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { +// write!(f, "{:?}", self.as_ref()) +// } +// } + /// Helper to quickly create an expanded name. /// /// Can be used with no namespace as `expanded_name!("", "some_name")` @@ -241,6 +326,30 @@ pub struct QualName { pub local: LocalName, } +impl<'a> ElemName for Ref<'a, QualName> { + #[inline(always)] + fn ns(&self) -> &Namespace { + &self.ns + } + + #[inline(always)] + fn local_name(&self) -> &LocalName { + &self.local + } +} + +impl<'a> ElemName for &'a QualName { + #[inline(always)] + fn ns(&self) -> &Namespace { + &self.ns + } + + #[inline(always)] + fn local_name(&self) -> &LocalName { + &self.local + } +} + impl QualName { /// Basic constructor function. /// diff --git a/markup5ever/interface/tree_builder.rs b/markup5ever/interface/tree_builder.rs index a0d24265..44979274 100644 --- a/markup5ever/interface/tree_builder.rs +++ b/markup5ever/interface/tree_builder.rs @@ -12,7 +12,9 @@ //! It can be used by a parser to create the DOM graph structure in memory. use crate::interface::{Attribute, ExpandedName, QualName}; +use crate::{LocalName, Namespace}; use std::borrow::Cow; +use std::fmt::Debug; use tendril::StrTendril; pub use self::NodeOrText::{AppendNode, AppendText}; @@ -97,6 +99,19 @@ where sink.create_element(name, attrs, flags) } +pub trait ElemName: Debug { + fn ns(&self) -> &Namespace; + fn local_name(&self) -> &LocalName; + + #[inline(always)] + fn expanded(&self) -> ExpandedName { + ExpandedName { + ns: self.ns(), + local: self.local_name(), + } + } +} + /// Methods a parser can use to create the DOM. The DOM provider implements this trait. /// /// Having this as a trait potentially allows multiple implementations of the DOM to be used with @@ -113,6 +128,11 @@ pub trait TreeSink { /// [rust-lang/rust#29661](https://github.com/rust-lang/rust/issues/29661) type Output; + // + type ElemName<'a>: ElemName + where + Self: 'a; + /// Consume this sink and return the overall result of parsing. /// /// TODO:This should default to `fn finish(self) -> Self::Output { self }`, @@ -130,7 +150,7 @@ pub trait TreeSink { /// /// Should never be called on a non-element node; /// feel free to `panic!`. - fn elem_name<'a>(&'a self, target: &'a Self::Handle) -> ExpandedName<'a>; + fn elem_name<'a>(&'a self, target: &'a Self::Handle) -> Self::ElemName<'a>; /// Create an element. /// diff --git a/rcdom/lib.rs b/rcdom/lib.rs index 45553cc1..96f26220 100644 --- a/rcdom/lib.rs +++ b/rcdom/lib.rs @@ -225,6 +225,8 @@ impl TreeSink for RcDom { type Handle = Handle; + type ElemName<'a> = ExpandedName<'a> where Self : 'a; + fn parse_error(&self, msg: Cow<'static, str>) { self.errors.borrow_mut().push(msg); } diff --git a/rcdom/tests/html-tree-sink.rs b/rcdom/tests/html-tree-sink.rs index c5d05ae2..855574a1 100644 --- a/rcdom/tests/html-tree-sink.rs +++ b/rcdom/tests/html-tree-sink.rs @@ -17,6 +17,7 @@ pub struct LineCountingDOM { impl TreeSink for LineCountingDOM { type Output = Self; + type ElemName<'a> = ExpandedName<'a>; fn finish(self) -> Self { self diff --git a/xml5ever/src/tree_builder/mod.rs b/xml5ever/src/tree_builder/mod.rs index b76eff0b..19427277 100644 --- a/xml5ever/src/tree_builder/mod.rs +++ b/xml5ever/src/tree_builder/mod.rs @@ -20,7 +20,7 @@ use std::collections::{BTreeMap, HashSet, VecDeque}; use std::fmt::{Debug, Error, Formatter}; use std::mem; -pub use self::interface::{NextParserState, NodeOrText, Tracer, TreeSink}; +pub use self::interface::{ElemName, NextParserState, NodeOrText, Tracer, TreeSink}; use self::types::*; use crate::interface::{self, create_element, AppendNode, Attribute, QualName}; use crate::interface::{AppendText, ExpandedName}; @@ -535,7 +535,7 @@ where self.open_elems .borrow() .iter() - .any(|a| self.sink.elem_name(a) == tag.name.expanded()) + .any(|a| self.sink.elem_name(a).expanded() == tag.name.expanded()) } // Pop elements until an element from the set has been popped. Returns the @@ -557,7 +557,7 @@ where TagSet: Fn(ExpandedName) -> bool, { // FIXME: take namespace into consideration: - set(self.sink.elem_name(&self.current_node())) + set(self.sink.elem_name(&self.current_node()).expanded()) } fn close_tag(&self, tag: Tag) -> XmlProcessResult { @@ -567,7 +567,7 @@ where &tag.name ); - if *self.sink.elem_name(&self.current_node()).local != tag.name.local { + if *self.sink.elem_name(&self.current_node()).local_name() != tag.name.local { self.sink .parse_error(Borrowed("Current node doesn't match tag")); }