From 0257b6ec0fe1f107f54a83bda67616e67d50001c Mon Sep 17 00:00:00 2001 From: Mingun Date: Mon, 4 Sep 2023 22:30:27 +0500 Subject: [PATCH 1/3] Move namespace buffer into NamespaceResolver No need to store it outside of resolver, because its lifetime is the same as resolver lifetime --- src/name.rs | 197 ++++++++++++++++++---------------------- src/reader/ns_reader.rs | 32 ++----- 2 files changed, 97 insertions(+), 132 deletions(-) diff --git a/src/name.rs b/src/name.rs index 07d261ab..57154788 100644 --- a/src/name.rs +++ b/src/name.rs @@ -392,6 +392,9 @@ impl NamespaceEntry { /// Holds all internal logic to push/pop namespaces with their levels. #[derive(Debug, Default, Clone)] pub(crate) struct NamespaceResolver { + /// Buffer that contains names of namespace prefixes (the part between `xmlns:` + /// and an `=`) and namespace values. + buffer: Vec, /// A stack of namespace bindings to prefixes that currently in scope bindings: Vec, /// The number of open tags at the moment. We need to keep track of this to know which namespace @@ -404,7 +407,7 @@ impl NamespaceResolver { /// the specified start element. /// /// [namespace binding]: https://www.w3.org/TR/xml-names11/#dt-NSDecl - pub fn push(&mut self, start: &BytesStart, buffer: &mut Vec) { + pub fn push(&mut self, start: &BytesStart) { self.nesting_level += 1; let level = self.nesting_level; // adds new namespaces for attributes starting with 'xmlns:' and for the 'xmlns' @@ -413,8 +416,8 @@ impl NamespaceResolver { if let Ok(Attribute { key: k, value: v }) = a { match k.as_namespace_binding() { Some(PrefixDeclaration::Default) => { - let start = buffer.len(); - buffer.extend_from_slice(&v); + let start = self.buffer.len(); + self.buffer.extend_from_slice(&v); self.bindings.push(NamespaceEntry { start, prefix_len: 0, @@ -423,9 +426,9 @@ impl NamespaceResolver { }); } Some(PrefixDeclaration::Named(prefix)) => { - let start = buffer.len(); - buffer.extend_from_slice(prefix); - buffer.extend_from_slice(&v); + let start = self.buffer.len(); + self.buffer.extend_from_slice(prefix); + self.buffer.extend_from_slice(&v); self.bindings.push(NamespaceEntry { start, prefix_len: prefix.len(), @@ -445,20 +448,20 @@ impl NamespaceResolver { /// last call to [`Self::push()`]. /// /// [namespace binding]: https://www.w3.org/TR/xml-names11/#dt-NSDecl - pub fn pop(&mut self, buffer: &mut Vec) { + pub fn pop(&mut self) { self.nesting_level -= 1; let current_level = self.nesting_level; // from the back (most deeply nested scope), look for the first scope that is still valid match self.bindings.iter().rposition(|n| n.level <= current_level) { // none of the namespaces are valid, remove all of them None => { - buffer.clear(); + self.buffer.clear(); self.bindings.clear(); } // drop all namespaces past the last valid namespace Some(last_valid_pos) => { if let Some(len) = self.bindings.get(last_valid_pos + 1).map(|n| n.start) { - buffer.truncate(len); + self.buffer.truncate(len); self.bindings.truncate(last_valid_pos + 1); } } @@ -478,50 +481,39 @@ impl NamespaceResolver { /// # Lifetimes /// /// - `'n`: lifetime of an attribute or an element name - /// - `'ns`: lifetime of a namespaces buffer, where all found namespaces are stored #[inline] - pub fn resolve<'n, 'ns>( + pub fn resolve<'n>( &self, name: QName<'n>, - buffer: &'ns [u8], use_default: bool, - ) -> (ResolveResult<'ns>, LocalName<'n>) { + ) -> (ResolveResult, LocalName<'n>) { let (local_name, prefix) = name.decompose(); - (self.resolve_prefix(prefix, buffer, use_default), local_name) + (self.resolve_prefix(prefix, use_default), local_name) } /// Finds a [namespace name] for a given qualified **element name**, borrow - /// it from the specified buffer. + /// it from the internal buffer. /// /// Returns `None`, if: /// - name is unqualified /// - prefix not found in the current scope /// - prefix was [unbound] using `xmlns:prefix=""` /// - /// # Lifetimes - /// - /// - `'ns`: lifetime of a namespaces buffer, where all found namespaces are stored - /// /// [namespace name]: https://www.w3.org/TR/xml-names11/#dt-NSName /// [unbound]: https://www.w3.org/TR/xml-names11/#scoping #[inline] - pub fn find<'ns>(&self, element_name: QName, buffer: &'ns [u8]) -> ResolveResult<'ns> { - self.resolve_prefix(element_name.prefix(), buffer, true) + pub fn find(&self, element_name: QName) -> ResolveResult { + self.resolve_prefix(element_name.prefix(), true) } - fn resolve_prefix<'ns>( - &self, - prefix: Option, - buffer: &'ns [u8], - use_default: bool, - ) -> ResolveResult<'ns> { + fn resolve_prefix(&self, prefix: Option, use_default: bool) -> ResolveResult { self.bindings .iter() // Find the last defined binding that corresponds to the given prefix .rev() - .find_map(|n| match (n.prefix(buffer), prefix) { + .find_map(|n| match (n.prefix(&self.buffer), prefix) { // This is default namespace definition and name has no explicit prefix - (None, None) if use_default => Some(n.namespace(buffer)), + (None, None) if use_default => Some(n.namespace(&self.buffer)), (None, None) => Some(ResolveResult::Unbound), // One part has prefix but other is not -> skip @@ -534,7 +526,7 @@ impl NamespaceResolver { // Prefixes the same, entry defines binding reset (corresponds to `xmlns:p=""`) _ if n.value_len == 0 => Some(Self::maybe_unknown(prefix)), // Prefixes the same, returns corresponding namespace - _ => Some(n.namespace(buffer)), + _ => Some(n.namespace(&self.buffer)), }) .unwrap_or_else(|| Self::maybe_unknown(prefix)) } @@ -572,29 +564,25 @@ mod namespaces { let ns = Namespace(b"default"); let mut resolver = NamespaceResolver::default(); - let mut buffer = Vec::new(); - resolver.push( - &BytesStart::from_content(" xmlns='default'", 0), - &mut buffer, - ); - assert_eq!(buffer, b"default"); + resolver.push(&BytesStart::from_content(" xmlns='default'", 0)); + assert_eq!(resolver.buffer, b"default"); // Check that tags without namespaces does not change result - resolver.push(&BytesStart::from_content("", 0), &mut buffer); - assert_eq!(buffer, b"default"); - resolver.pop(&mut buffer); + resolver.push(&BytesStart::from_content("", 0)); + assert_eq!(resolver.buffer, b"default"); + resolver.pop(); - assert_eq!(buffer, b"default"); + assert_eq!(resolver.buffer, b"default"); assert_eq!( - resolver.resolve(name, &buffer, true), + resolver.resolve(name, true), (Bound(ns), LocalName(b"simple")) ); assert_eq!( - resolver.resolve(name, &buffer, false), + resolver.resolve(name, false), (Unbound, LocalName(b"simple")) ); - assert_eq!(resolver.find(name, &buffer), Bound(ns)); + assert_eq!(resolver.find(name), Bound(ns)); } /// Test adding a second level of namespaces, which replaces the previous binding @@ -605,33 +593,32 @@ mod namespaces { let new_ns = Namespace(b"new"); let mut resolver = NamespaceResolver::default(); - let mut buffer = Vec::new(); - resolver.push(&BytesStart::from_content(" xmlns='old'", 0), &mut buffer); - resolver.push(&BytesStart::from_content(" xmlns='new'", 0), &mut buffer); + resolver.push(&BytesStart::from_content(" xmlns='old'", 0)); + resolver.push(&BytesStart::from_content(" xmlns='new'", 0)); - assert_eq!(buffer, b"oldnew"); + assert_eq!(resolver.buffer, b"oldnew"); assert_eq!( - resolver.resolve(name, &buffer, true), + resolver.resolve(name, true), (Bound(new_ns), LocalName(b"simple")) ); assert_eq!( - resolver.resolve(name, &buffer, false), + resolver.resolve(name, false), (Unbound, LocalName(b"simple")) ); - assert_eq!(resolver.find(name, &buffer), Bound(new_ns)); + assert_eq!(resolver.find(name), Bound(new_ns)); - resolver.pop(&mut buffer); - assert_eq!(buffer, b"old"); + resolver.pop(); + assert_eq!(resolver.buffer, b"old"); assert_eq!( - resolver.resolve(name, &buffer, true), + resolver.resolve(name, true), (Bound(old_ns), LocalName(b"simple")) ); assert_eq!( - resolver.resolve(name, &buffer, false), + resolver.resolve(name, false), (Unbound, LocalName(b"simple")) ); - assert_eq!(resolver.find(name, &buffer), Bound(old_ns)); + assert_eq!(resolver.find(name), Bound(old_ns)); } /// Test adding a second level of namespaces, which reset the previous binding @@ -644,33 +631,32 @@ mod namespaces { let old_ns = Namespace(b"old"); let mut resolver = NamespaceResolver::default(); - let mut buffer = Vec::new(); - resolver.push(&BytesStart::from_content(" xmlns='old'", 0), &mut buffer); - resolver.push(&BytesStart::from_content(" xmlns=''", 0), &mut buffer); + resolver.push(&BytesStart::from_content(" xmlns='old'", 0)); + resolver.push(&BytesStart::from_content(" xmlns=''", 0)); - assert_eq!(buffer, b"old"); + assert_eq!(resolver.buffer, b"old"); assert_eq!( - resolver.resolve(name, &buffer, true), + resolver.resolve(name, true), (Unbound, LocalName(b"simple")) ); assert_eq!( - resolver.resolve(name, &buffer, false), + resolver.resolve(name, false), (Unbound, LocalName(b"simple")) ); - assert_eq!(resolver.find(name, &buffer), Unbound); + assert_eq!(resolver.find(name), Unbound); - resolver.pop(&mut buffer); - assert_eq!(buffer, b"old"); + resolver.pop(); + assert_eq!(resolver.buffer, b"old"); assert_eq!( - resolver.resolve(name, &buffer, true), + resolver.resolve(name, true), (Bound(old_ns), LocalName(b"simple")) ); assert_eq!( - resolver.resolve(name, &buffer, false), + resolver.resolve(name, false), (Unbound, LocalName(b"simple")) ); - assert_eq!(resolver.find(name, &buffer), Bound(old_ns)); + assert_eq!(resolver.find(name), Bound(old_ns)); } } @@ -685,29 +671,25 @@ mod namespaces { let ns = Namespace(b"default"); let mut resolver = NamespaceResolver::default(); - let mut buffer = Vec::new(); - resolver.push( - &BytesStart::from_content(" xmlns:p='default'", 0), - &mut buffer, - ); - assert_eq!(buffer, b"pdefault"); + resolver.push(&BytesStart::from_content(" xmlns:p='default'", 0)); + assert_eq!(resolver.buffer, b"pdefault"); // Check that tags without namespaces does not change result - resolver.push(&BytesStart::from_content("", 0), &mut buffer); - assert_eq!(buffer, b"pdefault"); - resolver.pop(&mut buffer); + resolver.push(&BytesStart::from_content("", 0)); + assert_eq!(resolver.buffer, b"pdefault"); + resolver.pop(); - assert_eq!(buffer, b"pdefault"); + assert_eq!(resolver.buffer, b"pdefault"); assert_eq!( - resolver.resolve(name, &buffer, true), + resolver.resolve(name, true), (Bound(ns), LocalName(b"with-declared-prefix")) ); assert_eq!( - resolver.resolve(name, &buffer, false), + resolver.resolve(name, false), (Bound(ns), LocalName(b"with-declared-prefix")) ); - assert_eq!(resolver.find(name, &buffer), Bound(ns)); + assert_eq!(resolver.find(name), Bound(ns)); } /// Test adding a second level of namespaces, which replaces the previous binding @@ -718,33 +700,32 @@ mod namespaces { let new_ns = Namespace(b"new"); let mut resolver = NamespaceResolver::default(); - let mut buffer = Vec::new(); - resolver.push(&BytesStart::from_content(" xmlns:p='old'", 0), &mut buffer); - resolver.push(&BytesStart::from_content(" xmlns:p='new'", 0), &mut buffer); + resolver.push(&BytesStart::from_content(" xmlns:p='old'", 0)); + resolver.push(&BytesStart::from_content(" xmlns:p='new'", 0)); - assert_eq!(buffer, b"poldpnew"); + assert_eq!(resolver.buffer, b"poldpnew"); assert_eq!( - resolver.resolve(name, &buffer, true), + resolver.resolve(name, true), (Bound(new_ns), LocalName(b"with-declared-prefix")) ); assert_eq!( - resolver.resolve(name, &buffer, false), + resolver.resolve(name, false), (Bound(new_ns), LocalName(b"with-declared-prefix")) ); - assert_eq!(resolver.find(name, &buffer), Bound(new_ns)); + assert_eq!(resolver.find(name), Bound(new_ns)); - resolver.pop(&mut buffer); - assert_eq!(buffer, b"pold"); + resolver.pop(); + assert_eq!(resolver.buffer, b"pold"); assert_eq!( - resolver.resolve(name, &buffer, true), + resolver.resolve(name, true), (Bound(old_ns), LocalName(b"with-declared-prefix")) ); assert_eq!( - resolver.resolve(name, &buffer, false), + resolver.resolve(name, false), (Bound(old_ns), LocalName(b"with-declared-prefix")) ); - assert_eq!(resolver.find(name, &buffer), Bound(old_ns)); + assert_eq!(resolver.find(name), Bound(old_ns)); } /// Test adding a second level of namespaces, which reset the previous binding @@ -757,33 +738,32 @@ mod namespaces { let old_ns = Namespace(b"old"); let mut resolver = NamespaceResolver::default(); - let mut buffer = Vec::new(); - resolver.push(&BytesStart::from_content(" xmlns:p='old'", 0), &mut buffer); - resolver.push(&BytesStart::from_content(" xmlns:p=''", 0), &mut buffer); + resolver.push(&BytesStart::from_content(" xmlns:p='old'", 0)); + resolver.push(&BytesStart::from_content(" xmlns:p=''", 0)); - assert_eq!(buffer, b"poldp"); + assert_eq!(resolver.buffer, b"poldp"); assert_eq!( - resolver.resolve(name, &buffer, true), + resolver.resolve(name, true), (Unknown(b"p".to_vec()), LocalName(b"with-declared-prefix")) ); assert_eq!( - resolver.resolve(name, &buffer, false), + resolver.resolve(name, false), (Unknown(b"p".to_vec()), LocalName(b"with-declared-prefix")) ); - assert_eq!(resolver.find(name, &buffer), Unknown(b"p".to_vec())); + assert_eq!(resolver.find(name), Unknown(b"p".to_vec())); - resolver.pop(&mut buffer); - assert_eq!(buffer, b"pold"); + resolver.pop(); + assert_eq!(resolver.buffer, b"pold"); assert_eq!( - resolver.resolve(name, &buffer, true), + resolver.resolve(name, true), (Bound(old_ns), LocalName(b"with-declared-prefix")) ); assert_eq!( - resolver.resolve(name, &buffer, false), + resolver.resolve(name, false), (Bound(old_ns), LocalName(b"with-declared-prefix")) ); - assert_eq!(resolver.find(name, &buffer), Bound(old_ns)); + assert_eq!(resolver.find(name), Bound(old_ns)); } } @@ -792,18 +772,17 @@ mod namespaces { let name = QName(b"unknown:prefix"); let resolver = NamespaceResolver::default(); - let buffer = Vec::new(); - assert_eq!(buffer, b""); + assert_eq!(resolver.buffer, b""); assert_eq!( - resolver.resolve(name, &buffer, true), + resolver.resolve(name, true), (Unknown(b"unknown".to_vec()), LocalName(b"prefix")) ); assert_eq!( - resolver.resolve(name, &buffer, false), + resolver.resolve(name, false), (Unknown(b"unknown".to_vec()), LocalName(b"prefix")) ); - assert_eq!(resolver.find(name, &buffer), Unknown(b"unknown".to_vec())); + assert_eq!(resolver.find(name), Unknown(b"unknown".to_vec())); } /// Checks how the QName is decomposed to a prefix and a local name diff --git a/src/reader/ns_reader.rs b/src/reader/ns_reader.rs index 09457f28..35502be3 100644 --- a/src/reader/ns_reader.rs +++ b/src/reader/ns_reader.rs @@ -21,9 +21,6 @@ use crate::reader::{Reader, Span, XmlSource}; pub struct NsReader { /// An XML reader pub(super) reader: Reader, - /// Buffer that contains names of namespace prefixes (the part between `xmlns:` - /// and an `=`) and namespace values. - buffer: Vec, /// A buffer to manage namespaces ns_resolver: NamespaceResolver, /// We cannot pop data from the namespace stack until returned `Empty` or `End` @@ -49,7 +46,6 @@ impl NsReader { fn new(reader: Reader) -> Self { Self { reader, - buffer: Vec::new(), ns_resolver: NamespaceResolver::default(), pending_pop: false, } @@ -66,7 +62,7 @@ impl NsReader { pub(super) fn pop(&mut self) { if self.pending_pop { - self.ns_resolver.pop(&mut self.buffer); + self.ns_resolver.pop(); self.pending_pop = false; } } @@ -74,11 +70,11 @@ impl NsReader { pub(super) fn process_event<'i>(&mut self, event: Result>) -> Result> { match event { Ok(Event::Start(e)) => { - self.ns_resolver.push(&e, &mut self.buffer); + self.ns_resolver.push(&e); Ok(Event::Start(e)) } Ok(Event::Empty(e)) => { - self.ns_resolver.push(&e, &mut self.buffer); + self.ns_resolver.push(&e); // notify next `read_event_impl()` invocation that it needs to pop this // namespace scope self.pending_pop = true; @@ -99,19 +95,9 @@ impl NsReader { event: Result>, ) -> Result<(ResolveResult, Event<'i>)> { match event { - Ok(Event::Start(e)) => Ok(( - self.ns_resolver.find(e.name(), &self.buffer), - Event::Start(e), - )), - Ok(Event::Empty(e)) => Ok(( - self.ns_resolver.find(e.name(), &self.buffer), - Event::Empty(e), - )), - Ok(Event::End(e)) => Ok(( - // Comment that prevent cargo rmt - self.ns_resolver.find(e.name(), &self.buffer), - Event::End(e), - )), + Ok(Event::Start(e)) => Ok((self.ns_resolver.find(e.name()), Event::Start(e))), + Ok(Event::Empty(e)) => Ok((self.ns_resolver.find(e.name()), Event::Empty(e))), + Ok(Event::End(e)) => Ok((self.ns_resolver.find(e.name()), Event::End(e))), Ok(e) => Ok((ResolveResult::Unbound, e)), Err(e) => Err(e), } @@ -169,7 +155,7 @@ impl NsReader { /// [`resolve_element()`]: Self::resolve_element() #[inline] pub fn resolve<'n>(&self, name: QName<'n>, attribute: bool) -> (ResolveResult, LocalName<'n>) { - self.ns_resolver.resolve(name, &self.buffer, !attribute) + self.ns_resolver.resolve(name, !attribute) } /// Resolves a potentially qualified **element name** into _(namespace name, local name)_. @@ -225,7 +211,7 @@ impl NsReader { /// [`read_resolved_event()`]: Self::read_resolved_event #[inline] pub fn resolve_element<'n>(&self, name: QName<'n>) -> (ResolveResult, LocalName<'n>) { - self.ns_resolver.resolve(name, &self.buffer, true) + self.ns_resolver.resolve(name, true) } /// Resolves a potentially qualified **attribute name** into _(namespace name, local name)_. @@ -296,7 +282,7 @@ impl NsReader { /// [`Unknown`]: ResolveResult::Unknown #[inline] pub fn resolve_attribute<'n>(&self, name: QName<'n>) -> (ResolveResult, LocalName<'n>) { - self.ns_resolver.resolve(name, &self.buffer, false) + self.ns_resolver.resolve(name, false) } } From c9a124537816ad8210de4c7f25a98f2cbf4afab0 Mon Sep 17 00:00:00 2001 From: Mingun Date: Mon, 4 Sep 2023 23:51:58 +0500 Subject: [PATCH 2/3] Make namespaces tests independent from initial buffer content --- src/name.rs | 34 ++++++++++++++++++++-------------- 1 file changed, 20 insertions(+), 14 deletions(-) diff --git a/src/name.rs b/src/name.rs index 57154788..531f818a 100644 --- a/src/name.rs +++ b/src/name.rs @@ -564,16 +564,17 @@ mod namespaces { let ns = Namespace(b"default"); let mut resolver = NamespaceResolver::default(); + let s = resolver.buffer.len(); resolver.push(&BytesStart::from_content(" xmlns='default'", 0)); - assert_eq!(resolver.buffer, b"default"); + assert_eq!(&resolver.buffer[s..], b"default"); // Check that tags without namespaces does not change result resolver.push(&BytesStart::from_content("", 0)); - assert_eq!(resolver.buffer, b"default"); + assert_eq!(&resolver.buffer[s..], b"default"); resolver.pop(); - assert_eq!(resolver.buffer, b"default"); + assert_eq!(&resolver.buffer[s..], b"default"); assert_eq!( resolver.resolve(name, true), (Bound(ns), LocalName(b"simple")) @@ -593,11 +594,12 @@ mod namespaces { let new_ns = Namespace(b"new"); let mut resolver = NamespaceResolver::default(); + let s = resolver.buffer.len(); resolver.push(&BytesStart::from_content(" xmlns='old'", 0)); resolver.push(&BytesStart::from_content(" xmlns='new'", 0)); - assert_eq!(resolver.buffer, b"oldnew"); + assert_eq!(&resolver.buffer[s..], b"oldnew"); assert_eq!( resolver.resolve(name, true), (Bound(new_ns), LocalName(b"simple")) @@ -609,7 +611,7 @@ mod namespaces { assert_eq!(resolver.find(name), Bound(new_ns)); resolver.pop(); - assert_eq!(resolver.buffer, b"old"); + assert_eq!(&resolver.buffer[s..], b"old"); assert_eq!( resolver.resolve(name, true), (Bound(old_ns), LocalName(b"simple")) @@ -631,11 +633,12 @@ mod namespaces { let old_ns = Namespace(b"old"); let mut resolver = NamespaceResolver::default(); + let s = resolver.buffer.len(); resolver.push(&BytesStart::from_content(" xmlns='old'", 0)); resolver.push(&BytesStart::from_content(" xmlns=''", 0)); - assert_eq!(resolver.buffer, b"old"); + assert_eq!(&resolver.buffer[s..], b"old"); assert_eq!( resolver.resolve(name, true), (Unbound, LocalName(b"simple")) @@ -647,7 +650,7 @@ mod namespaces { assert_eq!(resolver.find(name), Unbound); resolver.pop(); - assert_eq!(resolver.buffer, b"old"); + assert_eq!(&resolver.buffer[s..], b"old"); assert_eq!( resolver.resolve(name, true), (Bound(old_ns), LocalName(b"simple")) @@ -671,16 +674,17 @@ mod namespaces { let ns = Namespace(b"default"); let mut resolver = NamespaceResolver::default(); + let s = resolver.buffer.len(); resolver.push(&BytesStart::from_content(" xmlns:p='default'", 0)); - assert_eq!(resolver.buffer, b"pdefault"); + assert_eq!(&resolver.buffer[s..], b"pdefault"); // Check that tags without namespaces does not change result resolver.push(&BytesStart::from_content("", 0)); - assert_eq!(resolver.buffer, b"pdefault"); + assert_eq!(&resolver.buffer[s..], b"pdefault"); resolver.pop(); - assert_eq!(resolver.buffer, b"pdefault"); + assert_eq!(&resolver.buffer[s..], b"pdefault"); assert_eq!( resolver.resolve(name, true), (Bound(ns), LocalName(b"with-declared-prefix")) @@ -700,11 +704,12 @@ mod namespaces { let new_ns = Namespace(b"new"); let mut resolver = NamespaceResolver::default(); + let s = resolver.buffer.len(); resolver.push(&BytesStart::from_content(" xmlns:p='old'", 0)); resolver.push(&BytesStart::from_content(" xmlns:p='new'", 0)); - assert_eq!(resolver.buffer, b"poldpnew"); + assert_eq!(&resolver.buffer[s..], b"poldpnew"); assert_eq!( resolver.resolve(name, true), (Bound(new_ns), LocalName(b"with-declared-prefix")) @@ -716,7 +721,7 @@ mod namespaces { assert_eq!(resolver.find(name), Bound(new_ns)); resolver.pop(); - assert_eq!(resolver.buffer, b"pold"); + assert_eq!(&resolver.buffer[s..], b"pold"); assert_eq!( resolver.resolve(name, true), (Bound(old_ns), LocalName(b"with-declared-prefix")) @@ -738,11 +743,12 @@ mod namespaces { let old_ns = Namespace(b"old"); let mut resolver = NamespaceResolver::default(); + let s = resolver.buffer.len(); resolver.push(&BytesStart::from_content(" xmlns:p='old'", 0)); resolver.push(&BytesStart::from_content(" xmlns:p=''", 0)); - assert_eq!(resolver.buffer, b"poldp"); + assert_eq!(&resolver.buffer[s..], b"poldp"); assert_eq!( resolver.resolve(name, true), (Unknown(b"p".to_vec()), LocalName(b"with-declared-prefix")) @@ -754,7 +760,7 @@ mod namespaces { assert_eq!(resolver.find(name), Unknown(b"p".to_vec())); resolver.pop(); - assert_eq!(resolver.buffer, b"pold"); + assert_eq!(&resolver.buffer[s..], b"pold"); assert_eq!( resolver.resolve(name, true), (Bound(old_ns), LocalName(b"with-declared-prefix")) From c05fda1753580285681de43dfc3a4894c41128f7 Mon Sep 17 00:00:00 2001 From: Wren Turkal Date: Wed, 25 Jan 2023 12:58:32 +0500 Subject: [PATCH 3/3] Add reserved namespace bindings. This adds xml and xmlns namespace bindings. These are defined at https://www.w3.org/TR/xml-names11/#xmlReserved. --- Changelog.md | 9 +- src/errors.rs | 23 +++ src/name.rs | 345 ++++++++++++++++++++++++++++++++++++++-- src/reader/ns_reader.rs | 4 +- 4 files changed, 362 insertions(+), 19 deletions(-) diff --git a/Changelog.md b/Changelog.md index 7b17a8e8..ad6d8bd6 100644 --- a/Changelog.md +++ b/Changelog.md @@ -12,13 +12,18 @@ ### New Features +- [#545]: Resolve well-known namespaces (`xml` and `xmlns`) to their appropriate URIs. + Also, enforce namespace constraints related to these well-known namespaces. + ### Bug Fixes ### Misc Changes -- [#643] Bumped MSRV to 1.56. In practice the previous MSRV was incorrect in many cases. -- [#643] Adopted Rust 2021 edition. +- [#643]: Bumped MSRV to 1.56. In practice the previous MSRV was incorrect in many cases. +- [#643]: Adopted Rust 2021 edition. +- [#545]: Add new `Error` variant -- `Error::InvalidPrefixBind` +[#545]: https://github.com/tafia/quick-xml/pull/545 [#643]: https://github.com/tafia/quick-xml/pull/643 diff --git a/src/errors.rs b/src/errors.rs index 14cd7a5c..47c89564 100644 --- a/src/errors.rs +++ b/src/errors.rs @@ -47,6 +47,22 @@ pub enum Error { EscapeError(EscapeError), /// Specified namespace prefix is unknown, cannot resolve namespace for it UnknownPrefix(Vec), + /// Error for when a reserved namespace is set incorrectly. + /// + /// This error returned in following cases: + /// - the XML document attempts to bind `xml` prefix to something other than + /// `http://www.w3.org/XML/1998/namespace` + /// - the XML document attempts to bind `xmlns` prefix + /// - the XML document attempts to bind some prefix (except `xml`) to + /// `http://www.w3.org/XML/1998/namespace` + /// - the XML document attempts to bind some prefix to + /// `http://www.w3.org/2000/xmlns/` + InvalidPrefixBind { + /// The prefix that is tried to be bound + prefix: Vec, + /// Namespace to which prefix tried to be bound + namespace: Vec, + }, } impl From for Error { @@ -121,6 +137,13 @@ impl fmt::Display for Error { write_byte_string(f, prefix)?; f.write_str("'") } + Error::InvalidPrefixBind { prefix, namespace } => { + f.write_str("The namespace prefix '")?; + write_byte_string(f, prefix)?; + f.write_str("' cannot be bound to '")?; + write_byte_string(f, namespace)?; + f.write_str("'") + } } } } diff --git a/src/name.rs b/src/name.rs index 531f818a..a6128686 100644 --- a/src/name.rs +++ b/src/name.rs @@ -390,7 +390,7 @@ impl NamespaceEntry { /// A namespace management buffer. /// /// Holds all internal logic to push/pop namespaces with their levels. -#[derive(Debug, Default, Clone)] +#[derive(Debug, Clone)] pub(crate) struct NamespaceResolver { /// Buffer that contains names of namespace prefixes (the part between `xmlns:` /// and an `=`) and namespace values. @@ -402,12 +402,62 @@ pub(crate) struct NamespaceResolver { nesting_level: i32, } +/// That constant define the one of [reserved namespaces] for the xml standard. +/// +/// The prefix `xml` is by definition bound to the namespace name +/// `http://www.w3.org/XML/1998/namespace`. It may, but need not, be declared, and must not be +/// undeclared or bound to any other namespace name. Other prefixes must not be bound to this +/// namespace name, and it must not be declared as the default namespace. +/// +/// [reserved namespaces]: https://www.w3.org/TR/xml-names11/#xmlReserved +const RESERVED_NAMESPACE_XML: (Prefix, Namespace) = ( + Prefix(b"xml"), + Namespace(b"http://www.w3.org/XML/1998/namespace"), +); +/// That constant define the one of [reserved namespaces] for the xml standard. +/// +/// The prefix `xmlns` is used only to declare namespace bindings and is by definition bound +/// to the namespace name `http://www.w3.org/2000/xmlns/`. It must not be declared or +/// undeclared. Other prefixes must not be bound to this namespace name, and it must not be +/// declared as the default namespace. Element names must not have the prefix `xmlns`. +/// +/// [reserved namespaces]: https://www.w3.org/TR/xml-names11/#xmlReserved +const RESERVED_NAMESPACE_XMLNS: (Prefix, Namespace) = ( + Prefix(b"xmlns"), + Namespace(b"http://www.w3.org/2000/xmlns/"), +); + +impl Default for NamespaceResolver { + fn default() -> Self { + let mut buffer = Vec::new(); + let mut bindings = Vec::new(); + for ent in &[RESERVED_NAMESPACE_XML, RESERVED_NAMESPACE_XMLNS] { + let prefix = ent.0.into_inner(); + let uri = ent.1.into_inner(); + bindings.push(NamespaceEntry { + start: buffer.len(), + prefix_len: prefix.len(), + value_len: uri.len(), + level: 0, + }); + buffer.extend(prefix); + buffer.extend(uri); + } + + Self { + buffer, + bindings, + nesting_level: 0, + } + } +} + impl NamespaceResolver { /// Begins a new scope and add to it all [namespace bindings] that found in /// the specified start element. /// /// [namespace binding]: https://www.w3.org/TR/xml-names11/#dt-NSDecl - pub fn push(&mut self, start: &BytesStart) { + pub fn push(&mut self, start: &BytesStart) -> Result<()> { self.nesting_level += 1; let level = self.nesting_level; // adds new namespaces for attributes starting with 'xmlns:' and for the 'xmlns' @@ -425,7 +475,35 @@ impl NamespaceResolver { level, }); } + Some(PrefixDeclaration::Named(b"xml")) => { + if Namespace(&v) != RESERVED_NAMESPACE_XML.1 { + // error, `xml` prefix explicitly set to different value + return Err(Error::InvalidPrefixBind { + prefix: b"xml".to_vec(), + namespace: v.to_vec(), + }); + } + // don't add another NamespaceEntry for the `xml` namespace prefix + } + Some(PrefixDeclaration::Named(b"xmlns")) => { + // error, `xmlns` prefix explicitly set + return Err(Error::InvalidPrefixBind { + prefix: b"xmlns".to_vec(), + namespace: v.to_vec(), + }); + } Some(PrefixDeclaration::Named(prefix)) => { + let ns = Namespace(&v); + + if ns == RESERVED_NAMESPACE_XML.1 || ns == RESERVED_NAMESPACE_XMLNS.1 { + // error, non-`xml` prefix set to xml uri + // error, non-`xmlns` prefix set to xmlns uri + return Err(Error::InvalidPrefixBind { + prefix: prefix.to_vec(), + namespace: v.to_vec(), + }); + } + let start = self.buffer.len(); self.buffer.extend_from_slice(prefix); self.buffer.extend_from_slice(&v); @@ -442,6 +520,7 @@ impl NamespaceResolver { break; } } + Ok(()) } /// Ends a top-most scope by popping all [namespace binding], that was added by @@ -566,11 +645,13 @@ mod namespaces { let mut resolver = NamespaceResolver::default(); let s = resolver.buffer.len(); - resolver.push(&BytesStart::from_content(" xmlns='default'", 0)); + resolver + .push(&BytesStart::from_content(" xmlns='default'", 0)) + .unwrap(); assert_eq!(&resolver.buffer[s..], b"default"); // Check that tags without namespaces does not change result - resolver.push(&BytesStart::from_content("", 0)); + resolver.push(&BytesStart::from_content("", 0)).unwrap(); assert_eq!(&resolver.buffer[s..], b"default"); resolver.pop(); @@ -596,8 +677,12 @@ mod namespaces { let mut resolver = NamespaceResolver::default(); let s = resolver.buffer.len(); - resolver.push(&BytesStart::from_content(" xmlns='old'", 0)); - resolver.push(&BytesStart::from_content(" xmlns='new'", 0)); + resolver + .push(&BytesStart::from_content(" xmlns='old'", 0)) + .unwrap(); + resolver + .push(&BytesStart::from_content(" xmlns='new'", 0)) + .unwrap(); assert_eq!(&resolver.buffer[s..], b"oldnew"); assert_eq!( @@ -635,8 +720,12 @@ mod namespaces { let mut resolver = NamespaceResolver::default(); let s = resolver.buffer.len(); - resolver.push(&BytesStart::from_content(" xmlns='old'", 0)); - resolver.push(&BytesStart::from_content(" xmlns=''", 0)); + resolver + .push(&BytesStart::from_content(" xmlns='old'", 0)) + .unwrap(); + resolver + .push(&BytesStart::from_content(" xmlns=''", 0)) + .unwrap(); assert_eq!(&resolver.buffer[s..], b"old"); assert_eq!( @@ -676,11 +765,13 @@ mod namespaces { let mut resolver = NamespaceResolver::default(); let s = resolver.buffer.len(); - resolver.push(&BytesStart::from_content(" xmlns:p='default'", 0)); + resolver + .push(&BytesStart::from_content(" xmlns:p='default'", 0)) + .unwrap(); assert_eq!(&resolver.buffer[s..], b"pdefault"); // Check that tags without namespaces does not change result - resolver.push(&BytesStart::from_content("", 0)); + resolver.push(&BytesStart::from_content("", 0)).unwrap(); assert_eq!(&resolver.buffer[s..], b"pdefault"); resolver.pop(); @@ -706,8 +797,12 @@ mod namespaces { let mut resolver = NamespaceResolver::default(); let s = resolver.buffer.len(); - resolver.push(&BytesStart::from_content(" xmlns:p='old'", 0)); - resolver.push(&BytesStart::from_content(" xmlns:p='new'", 0)); + resolver + .push(&BytesStart::from_content(" xmlns:p='old'", 0)) + .unwrap(); + resolver + .push(&BytesStart::from_content(" xmlns:p='new'", 0)) + .unwrap(); assert_eq!(&resolver.buffer[s..], b"poldpnew"); assert_eq!( @@ -745,8 +840,12 @@ mod namespaces { let mut resolver = NamespaceResolver::default(); let s = resolver.buffer.len(); - resolver.push(&BytesStart::from_content(" xmlns:p='old'", 0)); - resolver.push(&BytesStart::from_content(" xmlns:p=''", 0)); + resolver + .push(&BytesStart::from_content(" xmlns:p='old'", 0)) + .unwrap(); + resolver + .push(&BytesStart::from_content(" xmlns:p=''", 0)) + .unwrap(); assert_eq!(&resolver.buffer[s..], b"poldp"); assert_eq!( @@ -773,13 +872,229 @@ mod namespaces { } } + /// Tests for `xml` and `xmlns` built-in prefixes. + /// + /// See + mod builtin_prefixes { + use super::*; + + mod xml { + use super::*; + use pretty_assertions::assert_eq; + + /// `xml` prefix are always defined, it is not required to define it explicitly. + #[test] + fn undeclared() { + let name = QName(b"xml:random"); + let namespace = RESERVED_NAMESPACE_XML.1; + + let resolver = NamespaceResolver::default(); + + assert_eq!( + resolver.resolve(name, true), + (Bound(namespace), LocalName(b"random")) + ); + + assert_eq!( + resolver.resolve(name, false), + (Bound(namespace), LocalName(b"random")) + ); + assert_eq!(resolver.find(name), Bound(namespace)); + } + + /// `xml` prefix can be declared but it must be bound to the value + /// `http://www.w3.org/XML/1998/namespace` + #[test] + fn rebound_to_correct_ns() { + let mut resolver = NamespaceResolver::default(); + let s = resolver.buffer.len(); + resolver.push( + &BytesStart::from_content( + " xmlns:xml='http://www.w3.org/XML/1998/namespace'", + 0, + ), + ).expect("`xml` prefix should be possible to bound to `http://www.w3.org/XML/1998/namespace`"); + assert_eq!(&resolver.buffer[s..], b""); + } + + /// `xml` prefix cannot be re-declared to another namespace + #[test] + fn rebound_to_incorrect_ns() { + let mut resolver = NamespaceResolver::default(); + let s = resolver.buffer.len(); + match resolver.push(&BytesStart::from_content( + " xmlns:xml='not_correct_namespace'", + 0, + )) { + Err(Error::InvalidPrefixBind { prefix, namespace }) => { + assert_eq!(prefix, b"xml"); + assert_eq!(namespace, b"not_correct_namespace"); + } + x => panic!( + "Expected `Error::ReservedNamespaceError`, but found {:?}", + x + ), + } + assert_eq!(&resolver.buffer[s..], b""); + } + + /// `xml` prefix cannot be unbound + #[test] + fn unbound() { + let mut resolver = NamespaceResolver::default(); + let s = resolver.buffer.len(); + match resolver.push(&BytesStart::from_content(" xmlns:xml=''", 0)) { + Err(Error::InvalidPrefixBind { prefix, namespace }) => { + assert_eq!(prefix, b"xml"); + assert_eq!(namespace, b""); + } + x => panic!( + "Expected `Error::ReservedNamespaceError`, but found {:?}", + x + ), + } + assert_eq!(&resolver.buffer[s..], b""); + } + + /// Other prefix cannot be bound to `xml` namespace + #[test] + fn other_prefix_bound_to_xml_namespace() { + let mut resolver = NamespaceResolver::default(); + let s = resolver.buffer.len(); + match resolver.push(&BytesStart::from_content( + " xmlns:not_xml='http://www.w3.org/XML/1998/namespace'", + 0, + )) { + Err(Error::InvalidPrefixBind { prefix, namespace }) => { + assert_eq!(prefix, b"not_xml"); + assert_eq!(namespace, b"http://www.w3.org/XML/1998/namespace"); + } + x => panic!( + "Expected `Error::ReservedNamespaceError`, but found {:?}", + x + ), + } + assert_eq!(&resolver.buffer[s..], b""); + } + } + + mod xmlns { + use super::*; + use pretty_assertions::assert_eq; + + /// `xmlns` prefix are always defined, it is forbidden to define it explicitly + #[test] + fn undeclared() { + let name = QName(b"xmlns:random"); + let namespace = RESERVED_NAMESPACE_XMLNS.1; + + let resolver = NamespaceResolver::default(); + + assert_eq!( + resolver.resolve(name, true), + (Bound(namespace), LocalName(b"random")) + ); + + assert_eq!( + resolver.resolve(name, false), + (Bound(namespace), LocalName(b"random")) + ); + assert_eq!(resolver.find(name), Bound(namespace)); + } + + /// `xmlns` prefix cannot be re-declared event to its own namespace + #[test] + fn rebound_to_correct_ns() { + let mut resolver = NamespaceResolver::default(); + let s = resolver.buffer.len(); + match resolver.push(&BytesStart::from_content( + " xmlns:xmlns='http://www.w3.org/2000/xmlns/'", + 0, + )) { + Err(Error::InvalidPrefixBind { prefix, namespace }) => { + assert_eq!(prefix, b"xmlns"); + assert_eq!(namespace, b"http://www.w3.org/2000/xmlns/"); + } + x => panic!( + "Expected `Error::ReservedNamespaceError`, but found {:?}", + x + ), + } + assert_eq!(&resolver.buffer[s..], b""); + } + + /// `xmlns` prefix cannot be re-declared + #[test] + fn rebound_to_incorrect_ns() { + let mut resolver = NamespaceResolver::default(); + let s = resolver.buffer.len(); + match resolver.push(&BytesStart::from_content( + " xmlns:xmlns='not_correct_namespace'", + 0, + )) { + Err(Error::InvalidPrefixBind { prefix, namespace }) => { + assert_eq!(prefix, b"xmlns"); + assert_eq!(namespace, b"not_correct_namespace"); + } + x => panic!( + "Expected `Error::ReservedNamespaceError`, but found {:?}", + x + ), + } + assert_eq!(&resolver.buffer[s..], b""); + } + + /// `xmlns` prefix cannot be unbound + #[test] + fn unbound() { + let mut resolver = NamespaceResolver::default(); + let s = resolver.buffer.len(); + match resolver.push(&BytesStart::from_content(" xmlns:xmlns=''", 0)) { + Err(Error::InvalidPrefixBind { prefix, namespace }) => { + assert_eq!(prefix, b"xmlns"); + assert_eq!(namespace, b""); + } + x => panic!( + "Expected `Error::ReservedNamespaceError`, but found {:?}", + x + ), + } + assert_eq!(&resolver.buffer[s..], b""); + } + + /// Other prefix cannot be bound to `xmlns` namespace + #[test] + fn other_prefix_bound_to_xmlns_namespace() { + let mut resolver = NamespaceResolver::default(); + let s = resolver.buffer.len(); + match resolver.push(&BytesStart::from_content( + " xmlns:not_xmlns='http://www.w3.org/2000/xmlns/'", + 0, + )) { + Err(Error::InvalidPrefixBind { prefix, namespace }) => { + assert_eq!(prefix, b"not_xmlns"); + assert_eq!(namespace, b"http://www.w3.org/2000/xmlns/"); + } + x => panic!( + "Expected `Error::ReservedNamespaceError`, but found {:?}", + x + ), + } + assert_eq!(&resolver.buffer[s..], b""); + } + } + } + #[test] fn undeclared_prefix() { let name = QName(b"unknown:prefix"); let resolver = NamespaceResolver::default(); - assert_eq!(resolver.buffer, b""); + assert_eq!( + resolver.buffer, + b"xmlhttp://www.w3.org/XML/1998/namespacexmlnshttp://www.w3.org/2000/xmlns/" + ); assert_eq!( resolver.resolve(name, true), (Unknown(b"unknown".to_vec()), LocalName(b"prefix")) diff --git a/src/reader/ns_reader.rs b/src/reader/ns_reader.rs index 35502be3..1c05faa0 100644 --- a/src/reader/ns_reader.rs +++ b/src/reader/ns_reader.rs @@ -70,11 +70,11 @@ impl NsReader { pub(super) fn process_event<'i>(&mut self, event: Result>) -> Result> { match event { Ok(Event::Start(e)) => { - self.ns_resolver.push(&e); + self.ns_resolver.push(&e)?; Ok(Event::Start(e)) } Ok(Event::Empty(e)) => { - self.ns_resolver.push(&e); + self.ns_resolver.push(&e)?; // notify next `read_event_impl()` invocation that it needs to pop this // namespace scope self.pending_pop = true;