diff --git a/Cargo.toml b/Cargo.toml index a1fe074d..ef2f9ae9 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -19,6 +19,7 @@ encoding_rs = { version = "0.8", optional = true } serde = { version = "1.0.100", optional = true } tokio = { version = "1.0", optional = true, default-features = false, features = ["io-util"] } memchr = "2.0" +once_cell = "1.17.0" [dev-dependencies] criterion = "0.4" diff --git a/src/name.rs b/src/name.rs index ebf2e130..21de8f9c 100644 --- a/src/name.rs +++ b/src/name.rs @@ -8,6 +8,8 @@ use crate::events::attributes::Attribute; use crate::events::BytesStart; use crate::utils::write_byte_string; use memchr::memchr; +use once_cell::sync::Lazy; +use std::collections::HashMap; use std::convert::TryFrom; use std::fmt::{self, Debug, Formatter}; @@ -399,6 +401,32 @@ pub(crate) struct NamespaceResolver { nesting_level: i32, } +/// These constants define the reserved namespaces for the xml standard. +/// +/// The prefix `xml` is by definition bound to the namespace name +/// `http://www.w3.org/XML/1998/namespace`. It may, but need not, be declared, and must not be +/// undeclared or bound to any other namespace name. Other prefixes must not be bound to this +/// namespace name, and it must not be declared as the default namespace. +/// +/// The prefix `xmlns` is used only to declare namespace bindings and is by definition bound +/// to the namespace name http://www.w3.org/2000/xmlns/. It must not be declared or +/// undeclared. Other prefixes must not be bound to this namespace name, and it must not be +/// declared as the default namespace. Element names must not have the prefix xmlns. +/// +/// [reserved namespaces]: https://www.w3.org/TR/xml-names11/#xmlReserved +static WELL_KNOWN_NAMESPACES: Lazy> = Lazy::new(|| { + let mut m = HashMap::new(); + m.insert( + Prefix(b"xml"), + Namespace(b"http://www.w3.org/XML/1998/namespace"), + ); + m.insert( + Prefix(b"xmlns"), + Namespace(b"http://www.w3.org/2000/xmlns/"), + ); + m +}); + impl NamespaceResolver { /// Begins a new scope and add to it all [namespace bindings] that found in /// the specified start element. @@ -542,7 +570,10 @@ impl NamespaceResolver { #[inline] fn maybe_unknown(prefix: Option) -> ResolveResult<'static> { match prefix { - Some(p) => ResolveResult::Unknown(p.into_inner().to_vec()), + Some(p) => WELL_KNOWN_NAMESPACES.get(&p).map_or_else( + || ResolveResult::Unknown(p.into_inner().to_vec()), + |p| ResolveResult::Bound(*p), + ), None => ResolveResult::Unbound, } } @@ -806,6 +837,27 @@ mod namespaces { assert_eq!(resolver.find(name, &buffer), Unknown(b"unknown".to_vec())); } + #[test] + fn undeclared_reserved_prefixes() { + let resolver = NamespaceResolver::default(); + let tag = b"random"; + + for (prefix, namespace) in WELL_KNOWN_NAMESPACES.iter() { + let name_buf = [prefix.into_inner(), tag].join(&b":"[..]); + let name = QName(&name_buf); + + assert_eq!( + resolver.resolve(name, b"", true), + (Bound(*namespace), LocalName(tag)) + ); + assert_eq!( + resolver.resolve(name.clone(), b"", false), + (Bound(*namespace), LocalName(tag)) + ); + assert_eq!(resolver.find(name.clone(), b""), Bound(*namespace)); + } + } + /// Checks how the QName is decomposed to a prefix and a local name #[test] fn prefix_and_local_name() {