Skip to content

Commit

Permalink
Make escape / unescape consistent throughout API
Browse files Browse the repository at this point in the history
  • Loading branch information
dralley committed Jul 15, 2022
1 parent a4a0f8d commit a4d50a5
Show file tree
Hide file tree
Showing 10 changed files with 33 additions and 29 deletions.
4 changes: 4 additions & 0 deletions Changelog.md
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,10 @@
- [#415]: Renamed many functions following the pattern `unescape_and_decode*` to `decode_and_unescape*`
to better communicate their function. Renamed functions following the pattern `*_with_custom_entities`
to `decode_and_unescape_with` to be more consistent across the API.
- [#415]: `BytesText::escaped()` renamed to `BytesText::escape()`, `BytesText::unescaped()` renamed to
`BytesText::unescape()`, `BytesText::unescaped_with()` renamed to `BytesText::unescape_with()`,
`Attribute::escaped_value()` renamed to `Attribute::escape_value()`, and `Attribute::escaped_value_with()`
renamed to `Attribute::escape_value_with()` for consistency across the API.
- [#416]: `BytesStart::to_borrowed` renamed to `BytesStart::borrow`, the same method
added to all events

Expand Down
4 changes: 2 additions & 2 deletions benches/macrobenches.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,11 +24,11 @@ fn parse_document(doc: &[u8]) -> XmlResult<()> {
match r.read_event()? {
Event::Start(e) | Event::Empty(e) => {
for attr in e.attributes() {
criterion::black_box(attr?.unescaped_value()?);
criterion::black_box(attr?.unescape_value()?);
}
}
Event::Text(e) => {
criterion::black_box(e.unescaped()?);
criterion::black_box(e.unescape()?);
}
Event::CData(e) => {
criterion::black_box(e.into_inner());
Expand Down
2 changes: 1 addition & 1 deletion benches/microbenches.rs
Original file line number Diff line number Diff line change
Expand Up @@ -176,7 +176,7 @@ fn one_event(c: &mut Criterion) {
.check_comments(false)
.trim_text(true);
match r.read_event_into(&mut buf) {
Ok(Event::Comment(ref e)) => nbtxt += e.unescaped().unwrap().len(),
Ok(Event::Comment(ref e)) => nbtxt += e.unescape().unwrap().len(),
something_else => panic!("Did not expect {:?}", something_else),
};

Expand Down
4 changes: 2 additions & 2 deletions src/de/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -618,14 +618,14 @@ where
allow_start: bool,
) -> Result<BytesCData<'de>, DeError> {
match self.next()? {
DeEvent::Text(e) if unescape => e.unescape().map_err(Into::into),
DeEvent::Text(e) if unescape => e.unescape_as_cdata().map_err(Into::into),
DeEvent::Text(e) => Ok(BytesCData::new(e.into_inner())),
DeEvent::CData(e) => Ok(e),
DeEvent::Start(e) if allow_start => {
// allow one nested level
let inner = self.next()?;
let t = match inner {
DeEvent::Text(t) if unescape => t.unescape()?,
DeEvent::Text(t) if unescape => t.unescape_as_cdata()?,
DeEvent::Text(t) => BytesCData::new(t.into_inner()),
DeEvent::CData(t) => t,
DeEvent::Start(s) => {
Expand Down
22 changes: 11 additions & 11 deletions src/events/attributes.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,10 @@ use std::{borrow::Cow, ops::Range};
/// A struct representing a key/value XML attribute.
///
/// Field `value` stores raw bytes, possibly containing escape-sequences. Most users will likely
/// want to access the value using one of the [`unescaped_value`] and [`decode_and_unescape_value`]
/// want to access the value using one of the [`unescape_value`] and [`decode_and_unescape_value`]
/// functions.
///
/// [`unescaped_value`]: Self::unescaped_value
/// [`unescape_value`]: Self::unescape_value
/// [`decode_and_unescape_value`]: Self::decode_and_unescape_value
#[derive(Clone, PartialEq)]
pub struct Attribute<'a> {
Expand All @@ -37,9 +37,9 @@ impl<'a> Attribute<'a> {
///
/// This will allocate if the value contains any escape sequences.
///
/// See also [`unescaped_value_with()`](Self::unescaped_value_with)
pub fn unescaped_value(&self) -> XmlResult<Cow<[u8]>> {
self.unescaped_value_with(|_| None)
/// See also [`unescape_value_with()`](Self::unescape_value_with)
pub fn unescape_value(&self) -> XmlResult<Cow<[u8]>> {
self.unescape_value_with(|_| None)
}

/// Returns the unescaped value, using custom entities.
Expand All @@ -51,12 +51,12 @@ impl<'a> Attribute<'a> {
///
/// This will allocate if the value contains any escape sequences.
///
/// See also [`unescaped_value()`](Self::unescaped_value)
/// See also [`unescape_value()`](Self::unescape_value)
///
/// # Pre-condition
///
/// The implementation of `resolve_entity` is expected to operate over UTF-8 inputs.
pub fn unescaped_value_with<'s, 'entity>(
pub fn unescape_value_with<'s, 'entity>(
&'s self,
resolve_entity: impl Fn(&[u8]) -> Option<&'entity str>,
) -> XmlResult<Cow<'s, [u8]>> {
Expand All @@ -69,9 +69,9 @@ impl<'a> Attribute<'a> {
/// instead use one of:
///
/// * [`Reader::decoder().decode()`], as it only allocates when the decoding can't be performed otherwise.
/// * [`unescaped_value()`], as it doesn't allocate when no escape sequences are used.
/// * [`unescape_value()`], as it doesn't allocate when no escape sequences are used.
///
/// [`unescaped_value()`]: Self::unescaped_value
/// [`unescape_value()`]: Self::unescape_value
/// [`Reader::decoder().decode()`]: crate::reader::Decoder::decode
pub fn decode_and_unescape_value<B>(&self, reader: &Reader<B>) -> XmlResult<String> {
self.decode_and_unescape_value_with(reader, |_| None)
Expand All @@ -83,9 +83,9 @@ impl<'a> Attribute<'a> {
/// instead use one of:
///
/// * [`Reader::decoder().decode()`], as it only allocates when the decoding can't be performed otherwise.
/// * [`unescaped_value_with()`], as it doesn't allocate when no escape sequences are used.
/// * [`unescape_value_with()`], as it doesn't allocate when no escape sequences are used.
///
/// [`unescaped_value_with()`]: Self::unescaped_value_with
/// [`unescape_value_with()`]: Self::unescape_value_with
/// [`Reader::decoder().decode()`]: crate::reader::Decoder::decode
///
/// # Pre-condition
Expand Down
14 changes: 7 additions & 7 deletions src/events/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -735,7 +735,7 @@ impl<'a> BytesText<'a> {
/// Returns unescaped version of the text content, that can be written
/// as CDATA in XML
#[cfg(feature = "serialize")]
pub(crate) fn unescape(self) -> std::result::Result<BytesCData<'a>, EscapeError> {
pub(crate) fn unescape_as_cdata(self) -> std::result::Result<BytesCData<'a>, EscapeError> {
//TODO: need to think about better API instead of dozens similar functions
// Maybe use builder pattern. After that expose function as public API
//FIXME: need to take into account entities defined in the document
Expand All @@ -752,9 +752,9 @@ impl<'a> BytesText<'a> {
/// Searches for '&' into content and try to escape the coded character if possible
/// returns Malformed error with index within element if '&' is not followed by ';'
///
/// See also [`unescaped_with()`](Self::unescaped_with)
pub fn unescaped(&self) -> Result<Cow<[u8]>> {
self.unescaped_with(|_| None)
/// See also [`unescape_with()`](Self::unescape_with)
pub fn unescape(&self) -> Result<Cow<[u8]>> {
self.unescape_with(|_| None)
}

/// gets escaped content with custom entities
Expand All @@ -767,8 +767,8 @@ impl<'a> BytesText<'a> {
///
/// The implementation of `resolve_entity` is expected to operate over UTF-8 inputs.
///
/// See also [`unescaped()`](Self::unescaped)
pub fn unescaped_with<'s, 'entity>(
/// See also [`unescape()`](Self::unescape)
pub fn unescape_with<'s, 'entity>(
&'s self,
resolve_entity: impl Fn(&[u8]) -> Option<&'entity str>,
) -> Result<Cow<'s, [u8]>> {
Expand Down Expand Up @@ -807,7 +807,7 @@ impl<'a> BytesText<'a> {
}

/// Gets escaped content.
pub fn escaped(&self) -> &[u8] {
pub fn escape(&self) -> &[u8] {
self.content.as_ref()
}
}
Expand Down
2 changes: 1 addition & 1 deletion src/writer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,7 @@ impl<W: Write> Writer<W> {
Event::Empty(ref e) => self.write_wrapped(b"<", e, b"/>"),
Event::Text(ref e) => {
next_should_line_break = false;
self.write(&e.escaped())
self.write(&e.escape())
}
Event::Comment(ref e) => self.write_wrapped(b"<!--", e, b"-->"),
Event::CData(ref e) => {
Expand Down
4 changes: 2 additions & 2 deletions tests/test.rs
Original file line number Diff line number Diff line change
Expand Up @@ -157,13 +157,13 @@ fn fuzz_101() {
match reader.read_event_into(&mut buf) {
Ok(Start(ref e)) | Ok(Empty(ref e)) => {
for a in e.attributes() {
if a.ok().map_or(true, |a| a.unescaped_value().is_err()) {
if a.ok().map_or(true, |a| a.unescape_value().is_err()) {
break;
}
}
}
Ok(Text(ref e)) => {
if e.unescaped().is_err() {
if e.unescape().is_err() {
break;
}
}
Expand Down
4 changes: 2 additions & 2 deletions tests/unit_tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -523,7 +523,7 @@ fn test_escaped_content() {
"content unexpected: expecting '&lt;test&gt;', got '{:?}'",
from_utf8(&*e)
);
match e.unescaped() {
match e.unescape() {
Ok(ref c) => assert_eq!(
&**c,
b"<test>",
Expand Down Expand Up @@ -620,7 +620,7 @@ fn test_read_write_roundtrip_escape() -> Result<()> {
match reader.read_event_into(&mut buf)? {
Eof => break,
Text(e) => {
let t = e.escaped();
let t = e.escape();
assert!(writer
.write_event(Text(BytesText::from_escaped(t.to_vec())))
.is_ok());
Expand Down
2 changes: 1 addition & 1 deletion tests/xmlrs_reader_tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -459,7 +459,7 @@ fn xmlrs_display(opt_event: Result<(ResolveResult, Event)>, decoder: Decoder) ->
Ok((_, Event::CData(e))) => format!("CData({})", decoder.decode(&e).unwrap()),
Ok((_, Event::Text(e))) => match unescape(decoder.decode(&e).unwrap().as_bytes()) {
Ok(c) => format!("Characters({})", from_utf8(c.as_ref()).unwrap()),
Err(err) => format!("FailedUnescape({:?}; {})", e.escaped(), err),
Err(err) => format!("FailedUnescape({:?}; {})", e.escape(), err),
},
Ok((_, Event::Decl(e))) => {
let version_cow = e.version().unwrap();
Expand Down

0 comments on commit a4d50a5

Please sign in to comment.