From 101745157937a2cb55a9bda42eb77c3062de9cc7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ren=C3=A9=20Kijewski?= Date: Sun, 7 Jul 2024 08:31:29 +0200 Subject: [PATCH] Speed-up HTML escaping by using equal sized entities MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit By using codepoint entities like `'&'` → `"&"`, we have a much smaller lookup table (58 bytes instead of 29× pointer size ~= 232 bytes). This makes the cache happy, and the benchmark run about ~20% faster. ```text $ cargo bench --bench escape Escaping time: [3.4087 µs 3.4126 µs 3.4168 µs] change: [-19.790% -19.580% -19.354%] (p = 0.00 < 0.05) Performance has improved. ``` --- rinja/src/filters/escape.rs | 73 ++++++++++++++++++++++++----------- testing/tests/filter_block.rs | 2 +- testing/tests/filters.rs | 14 +++---- testing/tests/simple.rs | 4 +- testing/tests/whitespace.rs | 2 +- 5 files changed, 61 insertions(+), 34 deletions(-) diff --git a/rinja/src/filters/escape.rs b/rinja/src/filters/escape.rs index 70f372e4..446f24f0 100644 --- a/rinja/src/filters/escape.rs +++ b/rinja/src/filters/escape.rs @@ -1,5 +1,6 @@ use std::convert::Infallible; use std::fmt::{self, Display, Formatter, Write}; +use std::num::NonZeroU8; use std::str; /// Marks a string (or other `Display` type) as safe @@ -59,37 +60,63 @@ pub fn e(text: impl fmt::Display, escaper: impl Escaper) -> Result `<` -/// * `>` => `>` -/// * `&` => `&` -/// * `"` => `"` -/// * `'` => `'` +/// * `"` => `"` +/// * `&` => `&` +/// * `'` => `'` +/// * `<` => `<` +/// * `>` => `>` #[derive(Debug, Clone, Copy, Default)] pub struct Html; impl Escaper for Html { fn write_escaped_str(&self, mut fmt: W, string: &str) -> fmt::Result { + let mut escaped_buf = *b"&#__;"; let mut last = 0; + for (index, byte) in string.bytes().enumerate() { const MIN_CHAR: u8 = b'"'; const MAX_CHAR: u8 = b'>'; - const TABLE: [Option<&&str>; (MAX_CHAR - MIN_CHAR + 1) as usize] = { - let mut table = [None; (MAX_CHAR - MIN_CHAR + 1) as usize]; - table[(b'<' - MIN_CHAR) as usize] = Some(&"<"); - table[(b'>' - MIN_CHAR) as usize] = Some(&">"); - table[(b'&' - MIN_CHAR) as usize] = Some(&"&"); - table[(b'"' - MIN_CHAR) as usize] = Some(&"""); - table[(b'\'' - MIN_CHAR) as usize] = Some(&"'"); + + struct Table { + _align: [usize; 0], + lookup: [Option<[NonZeroU8; 2]>; (MAX_CHAR - MIN_CHAR + 1) as usize], + } + + const TABLE: Table = { + const fn n(c: u8) -> Option<[NonZeroU8; 2]> { + let n0 = match NonZeroU8::new(c / 10 + b'0') { + Some(n) => n, + None => panic!(), + }; + let n1 = match NonZeroU8::new(c % 10 + b'0') { + Some(n) => n, + None => panic!(), + }; + Some([n0, n1]) + } + + let mut table = Table { + _align: [], + lookup: [None; (MAX_CHAR - MIN_CHAR + 1) as usize], + }; + + table.lookup[(b'"' - MIN_CHAR) as usize] = n(b'"'); + table.lookup[(b'&' - MIN_CHAR) as usize] = n(b'&'); + table.lookup[(b'\'' - MIN_CHAR) as usize] = n(b'\''); + table.lookup[(b'<' - MIN_CHAR) as usize] = n(b'<'); + table.lookup[(b'>' - MIN_CHAR) as usize] = n(b'>'); table }; let escaped = match byte { - MIN_CHAR..=MAX_CHAR => TABLE[(byte - MIN_CHAR) as usize], + MIN_CHAR..=MAX_CHAR => TABLE.lookup[(byte - MIN_CHAR) as usize], _ => None, }; if let Some(escaped) = escaped { + escaped_buf[2] = escaped[0].get(); + escaped_buf[3] = escaped[1].get(); fmt.write_str(&string[last..index])?; - fmt.write_str(escaped)?; + fmt.write_str(unsafe { std::str::from_utf8_unchecked(escaped_buf.as_slice()) })?; last = index + 1; } } @@ -98,11 +125,11 @@ impl Escaper for Html { fn write_escaped_char(&self, mut fmt: W, c: char) -> fmt::Result { fmt.write_str(match (c.is_ascii(), c as u8) { - (true, b'<') => "<", - (true, b'>') => ">", - (true, b'&') => "&", - (true, b'"') => """, - (true, b'\'') => "'", + (true, b'"') => """, + (true, b'&') => "&", + (true, b'\'') => "'", + (true, b'<') => "<", + (true, b'>') => ">", _ => return fmt.write_char(c), }) } @@ -136,10 +163,10 @@ pub trait Escaper: Copy { #[test] fn test_escape() { assert_eq!(escape("", Html).unwrap().to_string(), ""); - assert_eq!(escape("<&>", Html).unwrap().to_string(), "<&>"); - assert_eq!(escape("bla&", Html).unwrap().to_string(), "bla&"); - assert_eq!(escape("", Html).unwrap().to_string(), "<&>"); + assert_eq!(escape("bla&", Html).unwrap().to_string(), "bla&"); + assert_eq!(escape("", Text).unwrap().to_string(), "<&>"); diff --git a/testing/tests/filter_block.rs b/testing/tests/filter_block.rs index bde29738..3d24c09c 100644 --- a/testing/tests/filter_block.rs +++ b/testing/tests/filter_block.rs @@ -115,7 +115,7 @@ struct D; #[test] fn filter_block_html_escape() { let template = D; - assert_eq!(template.render().unwrap(), r#"<block>"#); + assert_eq!(template.render().unwrap(), r#"<block>"#); } // This test ensures that it is not escaped if it is not HTML. diff --git a/testing/tests/filters.rs b/testing/tests/filters.rs index 3a2bc065..96b82f15 100644 --- a/testing/tests/filters.rs +++ b/testing/tests/filters.rs @@ -19,8 +19,8 @@ fn filter_escape() { }; assert_eq!( s.render().unwrap(), - "// my <html> is "unsafe" & \ - should be 'escaped'" + "// my <html> is "unsafe" & \ + should be 'escaped'" ); } @@ -42,7 +42,7 @@ fn filter_opt_escaper_none() { assert_eq!( t.render().unwrap(), r#"

Foo Bar

-<h1 class="title">Foo Bar</h1> +<h1 class="title">Foo Bar</h1>

Foo Bar

Foo Bar

"# @@ -67,9 +67,9 @@ fn filter_opt_escaper_html() { assert_eq!( t.render().unwrap(), r#"

Foo Bar

-<h1 class="title">Foo Bar</h1> -<h1 class="title">Foo Bar</h1> -<h1 class="title">Foo Bar</h1> +<h1 class="title">Foo Bar</h1> +<h1 class="title">Foo Bar</h1> +<h1 class="title">Foo Bar</h1> "# ); } @@ -329,7 +329,7 @@ fn test_json_attribute() { }; assert_eq!( t.render().unwrap(), - r#"
  • "# + r#"
  • "# ); } diff --git a/testing/tests/simple.rs b/testing/tests/simple.rs index 669db891..e4b22fc9 100644 --- a/testing/tests/simple.rs +++ b/testing/tests/simple.rs @@ -39,7 +39,7 @@ struct EscapeTemplate<'a> { fn test_escape() { let s = EscapeTemplate { name: "<>&\"'" }; - assert_eq!(s.render().unwrap(), "Hello, <>&"'!"); + assert_eq!(s.render().unwrap(), "Hello, <>&"'!"); } #[derive(Template)] @@ -155,7 +155,7 @@ fn test_literals_escape() { let s = LiteralsEscapeTemplate {}; assert_eq!( s.render().unwrap(), - "A\n\r\t\\\0♥'""\nA\n\r\t\\\0♥'"'" + "A\n\r\t\\\0♥'""\nA\n\r\t\\\0♥'"'" ); } diff --git a/testing/tests/whitespace.rs b/testing/tests/whitespace.rs index 5301dbca..63e40da3 100644 --- a/testing/tests/whitespace.rs +++ b/testing/tests/whitespace.rs @@ -41,7 +41,7 @@ fn test_extra_whitespace() { template.nested_1.nested_2.hash.insert("key", "value"); assert_eq!( template.render().unwrap(), - "\n0\n0\n0\n0\n\n\n\n0\n0\n0\n0\n0\n\na0\na1\nvalue\n\n\n\n\n\n[\n "a0",\n "a1",\n "a2",\n "a3"\n]\n[\n "a0",\n "a1",\n "a2",\n "a3"\n][\n "a0",\n "a1",\n "a2",\n "a3"\n]\n[\n "a1"\n][\n "a1"\n]\n[\n "a1",\n "a2"\n][\n "a1",\n "a2"\n]\n[\n "a1"\n][\n "a1"\n]1-1-1\n3333 3\n2222 2\n0000 0\n3333 3\n\ntruefalse\nfalsefalsefalse\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "\n0\n0\n0\n0\n\n\n\n0\n0\n0\n0\n0\n\na0\na1\nvalue\n\n\n\n\n\n[\n "a0",\n "a1",\n "a2",\n "a3"\n]\n[\n "a0",\n "a1",\n "a2",\n "a3"\n][\n "a0",\n "a1",\n "a2",\n "a3"\n]\n[\n "a1"\n][\n "a1"\n]\n[\n "a1",\n "a2"\n][\n "a1",\n "a2"\n]\n[\n "a1"\n][\n "a1"\n]1-1-1\n3333 3\n2222 2\n0000 0\n3333 3\n\ntruefalse\nfalsefalsefalse\n\n\n\n\n\n\n\n\n\n\n\n\n\n", ); }