From c3cc7db29bb48bc8ce545298e7e2790b3916c20d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ren=C3=A9=20Kijewski?= Date: Sun, 7 Jul 2024 08:31:29 +0200 Subject: [PATCH] Speed-up HTML escaping by using equal sized entities MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit By using codepoint entities like `'&'` → `"&"`, we have a much smaller lookup table (58 bytes instead of 29× pointer size ~= 232 bytes). This makes the cache happy, and the benchmark run about ~20% faster. ```text $ cargo bench --bench escape Escaping time: [3.4087 µs 3.4126 µs 3.4168 µs] change: [-19.790% -19.580% -19.354%] (p = 0.00 < 0.05) Performance has improved. ``` --- rinja/src/filters/escape.rs | 73 +++++++++++++++++++++++++------------ 1 file changed, 50 insertions(+), 23 deletions(-) diff --git a/rinja/src/filters/escape.rs b/rinja/src/filters/escape.rs index 70f372e4..446f24f0 100644 --- a/rinja/src/filters/escape.rs +++ b/rinja/src/filters/escape.rs @@ -1,5 +1,6 @@ use std::convert::Infallible; use std::fmt::{self, Display, Formatter, Write}; +use std::num::NonZeroU8; use std::str; /// Marks a string (or other `Display` type) as safe @@ -59,37 +60,63 @@ pub fn e(text: impl fmt::Display, escaper: impl Escaper) -> Result `<` -/// * `>` => `>` -/// * `&` => `&` -/// * `"` => `"` -/// * `'` => `'` +/// * `"` => `"` +/// * `&` => `&` +/// * `'` => `'` +/// * `<` => `<` +/// * `>` => `>` #[derive(Debug, Clone, Copy, Default)] pub struct Html; impl Escaper for Html { fn write_escaped_str(&self, mut fmt: W, string: &str) -> fmt::Result { + let mut escaped_buf = *b"&#__;"; let mut last = 0; + for (index, byte) in string.bytes().enumerate() { const MIN_CHAR: u8 = b'"'; const MAX_CHAR: u8 = b'>'; - const TABLE: [Option<&&str>; (MAX_CHAR - MIN_CHAR + 1) as usize] = { - let mut table = [None; (MAX_CHAR - MIN_CHAR + 1) as usize]; - table[(b'<' - MIN_CHAR) as usize] = Some(&"<"); - table[(b'>' - MIN_CHAR) as usize] = Some(&">"); - table[(b'&' - MIN_CHAR) as usize] = Some(&"&"); - table[(b'"' - MIN_CHAR) as usize] = Some(&"""); - table[(b'\'' - MIN_CHAR) as usize] = Some(&"'"); + + struct Table { + _align: [usize; 0], + lookup: [Option<[NonZeroU8; 2]>; (MAX_CHAR - MIN_CHAR + 1) as usize], + } + + const TABLE: Table = { + const fn n(c: u8) -> Option<[NonZeroU8; 2]> { + let n0 = match NonZeroU8::new(c / 10 + b'0') { + Some(n) => n, + None => panic!(), + }; + let n1 = match NonZeroU8::new(c % 10 + b'0') { + Some(n) => n, + None => panic!(), + }; + Some([n0, n1]) + } + + let mut table = Table { + _align: [], + lookup: [None; (MAX_CHAR - MIN_CHAR + 1) as usize], + }; + + table.lookup[(b'"' - MIN_CHAR) as usize] = n(b'"'); + table.lookup[(b'&' - MIN_CHAR) as usize] = n(b'&'); + table.lookup[(b'\'' - MIN_CHAR) as usize] = n(b'\''); + table.lookup[(b'<' - MIN_CHAR) as usize] = n(b'<'); + table.lookup[(b'>' - MIN_CHAR) as usize] = n(b'>'); table }; let escaped = match byte { - MIN_CHAR..=MAX_CHAR => TABLE[(byte - MIN_CHAR) as usize], + MIN_CHAR..=MAX_CHAR => TABLE.lookup[(byte - MIN_CHAR) as usize], _ => None, }; if let Some(escaped) = escaped { + escaped_buf[2] = escaped[0].get(); + escaped_buf[3] = escaped[1].get(); fmt.write_str(&string[last..index])?; - fmt.write_str(escaped)?; + fmt.write_str(unsafe { std::str::from_utf8_unchecked(escaped_buf.as_slice()) })?; last = index + 1; } } @@ -98,11 +125,11 @@ impl Escaper for Html { fn write_escaped_char(&self, mut fmt: W, c: char) -> fmt::Result { fmt.write_str(match (c.is_ascii(), c as u8) { - (true, b'<') => "<", - (true, b'>') => ">", - (true, b'&') => "&", - (true, b'"') => """, - (true, b'\'') => "'", + (true, b'"') => """, + (true, b'&') => "&", + (true, b'\'') => "'", + (true, b'<') => "<", + (true, b'>') => ">", _ => return fmt.write_char(c), }) } @@ -136,10 +163,10 @@ pub trait Escaper: Copy { #[test] fn test_escape() { assert_eq!(escape("", Html).unwrap().to_string(), ""); - assert_eq!(escape("<&>", Html).unwrap().to_string(), "<&>"); - assert_eq!(escape("bla&", Html).unwrap().to_string(), "bla&"); - assert_eq!(escape("", Html).unwrap().to_string(), "<&>"); + assert_eq!(escape("bla&", Html).unwrap().to_string(), "bla&"); + assert_eq!(escape("", Text).unwrap().to_string(), "<&>");