From a966a83fa00464d01e8ede3e9760abb5712817d7 Mon Sep 17 00:00:00 2001 From: backwardspy Date: Sat, 27 Apr 2024 10:37:52 +0100 Subject: [PATCH] support alternative template file encodings (#179) --- Cargo.lock | 71 +++++++++++------- whiskers/Cargo.toml | 7 +- whiskers/src/main.rs | 21 ++++-- whiskers/tests/cli.rs | 38 +++++++++- whiskers/tests/encodings.rs | 36 +++++++++ whiskers/tests/fixtures/encodings/README.md | 5 ++ .../tests/fixtures/encodings/utf16be.tera | Bin 0 -> 96 bytes .../tests/fixtures/encodings/utf16le.tera | Bin 0 -> 96 bytes whiskers/tests/fixtures/encodings/utf8.tera | 5 ++ .../tests/fixtures/encodings/utf8bom.tera | 5 ++ 10 files changed, 149 insertions(+), 39 deletions(-) create mode 100644 whiskers/tests/encodings.rs create mode 100644 whiskers/tests/fixtures/encodings/README.md create mode 100644 whiskers/tests/fixtures/encodings/utf16be.tera create mode 100644 whiskers/tests/fixtures/encodings/utf16le.tera create mode 100644 whiskers/tests/fixtures/encodings/utf8.tera create mode 100644 whiskers/tests/fixtures/encodings/utf8bom.tera diff --git a/Cargo.lock b/Cargo.lock index efc18f97..5a3ac44f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -91,9 +91,9 @@ dependencies = [ [[package]] name = "anyhow" -version = "1.0.81" +version = "1.0.82" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0952808a6c2afd1aa8947271f3a60f1a6763c7b912d210184c5149b5cf147247" +checksum = "f538837af36e6f6a9be0faa67f9a314f8119e4e4b5867c6ab40ed60360142519" [[package]] name = "assert_cmd" @@ -171,9 +171,9 @@ dependencies = [ [[package]] name = "bumpalo" -version = "3.15.4" +version = "3.16.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7ff69b9dd49fd426c69a0db9fc04dd934cdb6645ff000864d98f7e2af8830eaa" +checksum = "79296716171880943b8470b5f8d03aa55eb2e645a4874bdbb28adb49162e012c" [[package]] name = "byteorder" @@ -219,6 +219,7 @@ dependencies = [ "clap", "clap-stdin", "css-colors", + "encoding_rs_io", "indexmap", "itertools", "lzma-rust", @@ -235,9 +236,9 @@ dependencies = [ [[package]] name = "cc" -version = "1.0.90" +version = "1.0.92" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8cd6604a82acf3039f1144f54b8eb34e91ffba622051189e71b781822d5ee1f5" +checksum = "2678b2e3449475e95b0aa6f9b506a28e61b3dc8996592b983695e8ebb58a8b41" [[package]] name = "cfg-if" @@ -312,9 +313,9 @@ dependencies = [ [[package]] name = "clap_complete" -version = "4.5.1" +version = "4.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "885e4d7d5af40bfb99ae6f9433e292feac98d452dcb3ec3d25dfe7552b77da8c" +checksum = "dd79504325bf38b10165b02e89b4347300f855f273c4cb30c4a3209e6583275e" dependencies = [ "clap", ] @@ -455,6 +456,24 @@ version = "1.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "11157ac094ffbdde99aa67b23417ebdd801842852b500e395a45a9c0aac03e4a" +[[package]] +name = "encoding_rs" +version = "0.8.34" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b45de904aa0b010bce2ab45264d0631681847fa7b6f2eaa7dab7619943bc4f59" +dependencies = [ + "cfg-if", +] + +[[package]] +name = "encoding_rs_io" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1cc3c5651fb62ab8aa3103998dade57efdd028544bd300516baa31840c252a83" +dependencies = [ + "encoding_rs", +] + [[package]] name = "equivalent" version = "1.0.1" @@ -537,9 +556,9 @@ dependencies = [ [[package]] name = "getrandom" -version = "0.2.12" +version = "0.2.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "190092ea657667030ac6a35e305e62fc4dd69fd98ac98631e5d3a2b1575a12b5" +checksum = "94b22e06ecb0110981051723910cbf0b5f5e09a2062dd7663334ee79a9d1286c" dependencies = [ "cfg-if", "js-sys", @@ -724,9 +743,9 @@ checksum = "90ed8c1e510134f979dbc4f070f87d4313098b704861a105fe34231c70a3901c" [[package]] name = "lzma-rust" -version = "0.1.5" +version = "0.1.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7f798132166cc040cb70dbab4ccbb89643a6966a4ac33f0b312e76a8238673a5" +checksum = "d5edcf5d1f4d78221ea7861fb69899afd15c42601751f92f09a06f7b051fb289" dependencies = [ "byteorder", ] @@ -806,9 +825,9 @@ checksum = "e3148f5046208a5d56bcfc03053e3ca6334e51da8dfb19b6cdc8b306fae3283e" [[package]] name = "pest" -version = "2.7.8" +version = "2.7.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "56f8023d0fb78c8e03784ea1c7f3fa36e68a723138990b8d5a47d916b651e7a8" +checksum = "311fb059dee1a7b802f036316d790138c613a4e8b180c822e3925a662e9f0c95" dependencies = [ "memchr", "thiserror", @@ -817,9 +836,9 @@ dependencies = [ [[package]] name = "pest_derive" -version = "2.7.8" +version = "2.7.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b0d24f72393fd16ab6ac5738bc33cdb6a9aa73f8b902e8fe29cf4e67d7dd1026" +checksum = "f73541b156d32197eecda1a4014d7f868fd2bcb3c550d5386087cfba442bf69c" dependencies = [ "pest", "pest_generator", @@ -827,9 +846,9 @@ dependencies = [ [[package]] name = "pest_generator" -version = "2.7.8" +version = "2.7.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fdc17e2a6c7d0a492f0158d7a4bd66cc17280308bbaff78d5bef566dca35ab80" +checksum = "c35eeed0a3fab112f75165fdc026b3913f4183133f19b49be773ac9ea966e8bd" dependencies = [ "pest", "pest_meta", @@ -840,9 +859,9 @@ dependencies = [ [[package]] name = "pest_meta" -version = "2.7.8" +version = "2.7.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "934cd7631c050f4674352a6e835d5f6711ffbfb9345c2fc0107155ac495ae293" +checksum = "2adbf29bb9776f28caece835398781ab24435585fe0d4dc1374a61db5accedca" dependencies = [ "once_cell", "pest", @@ -953,9 +972,9 @@ dependencies = [ [[package]] name = "quote" -version = "1.0.35" +version = "1.0.36" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "291ec9ab5efd934aaf503a6466c5d5251535d108ee747472c3977cc5acc868ef" +checksum = "0fa76aaf39101c457836aec0ce2316dbdc3ab723cdda1c6bd4e6ad4208acaca7" dependencies = [ "proc-macro2", ] @@ -1176,15 +1195,15 @@ dependencies = [ [[package]] name = "strsim" -version = "0.11.0" +version = "0.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5ee073c9e4cd00e28217186dbe12796d692868f432bf2e97ee73bed0c56dfa01" +checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f" [[package]] name = "syn" -version = "2.0.57" +version = "2.0.58" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "11a6ae1e52eb25aab8f3fb9fca13be982a373b8f1157ca14b897a825ba4a2d35" +checksum = "44cfb93f38070beee36b3fef7d4f5a16f27751d94b187b666a5cc5e9b0d30687" dependencies = [ "proc-macro2", "quote", diff --git a/whiskers/Cargo.toml b/whiskers/Cargo.toml index a5de5d54..3bd3f6c8 100644 --- a/whiskers/Cargo.toml +++ b/whiskers/Cargo.toml @@ -36,17 +36,18 @@ anyhow = "1.0" base64 = "0.22" catppuccin = { version = "2.2", features = ["serde", "css-colors"] } clap = { version = "4.5", features = ["derive"] } -clap-stdin = "0.4.0" +clap-stdin = "0.4" css-colors = "1.0" +encoding_rs_io = "0.1" indexmap = { version = "2.2", features = ["serde"] } itertools = "0.12" lzma-rust = "0.1" rmp-serde = "1.2" -semver = { version = "1.0.22", features = ["serde"] } +semver = { version = "1.0", features = ["serde"] } serde = { version = "1.0", features = ["derive"] } serde_json = "1.0" serde_yaml = "0.9" -tempfile = "3.10.1" +tempfile = "3.10" tera = { version = "1.19", features = ["preserve_order"] } thiserror = "1.0" diff --git a/whiskers/src/main.rs b/whiskers/src/main.rs index 020aa070..ace4a9b4 100644 --- a/whiskers/src/main.rs +++ b/whiskers/src/main.rs @@ -1,7 +1,7 @@ use std::{ collections::{hash_map::Entry, HashMap}, env, - io::Write as _, + io::{Read, Write as _}, path::{Path, PathBuf}, process, }; @@ -9,6 +9,7 @@ use std::{ use anyhow::{anyhow, Context as _}; use catppuccin::FlavorName; use clap::Parser as _; +use encoding_rs_io::DecodeReaderBytes; use itertools::Itertools; use whiskers::{ cli::{Args, OutputFormat}, @@ -81,12 +82,18 @@ fn main() -> anyhow::Result<()> { .expect("args.template is guaranteed by clap to be set"); let template_from_stdin = matches!(template.source, clap_stdin::Source::Stdin); let template_name = template_name(&template); - let doc = frontmatter::parse( - &template - .contents() - .context("Template contents could not be read")?, - ) - .context("Frontmatter is invalid")?; + + let mut decoder = DecodeReaderBytes::new( + template + .into_reader() + .context("Failed to open template file")?, + ); + let mut template = String::new(); + decoder + .read_to_string(&mut template) + .context("Template could not be read")?; + + let doc = frontmatter::parse(&template).context("Frontmatter is invalid")?; let mut template_opts = TemplateOptions::from_frontmatter(&doc.frontmatter, args.flavor.map(Into::into)) .context("Could not get template options from frontmatter")?; diff --git a/whiskers/tests/cli.rs b/whiskers/tests/cli.rs index 1eab3cc9..119009b5 100644 --- a/whiskers/tests/cli.rs +++ b/whiskers/tests/cli.rs @@ -36,6 +36,38 @@ mod happy_path { "catppuccin-macchiato-yellow-no-italics.ini", )); } + + /// Test that the CLI can render a UTF-8 template file + #[test] + fn test_utf8() { + let mut cmd = Command::cargo_bin("whiskers").expect("binary exists"); + let assert = cmd.args(["tests/fixtures/encodings/utf8.tera"]).assert(); + assert.success().stdout("it worked!"); + } + + /// Test that the CLI can render a UTF-8 with BOM template file + #[test] + fn test_utf8_bom() { + let mut cmd = Command::cargo_bin("whiskers").expect("binary exists"); + let assert = cmd.args(["tests/fixtures/encodings/utf8bom.tera"]).assert(); + assert.success().stdout("it worked!"); + } + + /// Test that the CLI can render a UTF-16 BE template file + #[test] + fn test_utf16be() { + let mut cmd = Command::cargo_bin("whiskers").expect("binary exists"); + let assert = cmd.args(["tests/fixtures/encodings/utf16be.tera"]).assert(); + assert.success().stdout("it worked!"); + } + + /// Test that the CLI can render a UTF-16 LE template file + #[test] + fn test_utf16le() { + let mut cmd = Command::cargo_bin("whiskers").expect("binary exists"); + let assert = cmd.args(["tests/fixtures/encodings/utf16le.tera"]).assert(); + assert.success().stdout("it worked!"); + } } #[cfg(test)] @@ -47,9 +79,9 @@ mod sad_path { fn nonexistent_template_file() { let mut cmd = Command::cargo_bin("whiskers").expect("binary exists"); cmd.arg("test/file/doesnt/exist"); - cmd.assert().failure().stderr(predicate::str::contains( - "Template contents could not be read", - )); + cmd.assert() + .failure() + .stderr(predicate::str::contains("Failed to open template file")); } #[test] diff --git a/whiskers/tests/encodings.rs b/whiskers/tests/encodings.rs new file mode 100644 index 00000000..78dd676e --- /dev/null +++ b/whiskers/tests/encodings.rs @@ -0,0 +1,36 @@ +//! tests that ensure the special encoding fixtures are left untouched +#[test] +fn utf8() { + let bytes = &include_bytes!("fixtures/encodings/utf8.tera")[..3]; + assert_eq!( + bytes, b"---", + "fixtures/encodings/utf8.tera needs to be re-encoded to UTF-8" + ); +} + +#[test] +fn utf8bom() { + let bytes = &include_bytes!("fixtures/encodings/utf8bom.tera")[..6]; + assert_eq!( + bytes, b"\xEF\xBB\xBF---", + "fixtures/encodings/utf8bom.tera needs to be re-encoded to UTF-8 with BOM" + ); +} + +#[test] +fn utf16be() { + let bytes = &include_bytes!("fixtures/encodings/utf16be.tera")[..2]; + assert_eq!( + bytes, b"\xFE\xFF", + "fixtures/encodings/utf16be.tera needs to be re-encoded to UTF-16 BE" + ); +} + +#[test] +fn utf16le() { + let bytes = &include_bytes!("fixtures/encodings/utf16le.tera")[..2]; + assert_eq!( + bytes, b"\xFF\xFE", + "fixtures/encodings/utf16le.tera needs to be re-encoded to UTF-16 LE" + ); +} diff --git a/whiskers/tests/fixtures/encodings/README.md b/whiskers/tests/fixtures/encodings/README.md new file mode 100644 index 00000000..0c6ab973 --- /dev/null +++ b/whiskers/tests/fixtures/encodings/README.md @@ -0,0 +1,5 @@ +The fixtures in this directory are encoded in various formats to test the encoding detection and decoding capabilities of Whiskers. + +Some text editors like to normalize the encoding of files when saving them. Please be careful not to change them unintentionally. + +There are tests in `tests/encodings.rs` that ensure these fixtures are not unintentionally changed. \ No newline at end of file diff --git a/whiskers/tests/fixtures/encodings/utf16be.tera b/whiskers/tests/fixtures/encodings/utf16be.tera new file mode 100644 index 0000000000000000000000000000000000000000..d1de7a9995260ced9ffc61221b580fc64aee028f GIT binary patch literal 96 zcmezOpFtN4xfseBG8i%$iW#yQQW=VXv=svvg8~$mLB%o|@)`1gG71bz3`Puk3 literal 0 HcmV?d00001 diff --git a/whiskers/tests/fixtures/encodings/utf16le.tera b/whiskers/tests/fixtures/encodings/utf16le.tera new file mode 100644 index 0000000000000000000000000000000000000000..cab6764a1013d2cf3707be276c9415ed874ec500 GIT binary patch literal 96 zcmezWPnQ7%xfseBG8i%$iW#yQQW=VXv=vZH0Se2YVwnv240%8q1qLMsBL+PN10c-> X)(