Skip to content

Commit

Permalink
Implement a split filter (#517)
Browse files Browse the repository at this point in the history
  • Loading branch information
wolfv committed Jun 11, 2024
1 parent 3a4aed3 commit d47f4ba
Show file tree
Hide file tree
Showing 9 changed files with 120 additions and 8 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ All notable changes to MiniJinja are documented here.
This makes things such as `dict.keys` work. Also adds a new
`--py-compat` flag to `minijinja-cli` that enables it. This improves
the compatibility with Python based templates. #521
- Added a new `|split` filter that works like the `.split` method in Python. #517

## 2.0.1

Expand Down
9 changes: 9 additions & 0 deletions minijinja-contrib/src/pycompat.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ use minijinja::{Error, ErrorKind, State, Value};
/// * `str.lstrip`
/// * `str.replace`
/// * `str.rstrip`
/// * `str.split`
/// * `str.strip`
/// * `str.title`
/// * `str.upper`
Expand Down Expand Up @@ -114,6 +115,14 @@ fn string_methods(value: &Value, method: &str, args: &[Value]) -> Result<Value,
// privileged.
Ok(Value::from(minijinja::filters::title(s.into())))
}
"split" => {
let (sep, maxsplits) = from_args(args)?;
// one shall not call into these filters. However we consider ourselves
// privileged.
Ok(minijinja::filters::split(s.into(), sep, maxsplits)
.try_iter()?
.collect::<Value>())
}
"capitalize" => {
from_args(args)?;
// one shall not call into these filters. However we consider ourselves
Expand Down
4 changes: 4 additions & 0 deletions minijinja-contrib/tests/pycompat.rs
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,10 @@ fn test_string_methods() {
);
assert_eq!(eval_expr("'foo barooo'.count('oo')").as_usize(), Some(2));
assert_eq!(eval_expr("'foo barooo'.find('oo')").as_usize(), Some(1));
assert!(eval_expr("'a b c'.split() == ['a', 'b', 'c']").is_true());
assert!(eval_expr("'a b c'.split() == ['a', 'b', 'c']").is_true());
assert!(eval_expr("'a b c'.split(none, 1) == ['a', 'b c']").is_true());
assert!(eval_expr("'abcbd'.split('b', 1) == ['a', 'cbd']").is_true());
}

#[test]
Expand Down
1 change: 1 addition & 0 deletions minijinja/src/defaults.rs
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@ pub(crate) fn get_builtin_filters() -> BTreeMap<Cow<'static, str>, filters::Boxe
rv.insert("reverse".into(), BoxedFilter::new(filters::reverse));
rv.insert("trim".into(), BoxedFilter::new(filters::trim));
rv.insert("join".into(), BoxedFilter::new(filters::join));
rv.insert("split".into(), BoxedFilter::new(filters::split));
rv.insert("default".into(), BoxedFilter::new(filters::default));
rv.insert("round".into(), BoxedFilter::new(filters::round));
rv.insert("abs".into(), BoxedFilter::new(filters::abs));
Expand Down
30 changes: 30 additions & 0 deletions minijinja/src/filters.rs
Original file line number Diff line number Diff line change
Expand Up @@ -293,6 +293,7 @@ mod builtins {
use super::*;

use crate::error::ErrorKind;
use crate::utils::splitn_whitespace;
use crate::value::ops::as_f64;
use crate::value::{Kwargs, ValueKind, ValueRepr};
use std::borrow::Cow;
Expand Down Expand Up @@ -557,6 +558,35 @@ mod builtins {
}
}

/// Split a string into its substrings, using `split` as the separator string.
///
/// If `split` is not provided or `none` the string is split at all whitespace
/// characters and multiple spaces and empty strings will be removed from the
/// result.
///
/// The `maxsplits` parameter defines the maximum number of splits
/// (starting from the left). Note that this follows Python conventions
/// rather than Rust ones so `1` means one split and two resulting items.
///
/// ```jinja
/// {{ "hello world"|split|list }}
/// -> ["hello", "world"]
///
/// {{ "c,s,v"|split(",")|list }}
/// -> ["c", "s", "v"]
/// ```
#[cfg_attr(docsrs, doc(cfg(feature = "builtins")))]
pub fn split(s: Arc<str>, split: Option<Arc<str>>, maxsplits: Option<i64>) -> Value {
let maxsplits = maxsplits.and_then(|x| if x >= 0 { Some(x as usize + 1) } else { None });

Value::make_object_iterable((s, split), move |(s, split)| match (split, maxsplits) {
(None, None) => Box::new(s.split_whitespace().map(Value::from)),
(Some(split), None) => Box::new(s.split(split as &str).map(Value::from)),
(None, Some(n)) => Box::new(splitn_whitespace(s, n).map(Value::from)),
(Some(split), Some(n)) => Box::new(s.splitn(n, split as &str).map(Value::from)),
})
}

/// If the value is undefined it will return the passed default value,
/// otherwise the value of the variable:
///
Expand Down
57 changes: 57 additions & 0 deletions minijinja/src/utils.rs
Original file line number Diff line number Diff line change
Expand Up @@ -342,6 +342,46 @@ impl<F: FnOnce()> Drop for OnDrop<F> {
}
}

#[cfg(feature = "builtins")]
pub fn splitn_whitespace(s: &str, maxsplits: usize) -> impl Iterator<Item = &str> + '_ {
let mut splits = 1;
let mut skip_ws = true;
let mut split_start = None;
let mut last_split_end = 0;
let mut chars = s.char_indices();

std::iter::from_fn(move || {
for (idx, c) in chars.by_ref() {
if splits >= maxsplits && !skip_ws {
continue;
} else if c.is_whitespace() {
if let Some(old) = split_start {
let rv = &s[old..idx];
split_start = None;
last_split_end = idx;
splits += 1;
skip_ws = true;
return Some(rv);
}
} else {
skip_ws = false;
if split_start.is_none() {
split_start = Some(idx);
last_split_end = idx;
}
}
}

let rest = &s[last_split_end..];
if !rest.is_empty() {
last_split_end = s.len();
Some(rest)
} else {
None
}
})
}

#[cfg(test)]
mod tests {
use super::*;
Expand All @@ -362,4 +402,21 @@ mod tests {
assert_eq!(unescape("foobarbaz").unwrap(), "foobarbaz");
assert_eq!(unescape(r"\ud83d\udca9").unwrap(), "💩");
}

#[test]
#[cfg(feature = "builtins")]
fn test_splitn_whitespace() {
fn s(s: &str, n: usize) -> Vec<&str> {
splitn_whitespace(s, n).collect::<Vec<_>>()
}

assert_eq!(s("a b c", 1), vec!["a b c"]);
assert_eq!(s("a b c", 2), vec!["a", "b c"]);
assert_eq!(s("a b c", 2), vec!["a", "b c"]);
assert_eq!(s("a b c ", 2), vec!["a", "b c "]);
assert_eq!(s("a b c", 3), vec!["a", "b", "c"]);
assert_eq!(s("a b c", 4), vec!["a", "b", "c"]);
assert_eq!(s(" a b c", 3), vec!["a", "b", "c"]);
assert_eq!(s(" a b c", 4), vec!["a", "b", "c"]);
}
}
5 changes: 5 additions & 0 deletions minijinja/tests/inputs/filters.txt
Original file line number Diff line number Diff line change
Expand Up @@ -104,3 +104,8 @@ unique-filter: {{ [1, 1, 1, 4, 3, 0, 0, 5]|unique }}
pprint-filter: {{ objects|pprint }}
int-filter: {{ true|int }}, {{ "42"|int }}, {{ "-23"|int }}, {{ 42.0|int }}
float-filter: {{ true|float }}, {{ "42"|float }}, {{ "-23.5"|float }}, {{ 42.5|float }}
split: {{ three_words|split|list }}
split-at-and: {{ three_words|split(" and ")|list }}
split-n-ws: {{ three_words|split(none, 1)|list }}
split-n-d: {{ three_words|split("d", 1)|list }}
split-n-ws-filter-empty: {{ " foo bar baz "|split(none, 1)|list }}
1 change: 1 addition & 0 deletions minijinja/tests/snapshots/[email protected]
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,7 @@ State {
"selectattr",
"slice",
"sort",
"split",
"title",
"tojson",
"trim",
Expand Down
20 changes: 12 additions & 8 deletions minijinja/tests/snapshots/[email protected]
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
---
source: minijinja/tests/test_templates.rs
description: "lower: {{ word|lower }}\nupper: {{ word|upper }}\ntitle: {{ word|title }}\ntitle-sentence: {{ \"the bIrd, is The:word\"|title }}\ntitle-three-words: {{ three_words|title }}\ncapitalize: {{ word|capitalize }}\ncapitalize-three-words: {{ three_words|capitalize }}\nreplace: {{ word|replace(\"B\", \"th\") }}\nescape: {{ \"<\"|escape }}\ne: {{ \"<\"|e }}\ndouble-escape: {{ \"<\"|escape|escape }}\nsafe: {{ \"<\"|safe|escape }}\nlist-length: {{ list|length }}\nlist-from-list: {{ list|list }}\nlist-from-map: {{ map|list }}\nlist-from-word: {{ word|list }}\nlist-from-undefined: {{ undefined|list }}\nbool-empty-string: {{ \"\"|bool }}\nbool-non-empty-string: {{ \"hello\"|bool }}\nbool-empty-list: {{ []|bool }}\nbool-non-empty-list: {{ [42]|bool }}\nbool-undefined: {{ undefined|bool }}\nmap-length: {{ map|length }}\nstring-length: {{ word|length }}\nstring-count: {{ word|count }}\nreverse-list: {{ list|reverse }}\nreverse-string: {{ word|reverse }}\ntrim: |{{ word_with_spaces|trim }}|\ntrim-bird: {{ word|trim(\"Bd\") }}\njoin-default: {{ list|join }}\njoin-pipe: {{ list|join(\"|\") }}\njoin_string: {{ word|join('-') }}\ndefault: {{ undefined|default == \"\" }}\ndefault-value: {{ undefined|default(42) }}\nfirst-list: {{ list|first }}\nfirst-word: {{ word|first }}\nfirst-undefined: {{ []|first is undefined }}\nlast-list: {{ list|last }}\nlast-word: {{ word|last }}\nlast-undefined: {{ []|first is undefined }}\nmin: {{ other_list|min }}\nmax: {{ other_list|max }}\nsort: {{ other_list|sort }}\nsort-reverse: {{ other_list|sort(reverse=true) }}\nsort-case-insensitive: {{ [\"B\", \"a\", \"C\", \"z\"]|sort }}\nsort-case-sensitive: {{ [\"B\", \"a\", \"C\", \"z\"]|sort(case_sensitive=true) }}\nsort-case-insensitive-mixed: {{ [0, 1, \"true\", \"false\", \"True\", \"False\", true, false]|sort }}\nsort-case-sensitive-mixed: {{ [0, 1, \"true\", \"false\", \"True\", \"False\", true, false]|sort(case_sensitive=true) }}\nsort-attribute {{ objects|sort(attribute=\"name\") }}\nd: {{ undefined|d == \"\" }}\njson: {{ map|tojson }}\njson-pretty: {{ map|tojson(true) }}\njson-scary-html: {{ scary_html|tojson }}\nurlencode: {{ \"hello world/foo-bar_baz.txt\"|urlencode }}\nurlencode-kv: {{ dict(a=\"x y\", b=2, c=3)|urlencode }}\nbatch: {{ range(10)|batch(3) }}\nbatch-fill: {{ range(10)|batch(3, '-') }}\nslice: {{ range(10)|slice(3) }}\nslice-fill: {{ range(10)|slice(3, '-') }}\nitems: {{ dict(a=1)|items }}\nindent: {{ \"foo\\nbar\\nbaz\"|indent(2)|tojson }}\nindent-first-line: {{ \"foo\\nbar\\nbaz\"|indent(2, true)|tojson }}\nint-abs: {{ -42|abs }}\nfloat-abs: {{ -42.5|abs }}\nint-round: {{ 42|round }}\nfloat-round: {{ 42.5|round }}\nfloat-round-prec2: {{ 42.512345|round(2) }}\nselect-odd: {{ [1, 2, 3, 4, 5, 6]|select(\"odd\") }}\nselect-truthy: {{ [undefined, null, 0, 42, 23, \"\", \"aha\"]|select }}\nreject-truthy: {{ [undefined, null, 0, 42, 23, \"\", \"aha\"]|reject }}\nreject-odd: {{ [1, 2, 3, 4, 5, 6]|reject(\"odd\") }}\nselect-attr: {{ [dict(active=true, key=1), dict(active=false, key=2)]|selectattr(\"active\") }}\nreject-attr: {{ [dict(active=true, key=1), dict(active=false, key=2)]|rejectattr(\"active\") }}\nselect-attr: {{ [dict(active=true, key=1), dict(active=false, key=2)]|selectattr(\"key\", \"even\") }}\nreject-attr: {{ [dict(active=true, key=1), dict(active=false, key=2)]|rejectattr(\"key\", \"even\") }}\nmap-maps: {{ [-1, -2, 3, 4, -5]|map(\"abs\") }}\nmap-attr: {{ [dict(a=1), dict(a=2), {}]|map(attribute='a', default=None) }}\nmap-attr-undefined: {{ [dict(a=1), dict(a=2), {}]|map(attribute='a', default=definitely_undefined) }}\nmap-attr-deep: {{ [dict(a=[1]), dict(a=[2]), dict(a=[])]|map(attribute='a.0', default=None) }}\nmap-attr-int: {{ [[1], [1, 2]]|map(attribute=1, default=999) }}\nattr-filter: {{ map|attr(\"a\") }}\nunique-filter: {{ [1, 1, 1, 4, 3, 0, 0, 5]|unique }}\npprint-filter: {{ objects|pprint }}\nint-filter: {{ true|int }}, {{ \"42\"|int }}, {{ \"-23\"|int }}, {{ 42.0|int }}\nfloat-filter: {{ true|float }}, {{ \"42\"|float }}, {{ \"-23.5\"|float }}, {{ 42.5|float }}"
description: "lower: {{ word|lower }}\nupper: {{ word|upper }}\ntitle: {{ word|title }}\ntitle-sentence: {{ \"the bIrd, is The:word\"|title }}\ntitle-three-words: {{ three_words|title }}\ncapitalize: {{ word|capitalize }}\ncapitalize-three-words: {{ three_words|capitalize }}\nreplace: {{ word|replace(\"B\", \"th\") }}\nescape: {{ \"<\"|escape }}\ne: {{ \"<\"|e }}\ndouble-escape: {{ \"<\"|escape|escape }}\nsafe: {{ \"<\"|safe|escape }}\nlist-length: {{ list|length }}\nlist-from-list: {{ list|list }}\nlist-from-map: {{ map|list }}\nlist-from-word: {{ word|list }}\nlist-from-undefined: {{ undefined|list }}\nbool-empty-string: {{ \"\"|bool }}\nbool-non-empty-string: {{ \"hello\"|bool }}\nbool-empty-list: {{ []|bool }}\nbool-non-empty-list: {{ [42]|bool }}\nbool-undefined: {{ undefined|bool }}\nmap-length: {{ map|length }}\nstring-length: {{ word|length }}\nstring-count: {{ word|count }}\nreverse-list: {{ list|reverse }}\nreverse-string: {{ word|reverse }}\ntrim: |{{ word_with_spaces|trim }}|\ntrim-bird: {{ word|trim(\"Bd\") }}\njoin-default: {{ list|join }}\njoin-pipe: {{ list|join(\"|\") }}\njoin_string: {{ word|join('-') }}\ndefault: {{ undefined|default == \"\" }}\ndefault-value: {{ undefined|default(42) }}\nfirst-list: {{ list|first }}\nfirst-word: {{ word|first }}\nfirst-undefined: {{ []|first is undefined }}\nlast-list: {{ list|last }}\nlast-word: {{ word|last }}\nlast-undefined: {{ []|first is undefined }}\nmin: {{ other_list|min }}\nmax: {{ other_list|max }}\nsort: {{ other_list|sort }}\nsort-reverse: {{ other_list|sort(reverse=true) }}\nsort-case-insensitive: {{ [\"B\", \"a\", \"C\", \"z\"]|sort }}\nsort-case-sensitive: {{ [\"B\", \"a\", \"C\", \"z\"]|sort(case_sensitive=true) }}\nsort-case-insensitive-mixed: {{ [0, 1, \"true\", \"false\", \"True\", \"False\", true, false]|sort }}\nsort-case-sensitive-mixed: {{ [0, 1, \"true\", \"false\", \"True\", \"False\", true, false]|sort(case_sensitive=true) }}\nsort-attribute {{ objects|sort(attribute=\"name\") }}\nd: {{ undefined|d == \"\" }}\njson: {{ map|tojson }}\njson-pretty: {{ map|tojson(true) }}\njson-scary-html: {{ scary_html|tojson }}\nurlencode: {{ \"hello world/foo-bar_baz.txt\"|urlencode }}\nurlencode-kv: {{ dict(a=\"x y\", b=2, c=3, d=None)|urlencode }}\nbatch: {{ range(10)|batch(3) }}\nbatch-fill: {{ range(10)|batch(3, '-') }}\nslice: {{ range(10)|slice(3) }}\nslice-fill: {{ range(10)|slice(3, '-') }}\nitems: {{ dict(a=1)|items }}\nindent: {{ \"foo\\nbar\\nbaz\"|indent(2)|tojson }}\nindent-first-line: {{ \"foo\\nbar\\nbaz\"|indent(2, true)|tojson }}\nint-abs: {{ -42|abs }}\nfloat-abs: {{ -42.5|abs }}\nint-round: {{ 42|round }}\nfloat-round: {{ 42.5|round }}\nfloat-round-prec2: {{ 42.512345|round(2) }}\nselect-odd: {{ [1, 2, 3, 4, 5, 6]|select(\"odd\") }}\nselect-truthy: {{ [undefined, null, 0, 42, 23, \"\", \"aha\"]|select }}\nreject-truthy: {{ [undefined, null, 0, 42, 23, \"\", \"aha\"]|reject }}\nreject-odd: {{ [1, 2, 3, 4, 5, 6]|reject(\"odd\") }}\nselect-attr: {{ [dict(active=true, key=1), dict(active=false, key=2)]|selectattr(\"active\") }}\nreject-attr: {{ [dict(active=true, key=1), dict(active=false, key=2)]|rejectattr(\"active\") }}\nselect-attr: {{ [dict(active=true, key=1), dict(active=false, key=2)]|selectattr(\"key\", \"even\") }}\nreject-attr: {{ [dict(active=true, key=1), dict(active=false, key=2)]|rejectattr(\"key\", \"even\") }}\nmap-maps: {{ [-1, -2, 3, 4, -5]|map(\"abs\") }}\nmap-attr: {{ [dict(a=1), dict(a=2), {}]|map(attribute='a', default=None) }}\nmap-attr-undefined: {{ [dict(a=1), dict(a=2), {}]|map(attribute='a', default=definitely_undefined) }}\nmap-attr-deep: {{ [dict(a=[1]), dict(a=[2]), dict(a=[])]|map(attribute='a.0', default=None) }}\nmap-attr-int: {{ [[1], [1, 2]]|map(attribute=1, default=999) }}\nattr-filter: {{ map|attr(\"a\") }}\nunique-filter: {{ [1, 1, 1, 4, 3, 0, 0, 5]|unique }}\npprint-filter: {{ objects|pprint }}\nint-filter: {{ true|int }}, {{ \"42\"|int }}, {{ \"-23\"|int }}, {{ 42.0|int }}\nfloat-filter: {{ true|float }}, {{ \"42\"|float }}, {{ \"-23.5\"|float }}, {{ 42.5|float }}\nsplit: {{ three_words|split|list }}\nsplit-at-and: {{ three_words|split(\" and \")|list }}\nsplit-n-ws: {{ three_words|split(none, 1)|list }}\nsplit-n-d: {{ three_words|split(\"d\", 1)|list }}\nsplit-n-ws-filter-empty: {{ \" foo bar baz \"|split(none, 1)|list }}"
info:
word: Bird
word_with_spaces: " Spacebird\n"
three_words: bird and dinosaur
list:
- 1
- 2
- 3
map:
a: b
c: d
objects:
- name: b
- name: a
Expand All @@ -18,10 +18,10 @@ info:
- 2
- 9
- 111
map:
a: b
c: d
scary_html: "<>&'"
three_words: bird and dinosaur
word: Bird
word_with_spaces: " Spacebird\n"
input_file: minijinja/tests/inputs/filters.txt
---
lower: bird
Expand Down Expand Up @@ -119,4 +119,8 @@ pprint-filter: [
]
int-filter: 1, 42, -23, 42
float-filter: 1.0, 42.0, -23.5, 42.5

split: ["bird", "and", "dinosaur"]
split-at-and: ["bird", "dinosaur"]
split-n-ws: ["bird", "and dinosaur"]
split-n-d: ["bir", " and dinosaur"]
split-n-ws-filter-empty: ["foo", "bar baz "]

0 comments on commit d47f4ba

Please sign in to comment.