Skip to content

Commit

Permalink
api: fix for split() not returning last ""
Browse files Browse the repository at this point in the history
Corrects `/-/.split("a-")` to return `["a", ""]` instead of `["a"]`.
(`/-/` is shorthand for `Regex::new("-").unwrap()`.)

This adds tests for both `split()` and `splitn()` covering a variety of
edge cases. One test is commented out because it is failing due to #521.
A future commit will fix it.

Note that the `split2` and `split3` tests were passing incorrectly
before this change. I have fixed them to expect the correct values.

Fixes #627
  • Loading branch information
danielparks authored and BurntSushi committed Jan 9, 2020
1 parent 2f3fd66 commit b9d1bb5
Show file tree
Hide file tree
Showing 7 changed files with 35 additions and 6 deletions.
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@ Bug fixes:

* [BUG #594](https://github.com/rust-lang/regex/pull/594):
Improve error reporting when writing `\p\`.
* [BUG #627](https://github.com/rust-lang/regex/issues/627):
Corrects `re.split("a-")` to return `["a", ""]` instead of `["a"]`.
* [BUG #633](https://github.com/rust-lang/regex/pull/633):
Squash deprecation warnings for the `std::error::Error::description` method.

Expand Down
4 changes: 2 additions & 2 deletions src/re_bytes.rs
Original file line number Diff line number Diff line change
Expand Up @@ -739,11 +739,11 @@ impl<'r, 't> Iterator for Split<'r, 't> {
let text = self.finder.0.text();
match self.finder.next() {
None => {
if self.last >= text.len() {
if self.last > text.len() {
None
} else {
let s = &text[self.last..];
self.last = text.len();
self.last = text.len() + 1; // Next call will return None
Some(s)
}
}
Expand Down
4 changes: 2 additions & 2 deletions src/re_unicode.rs
Original file line number Diff line number Diff line change
Expand Up @@ -779,11 +779,11 @@ impl<'r, 't> Iterator for Split<'r, 't> {
let text = self.finder.0.text();
match self.finder.next() {
None => {
if self.last >= text.len() {
if self.last > text.len() {
None
} else {
let s = &text[self.last..];
self.last = text.len();
self.last = text.len() + 1; // Next call will return None
Some(s)
}
}
Expand Down
18 changes: 16 additions & 2 deletions tests/api.rs
Original file line number Diff line number Diff line change
Expand Up @@ -205,6 +205,20 @@ split!(
split2,
r"(?-u)\b",
"a b c",
&[t!(""), t!("a"), t!(" "), t!("b"), t!(" "), t!("c")]
&[t!(""), t!("a"), t!(" "), t!("b"), t!(" "), t!("c"), t!("")]
);
split!(split3, r"a$", "a", &[t!("")]);
split!(split3, r"a$", "a", &[t!(""), t!("")]);
split!(split_none, r"-", r"a", &[t!("a")]);
split!(split_trailing_blank, r"-", r"a-", &[t!("a"), t!("")]);
split!(split_trailing_blanks, r"-", r"a--", &[t!("a"), t!(""), t!("")]);
split!(split_empty, r"-", r"", &[t!("")]);

// See: https://github.com/rust-lang/regex/issues/521
// splitn!(splitn_below_limit, r"-", r"a", 2, &[t!("a")]);

splitn!(splitn_at_limit, r"-", r"a-b", 2, &[t!("a"), t!("b")]);
splitn!(splitn_above_limit, r"-", r"a-b-c", 2, &[t!("a"), t!("b-c")]);
splitn!(splitn_zero_limit, r"-", r"a-b", 0, empty_vec!());
splitn!(splitn_trailing_blank, r"-", r"a-", 2, &[t!("a"), t!("")]);
splitn!(splitn_trailing_separator, r"-", r"a--", 2, &[t!("a"), t!("-")]);
splitn!(splitn_empty, r"-", r"", 1, &[t!("")]);
11 changes: 11 additions & 0 deletions tests/macros.rs
Original file line number Diff line number Diff line change
Expand Up @@ -147,3 +147,14 @@ macro_rules! split {
}
}
}

macro_rules! splitn {
($name:ident, $re:expr, $text:expr, $limit:expr, $expected:expr) => {
#[test]
fn $name() {
let re = regex!($re);
let splitted: Vec<_> = re.splitn(t!($text), $limit).collect();
assert_eq!($expected, &*splitted);
}
}
}
1 change: 1 addition & 0 deletions tests/macros_bytes.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ macro_rules! text { ($text:expr) => { $text.as_bytes() } }
macro_rules! t { ($re:expr) => { text!($re) } }
macro_rules! match_text { ($text:expr) => { $text.as_bytes() } }
macro_rules! use_ { ($($path: tt)*) => { use regex::bytes::$($path)*; } }
macro_rules! empty_vec { () => { <Vec<&[u8]>>::new() } }

macro_rules! bytes { ($text:expr) => { $text } }

Expand Down
1 change: 1 addition & 0 deletions tests/macros_str.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ macro_rules! text { ($text:expr) => { $text } }
macro_rules! t { ($text:expr) => { text!($text) } }
macro_rules! match_text { ($text:expr) => { $text.as_str() } }
macro_rules! use_ { ($($path: tt)*) => { use regex::$($path)*; } }
macro_rules! empty_vec { () => { <Vec<&str>>::new() } }

macro_rules! no_expand {
($text:expr) => {{
Expand Down

0 comments on commit b9d1bb5

Please sign in to comment.