diff --git a/CHANGELOG.md b/CHANGELOG.md index 86f1faa..364b536 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,8 +1,9 @@ # Yii Strings Change Log -## 2.3.2 under development +## 2.4.0 under development -- no changes in this release. +- New #118: Add `findBetween()`, `findBetweenFirst()` and `findBetweenLast()` methods to `StringHelper` to retrieve + a substring that lies between two strings (@salehhashemi1992) ## 2.3.1 October 30, 2023 diff --git a/README.md b/README.md index 00ba426..e6ca88e 100644 --- a/README.md +++ b/README.md @@ -63,6 +63,9 @@ Overall the helper has the following method groups. - startsWithIgnoringCase - endsWith - endsWithIgnoringCase +- findBetween +- findBetweenFirst +- findBetweenLast ### Truncation diff --git a/src/StringHelper.php b/src/StringHelper.php index afebcb2..1715d0a 100644 --- a/src/StringHelper.php +++ b/src/StringHelper.php @@ -619,6 +619,104 @@ public static function rtrim(string|array $string, string $pattern = self::DEFAU return preg_replace("#[$pattern]+$#uD", '', $string); } + /** + * Returns the portion of the string that lies between the first occurrence of the `$start` string + * and the last occurrence of the `$end` string after that. + * + * @param string $string The input string. + * @param string $start The string marking the start of the portion to extract. + * @param string|null $end The string marking the end of the portion to extract. + * If the `$end` string is not provided, it defaults to the value of the `$start` string. + * @return string|null The portion of the string between the first occurrence of + * `$start` and the last occurrence of `$end`, or null if either `$start` or `$end` cannot be found. + */ + public static function findBetween(string $string, string $start, ?string $end = null): ?string + { + if ($end === null) { + $end = $start; + } + + $startPos = mb_strpos($string, $start); + + if ($startPos === false) { + return null; + } + + $startPos += mb_strlen($start); + $endPos = mb_strrpos($string, $end, $startPos); + + if ($endPos === false) { + return null; + } + + return mb_substr($string, $startPos, $endPos - $startPos); + } + + /** + * Returns the portion of the string between the initial occurrence of the '$start' string + * and the next occurrence of the '$end' string. + * + * @param string $string The input string. + * @param string $start The string marking the beginning of the segment to extract. + * @param string|null $end The string marking the termination of the segment. + * If the '$end' string is not provided, it defaults to the value of the '$start' string. + * @return string|null Extracted segment, or null if '$start' or '$end' is not present. + */ + public static function findBetweenFirst(string $string, string $start, ?string $end = null): ?string + { + if ($end === null) { + $end = $start; + } + + $startPos = mb_strpos($string, $start); + + if ($startPos === false) { + return null; + } + + $startPos += mb_strlen($start); + $endPos = mb_strpos($string, $end, $startPos); + + if ($endPos === false) { + return null; + } + + return mb_substr($string, $startPos, $endPos - $startPos); + } + + /** + * Returns the portion of the string between the latest '$start' string + * and the subsequent '$end' string. + * + * @param string $string The input string. + * @param string $start The string marking the beginning of the segment to extract. + * @param string|null $end The string marking the termination of the segment. + * If the '$end' string is not provided, it defaults to the value of the '$start' string. + * @return string|null Extracted segment, or null if '$start' or '$end' is not present. + */ + public static function findBetweenLast(string $string, string $start, ?string $end = null): ?string + { + if ($end === null) { + $end = $start; + } + + $endPos = mb_strrpos($string, $end); + + if ($endPos === false) { + return null; + } + + $startPos = mb_strrpos(mb_substr($string, 0, $endPos), $start); + + if ($startPos === false) { + return null; + } + + $startPos += mb_strlen($start); + + return mb_substr($string, $startPos, $endPos - $startPos); + } + /** * Ensure the input string is a valid UTF-8 string. * diff --git a/tests/StringHelperTest.php b/tests/StringHelperTest.php index 506fdb0..7895078 100644 --- a/tests/StringHelperTest.php +++ b/tests/StringHelperTest.php @@ -763,4 +763,92 @@ public function testInvalidTrimPattern(): void StringHelper::trim('string', "\xC3\x28"); } + + /** + * @dataProvider dataProviderFindBetween + */ + public function testFindBetween(string $string, string $start, ?string $end, ?string $expectedResult): void + { + $this->assertSame($expectedResult, StringHelper::findBetween($string, $start, $end)); + } + + public function dataProviderFindBetween(): array + { + return [ + ['hello world hello', ' hello', ' world', null], // end before start + ['This is a sample string', ' is ', ' string', 'a sample'], // normal case + ['startendstart', 'start', 'end', ''], // end before start + ['startmiddleend', 'start', 'end', 'middle'], // normal case + ['startend', 'start', 'end', ''], // end immediately follows start + ['multiple start start end end', 'start ', ' end', 'start end'], // multiple starts and ends + ['', 'start', 'end', null], // empty string + ['no delimiters here', 'start', 'end', null], // no start and end + ['start only', 'start', 'end', null], // start found but no end + ['end only', 'start', 'end', null], // end found but no start + ['a1a2a3a', 'a', 'a', '1a2a3'], // same start and end + ['a1a2a3a', 'a', null, '1a2a3'], // end is null + ['spécial !@#$%^&*()', 'spé', '&*()', 'cial !@#$%^'], // Special characters + ['من صالح هاشمی هستم', 'من ', ' هستم', 'صالح هاشمی'], // other languages + ]; + } + + /** + * @dataProvider dataProviderFindBetweenFirst + */ + public function testFindBetweenFirst(string $string, string $start, ?string $end, ?string $expectedResult): void + { + $this->assertSame($expectedResult, StringHelper::findBetweenFirst($string, $start, $end)); + } + + public function dataProviderFindBetweenFirst(): array + { + return [ + ['[a][b][c]', '[', ']', 'a'], // normal case + ['[a]m[b]n[c]', '[', ']', 'a'], // normal case + ['hello world hello', ' hello', ' world', null], // end before start + ['This is a sample string string', ' is ', ' string', 'a sample'], // normal case + ['startendstartend', 'start', 'end', ''], // end before start + ['startmiddleend', 'start', 'end', 'middle'], // normal case + ['startend', 'start', 'end', ''], // end immediately follows start + ['multiple start start end end', 'start ', ' end', 'start'], // multiple starts and ends + ['', 'start', 'end', null], // empty string + ['no delimiters here', 'start', 'end', null], // no start and end + ['start only', 'start', 'end', null], // start found but no end + ['end only', 'start', 'end', null], // end found but no start + ['a1a2a3a', 'a', 'a', '1'], // same start and end + ['a1a2a3a', 'a', null, '1'], // end is null + ['spécial !@#$%^&*()', 'spé', '&*()', 'cial !@#$%^'], // Special characters + ['من صالح هاشمی هستم هستم', 'من ', ' هستم', 'صالح هاشمی'], // other languages + ]; + } + + /** + * @dataProvider dataProviderFindBetweenLast + */ + public function testFindBetweenLast(string $string, string $start, ?string $end, ?string $expectedResult): void + { + $this->assertSame($expectedResult, StringHelper::findBetweenLast($string, $start, $end)); + } + + public function dataProviderFindBetweenLast(): array + { + return [ + ['[a][b][c]', '[', ']', 'c'], // normal case + ['[a]m[b]n[c]', '[', ']', 'c'], // normal case + ['hello world hello', ' hello', ' world', null], // end before start + ['This is is a sample string string', ' is ', ' string', 'a sample string'], // normal case + ['startendstartend', 'start', 'end', ''], // end before start + ['startmiddleend', 'start', 'end', 'middle'], // normal case + ['startend', 'start', 'end', ''], // end immediately follows start + ['multiple start start end end', 'start ', ' end', 'end'], // multiple starts and ends + ['', 'start', 'end', null], // empty string + ['no delimiters here', 'start', 'end', null], // no start and end + ['start only', 'start', 'end', null], // start found but no end + ['end only', 'start', 'end', null], // end found but no start + ['a1a2a3a', 'a', 'a', '3'], // same start and end + ['a1a2a3a', 'a', null, '3'], // end is null + ['spécial !@#$%^&*()', 'spé', '&*()', 'cial !@#$%^'], // Special characters + ['من صالح هاشمی هستم هستم', 'من ', ' هستم', 'صالح هاشمی هستم'], // other languages + ]; + } }