From 5d6e65b1af61d95a793f5d4814e647bc82bc8dd4 Mon Sep 17 00:00:00 2001 From: ignace nyamagana butera Date: Sun, 4 Aug 2024 09:34:12 +0200 Subject: [PATCH 1/2] Improve FragmentFinder public API --- CHANGELOG.md | 21 ++ docs/9.0/reader/statement.md | 81 ++++- docs/9.0/reader/tabular-data-reader.md | 1 + src/Fragment/Expression.php | 407 +++++++++++++++++++++++-- src/Fragment/ExpressionTest.php | 198 ++++++++++++ src/Fragment/Selection.php | 145 +++++---- src/Fragment/Type.php | 5 +- src/FragmentFinder.php | 168 ++-------- src/Reader.php | 9 +- src/ResultSet.php | 9 +- src/TabularDataReaderTestCase.php | 31 +- 11 files changed, 820 insertions(+), 255 deletions(-) create mode 100644 src/Fragment/ExpressionTest.php diff --git a/CHANGELOG.md b/CHANGELOG.md index 5582f3ca..0d925c40 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,27 @@ All Notable changes to `Csv` will be documented in this file +## [Next] - TBD + +### Added + +- `League\Csv\Fragment\Expression` +- `League\Csv\Fragment\Selection` (internal class) + +### Deprecated + +- `League\Csv\FragmentFinder::findAll` use `League\Csv\Fragment\Expression::fragment` instead + +### Fixed + +- `Cast*` methods accept more input type. +- `FragmentFinder` now removes duplicate selection. +- `TabularaDataReader::matching` will return an empty `Iterable` instance when no selection is valid, previously and empty `TabularDataReader` instance was returned as unique item of the iterable returned. + +### Removed + +- None + ## [9.16.0](https://github.com/thephpleague/csv/compare/9.15.0...9.16.0) - 2024-05-24 ### Added diff --git a/docs/9.0/reader/statement.md b/docs/9.0/reader/statement.md index dfeb3fde..8ad9a0da 100644 --- a/docs/9.0/reader/statement.md +++ b/docs/9.0/reader/statement.md @@ -434,16 +434,17 @@ Here are some selection example: - `cell=5,2-8,9` : will select the cells located between row `4` and column `1` and row `7` and column `8`; Of note, the RFC allows for multiple selections, separated by a `;`. which are translated -as `OR` expressions. To strictly cover The RFC the class exposes the `findAll` method +as `OR` expressions. To strictly cover The RFC the class exposes the `find` method which returns an iterable containing the results of all found fragments as distinct `TabulatDataReader` instances. -

If some selections are invalid no error is returned; the invalid -selection is skipped from the returned value.

+

This find method is introduced with version 9.17.0.

+

The findAll method is deprecated, you should use find instead.

+

If some selections are invalid no error is returned; the invalid selection is skipped from the returned value.

To restrict the returned values you may use the `findFirst` and `findFirstOrFail` methods. Both methods return on success a `TabularDataReader` instance. While the `first` method -always return the first selection found or `null`; `firstOrFail` **MUST** return a +always return the first selection found if it is not empty or `null`; `firstOrFail` **MUST** return a non-empty `TabularDataReader` instance or throw. It will also throw if the expression syntax is invalid while all the other methods just ignore the error. @@ -456,15 +457,83 @@ use League\Csv\FragmentFinder; $reader = Reader::createFromPath('/path/to/file.csv'); $finder = FragmentFinder::create(); -$finder->findAll('row=7-5;8-9', $reader); // return an Iterator +$finder->find('row=7-5;8-9', $reader); // return an Iterator $finder->findFirst('row=7-5;8-9', $reader); // return an TabularDataReader $finder->findFirstOrFail('row=7-5;8-9', $reader); // will throw ``` -- `FragmentFinder::findAll` returns an Iterator containing a single `TabularDataReader` because the first selection +- `FragmentFinder::find` returns an Iterator containing a single `TabularDataReader` because the first selection is invalid; - `FragmentFinder::findFirst` returns the single valid `TabularDataReader` - `FragmentFinder::findFirstOrFail` throws a `SyntaxError`. Both classes, `FragmentFinder` and `Statement` returns an instance that implements the `TabularDataReader` interface which returns the found data in a consistent way. + +### Fragment Expression builder + +

This mechanism is introduced with version 9.17.0.

+ +The `Expression` class provides an immutable, fluent interface to create valid expressions. +We can rewrite the previous example as followed: + +```php +use League\Csv\Reader; +use League\Csv\Fragment\Expression; +use League\Csv\FragmentFinder; + +$reader = Reader::createFromPath('/path/to/file.csv'); +$finder = FragmentFinder::create(); +$expression = Expression::fromRow('7-5', '8-9'); + +$finder->find($expression, $reader); // return an Iterator +$finder->findFirst($expression, $reader); // return an TabularDataReader +$finder->findFirstOrFail($expression, $reader); // will throw +``` + +The `Expression` validates that your selections are valid according to the selection scheme chosen. +The class exposes method to create fragment expression for: + +- cell selection using `Expression::fromCell`; +- row selection using `Expression::fromRow`; +- column selection using `Expression::fromColumn`; + +```php +use League\Csv\Fragment\Expression; + +$expression = Expression::fromRow('7-5', '8-9'); +echo $expression; +// returns 'row=8-9' and removes `7-5` because it is an invalid selection +``` + +You can even gradually create your expression using a fluent and immutable API +using the `push`, `unshift` and `remove` methods. And there are convenient method to +inspect the class to know how nmany selections are present and to select them according +to their indices using the `get` a `has` methods. You are also able to tell if a specific +selection in present via the `contains` method. + +```php +use League\Csv\Fragment\Expression; + +$expression = Expression::fromRow() + ->push('5-8') + ->unshift('12-15') + ->replace('5-8', '12-*') + ->remove('12-15'); + +echo $expression->toString(); +// or +echo $expression; +// returns 'row=12-*' +``` + +You can use que `Expression` to directly query a `TabularDataReader` using the `fragment` method. +The result will be an iterable structure containing `TabularDataReader` instances whose index present the +selection used to generate the data. The `firstFragment` method can be used to return the first +`TabularDataReader` instance regardless if it contains data or not. + +```php +$document = Reader::createFromPath('/path/to/file.csv'); +Expression::fromRow('7-5', '8-9')->fragment($document); // returns an iterable +Expression::fromRow('7-5', '8-9')->firstFragment($document); // returns the first selection found regardless if it contains data or not +``` diff --git a/docs/9.0/reader/tabular-data-reader.md b/docs/9.0/reader/tabular-data-reader.md index b83c01d2..0ec3ceda 100644 --- a/docs/9.0/reader/tabular-data-reader.md +++ b/docs/9.0/reader/tabular-data-reader.md @@ -506,6 +506,7 @@ $reader->matchingFirstOrFail('row=3-1;4-6'); // will throw

Wraps the functionality of FragmentFinder class.

Added in version 9.12.0 for Reader and ResultSet.

+

In addition to using a string expression, starting with version 9.17.0 you can alternatively use an `Expression` object.

### chunkBy diff --git a/src/Fragment/Expression.php b/src/Fragment/Expression.php index fc6ab51f..0a744df7 100644 --- a/src/Fragment/Expression.php +++ b/src/Fragment/Expression.php @@ -13,34 +13,84 @@ namespace League\Csv\Fragment; +use Countable; +use IteratorAggregate; +use League\Csv\Exception; use League\Csv\FragmentNotFound; -use function preg_match; -use function explode; +use League\Csv\InvalidArgument; +use League\Csv\Statement; +use League\Csv\SyntaxError; +use League\Csv\TabularDataReader; +use ReflectionException; +use Stringable; +use Traversable; + use function array_map; +use function explode; +use function implode; +use function preg_match; -final class Expression +/** + * @implements IteratorAggregate + */ +final class Expression implements Stringable, Countable, IteratorAggregate { private const REGEXP_URI_FRAGMENT = ',^(?row|cell|col)=(?.*)$,i'; + private readonly Type $type; /** @param array $selections */ - private function __construct( - public readonly Type $type, - public readonly array $selections - ) {} + private readonly array $selections; - public static function tryFrom(string $expression): self + /** + * @param array $selections + */ + private function __construct(Type $type, array $selections) { - try { - return self::from($expression); - } catch (FragmentNotFound $fragmentNotFound) { - return self::fromUnknown(); + $this->type = $type; + $this->selections = self::removeDuplicates($selections); + } + + /** + * @param array $selections + * + * @return array + */ + private static function removeDuplicates(array $selections): array + { + $sorted = []; + foreach ($selections as $selection) { + if (null !== $selection) { + $sorted[$selection->toString()] = $selection; + } } + + return array_values($sorted); + } + + /** + * @param array $selections1 + * @param array $selections2 + */ + private static function isEqualSelection(array $selections1, array $selections2): bool + { + $toString = static fn (Selection $selection): string => $selection->toString(); + $selectionsA = array_map($toString, $selections1); + $selectionsB = array_map($toString, $selections2); + + sort($selectionsA); + sort($selectionsB); + + return $selectionsB === $selectionsA; } - public static function from(string $expression): self + public static function from(Stringable|string $expression): self { - if (1 !== preg_match(self::REGEXP_URI_FRAGMENT, $expression, $matches)) { - throw new FragmentNotFound('The submitted expression `'.$expression.'` is invalid.'); + if ($expression instanceof self) { + return $expression; + } + + if (1 !== preg_match(self::REGEXP_URI_FRAGMENT, (string) $expression, $matches)) { + throw new FragmentNotFound('The expression "' . $expression . '" does not match the CSV fragment Identifier specification.'); } $selections = explode(';', $matches['selections']); @@ -49,27 +99,336 @@ public static function from(string $expression): self Type::Row => self::fromRow(...$selections), Type::Column => self::fromColumn(...$selections), Type::Cell => self::fromCell(...$selections), - default => throw new FragmentNotFound('The submitted expression `'.$expression.'` is invalid.'), }; } - public static function fromUnknown(): self - { - return new self(Type::Unknown, [Selection::fromUnknown()]); - } - public static function fromCell(string ...$selections): self { - return new self(Type::Cell, array_map(Selection::fromCell(...), $selections)); + return new self(Type::Cell, array_filter(array_map(Selection::fromCell(...), $selections))); } public static function fromColumn(string ...$selections): self { - return new self(Type::Column, array_map(Selection::fromColumn(...), $selections)); + return new self(Type::Column, array_filter(array_map(Selection::fromColumn(...), $selections))); } public static function fromRow(string ...$selections): self { - return new self(Type::Row, array_map(Selection::fromRow(...), $selections)); + return new self(Type::Row, array_filter(array_map(Selection::fromRow(...), $selections))); + } + + public function type(): Type + { + return $this->type; + } + + public function isEmpty(): bool + { + return [] === $this->selections; + } + + public function isNotEmpty(): bool + { + return ! $this->isEmpty(); + } + + public function __toString(): string + { + return $this->toString(); + } + + public function toString(): string + { + return $this->type->value .'='.implode( + ';', + array_map(fn (Selection $selection): string => $selection->toString(), $this->selections) + ); + } + + public function count(): int + { + return count($this->selections); + } + + public function getIterator(): Traversable + { + foreach ($this->selections as $selection) { + yield $selection->toString(); + } + } + + public function get(int $key): string + { + return $this->selections[ + $this->filterIndex($key) ?? throw new FragmentNotFound('No selection found for the given key `'.$key.'`.') + ]->toString(); + } + + public function hasKey(int ...$keys): bool + { + $max = count($this->selections); + foreach ($keys as $offset) { + if (null === $this->filterIndex($offset, $max)) { + return false; + } + } + + return [] !== $keys; + } + + public function has(string ...$selections): bool + { + foreach ($selections as $selection) { + if (null === $this->contains($selection)) { + return false; + } + } + + return [] !== $selections; + } + + public function contains(string $selection): ?int + { + if ([] === $this->selections) { + return null; + } + + $selection = (match ($this->type) { + Type::Row => Selection::fromRow($selection), + Type::Column => Selection::fromColumn($selection), + Type::Cell => Selection::fromCell($selection), + })?->toString(); + + if (null === $selection) { + return null; + } + + foreach ($this->selections as $offset => $innerSelection) { + if ($selection === $innerSelection->toString()) { + return $offset; + } + } + + return null; + } + + private function filterIndex(int $index, ?int $max = null): ?int + { + $max ??= count($this->selections); + + return match (true) { + [] === $this->selections, 0 > $max + $index, 0 > $max - $index - 1 => null, + 0 > $index => $max + $index, + default => $index, + }; + } + + public function push(string ...$selections): self + { + if ([] === $selections) { + return $this; + } + + $selections = array_filter(match ($this->type) { + Type::Row => array_map(Selection::fromRow(...), $selections), + Type::Column => array_map(Selection::fromColumn(...), $selections), + Type::Cell => array_map(Selection::fromCell(...), $selections), + }); + + $selections = self::removeDuplicates($selections); + if ([] === $selections || self::isEqualSelection($this->selections, $selections)) { + return $this; + } + + return new self($this->type, [...$this->selections, ...$selections]); + } + + public function unshift(string ...$selections): self + { + if ([] === $selections) { + return $this; + } + + $selections = array_filter(match ($this->type) { + Type::Row => array_map(Selection::fromRow(...), $selections), + Type::Column => array_map(Selection::fromColumn(...), $selections), + Type::Cell => array_map(Selection::fromCell(...), $selections), + }); + + $selections = self::removeDuplicates($selections); + if ([] === $selections || self::isEqualSelection($this->selections, $selections)) { + return $this; + } + + return new self($this->type, [...$selections, ...$this->selections]); + } + + public function replace(string $oldSelection, string $newSelection): self + { + $offset = $this->contains($oldSelection); + if (null === $offset) { + throw new FragmentNotFound('The selection `'.$oldSelection.'` used for replace is not valid'); + } + + $newSelectionObject = match ($this->type) { + Type::Row => Selection::fromRow($newSelection), + Type::Column => Selection::fromColumn($newSelection), + Type::Cell => Selection::fromCell($newSelection), + }; + + if (null === $newSelectionObject) { + throw new FragmentNotFound('The selection `'.$newSelection.'` used for replace is not valid'); + } + + if (null === $this->contains($newSelectionObject->toString())) { + return $this; + } + + return match ($newSelectionObject->toString()) { + $oldSelection => $this, + default => new self($this->type, array_replace($this->selections, [$offset => $newSelectionObject])), + }; + } + + public function remove(string ...$selections): self + { + if (in_array([], [$this->selections, $selections], true)) { + return $this; + } + + $keys = array_filter(array_map($this->contains(...), $selections), fn (int|null $key): bool => null !== $key); + + return match (true) { + [] === $keys => $this, + count($keys) === count($this->selections) => new self($this->type, []), + default => new self($this->type, array_values( + array_filter( + $this->selections, + fn (int $key): bool => !in_array($key, $keys, true), + ARRAY_FILTER_USE_KEY + ) + )), + }; + } + + /** + * @throws Exception + * @throws InvalidArgument + * @throws ReflectionException + * @throws SyntaxError + */ + public function firstFragment(TabularDataReader $tabularDataReader): ?TabularDataReader + { + foreach ($this->fragment($tabularDataReader) as $tabularData) { + return $tabularData; + } + + return null; + } + + /** + * @throws Exception + * @throws InvalidArgument + * @throws ReflectionException + * @throws SyntaxError + * + * @return iterable + */ + public function fragment(TabularDataReader $tabularDataReader): iterable + { + $statements = [] === $this->selections ? [] : match ($this->type) { + Type::Row => $this->queryByRows(), + Type::Column => $this->queryByColumns($tabularDataReader), + Type::Cell => $this->queryByCells($tabularDataReader), + }; + + foreach ($statements as $selection => $statement) { + yield $selection => $statement->process($tabularDataReader); + } + } + + /** + * @throws Exception + * @throws InvalidArgument + * @throws SyntaxError + * @throws ReflectionException + * + * @return iterable + */ + private function queryByRows(): iterable + { + $predicate = fn (array $record, int $offset): bool => [] !== array_filter( + $this->selections, + fn (Selection $selection): bool => $offset >= $selection->rowStart && + (null === $selection->rowEnd || $offset <= $selection->rowEnd) + ); + + yield $this->toString() => Statement::create()->where($predicate); + } + + /** + * @throws Exception + * @throws InvalidArgument + * @throws ReflectionException + * @throws SyntaxError + * + * @return iterable + */ + private function queryByColumns(TabularDataReader $tabularDataReader): iterable + { + $nbColumns = $this->getTabularDataColumnCount($tabularDataReader); + $columns = array_reduce( + $this->selections, + fn (array $columns, Selection $selection): array => [ + ...$columns, + ...match (($columnRange = $selection->columnRange())) { + null => range($selection->columnStart, $nbColumns - 1), + default => $selection->columnEnd > $nbColumns || $selection->columnEnd === -1 ? range($selection->columnStart, $nbColumns - 1) : $columnRange, + } + ], + [] + ); + + if ([] !== $columns) { + yield $this->toString() => Statement::create()->select(...$columns); + } + } + + /** + * @throws Exception + * @throws InvalidArgument + * @throws ReflectionException + * @throws SyntaxError + * + * @return iterable + */ + private function queryByCells(TabularDataReader $tabularDataReader): iterable + { + $nbColumns = $this->getTabularDataColumnCount($tabularDataReader); + $mapper = fn (Selection $selection): Statement => Statement::create() + ->where( + fn (array $record, int $offset): bool => $offset >= $selection->rowStart && + (null === $selection->rowEnd || $offset <= $selection->rowEnd) + ) + ->select( + ...match (($columnRange = $selection->columnRange())) { + null => range($selection->columnStart, $nbColumns - 1), + default => $selection->columnEnd > $nbColumns || $selection->columnEnd === -1 ? range($selection->columnStart, $nbColumns - 1) : $columnRange, + } + ); + + foreach ($this->selections as $selection) { + yield Type::Cell->value.'='.$selection->toString() => $mapper($selection); + } + } + + private function getTabularDataColumnCount(TabularDataReader $tabularDataReader): int + { + $header = $tabularDataReader->getHeader(); + + return count(match ($header) { + [] => $tabularDataReader->first(), + default => $header, + }); } } diff --git a/src/Fragment/ExpressionTest.php b/src/Fragment/ExpressionTest.php new file mode 100644 index 00000000..9ab3b541 --- /dev/null +++ b/src/Fragment/ExpressionTest.php @@ -0,0 +1,198 @@ + + * + * For the full copyright and license information, please view the LICENSE + * file that was distributed with this source code. + */ + +declare(strict_types=1); + +namespace League\Csv\Fragment; + +use League\Csv\FragmentNotFound; +use PHPUnit\Framework\Attributes\DataProvider; +use PHPUnit\Framework\Attributes\Test; +use PHPUnit\Framework\TestCase; + +final class ExpressionTest extends TestCase +{ + #[Test] + #[DataProvider('validExpressionProvider')] + public function it_can_generate_an_expression_from_a_string(string $input, string $expected): void + { + self::assertSame($expected, Expression::from($input)->toString()); + } + + public static function validExpressionProvider(): iterable + { + yield 'single row' => [ + 'input' => 'ROW=1', + 'expected' => 'row=1', + ]; + + yield 'row range' => [ + 'input' => 'row=1-5', + 'expected' => 'row=1-5', + ]; + + yield 'row infinite range' => [ + 'input' => 'row=12-*', + 'expected' => 'row=12-*', + ]; + + yield 'multiple row selections' => [ + 'input' => 'row=1-5;12-*', + 'expected' => 'row=1-5;12-*', + ]; + + yield 'single column' => [ + 'input' => 'CoL=1', + 'expected' => 'col=1', + ]; + + yield 'column range' => [ + 'input' => 'col=12-24', + 'expected' => 'col=12-24', + ]; + + yield 'column infinite range' => [ + 'input' => 'col=12-*', + 'expected' => 'col=12-*', + ]; + + yield 'multiple column selections' => [ + 'input' => 'col=1-5;12-*', + 'expected' => 'col=1-5;12-*', + ]; + + yield 'single cell' => [ + 'input' => 'CeLl=1,4', + 'expected' => 'cell=1,4', + ]; + + yield 'cell range' => [ + 'input' => 'CeLl=1,4-5,9', + 'expected' => 'cell=1,4-5,9', + ]; + + yield 'cell range with infinite' => [ + 'input' => 'CeLl=1,4-*', + 'expected' => 'cell=1,4-*', + ]; + + yield 'multiple cell selection' => [ + 'input' => 'CeLl=1,4-5,9;12,15-*', + 'expected' => 'cell=1,4-5,9;12,15-*', + ]; + } + + #[Test] + #[DataProvider('invalidExpressionProvider')] + public function it_will_throw_parsing_incorrect_expression(string $expression): void + { + $this->expectException(FragmentNotFound::class); + + Expression::from($expression)->toString(); + } + + public static function invalidExpressionProvider(): iterable + { + yield 'invalid row index' => ['expression' => 'row=-1']; + yield 'invalid row end' => ['expression' => 'row=1--1']; + yield 'invalid row number' => ['expression' => 'row=1-four']; + yield 'invalid row range infinite' => ['expression' => 'row=*-1']; + yield 'invalid multiple row range' => ['expression' => 'row=1-4,2-5']; + + yield 'invalid column index' => ['expression' => 'col=-1']; + yield 'invalid column end' => ['expression' => 'col=1--1']; + yield 'invalid column number' => ['expression' => 'col=1-four']; + yield 'invalid column range infinite' => ['expression' => 'col=*-1']; + yield 'invalid multiple column range' => ['expression' => 'col=1-4,2-5']; + + yield 'invalid cell' => ['expression' => 'cell=1,*']; + yield 'invalid cell index' => ['expression' => 'cell=1,-3']; + yield 'invalid cell number' => ['expression' => 'cell=1,three']; + } + + #[Test] + #[DataProvider('ignoreExpressionProvider')] + public function it_will_fail_parsing_incorrect_expression(string $expression): void + { + $this->expectException(FragmentNotFound::class); + + Expression::from($expression); + } + + public static function ignoreExpressionProvider(): iterable + { + yield 'invalid multiple cell selection' => ['expression' => 'cell=2,3-14,16;22-23']; + } + + #[Test] + public function it_can_add_remove_selections(): void + { + $expression = Expression::fromColumn(); + self::assertCount(0, $expression); + + $addExpression = $expression + ->push( '12-*') + ->unshift('1-5'); + + $removeExpression = $addExpression->remove('12-*'); + $replaceExpression = $addExpression->replace('12-*', '8-9'); + + self::assertCount(0, $expression); + self::assertCount(2, $addExpression); + self::assertCount(2, $replaceExpression); + self::assertCount(1, $removeExpression); + self::assertSame($addExpression->get(-1), $replaceExpression->get(-1)); + + self::assertSame($expression, $expression->push()); + self::assertSame($expression, $expression->unshift()); + self::assertSame($expression, $expression->remove()); + self::assertFalse($expression->has('12-*')); + + self::assertSame($addExpression, $addExpression->push()); + self::assertSame($addExpression, $addExpression->unshift()); + self::assertSame($addExpression, $addExpression->remove()); + self::assertSame($addExpression, $addExpression->replace($addExpression->get(0), $addExpression->get(0))); + + self::assertEquals('12-*', $addExpression->get(1)); + self::assertEquals('12-*', $addExpression->get(-1)); + self::assertTrue($addExpression->has('12-*')); + + self::assertFalse($removeExpression->hasKey(1)); + self::assertFalse($removeExpression->has('12-*')); + self::assertTrue($removeExpression->hasKey(0)); + self::assertEquals('1-5', $removeExpression->get(0)); + self::assertEquals('1-5', $addExpression->get(0)); + + $this->expectException(FragmentNotFound::class); + $removeExpression->get(42); + } + + #[Test] + public function it_can_ignore_all_selections(): void + { + self::assertSame('row=', Expression::fromRow('7-5')->toString()); + self::assertSame('row=', Expression::fromRow()->toString()); + self::assertSame('row=', Expression::from('row=')->toString()); + self::assertTrue(Expression::from('row=')->isEmpty()); + self::assertSame(Type::Row, Expression::from('row=')->type()); + + self::assertSame('cell=', Expression::fromCell('2,3-1,2')->toString()); + self::assertSame('cell=', Expression::fromCell()->toString()); + self::assertSame('cell=', Expression::from('cell=')->toString()); + self::assertSame(Type::Cell, Expression::from('cell=')->type()); + + self::assertSame('col=', Expression::fromColumn('7-5')->toString()); + self::assertSame('col=', Expression::fromColumn()->toString()); + self::assertSame('col=', Expression::from('col=')->toString()); + self::assertSame(Type::Column, Expression::from('col=')->type()); + + } +} diff --git a/src/Fragment/Selection.php b/src/Fragment/Selection.php index 7b58250d..4d9a4cda 100644 --- a/src/Fragment/Selection.php +++ b/src/Fragment/Selection.php @@ -16,6 +16,9 @@ use League\Csv\FragmentNotFound; use const FILTER_VALIDATE_INT; +/** + * @internal Internal representation of an Expression Selection. + */ final class Selection { private const REGEXP_ROWS_COLUMNS_SELECTION = '/^(?\d+)(-(?\d+|\*))?$/'; @@ -29,70 +32,94 @@ final class Selection )? $/x'; - public static function fromUnknown(): self - { - return new self(-1, null, -1, null); - } - private function __construct( public readonly int $rowStart, public readonly ?int $rowEnd, public readonly int $columnStart, public readonly ?int $columnEnd, - ) { - } - - public function rowCount(): ?int - { - return match (true) { - -1 === $this->rowStart => null, - null === $this->rowEnd => -1, - default => $this->rowEnd - $this->rowStart + 1, - }; - } - - public function columnRange(): ?array - { - return match (true) { - -1 === $this->columnStart => [], - null === $this->columnEnd => null, - default => range($this->columnStart, $this->columnEnd), - }; - } + ) {} - public static function fromRow(string $selection): self + public static function fromRow(string $selection): ?self { [$start, $end] = self::parseRowColumnSelection($selection); return match (true) { - -1 === $start => throw new FragmentNotFound('The submitted selection `'.$selection.'` is invalid.'), + -1 === $start => null, null === $end => new self($start, $start, -1, null), '*' === $end => new self($start, null, -1, null), default => new self($start, $end,-1, null), }; } - public static function fromColumn(string $selection): self + public static function fromColumn(string $selection): ?self { [$start, $end] = self::parseRowColumnSelection($selection); return match (true) { - -1 === $start => throw new FragmentNotFound('The submitted selection `'.$selection.'` is invalid.'), + -1 === $start => null, null === $end => new self(-1, null, $start, $start), '*' === $end => new self(-1, null, $start, -1), default => new self(-1, null, $start, $end), }; } + public static function fromCell(string $selection): ?self + { + if ('' === $selection) { + return new self(-1, -1, -1, -1); + } + + if (1 !== preg_match(self::REGEXP_CELLS_SELECTION, $selection, $found)) { + return throw new FragmentNotFound('The fragment selection "'.$selection.'" is invalid.'); + } + + $cellStartRow = filter_var($found['csr'], FILTER_VALIDATE_INT, ['options' => ['min_range' => 1]]); + $cellStartCol = filter_var($found['csc'], FILTER_VALIDATE_INT, ['options' => ['min_range' => 1]]); + if (false === $cellStartRow || false === $cellStartCol) { + return null; + } + + --$cellStartRow; + --$cellStartCol; + + $cellEnd = $found['end'] ?? null; + if (null === $cellEnd) { + return new self($cellStartRow, $cellStartRow, $cellStartCol, $cellStartCol); + } + + if ('*' === $cellEnd) { + return new self($cellStartRow, null, $cellStartCol, -1); + } + + $cellEndRow = filter_var($found['cer'], FILTER_VALIDATE_INT, ['options' => ['min_range' => 1]]); + $cellEndCol = filter_var($found['cec'], FILTER_VALIDATE_INT, ['options' => ['min_range' => 1]]); + if (false === $cellEndRow || false === $cellEndCol) { + return null; + } + + --$cellEndRow; + --$cellEndCol; + + if ($cellEndRow < $cellStartRow || $cellEndCol < $cellStartCol) { + return null; + } + + return new self($cellStartRow, $cellEndRow, $cellStartCol, $cellEndCol,); + } + /** * @return array{int<-1, max>, int|null|'*'} */ private static function parseRowColumnSelection(string $selection): array { - if (1 !== preg_match(self::REGEXP_ROWS_COLUMNS_SELECTION, $selection, $found)) { + if ('' === $selection) { return [-1, 0]; } + if (1 !== preg_match(self::REGEXP_ROWS_COLUMNS_SELECTION, $selection, $found)) { + throw new FragmentNotFound('The fragment selection "'.$selection.'" is invalid.'); + } + $start = $found['start']; $end = $found['end'] ?? null; $start = filter_var($start, FILTER_VALIDATE_INT, ['options' => ['min_range' => 1]]); @@ -118,43 +145,43 @@ private static function parseRowColumnSelection(string $selection): array return [$start, $end]; } - public static function fromCell(string $selection): self + public function toString(): string { - if (1 !== preg_match(self::REGEXP_CELLS_SELECTION, $selection, $found)) { - throw new FragmentNotFound('The submitted selection `'.$selection.'` is invalid.'); + if (-1 === $this->columnStart && -1 === $this->rowStart) { + return ''; } - $cellStartRow = filter_var($found['csr'], FILTER_VALIDATE_INT, ['options' => ['min_range' => 1]]); - $cellStartCol = filter_var($found['csc'], FILTER_VALIDATE_INT, ['options' => ['min_range' => 1]]); - if (false === $cellStartRow || false === $cellStartCol) { - throw new FragmentNotFound('The submitted selection `'.$selection.'` is invalid.'); + if (-1 === $this->columnStart) { + return (string) match ($this->rowEnd) { + null => ($this->rowStart + 1).'-*', + $this->rowStart => ($this->rowStart + 1), + default => ($this->rowStart + 1).'-'.($this->rowEnd + 1), + }; } - --$cellStartRow; - --$cellStartCol; - - $cellEnd = $found['end'] ?? null; - if (null === $cellEnd) { - return new self($cellStartRow, $cellStartRow, $cellStartCol, $cellStartCol); + if (-1 === $this->rowStart) { + return (string) match ($this->columnEnd) { + -1 => ($this->columnStart + 1).'-*', + null, $this->columnStart => ($this->columnStart + 1), + default => ($this->columnStart + 1).'-'.($this->columnEnd + 1), + }; } - if ('*' === $cellEnd) { - return new self($cellStartRow, null, $cellStartCol, -1); - } - - $cellEndRow = filter_var($found['cer'], FILTER_VALIDATE_INT, ['options' => ['min_range' => 1]]); - $cellEndCol = filter_var($found['cec'], FILTER_VALIDATE_INT, ['options' => ['min_range' => 1]]); - if (false === $cellEndRow || false === $cellEndCol) { - throw new FragmentNotFound('The submitted selection `'.$selection.'` is invalid.'); - } - - --$cellEndRow; - --$cellEndCol; + $selection = ($this->rowStart + 1).','.($this->columnStart + 1); - if ($cellEndRow < $cellStartRow || $cellEndCol < $cellStartCol) { - throw new FragmentNotFound('The submitted selection `'.$selection.'` is invalid.'); - } + return match (true) { + $this->columnEnd === -1 => $selection.'-*', + $this->rowStart === $this->rowEnd && $this->columnStart === $this->columnEnd => $selection, + default => $selection.'-'.(($this->rowEnd ?? 0) + 1).','.(($this->columnEnd ?? 0) + 1), + }; + } - return new self($cellStartRow, $cellEndRow, $cellStartCol, $cellEndCol,); + public function columnRange(): ?array + { + return match (true) { + -1 === $this->columnStart => [], + null === $this->columnEnd => null, + default => range($this->columnStart, $this->columnEnd), + }; } } diff --git a/src/Fragment/Type.php b/src/Fragment/Type.php index 28cfe8bc..ce08adb4 100644 --- a/src/Fragment/Type.php +++ b/src/Fragment/Type.php @@ -15,8 +15,7 @@ enum Type: string { - case Row = 'row'; - case Column = 'col'; case Cell = 'cell'; - case Unknown = 'unknown'; + case Column = 'col'; + case Row = 'row'; } diff --git a/src/FragmentFinder.php b/src/FragmentFinder.php index 082f3701..d319eba6 100644 --- a/src/FragmentFinder.php +++ b/src/FragmentFinder.php @@ -15,12 +15,6 @@ use ReflectionException; use League\Csv\Fragment\Expression; -use League\Csv\Fragment\Selection; -use League\Csv\Fragment\Type; - -use function array_filter; -use function array_map; -use function array_reduce; class FragmentFinder { @@ -34,53 +28,15 @@ public static function create(): self * @throws InvalidArgument * @throws ReflectionException * @throws SyntaxError - * - * @return iterable */ - public function findAll(string $expression, TabularDataReader $tabularDataReader): iterable + public function findFirst(Expression|string $expression, TabularDataReader $tabularDataReader): ?TabularDataReader { - return $this->find(Expression::tryFrom($expression), $tabularDataReader); - } - - /** - * @throws Exception - * @throws InvalidArgument - * @throws ReflectionException - * @throws SyntaxError - */ - public function findFirst(string $expression, TabularDataReader $tabularDataReader): ?TabularDataReader - { - $fragment = $this->find(Expression::tryFrom($expression), $tabularDataReader)[0]; - - return match ([]) { - $fragment->first() => null, - default => $fragment, - }; - } - - /** - * @throws Exception - * @throws FragmentNotFound - * @throws InvalidArgument - * @throws ReflectionException - * @throws SyntaxError - */ - public function findFirstOrFail(string $expression, TabularDataReader $tabularDataReader): TabularDataReader - { - $parsedExpression = Expression::tryFrom($expression); - if ([] !== array_filter( - $parsedExpression->selections, - fn (Selection $selection): bool => -1 === $selection->rowStart && -1 === $selection->columnStart) - ) { - throw new FragmentNotFound('The expression `'.$expression.'` contains an invalid or an unsupported selection for the tabular data.'); + $tabularData = Expression::from($expression)->firstFragment($tabularDataReader); + if ($tabularData instanceof TabularDataReader && [] !== $tabularData->first()) { + return $tabularData; } - $fragment = $this->find($parsedExpression, $tabularDataReader)[0]; - - return match ([]) { - $fragment->first() => throw new FragmentNotFound('No fragment found in the tabular data with the expression `'.$expression.'`.'), - default => $fragment, - }; + return null; } /** @@ -88,41 +44,15 @@ public function findFirstOrFail(string $expression, TabularDataReader $tabularDa * @throws InvalidArgument * @throws ReflectionException * @throws SyntaxError - * - * @return array */ - private function find(Expression $expression, TabularDataReader $tabularDataReader): array + public function findFirstOrFail(Expression|string $expression, TabularDataReader $tabularDataReader): TabularDataReader { - return match ($expression->type) { - Type::Row => $this->findByRow($expression, $tabularDataReader), - Type::Column => $this->findByColumn($expression, $tabularDataReader), - Type::Cell => $this->findByCell($expression, $tabularDataReader), - Type::Unknown => [ResultSet::createFromRecords()], - }; - } - - /** - * @throws Exception - * @throws InvalidArgument - * @throws SyntaxError - * @throws ReflectionException - * - * @return array - */ - private function findByRow(Expression $expression, TabularDataReader $tabularDataReader): array - { - $selections = array_filter($expression->selections, fn (Selection $selection): bool => -1 < $selection->rowStart); - if ([] === $selections) { - return [ResultSet::createFromRecords()]; + $parseExpression = !$expression instanceof Expression ? Expression::from($expression) : $expression; + if ((string) $parseExpression !== strtolower((string) $expression)) { + throw new FragmentNotFound('The expression "' . $expression . '" contains invalid section(s).'); } - $rowFilter = fn(array $record, int $offset): bool => [] !== array_filter( - $selections, - fn(Selection $selection) => $offset >= $selection->rowStart && - (null === $selection->rowEnd || $offset <= $selection->rowEnd) - ); - - return [Statement::create()->where($rowFilter)->process($tabularDataReader)]; + return $this->findFirst($parseExpression, $tabularDataReader) ?? throw new FragmentNotFound('No fragment found in the tabular data with the expression `'.$expression.'`.'); } /** @@ -131,78 +61,22 @@ private function findByRow(Expression $expression, TabularDataReader $tabularDat * @throws ReflectionException * @throws SyntaxError * - * @return array - */ - private function findByColumn(Expression $expression, TabularDataReader $tabularDataReader): array - { - $header = $tabularDataReader->getHeader(); - if ([] === $header) { - $header = $tabularDataReader->first(); - } - - $nbColumns = count($header); - $selections = array_filter($expression->selections, fn(Selection $selection) => -1 < $selection->columnStart); - if ([] === $selections) { - return [ResultSet::createFromRecords()]; - } - - /** @var array $columns */ - $columns = array_reduce( - $selections, - fn (array $columns, Selection $selection): array => [ - ...$columns, - ...match (($columnRange = $selection->columnRange())) { - null => range($selection->columnStart, $nbColumns - 1), - default => $selection->columnEnd > $nbColumns || $selection->columnEnd === -1 ? range($selection->columnStart, $nbColumns - 1) : $columnRange, - } - ], - [] - ); - - return [match ([]) { - $columns => ResultSet::createFromRecords(), - default => Statement::create()->select(...$columns)->process($tabularDataReader), - }]; - } - - /** - * @throws Exception - * @throws InvalidArgument - * @throws ReflectionException - * @throws SyntaxError + * @return iterable + * @see Expression::fragment() * - * @return array + * @codeCoverageIgnore + * @deprecated since version 9.17.0 */ - private function findByCell(Expression $expression, TabularDataReader $tabularDataReader): array + public function findAll(Expression|string $expression, TabularDataReader $tabularDataReader): iterable { - $header = $tabularDataReader->getHeader(); - if ([] === $header) { - $header = $tabularDataReader->first(); + $found = false; + foreach (Expression::from($expression)->fragment($tabularDataReader) as $tabularData) { + $found = true; + yield $tabularData; } - $nbColumns = count($header); - $selections = array_filter( - $expression->selections, - fn(Selection $selection) => -1 < $selection->rowStart && -1 < $selection->columnStart - ); - if ([] === $selections) { - return [ResultSet::createFromRecords()]; + if (false === $found) { + yield ResultSet::createFromRecords(); } - - return array_map( - fn (Selection $selection): TabularDataReader => Statement::create() - ->where( - fn (array $record, int $offset): bool => $offset >= $selection->rowStart && - (null === $selection->rowEnd || $offset <= $selection->rowEnd) - ) - ->select( - ...match (($columnRange = $selection->columnRange())) { - null => range($selection->columnStart, $nbColumns - 1), - default => $selection->columnEnd > $nbColumns || $selection->columnEnd === -1 ? range($selection->columnStart, $nbColumns - 1) : $columnRange, - } - ) - ->process($tabularDataReader), - $selections - ); } } diff --git a/src/Reader.php b/src/Reader.php index 7ba53d68..83f08755 100644 --- a/src/Reader.php +++ b/src/Reader.php @@ -17,6 +17,7 @@ use Closure; use Iterator; use JsonSerializable; +use League\Csv\Fragment\Expression; use League\Csv\Serializer\Denormalizer; use League\Csv\Serializer\MappingFailed; use League\Csv\Serializer\TypeCastingFailed; @@ -413,12 +414,12 @@ public function sorted(Query\Sort|Closure $orderBy): TabularDataReader return Statement::create()->orderBy($orderBy)->process($this); } - public function matching(string $expression): iterable + public function matching(Expression|string $expression): iterable { - return FragmentFinder::create()->findAll($expression, $this); + return Expression::from($expression)->fragment($this); } - public function matchingFirst(string $expression): ?TabularDataReader + public function matchingFirst(Expression|string $expression): ?TabularDataReader { return FragmentFinder::create()->findFirst($expression, $this); } @@ -427,7 +428,7 @@ public function matchingFirst(string $expression): ?TabularDataReader * @throws SyntaxError * @throws FragmentNotFound */ - public function matchingFirstOrFail(string $expression): TabularDataReader + public function matchingFirstOrFail(Expression|string $expression): TabularDataReader { return FragmentFinder::create()->findFirstOrFail($expression, $this); } diff --git a/src/ResultSet.php b/src/ResultSet.php index 2c64db05..f973384a 100644 --- a/src/ResultSet.php +++ b/src/ResultSet.php @@ -20,6 +20,7 @@ use Iterator; use IteratorIterator; use JsonSerializable; +use League\Csv\Fragment\Expression; use League\Csv\Serializer\Denormalizer; use League\Csv\Serializer\MappingFailed; use League\Csv\Serializer\TypeCastingFailed; @@ -269,12 +270,12 @@ public function select(string|int ...$columns): TabularDataReader return new self(new MapIterator($this, $callback), $hasHeader ? $header : []); } - public function matching(string $expression): iterable + public function matching(Expression|string $expression): iterable { - return FragmentFinder::create()->findAll($expression, $this); + return Expression::from($expression)->fragment($this); } - public function matchingFirst(string $expression): ?TabularDataReader + public function matchingFirst(Expression|string $expression): ?TabularDataReader { return FragmentFinder::create()->findFirst($expression, $this); } @@ -283,7 +284,7 @@ public function matchingFirst(string $expression): ?TabularDataReader * @throws SyntaxError * @throws FragmentNotFound */ - public function matchingFirstOrFail(string $expression): TabularDataReader + public function matchingFirstOrFail(Expression|string $expression): TabularDataReader { return FragmentFinder::create()->findFirstOrFail($expression, $this); } diff --git a/src/TabularDataReaderTestCase.php b/src/TabularDataReaderTestCase.php index f2de6c9d..19951cf1 100644 --- a/src/TabularDataReaderTestCase.php +++ b/src/TabularDataReaderTestCase.php @@ -20,6 +20,7 @@ use PHPUnit\Framework\Attributes\Group; use PHPUnit\Framework\Attributes\Test; use PHPUnit\Framework\TestCase; +use Throwable; #[Group('tabulardata')] abstract class TabularDataReaderTestCase extends TestCase @@ -232,13 +233,31 @@ public static function provideValidExpressions(): iterable #[Test] #[DataProvider('provideInvalidExpressions')] + public function it_will_fail_to_parse_invalid_expression(string $expression): void + { + $this->expectException(Throwable::class); + + iterator_to_array($this->tabularData()->matching($expression)); + } + + public static function provideInvalidExpressions(): iterable + { + return [ + 'expression selection is invalid for cell 1' => ['expression' => 'cell=5'], + 'expression selection is invalid for row or column 1' => ['expression' => 'row=4,3'], + 'expression selection is invalid for row or column 2' => ['expression' => 'row=four-five'], + ]; + } + + #[Test] + #[DataProvider('provideExpressionWithIgnoredSelections')] public function it_will_return_null_on_invalid_expression(string $expression): void { self::assertNull($this->tabularData()->matchingFirst($expression)); } #[Test] - #[DataProvider('provideInvalidExpressions')] + #[DataProvider('provideExpressionWithIgnoredSelections')] public function it_will_fail_to_parse_the_expression(string $expression): void { $this->expectException(FragmentNotFound::class); @@ -246,22 +265,18 @@ public function it_will_fail_to_parse_the_expression(string $expression): void $this->tabularData()->matchingFirstOrFail($expression); } - public static function provideInvalidExpressions(): iterable + public static function provideExpressionWithIgnoredSelections(): iterable { return [ - 'missing expression type' => ['2-4'], 'missing expression selection row' => ['row='], 'missing expression selection cell' => ['cell='], 'missing expression selection coll' => ['col='], - 'expression selection is invalid for cell 1' => ['cell=5'], 'expression selection is invalid for cell 2' => ['cell=0,3'], 'expression selection is invalid for cell 3' => ['cell=3,0'], 'expression selection is invalid for cell 4' => ['cell=1,3-0,4'], 'expression selection is invalid for cell 5' => ['cell=1,3-4,0'], 'expression selection is invalid for cell 6' => ['cell=0,3-1,4'], 'expression selection is invalid for cell 7' => ['cell=1,0-2,3'], - 'expression selection is invalid for row or column 1' => ['row=4,3'], - 'expression selection is invalid for row or column 2' => ['row=four-five'], 'expression selection is invalid for row or column 3' => ['row=0-3'], 'expression selection is invalid for row or column 4' => ['row=3-0'], ]; @@ -270,13 +285,13 @@ public static function provideInvalidExpressions(): iterable #[Test] public function it_returns_multiple_selections_in_one_tabular_data_instance(): void { - self::assertCount(1, $this->tabularData()->matching('row=1-2;5-4;2-4')); + self::assertSame(1, iterator_count($this->tabularData()->matching('row=1-2;5-4;2-4'))); } #[Test] public function it_returns_no_selection(): void { - self::assertCount(1, $this->tabularData()->matching('row=5-4')); + self::assertSame(0, iterator_count($this->tabularData()->matching('row=5-4'))); } #[Test] From d56a33d88e8cf570de717eb85e4457d98c358ff6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?C=C3=A9dric=20Anne?= Date: Mon, 19 Aug 2024 17:28:03 +0200 Subject: [PATCH 2/2] Fix usage of non empty `$escape` in PHP 8.4 --- src/AbstractCsv.php | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/AbstractCsv.php b/src/AbstractCsv.php index e3dcdc66..8d174b5b 100644 --- a/src/AbstractCsv.php +++ b/src/AbstractCsv.php @@ -44,8 +44,8 @@ abstract class AbstractCsv implements ByteSequence protected ?Bom $input_bom = null; protected ?Bom $output_bom = null; protected string $delimiter = ','; - protected string $enclosure = '"'; - protected string $escape = '\\'; + protected string $enclosure = '\\'; + protected string $escape = ''; protected bool $is_input_bom_included = false; /**