diff --git a/CHANGELOG.md b/CHANGELOG.md index 5582f3ca..1cb50d34 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,25 @@ All Notable changes to `Csv` will be documented in this file +## [Next] - TBD + +### Added + +- `League\Csv\Fragment\Expression` + +### Deprecated + +- None + +### Fixed + +- `FragmentFinder` now removes duplicate selection. +- `Cast*` methods are accept more input. + +### Removed + +- None + ## [9.16.0](https://github.com/thephpleague/csv/compare/9.15.0...9.16.0) - 2024-05-24 ### Added diff --git a/docs/9.0/reader/statement.md b/docs/9.0/reader/statement.md index dfeb3fde..a4507299 100644 --- a/docs/9.0/reader/statement.md +++ b/docs/9.0/reader/statement.md @@ -468,3 +468,74 @@ is invalid; Both classes, `FragmentFinder` and `Statement` returns an instance that implements the `TabularDataReader` interface which returns the found data in a consistent way. + +### Fragment Expression builder + +

This mechanism is introduced with version 9.17.0.

+ +The `Expression` class provides an immutable, fluent interface to create valid expressions. +We can rewrite the previous example as followed: + +```php +use League\Csv\Reader; +use League\Csv\Fragment\Expression; +use League\Csv\FragmentFinder; + +$reader = Reader::createFromPath('/path/to/file.csv'); +$finder = FragmentFinder::create(); +$expression = Expression::fromRow('7-5', '8-9'); + +$finder->findAll($expression, $reader); // return an Iterator +$finder->findFirst($expression, $reader); // return an TabularDataReader +$finder->findFirstOrFail($expression, $reader); // will throw +``` + +The `Expression` validates that your selections are valid according to the selection scheme chosen. +The class exposes method to create fragment expression for: + +- cell selection using `Expression::fromCell`; +- row selection using `Expression::fromRow`; +- column selection using `Expression::fromColumn`; + +```php +use League\Csv\Fragment\Expression; + +$expression = Expression::fromRow('7-5', '8-9'); +echo $expression; +// returns 'row=8-9' and removes `7-5` because it is an invalid selection +``` + +You can even gradually create your expression using a fluent and immutable API +using the `push`, `unshift` and `remove` methods. And there are convenient method to +inspect the class to know how nmany selections are present and to select them according +to their indices using the `get` a `has` methods. You are also able to tell if a specific +selection in present via the `contains` method. + +```php +use League\Csv\Fragment\Expression; + +$expression = Expression::fromRow() + ->push('5-8') + ->unshift('12-15') + ->replace('5-8', '12-*') + ->remove('12-15'); + +echo $expression->toString(); +// or +echo $expression; +// returns 'row=12-*' +``` + +You can use que `Expression` to directly query a `TabularDataReader` using the query method. +The result will be an iterable structure containing `Statement` instances. You will still need +to call the `Statement::process` on them in order to get access to the resulting `TabularDataReader` +instances if any exist that fulfill the statement constraints. + +```php +$csv = Reader::createFromPath('/path/to/file.csv'); +$results = array_map( + fn (Statement $stmt): TabularDataReader => $stmt->process($csv), + Expression::fromRow('7-5', '8-9')->query($csv) +); +//$results is an iterable +``` diff --git a/docs/9.0/reader/tabular-data-reader.md b/docs/9.0/reader/tabular-data-reader.md index b83c01d2..0ec3ceda 100644 --- a/docs/9.0/reader/tabular-data-reader.md +++ b/docs/9.0/reader/tabular-data-reader.md @@ -506,6 +506,7 @@ $reader->matchingFirstOrFail('row=3-1;4-6'); // will throw

Wraps the functionality of FragmentFinder class.

Added in version 9.12.0 for Reader and ResultSet.

+

In addition to using a string expression, starting with version 9.17.0 you can alternatively use an `Expression` object.

### chunkBy diff --git a/src/Fragment/Expression.php b/src/Fragment/Expression.php index fc6ab51f..52add38c 100644 --- a/src/Fragment/Expression.php +++ b/src/Fragment/Expression.php @@ -13,63 +13,375 @@ namespace League\Csv\Fragment; +use Countable; +use IteratorAggregate; +use League\Csv\Exception; use League\Csv\FragmentNotFound; -use function preg_match; -use function explode; +use League\Csv\InvalidArgument; +use League\Csv\Statement; +use League\Csv\SyntaxError; +use League\Csv\TabularDataReader; +use ReflectionException; +use Stringable; +use Traversable; + use function array_map; +use function explode; +use function implode; +use function preg_match; -final class Expression +/** + * @implements IteratorAggregate + */ +final class Expression implements Stringable, Countable, IteratorAggregate { - private const REGEXP_URI_FRAGMENT = ',^(?row|cell|col)=(?.*)$,i'; + private const REGEXP_URI_FRAGMENT = ',^(?row|cell|col)=(?.*)$,i'; + private readonly Type $type; /** @param array $selections */ - private function __construct( - public readonly Type $type, - public readonly array $selections - ) {} + private readonly array $selections; - public static function tryFrom(string $expression): self + /** + * @param array $selections + */ + private function __construct(Type $type, array $selections) { - try { - return self::from($expression); - } catch (FragmentNotFound $fragmentNotFound) { - return self::fromUnknown(); + $this->type = $type; + $this->selections = self::removeDuplicates($selections); + } + + /** + * @param array $selections + * + * @return array + */ + private static function removeDuplicates(array $selections): array + { + $sorted = []; + foreach ($selections as $selection) { + $key = $selection->toString(); + if (array_key_exists($key, $sorted)) { + continue; + } + $sorted[$key] = $selection; } + ksort($sorted); + + return array_values($sorted); } - public static function from(string $expression): self + public static function from(Stringable|string $expression): self { - if (1 !== preg_match(self::REGEXP_URI_FRAGMENT, $expression, $matches)) { - throw new FragmentNotFound('The submitted expression `'.$expression.'` is invalid.'); + if ($expression instanceof self) { + return $expression; + } + + if (1 !== preg_match(self::REGEXP_URI_FRAGMENT, (string) $expression, $matches)) { + throw new FragmentNotFound('The expression "' . $expression . '" does not match the CSV fragment Identifier specification.'); } $selections = explode(';', $matches['selections']); - return match (Type::from(strtolower($matches['type']))) { + return match (Type::from(strtolower($matches['scheme']))) { Type::Row => self::fromRow(...$selections), Type::Column => self::fromColumn(...$selections), Type::Cell => self::fromCell(...$selections), - default => throw new FragmentNotFound('The submitted expression `'.$expression.'` is invalid.'), }; } - public static function fromUnknown(): self - { - return new self(Type::Unknown, [Selection::fromUnknown()]); - } - public static function fromCell(string ...$selections): self { - return new self(Type::Cell, array_map(Selection::fromCell(...), $selections)); + return new self(Type::Cell, array_filter(array_map(Selection::tryFromCell(...), $selections))); } public static function fromColumn(string ...$selections): self { - return new self(Type::Column, array_map(Selection::fromColumn(...), $selections)); + return new self(Type::Column, array_filter(array_map(Selection::tryFromColumn(...), $selections))); } public static function fromRow(string ...$selections): self { - return new self(Type::Row, array_map(Selection::fromRow(...), $selections)); + return new self(Type::Row, array_filter(array_map(Selection::tryFromRow(...), $selections))); + } + + public function isEmpty(): bool + { + return [] === $this->selections; + } + + public function isNotEmpty(): bool + { + return ! $this->isEmpty(); + } + + public function __toString(): string + { + return $this->toString(); + } + + public function toString(): string + { + return $this->type->value .'='.implode( + ';', + array_map(fn (Selection $selection): string => $selection->toString(), $this->selections) + ); + } + + public function count(): int + { + return count($this->selections); + } + + public function getIterator(): Traversable + { + foreach ($this->selections as $selection) { + yield $selection->toString(); + } + } + + public function get(int $key): string + { + return $this->selections[ + $this->filterIndex($key) ?? throw new FragmentNotFound('No selection found for the given key `'.$key.'`.') + ]->toString(); + } + + public function hasKey(int ...$keys): bool + { + $max = count($this->selections); + foreach ($keys as $offset) { + if (null === $this->filterIndex($offset, $max)) { + return false; + } + } + + return [] !== $keys; + } + + public function has(string $selection): bool + { + return null !== $this->contains($selection); + } + + public function contains(string $selection): ?int + { + if ([] === $this->selections) { + return null; + } + + try { + $selection = (match ($this->type) { + Type::Row => Selection::fromRow($selection), + Type::Column => Selection::fromColumn($selection), + Type::Cell => Selection::fromCell($selection), + })->toString(); + } catch (FragmentNotFound) { + return null; + } + + foreach ($this->selections as $offset => $innerSelection) { + if ($selection === $innerSelection->toString()) { + return $offset; + } + } + + return null; + } + + private function filterIndex(int $index, ?int $max = null): ?int + { + $max ??= count($this->selections); + + return match (true) { + [] === $this->selections, 0 > $max + $index, 0 > $max - $index - 1 => null, + 0 > $index => $max + $index, + default => $index, + }; + } + + public function push(string ...$selections): self + { + if ([] === $selections) { + return $this; + } + + $selections = array_filter(match ($this->type) { + Type::Row => array_map(Selection::tryFromRow(...), $selections), + Type::Column => array_map(Selection::tryFromColumn(...), $selections), + Type::Cell => array_map(Selection::tryFromCell(...), $selections), + }); + + $selections = self::removeDuplicates($selections); + if ([] === $selections || $this->selections == $selections) { + return $this; + } + + return new self($this->type, [...$this->selections, ...$selections]); + } + + public function unshift(string ...$selections): self + { + if ([] === $selections) { + return $this; + } + + $selections = array_filter(match ($this->type) { + Type::Row => array_map(Selection::tryFromRow(...), $selections), + Type::Column => array_map(Selection::tryFromColumn(...), $selections), + Type::Cell => array_map(Selection::tryFromCell(...), $selections), + }); + + $selections = self::removeDuplicates($selections); + if ([] === $selections || $this->selections == $selections) { + return $this; + } + + return new self($this->type, [...$selections, ...$this->selections]); + } + + public function replace(string $oldSelection, string $newSelection): self + { + $offset = $this->contains($oldSelection); + if (null === $offset) { + throw new FragmentNotFound('The selection `'.$oldSelection.'` used for replace is not valid'); + } + + $newSelection = match ($this->type) { + Type::Row => Selection::fromRow($newSelection), + Type::Column => Selection::fromColumn($newSelection), + Type::Cell => Selection::fromCell($newSelection), + }; + + if (null === $this->contains($newSelection->toString())) { + return $this; + } + + return match ($newSelection->toString()) { + $oldSelection => $this, + default => new self($this->type, array_replace($this->selections, [$offset => $newSelection])), + }; + } + + public function remove(string ...$selections): self + { + if (in_array([], [$this->selections, $selections], true)) { + return $this; + } + + $keys = array_filter(array_map($this->contains(...), $selections), fn (int|null $key): bool => null !== $key); + + return match (true) { + [] === $keys => $this, + count($keys) === count($this->selections) => new self($this->type, []), + default => new self($this->type, array_values( + array_filter( + $this->selections, + fn (int $key): bool => !in_array($key, $keys, true), + ARRAY_FILTER_USE_KEY + ) + )), + }; + } + + /** + * @throws Exception + * @throws InvalidArgument + * @throws ReflectionException + * @throws SyntaxError + * + * @return iterable + */ + public function query(TabularDataReader $tabularDataReader): iterable + { + return [] === $this->selections ? [] : match ($this->type) { + Type::Row => $this->queryByRows(), + Type::Column => $this->queryByColumns($tabularDataReader), + Type::Cell => $this->queryByCells($tabularDataReader), + }; + } + + /** + * @throws Exception + * @throws InvalidArgument + * @throws SyntaxError + * @throws ReflectionException + * + * @return iterable + */ + private function queryByRows(): iterable + { + $predicate = fn (array $record, int $offset): bool => [] !== array_filter( + $this->selections, + fn (Selection $selection): bool => $offset >= $selection->rowStart && + (null === $selection->rowEnd || $offset <= $selection->rowEnd) + ); + + yield $this->toString() => Statement::create()->where($predicate); + } + + /** + * @throws Exception + * @throws InvalidArgument + * @throws ReflectionException + * @throws SyntaxError + * + * @return iterable + */ + private function queryByColumns(TabularDataReader $tabularDataReader): iterable + { + $nbColumns = $this->getTabularDataColumnCount($tabularDataReader); + $columns = array_reduce( + $this->selections, + fn (array $columns, Selection $selection): array => [ + ...$columns, + ...match (($columnRange = $selection->columnRange())) { + null => range($selection->columnStart, $nbColumns - 1), + default => $selection->columnEnd > $nbColumns || $selection->columnEnd === -1 ? range($selection->columnStart, $nbColumns - 1) : $columnRange, + } + ], + [] + ); + + if ([] !== $columns) { + yield $this->toString() => Statement::create()->select(...$columns); + } + } + + /** + * @throws Exception + * @throws InvalidArgument + * @throws ReflectionException + * @throws SyntaxError + * + * @return iterable + */ + private function queryByCells(TabularDataReader $tabularDataReader): iterable + { + $nbColumns = $this->getTabularDataColumnCount($tabularDataReader); + $mapper = fn (Selection $selection): Statement => Statement::create() + ->where( + fn (array $record, int $offset): bool => $offset >= $selection->rowStart && + (null === $selection->rowEnd || $offset <= $selection->rowEnd) + ) + ->select( + ...match (($columnRange = $selection->columnRange())) { + null => range($selection->columnStart, $nbColumns - 1), + default => $selection->columnEnd > $nbColumns || $selection->columnEnd === -1 ? range($selection->columnStart, $nbColumns - 1) : $columnRange, + } + ); + + foreach ($this->selections as $selection) { + yield Type::Cell->value.'='.$selection->toString() => $mapper($selection); + } + } + + private function getTabularDataColumnCount(TabularDataReader $tabularDataReader): int + { + $header = $tabularDataReader->getHeader(); + + return count(match ($header) { + [] => $tabularDataReader->first(), + default => $header, + }); } } diff --git a/src/Fragment/ExpressionTest.php b/src/Fragment/ExpressionTest.php new file mode 100644 index 00000000..83985737 --- /dev/null +++ b/src/Fragment/ExpressionTest.php @@ -0,0 +1,163 @@ + + * + * For the full copyright and license information, please view the LICENSE + * file that was distributed with this source code. + */ + +declare(strict_types=1); + +namespace League\Csv\Fragment; + +use League\Csv\FragmentNotFound; +use PHPUnit\Framework\Attributes\DataProvider; +use PHPUnit\Framework\Attributes\Test; +use PHPUnit\Framework\TestCase; + +final class ExpressionTest extends TestCase +{ + #[Test] + #[DataProvider('validExpressionProvider')] + public function it_can_generate_an_expression_from_a_string(string $input, string $expected): void + { + self::assertSame($expected, Expression::from($input)->toString()); + } + + public static function validExpressionProvider(): iterable + { + yield 'single row' => [ + 'input' => 'ROW=1', + 'expected' => 'row=1', + ]; + + yield 'row range' => [ + 'input' => 'row=1-5', + 'expected' => 'row=1-5', + ]; + + yield 'row infinite range' => [ + 'input' => 'row=12-*', + 'expected' => 'row=12-*', + ]; + + yield 'multiple row selections' => [ + 'input' => 'row=1-5;12-*', + 'expected' => 'row=1-5;12-*', + ]; + + yield 'single column' => [ + 'input' => 'CoL=1', + 'expected' => 'col=1', + ]; + + yield 'column range' => [ + 'input' => 'col=12-24', + 'expected' => 'col=12-24', + ]; + + yield 'column infinite range' => [ + 'input' => 'col=12-*', + 'expected' => 'col=12-*', + ]; + + yield 'multiple column selections' => [ + 'input' => 'col=1-5;12-*', + 'expected' => 'col=1-5;12-*', + ]; + + yield 'single cell' => [ + 'input' => 'CeLl=1,4', + 'expected' => 'cell=1,4', + ]; + + yield 'cell range' => [ + 'input' => 'CeLl=1,4-5,9', + 'expected' => 'cell=1,4-5,9', + ]; + + yield 'cell range with infinite' => [ + 'input' => 'CeLl=1,4-*', + 'expected' => 'cell=1,4-*', + ]; + + yield 'multiple cell selection' => [ + 'input' => 'CeLl=1,4-5,9;12,15-*', + 'expected' => 'cell=1,4-5,9;12,15-*', + ]; + } + + #[Test] + #[DataProvider('invalidExpressionProvider')] + public function it_will_fail_parsing_incorrect_expression(string $expression, string $expected): void + { + self::assertSame($expected, Expression::from($expression)->toString()); + } + + public static function invalidExpressionProvider(): iterable + { + yield 'invalid row index' => ['expression' => 'row=-1', 'expected' => 'row=']; + yield 'invalid row end' => ['expression' => 'row=1--1', 'expected' => 'row=']; + yield 'invalid row number' => ['expression' => 'row=1-four', 'expected' => 'row=']; + yield 'invalid row range infinite' => ['expression' => 'row=*-1', 'expected' => 'row=']; + yield 'invalid multiple row range' => ['expression' => 'row=1-4,2-5', 'expected' => 'row=']; + + yield 'invalid column index' => ['expression' => 'col=-1', 'expected' => 'col=']; + yield 'invalid column end' => ['expression' => 'col=1--1', 'expected' => 'col=']; + yield 'invalid column number' => ['expression' => 'col=1-four', 'expected' => 'col=']; + yield 'invalid column range infinite' => ['expression' => 'col=*-1', 'expected' => 'col=']; + yield 'invalid multiple column range' => ['expression' => 'col=1-4,2-5', 'expected' => 'col=']; + + yield 'invalid cell' => ['expression' => 'cell=1,*', 'expected' => 'cell=']; + yield 'invalid cell index' => ['expression' => 'cell=1,-3', 'expected' => 'cell=']; + yield 'invalid cell number' => ['expression' => 'cell=1,three', 'expected' => 'cell=']; + yield 'invalid cell location' => ['expression' => 'cell=2,3-1,4', 'expected' => 'cell=']; + yield 'invalid multiple cell selection' => ['expression' => 'cell=2,3-14,16;22-23', 'expected' => 'cell=2,3-14,16']; + } + + #[Test] + public function it_can_add_remove_selections(): void + { + $expression = Expression::fromColumn(); + self::assertCount(0, $expression); + + $addExpression = $expression + ->push( '12-*') + ->unshift('1-5'); + + $removeExpression = $addExpression->remove('12-*'); + $replaceExpression = $addExpression->replace('12-*', '8-9'); + + self::assertCount(0, $expression); + self::assertCount(2, $addExpression); + self::assertCount(2, $replaceExpression); + self::assertCount(1, $removeExpression); + self::assertSame($addExpression->get(-1), $replaceExpression->get(-1)); + + self::assertSame($expression, $expression->push()); + self::assertSame($expression, $expression->unshift()); + self::assertSame($expression, $expression->remove()); + self::assertFalse($expression->has('12-*')); + + self::assertSame($addExpression, $addExpression->push()); + self::assertSame($addExpression, $addExpression->unshift()); + self::assertSame($addExpression, $addExpression->remove()); + self::assertSame($addExpression, $addExpression->replace($addExpression->get(0), $addExpression->get(0))); + + self::assertEquals('12-*', $addExpression->get(1)); + self::assertEquals('12-*', $addExpression->get(-1)); + self::assertTrue($addExpression->has('12-*')); + + self::assertFalse($removeExpression->hasKey(1)); + self::assertFalse($removeExpression->has('12-*')); + self::assertTrue($removeExpression->hasKey(0)); + self::assertEquals('1-5', $removeExpression->get(0)); + self::assertEquals('1-5', $addExpression->get(0)); + + $this->expectException(FragmentNotFound::class); + $removeExpression->get(42); + } +} diff --git a/src/Fragment/Selection.php b/src/Fragment/Selection.php index 7b58250d..401eb14b 100644 --- a/src/Fragment/Selection.php +++ b/src/Fragment/Selection.php @@ -16,6 +16,9 @@ use League\Csv\FragmentNotFound; use const FILTER_VALIDATE_INT; +/** + * @internal Internal representation of an Expression Selection. + */ final class Selection { private const REGEXP_ROWS_COLUMNS_SELECTION = '/^(?\d+)(-(?\d+|\*))?$/'; @@ -29,25 +32,37 @@ final class Selection )? $/x'; - public static function fromUnknown(): self - { - return new self(-1, null, -1, null); - } - private function __construct( public readonly int $rowStart, public readonly ?int $rowEnd, public readonly int $columnStart, public readonly ?int $columnEnd, - ) { - } + ) {} - public function rowCount(): ?int + public function toString(): string { + if (-1 === $this->columnStart) { + return match ($this->rowEnd) { + null => ($this->rowStart + 1).'-*', + $this->rowStart => (string) ($this->rowStart + 1), + default => ($this->rowStart + 1).'-'.($this->rowEnd + 1), + }; + } + + if (-1 === $this->rowStart) { + return match ($this->columnEnd) { + -1 => ($this->columnStart + 1).'-*', + null, $this->columnStart => (string) ($this->columnStart + 1), + default => ($this->columnStart + 1).'-'.($this->columnEnd + 1), + }; + } + + $selection = ($this->rowStart + 1).','.($this->columnStart + 1); + return match (true) { - -1 === $this->rowStart => null, - null === $this->rowEnd => -1, - default => $this->rowEnd - $this->rowStart + 1, + $this->columnEnd === -1 => $selection.'-*', + $this->rowStart === $this->rowEnd && $this->columnStart === $this->columnEnd => $selection, + default => $selection.'-'.(($this->rowEnd ?? 0) + 1).','.(($this->columnEnd ?? 0) + 1), }; } @@ -60,6 +75,15 @@ public function columnRange(): ?array }; } + public static function tryFromRow(string $selection): ?self + { + try { + return self::fromRow($selection); + } catch (FragmentNotFound) { + return null; + } + } + public static function fromRow(string $selection): self { [$start, $end] = self::parseRowColumnSelection($selection); @@ -72,6 +96,15 @@ public static function fromRow(string $selection): self }; } + public static function tryFromColumn(string $selection): ?self + { + try { + return self::fromColumn($selection); + } catch (FragmentNotFound) { + return null; + } + } + public static function fromColumn(string $selection): self { [$start, $end] = self::parseRowColumnSelection($selection); @@ -84,38 +117,13 @@ public static function fromColumn(string $selection): self }; } - /** - * @return array{int<-1, max>, int|null|'*'} - */ - private static function parseRowColumnSelection(string $selection): array + public static function tryFromCell(string $selection): ?self { - if (1 !== preg_match(self::REGEXP_ROWS_COLUMNS_SELECTION, $selection, $found)) { - return [-1, 0]; - } - - $start = $found['start']; - $end = $found['end'] ?? null; - $start = filter_var($start, FILTER_VALIDATE_INT, ['options' => ['min_range' => 1]]); - if (false === $start) { - return [-1, 0]; - } - --$start; - - if (null === $end || '*' === $end) { - return [$start, $end]; - } - - $end = filter_var($end, FILTER_VALIDATE_INT, ['options' => ['min_range' => 1]]); - if (false === $end) { - return [-1, 0]; + try { + return self::fromCell($selection); + } catch (FragmentNotFound) { + return null; } - --$end; - - if ($end <= $start) { - return [-1, 0]; - } - - return [$start, $end]; } public static function fromCell(string $selection): self @@ -157,4 +165,38 @@ public static function fromCell(string $selection): self return new self($cellStartRow, $cellEndRow, $cellStartCol, $cellEndCol,); } + + /** + * @return array{int<-1, max>, int|null|'*'} + */ + private static function parseRowColumnSelection(string $selection): array + { + if (1 !== preg_match(self::REGEXP_ROWS_COLUMNS_SELECTION, $selection, $found)) { + return [-1, 0]; + } + + $start = $found['start']; + $end = $found['end'] ?? null; + $start = filter_var($start, FILTER_VALIDATE_INT, ['options' => ['min_range' => 1]]); + if (false === $start) { + return [-1, 0]; + } + --$start; + + if (null === $end || '*' === $end) { + return [$start, $end]; + } + + $end = filter_var($end, FILTER_VALIDATE_INT, ['options' => ['min_range' => 1]]); + if (false === $end) { + return [-1, 0]; + } + --$end; + + if ($end <= $start) { + return [-1, 0]; + } + + return [$start, $end]; + } } diff --git a/src/Fragment/Type.php b/src/Fragment/Type.php index 28cfe8bc..ce08adb4 100644 --- a/src/Fragment/Type.php +++ b/src/Fragment/Type.php @@ -15,8 +15,7 @@ enum Type: string { - case Row = 'row'; - case Column = 'col'; case Cell = 'cell'; - case Unknown = 'unknown'; + case Column = 'col'; + case Row = 'row'; } diff --git a/src/FragmentFinder.php b/src/FragmentFinder.php index 082f3701..00bf9adc 100644 --- a/src/FragmentFinder.php +++ b/src/FragmentFinder.php @@ -15,12 +15,6 @@ use ReflectionException; use League\Csv\Fragment\Expression; -use League\Csv\Fragment\Selection; -use League\Csv\Fragment\Type; - -use function array_filter; -use function array_map; -use function array_reduce; class FragmentFinder { @@ -35,52 +29,19 @@ public static function create(): self * @throws ReflectionException * @throws SyntaxError * - * @return iterable - */ - public function findAll(string $expression, TabularDataReader $tabularDataReader): iterable - { - return $this->find(Expression::tryFrom($expression), $tabularDataReader); - } - - /** - * @throws Exception - * @throws InvalidArgument - * @throws ReflectionException - * @throws SyntaxError - */ - public function findFirst(string $expression, TabularDataReader $tabularDataReader): ?TabularDataReader - { - $fragment = $this->find(Expression::tryFrom($expression), $tabularDataReader)[0]; - - return match ([]) { - $fragment->first() => null, - default => $fragment, - }; - } - - /** - * @throws Exception - * @throws FragmentNotFound - * @throws InvalidArgument - * @throws ReflectionException - * @throws SyntaxError + * @return iterable */ - public function findFirstOrFail(string $expression, TabularDataReader $tabularDataReader): TabularDataReader + public function findAll(Expression|string $expression, TabularDataReader $tabularDataReader): iterable { - $parsedExpression = Expression::tryFrom($expression); - if ([] !== array_filter( - $parsedExpression->selections, - fn (Selection $selection): bool => -1 === $selection->rowStart && -1 === $selection->columnStart) - ) { - throw new FragmentNotFound('The expression `'.$expression.'` contains an invalid or an unsupported selection for the tabular data.'); + $found = false; + foreach ($this->find($expression, $tabularDataReader) as $result) { + $found = true; + yield $result; } - $fragment = $this->find($parsedExpression, $tabularDataReader)[0]; - - return match ([]) { - $fragment->first() => throw new FragmentNotFound('No fragment found in the tabular data with the expression `'.$expression.'`.'), - default => $fragment, - }; + if (false === $found) { + yield ResultSet::createFromRecords(); + } } /** @@ -88,81 +49,30 @@ public function findFirstOrFail(string $expression, TabularDataReader $tabularDa * @throws InvalidArgument * @throws ReflectionException * @throws SyntaxError - * - * @return array */ - private function find(Expression $expression, TabularDataReader $tabularDataReader): array + public function findFirst(Expression|string $expression, TabularDataReader $tabularDataReader): ?TabularDataReader { - return match ($expression->type) { - Type::Row => $this->findByRow($expression, $tabularDataReader), - Type::Column => $this->findByColumn($expression, $tabularDataReader), - Type::Cell => $this->findByCell($expression, $tabularDataReader), - Type::Unknown => [ResultSet::createFromRecords()], - }; - } + foreach ($this->find($expression, $tabularDataReader) as $fragment) { + if ($fragment->first() === []) { + return null; + } - /** - * @throws Exception - * @throws InvalidArgument - * @throws SyntaxError - * @throws ReflectionException - * - * @return array - */ - private function findByRow(Expression $expression, TabularDataReader $tabularDataReader): array - { - $selections = array_filter($expression->selections, fn (Selection $selection): bool => -1 < $selection->rowStart); - if ([] === $selections) { - return [ResultSet::createFromRecords()]; + return $fragment; } - $rowFilter = fn(array $record, int $offset): bool => [] !== array_filter( - $selections, - fn(Selection $selection) => $offset >= $selection->rowStart && - (null === $selection->rowEnd || $offset <= $selection->rowEnd) - ); - - return [Statement::create()->where($rowFilter)->process($tabularDataReader)]; + return null; } /** * @throws Exception + * @throws FragmentNotFound * @throws InvalidArgument * @throws ReflectionException * @throws SyntaxError - * - * @return array */ - private function findByColumn(Expression $expression, TabularDataReader $tabularDataReader): array + public function findFirstOrFail(Expression|string $expression, TabularDataReader $tabularDataReader): TabularDataReader { - $header = $tabularDataReader->getHeader(); - if ([] === $header) { - $header = $tabularDataReader->first(); - } - - $nbColumns = count($header); - $selections = array_filter($expression->selections, fn(Selection $selection) => -1 < $selection->columnStart); - if ([] === $selections) { - return [ResultSet::createFromRecords()]; - } - - /** @var array $columns */ - $columns = array_reduce( - $selections, - fn (array $columns, Selection $selection): array => [ - ...$columns, - ...match (($columnRange = $selection->columnRange())) { - null => range($selection->columnStart, $nbColumns - 1), - default => $selection->columnEnd > $nbColumns || $selection->columnEnd === -1 ? range($selection->columnStart, $nbColumns - 1) : $columnRange, - } - ], - [] - ); - - return [match ([]) { - $columns => ResultSet::createFromRecords(), - default => Statement::create()->select(...$columns)->process($tabularDataReader), - }]; + return $this->findFirst($expression, $tabularDataReader) ?? throw new FragmentNotFound('No fragment found in the tabular data with the expression `'.$expression.'`.'); } /** @@ -171,38 +81,20 @@ private function findByColumn(Expression $expression, TabularDataReader $tabular * @throws ReflectionException * @throws SyntaxError * - * @return array + * @return array */ - private function findByCell(Expression $expression, TabularDataReader $tabularDataReader): array + private function find(Expression|string $expression, TabularDataReader $tabularDataReader): iterable { - $header = $tabularDataReader->getHeader(); - if ([] === $header) { - $header = $tabularDataReader->first(); + if (!$expression instanceof Expression) { + try { + $expression = Expression::from($expression); + } catch (FragmentNotFound) { + return; + } } - $nbColumns = count($header); - $selections = array_filter( - $expression->selections, - fn(Selection $selection) => -1 < $selection->rowStart && -1 < $selection->columnStart - ); - if ([] === $selections) { - return [ResultSet::createFromRecords()]; + foreach ($expression->query($tabularDataReader) as $selection => $statement) { + yield $selection => $statement->process($tabularDataReader); } - - return array_map( - fn (Selection $selection): TabularDataReader => Statement::create() - ->where( - fn (array $record, int $offset): bool => $offset >= $selection->rowStart && - (null === $selection->rowEnd || $offset <= $selection->rowEnd) - ) - ->select( - ...match (($columnRange = $selection->columnRange())) { - null => range($selection->columnStart, $nbColumns - 1), - default => $selection->columnEnd > $nbColumns || $selection->columnEnd === -1 ? range($selection->columnStart, $nbColumns - 1) : $columnRange, - } - ) - ->process($tabularDataReader), - $selections - ); } } diff --git a/src/Reader.php b/src/Reader.php index 7ba53d68..10d15cc8 100644 --- a/src/Reader.php +++ b/src/Reader.php @@ -17,6 +17,7 @@ use Closure; use Iterator; use JsonSerializable; +use League\Csv\Fragment\Expression; use League\Csv\Serializer\Denormalizer; use League\Csv\Serializer\MappingFailed; use League\Csv\Serializer\TypeCastingFailed; @@ -413,12 +414,12 @@ public function sorted(Query\Sort|Closure $orderBy): TabularDataReader return Statement::create()->orderBy($orderBy)->process($this); } - public function matching(string $expression): iterable + public function matching(Expression|string $expression): iterable { return FragmentFinder::create()->findAll($expression, $this); } - public function matchingFirst(string $expression): ?TabularDataReader + public function matchingFirst(Expression|string $expression): ?TabularDataReader { return FragmentFinder::create()->findFirst($expression, $this); } @@ -427,7 +428,7 @@ public function matchingFirst(string $expression): ?TabularDataReader * @throws SyntaxError * @throws FragmentNotFound */ - public function matchingFirstOrFail(string $expression): TabularDataReader + public function matchingFirstOrFail(Expression|string $expression): TabularDataReader { return FragmentFinder::create()->findFirstOrFail($expression, $this); } diff --git a/src/ResultSet.php b/src/ResultSet.php index 2c64db05..53a37660 100644 --- a/src/ResultSet.php +++ b/src/ResultSet.php @@ -20,6 +20,7 @@ use Iterator; use IteratorIterator; use JsonSerializable; +use League\Csv\Fragment\Expression; use League\Csv\Serializer\Denormalizer; use League\Csv\Serializer\MappingFailed; use League\Csv\Serializer\TypeCastingFailed; @@ -269,12 +270,12 @@ public function select(string|int ...$columns): TabularDataReader return new self(new MapIterator($this, $callback), $hasHeader ? $header : []); } - public function matching(string $expression): iterable + public function matching(Expression|string $expression): iterable { return FragmentFinder::create()->findAll($expression, $this); } - public function matchingFirst(string $expression): ?TabularDataReader + public function matchingFirst(Expression|string $expression): ?TabularDataReader { return FragmentFinder::create()->findFirst($expression, $this); } @@ -283,7 +284,7 @@ public function matchingFirst(string $expression): ?TabularDataReader * @throws SyntaxError * @throws FragmentNotFound */ - public function matchingFirstOrFail(string $expression): TabularDataReader + public function matchingFirstOrFail(Expression|string $expression): TabularDataReader { return FragmentFinder::create()->findFirstOrFail($expression, $this); } diff --git a/src/TabularDataReaderTestCase.php b/src/TabularDataReaderTestCase.php index f2de6c9d..6f820d7a 100644 --- a/src/TabularDataReaderTestCase.php +++ b/src/TabularDataReaderTestCase.php @@ -270,13 +270,17 @@ public static function provideInvalidExpressions(): iterable #[Test] public function it_returns_multiple_selections_in_one_tabular_data_instance(): void { - self::assertCount(1, $this->tabularData()->matching('row=1-2;5-4;2-4')); + $count = iterator_count($this->tabularData()->matching('row=1-2;5-4;2-4')); + + self::assertSame(1, $count); } #[Test] public function it_returns_no_selection(): void { - self::assertCount(1, $this->tabularData()->matching('row=5-4')); + $count = iterator_count($this->tabularData()->matching('row=5-4')); + + self::assertSame(1, $count); } #[Test]