diff --git a/CHANGELOG.md b/CHANGELOG.md
index 5582f3ca..f25eda7f 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -2,6 +2,27 @@
All Notable changes to `Csv` will be documented in this file
+## [Next] - TBD
+
+### Added
+
+- `League\Csv\Fragment\Expression`
+- `League\Csv\Fragment\Selection` (internal class)
+
+### Deprecated
+
+- None
+
+### Fixed
+
+- `Cast*` methods are accept more input.
+- `FragmentFinder` now removes duplicate selection.
+- `TabularaDataReader::matching` will return an empty `Iterable` instance when no selection is valid, previously and empty `TabularDataReader` instance was returned as unique item of the iterable returned.
+
+### Removed
+
+- None
+
## [9.16.0](https://github.com/thephpleague/csv/compare/9.15.0...9.16.0) - 2024-05-24
### Added
diff --git a/docs/9.0/reader/statement.md b/docs/9.0/reader/statement.md
index dfeb3fde..498b8fab 100644
--- a/docs/9.0/reader/statement.md
+++ b/docs/9.0/reader/statement.md
@@ -434,10 +434,15 @@ Here are some selection example:
- `cell=5,2-8,9` : will select the cells located between row `4` and column `1` and row `7` and column `8`;
Of note, the RFC allows for multiple selections, separated by a `;`. which are translated
-as `OR` expressions. To strictly cover The RFC the class exposes the `findAll` method
+as `OR` expressions. To strictly cover The RFC the class exposes the `find` method
which returns an iterable containing the results of all found fragments as distinct `TabulatDataReader`
instances.
+
This find
method is introduced with version 9.17.0
.
+
+This findAll
method is still present, and usable but it is
+preferable to use the find
method instead.
+
If some selections are invalid no error is returned; the invalid
selection is skipped from the returned value.
@@ -456,15 +461,86 @@ use League\Csv\FragmentFinder;
$reader = Reader::createFromPath('/path/to/file.csv');
$finder = FragmentFinder::create();
-$finder->findAll('row=7-5;8-9', $reader); // return an Iterator
+$finder->find('row=7-5;8-9', $reader); // return an Iterator
$finder->findFirst('row=7-5;8-9', $reader); // return an TabularDataReader
$finder->findFirstOrFail('row=7-5;8-9', $reader); // will throw
```
-- `FragmentFinder::findAll` returns an Iterator containing a single `TabularDataReader` because the first selection
+- `FragmentFinder::find` returns an Iterator containing a single `TabularDataReader` because the first selection
is invalid;
- `FragmentFinder::findFirst` returns the single valid `TabularDataReader`
- `FragmentFinder::findFirstOrFail` throws a `SyntaxError`.
Both classes, `FragmentFinder` and `Statement` returns an instance that implements the `TabularDataReader` interface
which returns the found data in a consistent way.
+
+### Fragment Expression builder
+
+This mechanism is introduced with version 9.17.0
.
+
+The `Expression` class provides an immutable, fluent interface to create valid expressions.
+We can rewrite the previous example as followed:
+
+```php
+use League\Csv\Reader;
+use League\Csv\Fragment\Expression;
+use League\Csv\FragmentFinder;
+
+$reader = Reader::createFromPath('/path/to/file.csv');
+$finder = FragmentFinder::create();
+$expression = Expression::fromRow('7-5', '8-9');
+
+$finder->find($expression, $reader); // return an Iterator
+$finder->findFirst($expression, $reader); // return an TabularDataReader
+$finder->findFirstOrFail($expression, $reader); // will throw
+```
+
+The `Expression` validates that your selections are valid according to the selection scheme chosen.
+The class exposes method to create fragment expression for:
+
+- cell selection using `Expression::fromCell`;
+- row selection using `Expression::fromRow`;
+- column selection using `Expression::fromColumn`;
+
+```php
+use League\Csv\Fragment\Expression;
+
+$expression = Expression::fromRow('7-5', '8-9');
+echo $expression;
+// returns 'row=8-9' and removes `7-5` because it is an invalid selection
+```
+
+You can even gradually create your expression using a fluent and immutable API
+using the `push`, `unshift` and `remove` methods. And there are convenient method to
+inspect the class to know how nmany selections are present and to select them according
+to their indices using the `get` a `has` methods. You are also able to tell if a specific
+selection in present via the `contains` method.
+
+```php
+use League\Csv\Fragment\Expression;
+
+$expression = Expression::fromRow()
+ ->push('5-8')
+ ->unshift('12-15')
+ ->replace('5-8', '12-*')
+ ->remove('12-15');
+
+echo $expression->toString();
+// or
+echo $expression;
+// returns 'row=12-*'
+```
+
+You can use que `Expression` to directly query a `TabularDataReader` using the query method.
+The result will be an iterable structure containing `Statement` instances. You will still need
+to call the `Statement::process` on them in order to get access to the resulting `TabularDataReader`
+instances if any exist that fulfill the statement constraints.
+
+```php
+$csv = Reader::createFromPath('/path/to/file.csv');
+$results = array_map(
+ fn (Statement $stmt): TabularDataReader => $stmt->process($csv),
+ Expression::fromRow('7-5', '8-9')->query($csv)
+);
+//$results is an iterable
+```
diff --git a/docs/9.0/reader/tabular-data-reader.md b/docs/9.0/reader/tabular-data-reader.md
index b83c01d2..0ec3ceda 100644
--- a/docs/9.0/reader/tabular-data-reader.md
+++ b/docs/9.0/reader/tabular-data-reader.md
@@ -506,6 +506,7 @@ $reader->matchingFirstOrFail('row=3-1;4-6'); // will throw
Wraps the functionality of FragmentFinder
class.
Added in version 9.12.0
for Reader
and ResultSet
.
+In addition to using a string expression, starting with version 9.17.0
you can alternatively use an `Expression` object.
### chunkBy
diff --git a/src/Fragment/Expression.php b/src/Fragment/Expression.php
index fc6ab51f..bd8ed860 100644
--- a/src/Fragment/Expression.php
+++ b/src/Fragment/Expression.php
@@ -13,34 +13,86 @@
namespace League\Csv\Fragment;
+use Countable;
+use IteratorAggregate;
+use League\Csv\Exception;
use League\Csv\FragmentNotFound;
-use function preg_match;
-use function explode;
+use League\Csv\InvalidArgument;
+use League\Csv\Statement;
+use League\Csv\SyntaxError;
+use League\Csv\TabularDataReader;
+use ReflectionException;
+use Stringable;
+use Traversable;
+
use function array_map;
+use function explode;
+use function implode;
+use function preg_match;
-final class Expression
+/**
+ * @implements IteratorAggregate
+ */
+final class Expression implements Stringable, Countable, IteratorAggregate
{
private const REGEXP_URI_FRAGMENT = ',^(?row|cell|col)=(?.*)$,i';
+ private readonly Type $type;
/** @param array $selections */
- private function __construct(
- public readonly Type $type,
- public readonly array $selections
- ) {}
+ private readonly array $selections;
+
+ /**
+ * @param array $selections
+ */
+ private function __construct(Type $type, array $selections)
+ {
+ $this->type = $type;
+ $this->selections = self::removeDuplicates($selections);
+ }
- public static function tryFrom(string $expression): self
+ /**
+ * @param array $selections
+ *
+ * @return array
+ */
+ private static function removeDuplicates(array $selections): array
{
- try {
- return self::from($expression);
- } catch (FragmentNotFound $fragmentNotFound) {
- return self::fromUnknown();
+ $sorted = [];
+ foreach ($selections as $selection) {
+ if (null !== $selection) {
+ $sorted[$selection->toString()] = $selection;
+ }
}
+
+ return array_values($sorted);
+ }
+
+ /**
+ * @param array $selections1
+ * @param array $selections2
+ *
+ * @return bool
+ */
+ private static function isEqualSelection(array $selections1, array $selections2): bool
+ {
+ $toString = fn (Selection $selection): string => $selection->toString();
+ $selectionsA = array_map($toString, $selections1);
+ $selectionsB = array_map($toString, $selections2);
+
+ sort($selectionsA);
+ sort($selectionsB);
+
+ return $selectionsB === $selectionsA;
}
- public static function from(string $expression): self
+ public static function from(Stringable|string $expression): self
{
- if (1 !== preg_match(self::REGEXP_URI_FRAGMENT, $expression, $matches)) {
- throw new FragmentNotFound('The submitted expression `'.$expression.'` is invalid.');
+ if ($expression instanceof self) {
+ return $expression;
+ }
+
+ if (1 !== preg_match(self::REGEXP_URI_FRAGMENT, (string) $expression, $matches)) {
+ throw new FragmentNotFound('The expression "' . $expression . '" does not match the CSV fragment Identifier specification.');
}
$selections = explode(';', $matches['selections']);
@@ -49,27 +101,306 @@ public static function from(string $expression): self
Type::Row => self::fromRow(...$selections),
Type::Column => self::fromColumn(...$selections),
Type::Cell => self::fromCell(...$selections),
- default => throw new FragmentNotFound('The submitted expression `'.$expression.'` is invalid.'),
};
}
- public static function fromUnknown(): self
- {
- return new self(Type::Unknown, [Selection::fromUnknown()]);
- }
-
public static function fromCell(string ...$selections): self
{
- return new self(Type::Cell, array_map(Selection::fromCell(...), $selections));
+ return new self(Type::Cell, array_filter(array_map(Selection::fromCell(...), $selections)));
}
public static function fromColumn(string ...$selections): self
{
- return new self(Type::Column, array_map(Selection::fromColumn(...), $selections));
+ return new self(Type::Column, array_filter(array_map(Selection::fromColumn(...), $selections)));
}
public static function fromRow(string ...$selections): self
{
- return new self(Type::Row, array_map(Selection::fromRow(...), $selections));
+ return new self(Type::Row, array_filter(array_map(Selection::fromRow(...), $selections)));
+ }
+
+ public function isEmpty(): bool
+ {
+ return [] === $this->selections;
+ }
+
+ public function isNotEmpty(): bool
+ {
+ return ! $this->isEmpty();
+ }
+
+ public function __toString(): string
+ {
+ return $this->toString();
+ }
+
+ public function toString(): string
+ {
+ return $this->type->value .'='.implode(
+ ';',
+ array_map(fn (Selection $selection): string => $selection->toString(), $this->selections)
+ );
+ }
+
+ public function count(): int
+ {
+ return count($this->selections);
+ }
+
+ public function getIterator(): Traversable
+ {
+ foreach ($this->selections as $selection) {
+ yield $selection->toString();
+ }
+ }
+
+ public function get(int $key): string
+ {
+ return $this->selections[
+ $this->filterIndex($key) ?? throw new FragmentNotFound('No selection found for the given key `'.$key.'`.')
+ ]->toString();
+ }
+
+ public function hasKey(int ...$keys): bool
+ {
+ $max = count($this->selections);
+ foreach ($keys as $offset) {
+ if (null === $this->filterIndex($offset, $max)) {
+ return false;
+ }
+ }
+
+ return [] !== $keys;
+ }
+
+ public function has(string $selection): bool
+ {
+ return null !== $this->contains($selection);
+ }
+
+ public function contains(string $selection): ?int
+ {
+ if ([] === $this->selections) {
+ return null;
+ }
+
+ $selection = (match ($this->type) {
+ Type::Row => Selection::fromRow($selection),
+ Type::Column => Selection::fromColumn($selection),
+ Type::Cell => Selection::fromCell($selection),
+ })?->toString();
+
+ if (null === $selection) {
+ return null;
+ }
+
+ foreach ($this->selections as $offset => $innerSelection) {
+ if ($selection === $innerSelection->toString()) {
+ return $offset;
+ }
+ }
+
+ return null;
+ }
+
+ private function filterIndex(int $index, ?int $max = null): ?int
+ {
+ $max ??= count($this->selections);
+
+ return match (true) {
+ [] === $this->selections, 0 > $max + $index, 0 > $max - $index - 1 => null,
+ 0 > $index => $max + $index,
+ default => $index,
+ };
+ }
+
+ public function push(string ...$selections): self
+ {
+ if ([] === $selections) {
+ return $this;
+ }
+
+ $selections = array_filter(match ($this->type) {
+ Type::Row => array_map(Selection::fromRow(...), $selections),
+ Type::Column => array_map(Selection::fromColumn(...), $selections),
+ Type::Cell => array_map(Selection::fromCell(...), $selections),
+ });
+
+ $selections = self::removeDuplicates($selections);
+ if ([] === $selections || self::isEqualSelection($this->selections, $selections)) {
+ return $this;
+ }
+
+ return new self($this->type, [...$this->selections, ...$selections]);
+ }
+
+ public function unshift(string ...$selections): self
+ {
+ if ([] === $selections) {
+ return $this;
+ }
+
+ $selections = array_filter(match ($this->type) {
+ Type::Row => array_map(Selection::fromRow(...), $selections),
+ Type::Column => array_map(Selection::fromColumn(...), $selections),
+ Type::Cell => array_map(Selection::fromCell(...), $selections),
+ });
+
+ $selections = self::removeDuplicates($selections);
+ if ([] === $selections || self::isEqualSelection($this->selections, $selections)) {
+ return $this;
+ }
+
+ return new self($this->type, [...$selections, ...$this->selections]);
+ }
+
+ public function replace(string $oldSelection, string $newSelection): self
+ {
+ $offset = $this->contains($oldSelection);
+ if (null === $offset) {
+ throw new FragmentNotFound('The selection `'.$oldSelection.'` used for replace is not valid');
+ }
+
+ $newSelectionObject = match ($this->type) {
+ Type::Row => Selection::fromRow($newSelection),
+ Type::Column => Selection::fromColumn($newSelection),
+ Type::Cell => Selection::fromCell($newSelection),
+ };
+
+ if (null === $newSelectionObject) {
+ throw new FragmentNotFound('The selection `'.$newSelection.'` used for replace is not valid');
+ }
+
+ if (null === $this->contains($newSelectionObject->toString())) {
+ return $this;
+ }
+
+ return match ($newSelectionObject->toString()) {
+ $oldSelection => $this,
+ default => new self($this->type, array_replace($this->selections, [$offset => $newSelectionObject])),
+ };
+ }
+
+ public function remove(string ...$selections): self
+ {
+ if (in_array([], [$this->selections, $selections], true)) {
+ return $this;
+ }
+
+ $keys = array_filter(array_map($this->contains(...), $selections), fn (int|null $key): bool => null !== $key);
+
+ return match (true) {
+ [] === $keys => $this,
+ count($keys) === count($this->selections) => new self($this->type, []),
+ default => new self($this->type, array_values(
+ array_filter(
+ $this->selections,
+ fn (int $key): bool => !in_array($key, $keys, true),
+ ARRAY_FILTER_USE_KEY
+ )
+ )),
+ };
+ }
+
+ /**
+ * @throws Exception
+ * @throws InvalidArgument
+ * @throws ReflectionException
+ * @throws SyntaxError
+ *
+ * @return iterable
+ */
+ public function query(TabularDataReader $tabularDataReader): iterable
+ {
+ return [] === $this->selections ? [] : match ($this->type) {
+ Type::Row => $this->queryByRows(),
+ Type::Column => $this->queryByColumns($tabularDataReader),
+ Type::Cell => $this->queryByCells($tabularDataReader),
+ };
+ }
+
+ /**
+ * @throws Exception
+ * @throws InvalidArgument
+ * @throws SyntaxError
+ * @throws ReflectionException
+ *
+ * @return iterable
+ */
+ private function queryByRows(): iterable
+ {
+ $predicate = fn (array $record, int $offset): bool => [] !== array_filter(
+ $this->selections,
+ fn (Selection $selection): bool => $offset >= $selection->rowStart &&
+ (null === $selection->rowEnd || $offset <= $selection->rowEnd)
+ );
+
+ yield $this->toString() => Statement::create()->where($predicate);
+ }
+
+ /**
+ * @throws Exception
+ * @throws InvalidArgument
+ * @throws ReflectionException
+ * @throws SyntaxError
+ *
+ * @return iterable
+ */
+ private function queryByColumns(TabularDataReader $tabularDataReader): iterable
+ {
+ $nbColumns = $this->getTabularDataColumnCount($tabularDataReader);
+ $columns = array_reduce(
+ $this->selections,
+ fn (array $columns, Selection $selection): array => [
+ ...$columns,
+ ...match (($columnRange = $selection->columnRange())) {
+ null => range($selection->columnStart, $nbColumns - 1),
+ default => $selection->columnEnd > $nbColumns || $selection->columnEnd === -1 ? range($selection->columnStart, $nbColumns - 1) : $columnRange,
+ }
+ ],
+ []
+ );
+
+ if ([] !== $columns) {
+ yield $this->toString() => Statement::create()->select(...$columns);
+ }
+ }
+
+ /**
+ * @throws Exception
+ * @throws InvalidArgument
+ * @throws ReflectionException
+ * @throws SyntaxError
+ *
+ * @return iterable
+ */
+ private function queryByCells(TabularDataReader $tabularDataReader): iterable
+ {
+ $nbColumns = $this->getTabularDataColumnCount($tabularDataReader);
+ $mapper = fn (Selection $selection): Statement => Statement::create()
+ ->where(
+ fn (array $record, int $offset): bool => $offset >= $selection->rowStart &&
+ (null === $selection->rowEnd || $offset <= $selection->rowEnd)
+ )
+ ->select(
+ ...match (($columnRange = $selection->columnRange())) {
+ null => range($selection->columnStart, $nbColumns - 1),
+ default => $selection->columnEnd > $nbColumns || $selection->columnEnd === -1 ? range($selection->columnStart, $nbColumns - 1) : $columnRange,
+ }
+ );
+
+ foreach ($this->selections as $selection) {
+ yield Type::Cell->value.'='.$selection->toString() => $mapper($selection);
+ }
+ }
+
+ private function getTabularDataColumnCount(TabularDataReader $tabularDataReader): int
+ {
+ $header = $tabularDataReader->getHeader();
+
+ return count(match ($header) {
+ [] => $tabularDataReader->first(),
+ default => $header,
+ });
}
}
diff --git a/src/Fragment/ExpressionTest.php b/src/Fragment/ExpressionTest.php
new file mode 100644
index 00000000..83985737
--- /dev/null
+++ b/src/Fragment/ExpressionTest.php
@@ -0,0 +1,163 @@
+
+ *
+ * For the full copyright and license information, please view the LICENSE
+ * file that was distributed with this source code.
+ */
+
+declare(strict_types=1);
+
+namespace League\Csv\Fragment;
+
+use League\Csv\FragmentNotFound;
+use PHPUnit\Framework\Attributes\DataProvider;
+use PHPUnit\Framework\Attributes\Test;
+use PHPUnit\Framework\TestCase;
+
+final class ExpressionTest extends TestCase
+{
+ #[Test]
+ #[DataProvider('validExpressionProvider')]
+ public function it_can_generate_an_expression_from_a_string(string $input, string $expected): void
+ {
+ self::assertSame($expected, Expression::from($input)->toString());
+ }
+
+ public static function validExpressionProvider(): iterable
+ {
+ yield 'single row' => [
+ 'input' => 'ROW=1',
+ 'expected' => 'row=1',
+ ];
+
+ yield 'row range' => [
+ 'input' => 'row=1-5',
+ 'expected' => 'row=1-5',
+ ];
+
+ yield 'row infinite range' => [
+ 'input' => 'row=12-*',
+ 'expected' => 'row=12-*',
+ ];
+
+ yield 'multiple row selections' => [
+ 'input' => 'row=1-5;12-*',
+ 'expected' => 'row=1-5;12-*',
+ ];
+
+ yield 'single column' => [
+ 'input' => 'CoL=1',
+ 'expected' => 'col=1',
+ ];
+
+ yield 'column range' => [
+ 'input' => 'col=12-24',
+ 'expected' => 'col=12-24',
+ ];
+
+ yield 'column infinite range' => [
+ 'input' => 'col=12-*',
+ 'expected' => 'col=12-*',
+ ];
+
+ yield 'multiple column selections' => [
+ 'input' => 'col=1-5;12-*',
+ 'expected' => 'col=1-5;12-*',
+ ];
+
+ yield 'single cell' => [
+ 'input' => 'CeLl=1,4',
+ 'expected' => 'cell=1,4',
+ ];
+
+ yield 'cell range' => [
+ 'input' => 'CeLl=1,4-5,9',
+ 'expected' => 'cell=1,4-5,9',
+ ];
+
+ yield 'cell range with infinite' => [
+ 'input' => 'CeLl=1,4-*',
+ 'expected' => 'cell=1,4-*',
+ ];
+
+ yield 'multiple cell selection' => [
+ 'input' => 'CeLl=1,4-5,9;12,15-*',
+ 'expected' => 'cell=1,4-5,9;12,15-*',
+ ];
+ }
+
+ #[Test]
+ #[DataProvider('invalidExpressionProvider')]
+ public function it_will_fail_parsing_incorrect_expression(string $expression, string $expected): void
+ {
+ self::assertSame($expected, Expression::from($expression)->toString());
+ }
+
+ public static function invalidExpressionProvider(): iterable
+ {
+ yield 'invalid row index' => ['expression' => 'row=-1', 'expected' => 'row='];
+ yield 'invalid row end' => ['expression' => 'row=1--1', 'expected' => 'row='];
+ yield 'invalid row number' => ['expression' => 'row=1-four', 'expected' => 'row='];
+ yield 'invalid row range infinite' => ['expression' => 'row=*-1', 'expected' => 'row='];
+ yield 'invalid multiple row range' => ['expression' => 'row=1-4,2-5', 'expected' => 'row='];
+
+ yield 'invalid column index' => ['expression' => 'col=-1', 'expected' => 'col='];
+ yield 'invalid column end' => ['expression' => 'col=1--1', 'expected' => 'col='];
+ yield 'invalid column number' => ['expression' => 'col=1-four', 'expected' => 'col='];
+ yield 'invalid column range infinite' => ['expression' => 'col=*-1', 'expected' => 'col='];
+ yield 'invalid multiple column range' => ['expression' => 'col=1-4,2-5', 'expected' => 'col='];
+
+ yield 'invalid cell' => ['expression' => 'cell=1,*', 'expected' => 'cell='];
+ yield 'invalid cell index' => ['expression' => 'cell=1,-3', 'expected' => 'cell='];
+ yield 'invalid cell number' => ['expression' => 'cell=1,three', 'expected' => 'cell='];
+ yield 'invalid cell location' => ['expression' => 'cell=2,3-1,4', 'expected' => 'cell='];
+ yield 'invalid multiple cell selection' => ['expression' => 'cell=2,3-14,16;22-23', 'expected' => 'cell=2,3-14,16'];
+ }
+
+ #[Test]
+ public function it_can_add_remove_selections(): void
+ {
+ $expression = Expression::fromColumn();
+ self::assertCount(0, $expression);
+
+ $addExpression = $expression
+ ->push( '12-*')
+ ->unshift('1-5');
+
+ $removeExpression = $addExpression->remove('12-*');
+ $replaceExpression = $addExpression->replace('12-*', '8-9');
+
+ self::assertCount(0, $expression);
+ self::assertCount(2, $addExpression);
+ self::assertCount(2, $replaceExpression);
+ self::assertCount(1, $removeExpression);
+ self::assertSame($addExpression->get(-1), $replaceExpression->get(-1));
+
+ self::assertSame($expression, $expression->push());
+ self::assertSame($expression, $expression->unshift());
+ self::assertSame($expression, $expression->remove());
+ self::assertFalse($expression->has('12-*'));
+
+ self::assertSame($addExpression, $addExpression->push());
+ self::assertSame($addExpression, $addExpression->unshift());
+ self::assertSame($addExpression, $addExpression->remove());
+ self::assertSame($addExpression, $addExpression->replace($addExpression->get(0), $addExpression->get(0)));
+
+ self::assertEquals('12-*', $addExpression->get(1));
+ self::assertEquals('12-*', $addExpression->get(-1));
+ self::assertTrue($addExpression->has('12-*'));
+
+ self::assertFalse($removeExpression->hasKey(1));
+ self::assertFalse($removeExpression->has('12-*'));
+ self::assertTrue($removeExpression->hasKey(0));
+ self::assertEquals('1-5', $removeExpression->get(0));
+ self::assertEquals('1-5', $addExpression->get(0));
+
+ $this->expectException(FragmentNotFound::class);
+ $removeExpression->get(42);
+ }
+}
diff --git a/src/Fragment/Selection.php b/src/Fragment/Selection.php
index 7b58250d..20f623fe 100644
--- a/src/Fragment/Selection.php
+++ b/src/Fragment/Selection.php
@@ -13,9 +13,11 @@
namespace League\Csv\Fragment;
-use League\Csv\FragmentNotFound;
use const FILTER_VALIDATE_INT;
+/**
+ * @internal Internal representation of an Expression Selection.
+ */
final class Selection
{
private const REGEXP_ROWS_COLUMNS_SELECTION = '/^(?\d+)(-(?\d+|\*))?$/';
@@ -29,61 +31,77 @@ final class Selection
)?
$/x';
- public static function fromUnknown(): self
- {
- return new self(-1, null, -1, null);
- }
-
private function __construct(
public readonly int $rowStart,
public readonly ?int $rowEnd,
public readonly int $columnStart,
public readonly ?int $columnEnd,
- ) {
- }
+ ) {}
- public function rowCount(): ?int
- {
- return match (true) {
- -1 === $this->rowStart => null,
- null === $this->rowEnd => -1,
- default => $this->rowEnd - $this->rowStart + 1,
- };
- }
-
- public function columnRange(): ?array
- {
- return match (true) {
- -1 === $this->columnStart => [],
- null === $this->columnEnd => null,
- default => range($this->columnStart, $this->columnEnd),
- };
- }
-
- public static function fromRow(string $selection): self
+ public static function fromRow(string $selection): ?self
{
[$start, $end] = self::parseRowColumnSelection($selection);
return match (true) {
- -1 === $start => throw new FragmentNotFound('The submitted selection `'.$selection.'` is invalid.'),
+ -1 === $start => null,
null === $end => new self($start, $start, -1, null),
'*' === $end => new self($start, null, -1, null),
default => new self($start, $end,-1, null),
};
}
- public static function fromColumn(string $selection): self
+ public static function fromColumn(string $selection): ?self
{
[$start, $end] = self::parseRowColumnSelection($selection);
return match (true) {
- -1 === $start => throw new FragmentNotFound('The submitted selection `'.$selection.'` is invalid.'),
+ -1 === $start => null,
null === $end => new self(-1, null, $start, $start),
'*' === $end => new self(-1, null, $start, -1),
default => new self(-1, null, $start, $end),
};
}
+ public static function fromCell(string $selection): ?self
+ {
+ if (1 !== preg_match(self::REGEXP_CELLS_SELECTION, $selection, $found)) {
+ return null;
+ }
+
+ $cellStartRow = filter_var($found['csr'], FILTER_VALIDATE_INT, ['options' => ['min_range' => 1]]);
+ $cellStartCol = filter_var($found['csc'], FILTER_VALIDATE_INT, ['options' => ['min_range' => 1]]);
+ if (false === $cellStartRow || false === $cellStartCol) {
+ return null;
+ }
+
+ --$cellStartRow;
+ --$cellStartCol;
+
+ $cellEnd = $found['end'] ?? null;
+ if (null === $cellEnd) {
+ return new self($cellStartRow, $cellStartRow, $cellStartCol, $cellStartCol);
+ }
+
+ if ('*' === $cellEnd) {
+ return new self($cellStartRow, null, $cellStartCol, -1);
+ }
+
+ $cellEndRow = filter_var($found['cer'], FILTER_VALIDATE_INT, ['options' => ['min_range' => 1]]);
+ $cellEndCol = filter_var($found['cec'], FILTER_VALIDATE_INT, ['options' => ['min_range' => 1]]);
+ if (false === $cellEndRow || false === $cellEndCol) {
+ return null;
+ }
+
+ --$cellEndRow;
+ --$cellEndCol;
+
+ if ($cellEndRow < $cellStartRow || $cellEndCol < $cellStartCol) {
+ return null;
+ }
+
+ return new self($cellStartRow, $cellEndRow, $cellStartCol, $cellEndCol,);
+ }
+
/**
* @return array{int<-1, max>, int|null|'*'}
*/
@@ -118,43 +136,39 @@ private static function parseRowColumnSelection(string $selection): array
return [$start, $end];
}
- public static function fromCell(string $selection): self
+ public function toString(): string
{
- if (1 !== preg_match(self::REGEXP_CELLS_SELECTION, $selection, $found)) {
- throw new FragmentNotFound('The submitted selection `'.$selection.'` is invalid.');
+ if (-1 === $this->columnStart) {
+ return match ($this->rowEnd) {
+ null => ($this->rowStart + 1).'-*',
+ $this->rowStart => (string) ($this->rowStart + 1),
+ default => ($this->rowStart + 1).'-'.($this->rowEnd + 1),
+ };
}
- $cellStartRow = filter_var($found['csr'], FILTER_VALIDATE_INT, ['options' => ['min_range' => 1]]);
- $cellStartCol = filter_var($found['csc'], FILTER_VALIDATE_INT, ['options' => ['min_range' => 1]]);
- if (false === $cellStartRow || false === $cellStartCol) {
- throw new FragmentNotFound('The submitted selection `'.$selection.'` is invalid.');
+ if (-1 === $this->rowStart) {
+ return match ($this->columnEnd) {
+ -1 => ($this->columnStart + 1).'-*',
+ null, $this->columnStart => (string) ($this->columnStart + 1),
+ default => ($this->columnStart + 1).'-'.($this->columnEnd + 1),
+ };
}
- --$cellStartRow;
- --$cellStartCol;
+ $selection = ($this->rowStart + 1).','.($this->columnStart + 1);
- $cellEnd = $found['end'] ?? null;
- if (null === $cellEnd) {
- return new self($cellStartRow, $cellStartRow, $cellStartCol, $cellStartCol);
- }
-
- if ('*' === $cellEnd) {
- return new self($cellStartRow, null, $cellStartCol, -1);
- }
-
- $cellEndRow = filter_var($found['cer'], FILTER_VALIDATE_INT, ['options' => ['min_range' => 1]]);
- $cellEndCol = filter_var($found['cec'], FILTER_VALIDATE_INT, ['options' => ['min_range' => 1]]);
- if (false === $cellEndRow || false === $cellEndCol) {
- throw new FragmentNotFound('The submitted selection `'.$selection.'` is invalid.');
- }
-
- --$cellEndRow;
- --$cellEndCol;
-
- if ($cellEndRow < $cellStartRow || $cellEndCol < $cellStartCol) {
- throw new FragmentNotFound('The submitted selection `'.$selection.'` is invalid.');
- }
+ return match (true) {
+ $this->columnEnd === -1 => $selection.'-*',
+ $this->rowStart === $this->rowEnd && $this->columnStart === $this->columnEnd => $selection,
+ default => $selection.'-'.(($this->rowEnd ?? 0) + 1).','.(($this->columnEnd ?? 0) + 1),
+ };
+ }
- return new self($cellStartRow, $cellEndRow, $cellStartCol, $cellEndCol,);
+ public function columnRange(): ?array
+ {
+ return match (true) {
+ -1 === $this->columnStart => [],
+ null === $this->columnEnd => null,
+ default => range($this->columnStart, $this->columnEnd),
+ };
}
}
diff --git a/src/Fragment/Type.php b/src/Fragment/Type.php
index 28cfe8bc..ce08adb4 100644
--- a/src/Fragment/Type.php
+++ b/src/Fragment/Type.php
@@ -15,8 +15,7 @@
enum Type: string
{
- case Row = 'row';
- case Column = 'col';
case Cell = 'cell';
- case Unknown = 'unknown';
+ case Column = 'col';
+ case Row = 'row';
}
diff --git a/src/FragmentFinder.php b/src/FragmentFinder.php
index 082f3701..fa837f4f 100644
--- a/src/FragmentFinder.php
+++ b/src/FragmentFinder.php
@@ -15,12 +15,6 @@
use ReflectionException;
use League\Csv\Fragment\Expression;
-use League\Csv\Fragment\Selection;
-use League\Csv\Fragment\Type;
-
-use function array_filter;
-use function array_map;
-use function array_reduce;
class FragmentFinder
{
@@ -35,52 +29,19 @@ public static function create(): self
* @throws ReflectionException
* @throws SyntaxError
*
- * @return iterable
- */
- public function findAll(string $expression, TabularDataReader $tabularDataReader): iterable
- {
- return $this->find(Expression::tryFrom($expression), $tabularDataReader);
- }
-
- /**
- * @throws Exception
- * @throws InvalidArgument
- * @throws ReflectionException
- * @throws SyntaxError
- */
- public function findFirst(string $expression, TabularDataReader $tabularDataReader): ?TabularDataReader
- {
- $fragment = $this->find(Expression::tryFrom($expression), $tabularDataReader)[0];
-
- return match ([]) {
- $fragment->first() => null,
- default => $fragment,
- };
- }
-
- /**
- * @throws Exception
- * @throws FragmentNotFound
- * @throws InvalidArgument
- * @throws ReflectionException
- * @throws SyntaxError
+ * @return iterable
*/
- public function findFirstOrFail(string $expression, TabularDataReader $tabularDataReader): TabularDataReader
+ public function findAll(Expression|string $expression, TabularDataReader $tabularDataReader): iterable
{
- $parsedExpression = Expression::tryFrom($expression);
- if ([] !== array_filter(
- $parsedExpression->selections,
- fn (Selection $selection): bool => -1 === $selection->rowStart && -1 === $selection->columnStart)
- ) {
- throw new FragmentNotFound('The expression `'.$expression.'` contains an invalid or an unsupported selection for the tabular data.');
+ $found = false;
+ foreach ($this->find($expression, $tabularDataReader) as $result) {
+ $found = true;
+ yield $result;
}
- $fragment = $this->find($parsedExpression, $tabularDataReader)[0];
-
- return match ([]) {
- $fragment->first() => throw new FragmentNotFound('No fragment found in the tabular data with the expression `'.$expression.'`.'),
- default => $fragment,
- };
+ if (false === $found) {
+ yield ResultSet::createFromRecords();
+ }
}
/**
@@ -88,81 +49,30 @@ public function findFirstOrFail(string $expression, TabularDataReader $tabularDa
* @throws InvalidArgument
* @throws ReflectionException
* @throws SyntaxError
- *
- * @return array
*/
- private function find(Expression $expression, TabularDataReader $tabularDataReader): array
+ public function findFirst(Expression|string $expression, TabularDataReader $tabularDataReader): ?TabularDataReader
{
- return match ($expression->type) {
- Type::Row => $this->findByRow($expression, $tabularDataReader),
- Type::Column => $this->findByColumn($expression, $tabularDataReader),
- Type::Cell => $this->findByCell($expression, $tabularDataReader),
- Type::Unknown => [ResultSet::createFromRecords()],
- };
- }
+ foreach ($this->find($expression, $tabularDataReader) as $fragment) {
+ if ($fragment->first() === []) {
+ return null;
+ }
- /**
- * @throws Exception
- * @throws InvalidArgument
- * @throws SyntaxError
- * @throws ReflectionException
- *
- * @return array
- */
- private function findByRow(Expression $expression, TabularDataReader $tabularDataReader): array
- {
- $selections = array_filter($expression->selections, fn (Selection $selection): bool => -1 < $selection->rowStart);
- if ([] === $selections) {
- return [ResultSet::createFromRecords()];
+ return $fragment;
}
- $rowFilter = fn(array $record, int $offset): bool => [] !== array_filter(
- $selections,
- fn(Selection $selection) => $offset >= $selection->rowStart &&
- (null === $selection->rowEnd || $offset <= $selection->rowEnd)
- );
-
- return [Statement::create()->where($rowFilter)->process($tabularDataReader)];
+ return null;
}
/**
* @throws Exception
+ * @throws FragmentNotFound
* @throws InvalidArgument
* @throws ReflectionException
* @throws SyntaxError
- *
- * @return array
*/
- private function findByColumn(Expression $expression, TabularDataReader $tabularDataReader): array
+ public function findFirstOrFail(Expression|string $expression, TabularDataReader $tabularDataReader): TabularDataReader
{
- $header = $tabularDataReader->getHeader();
- if ([] === $header) {
- $header = $tabularDataReader->first();
- }
-
- $nbColumns = count($header);
- $selections = array_filter($expression->selections, fn(Selection $selection) => -1 < $selection->columnStart);
- if ([] === $selections) {
- return [ResultSet::createFromRecords()];
- }
-
- /** @var array $columns */
- $columns = array_reduce(
- $selections,
- fn (array $columns, Selection $selection): array => [
- ...$columns,
- ...match (($columnRange = $selection->columnRange())) {
- null => range($selection->columnStart, $nbColumns - 1),
- default => $selection->columnEnd > $nbColumns || $selection->columnEnd === -1 ? range($selection->columnStart, $nbColumns - 1) : $columnRange,
- }
- ],
- []
- );
-
- return [match ([]) {
- $columns => ResultSet::createFromRecords(),
- default => Statement::create()->select(...$columns)->process($tabularDataReader),
- }];
+ return $this->findFirst($expression, $tabularDataReader) ?? throw new FragmentNotFound('No fragment found in the tabular data with the expression `'.$expression.'`.');
}
/**
@@ -171,38 +81,20 @@ private function findByColumn(Expression $expression, TabularDataReader $tabular
* @throws ReflectionException
* @throws SyntaxError
*
- * @return array
+ * @return array
*/
- private function findByCell(Expression $expression, TabularDataReader $tabularDataReader): array
+ public function find(Expression|string $expression, TabularDataReader $tabularDataReader): iterable
{
- $header = $tabularDataReader->getHeader();
- if ([] === $header) {
- $header = $tabularDataReader->first();
+ if (!$expression instanceof Expression) {
+ try {
+ $expression = Expression::from($expression);
+ } catch (FragmentNotFound) {
+ return;
+ }
}
- $nbColumns = count($header);
- $selections = array_filter(
- $expression->selections,
- fn(Selection $selection) => -1 < $selection->rowStart && -1 < $selection->columnStart
- );
- if ([] === $selections) {
- return [ResultSet::createFromRecords()];
+ foreach ($expression->query($tabularDataReader) as $selection => $statement) {
+ yield $selection => $statement->process($tabularDataReader);
}
-
- return array_map(
- fn (Selection $selection): TabularDataReader => Statement::create()
- ->where(
- fn (array $record, int $offset): bool => $offset >= $selection->rowStart &&
- (null === $selection->rowEnd || $offset <= $selection->rowEnd)
- )
- ->select(
- ...match (($columnRange = $selection->columnRange())) {
- null => range($selection->columnStart, $nbColumns - 1),
- default => $selection->columnEnd > $nbColumns || $selection->columnEnd === -1 ? range($selection->columnStart, $nbColumns - 1) : $columnRange,
- }
- )
- ->process($tabularDataReader),
- $selections
- );
}
}
diff --git a/src/Reader.php b/src/Reader.php
index 7ba53d68..b461b351 100644
--- a/src/Reader.php
+++ b/src/Reader.php
@@ -17,6 +17,7 @@
use Closure;
use Iterator;
use JsonSerializable;
+use League\Csv\Fragment\Expression;
use League\Csv\Serializer\Denormalizer;
use League\Csv\Serializer\MappingFailed;
use League\Csv\Serializer\TypeCastingFailed;
@@ -413,12 +414,12 @@ public function sorted(Query\Sort|Closure $orderBy): TabularDataReader
return Statement::create()->orderBy($orderBy)->process($this);
}
- public function matching(string $expression): iterable
+ public function matching(Expression|string $expression): iterable
{
- return FragmentFinder::create()->findAll($expression, $this);
+ return FragmentFinder::create()->find($expression, $this);
}
- public function matchingFirst(string $expression): ?TabularDataReader
+ public function matchingFirst(Expression|string $expression): ?TabularDataReader
{
return FragmentFinder::create()->findFirst($expression, $this);
}
@@ -427,7 +428,7 @@ public function matchingFirst(string $expression): ?TabularDataReader
* @throws SyntaxError
* @throws FragmentNotFound
*/
- public function matchingFirstOrFail(string $expression): TabularDataReader
+ public function matchingFirstOrFail(Expression|string $expression): TabularDataReader
{
return FragmentFinder::create()->findFirstOrFail($expression, $this);
}
diff --git a/src/ResultSet.php b/src/ResultSet.php
index 2c64db05..985c8237 100644
--- a/src/ResultSet.php
+++ b/src/ResultSet.php
@@ -20,6 +20,7 @@
use Iterator;
use IteratorIterator;
use JsonSerializable;
+use League\Csv\Fragment\Expression;
use League\Csv\Serializer\Denormalizer;
use League\Csv\Serializer\MappingFailed;
use League\Csv\Serializer\TypeCastingFailed;
@@ -269,12 +270,12 @@ public function select(string|int ...$columns): TabularDataReader
return new self(new MapIterator($this, $callback), $hasHeader ? $header : []);
}
- public function matching(string $expression): iterable
+ public function matching(Expression|string $expression): iterable
{
- return FragmentFinder::create()->findAll($expression, $this);
+ return FragmentFinder::create()->find($expression, $this);
}
- public function matchingFirst(string $expression): ?TabularDataReader
+ public function matchingFirst(Expression|string $expression): ?TabularDataReader
{
return FragmentFinder::create()->findFirst($expression, $this);
}
@@ -283,7 +284,7 @@ public function matchingFirst(string $expression): ?TabularDataReader
* @throws SyntaxError
* @throws FragmentNotFound
*/
- public function matchingFirstOrFail(string $expression): TabularDataReader
+ public function matchingFirstOrFail(Expression|string $expression): TabularDataReader
{
return FragmentFinder::create()->findFirstOrFail($expression, $this);
}
diff --git a/src/TabularDataReaderTestCase.php b/src/TabularDataReaderTestCase.php
index f2de6c9d..f6b63d69 100644
--- a/src/TabularDataReaderTestCase.php
+++ b/src/TabularDataReaderTestCase.php
@@ -270,13 +270,17 @@ public static function provideInvalidExpressions(): iterable
#[Test]
public function it_returns_multiple_selections_in_one_tabular_data_instance(): void
{
- self::assertCount(1, $this->tabularData()->matching('row=1-2;5-4;2-4'));
+ $count = iterator_count($this->tabularData()->matching('row=1-2;5-4;2-4'));
+
+ self::assertSame(1, $count);
}
#[Test]
public function it_returns_no_selection(): void
{
- self::assertCount(1, $this->tabularData()->matching('row=5-4'));
+ $count = iterator_count($this->tabularData()->matching('row=5-4'));
+
+ self::assertSame(0, $count);
}
#[Test]