diff --git a/CHANGELOG.md b/CHANGELOG.md index 497b278d..f45a34fe 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -14,7 +14,7 @@ All Notable changes to `Csv` will be documented in this file ### Fixed -- None +- BOM string and empty records stripping fix [#394](https://github.com/thephpleague/csv/issues/394) ### Removed diff --git a/src/HTMLConverter.php b/src/HTMLConverter.php index e3458249..8a7c6bdd 100644 --- a/src/HTMLConverter.php +++ b/src/HTMLConverter.php @@ -96,11 +96,13 @@ protected function appendHeaderSection(string $node_name, array $record, DOMElem return; } + /** @var DOMDocument $ownerDocument */ + $ownerDocument = $table->ownerDocument; $node = $this->xml_converter ->rootElement($node_name) ->recordElement('tr') ->fieldElement('th') - ->import([$record], $table->ownerDocument) + ->import([$record], $ownerDocument) ; /** @var DOMElement $element */ diff --git a/src/Reader.php b/src/Reader.php index 770474db..e29351db 100644 --- a/src/Reader.php +++ b/src/Reader.php @@ -132,8 +132,13 @@ protected function setHeader(int $offset): array throw new SyntaxError(sprintf('The header record does not exist or is empty at offset: `%s`', $offset)); } - if (0 === $offset) { - return $this->removeBOM($header, mb_strlen($this->getInputBOM()), $this->enclosure); + if (0 !== $offset) { + return $header; + } + + $header = $this->removeBOM($header, mb_strlen($this->getInputBOM()), $this->enclosure); + if ([''] === $header) { + throw new SyntaxError(sprintf('The header record does not exist or is empty at offset: `%s`', $offset)); } return $header; @@ -347,10 +352,19 @@ protected function stripBOM(Iterator $iterator, string $bom): Iterator return $record; } - return $this->removeBOM($record, $bom_length, $this->enclosure); + $record = $this->removeBOM($record, $bom_length, $this->enclosure); + if ([''] === $record) { + return [null]; + } + + return $record; }; - return new MapIterator($iterator, $mapper); + $filter = function (array $record): bool { + return $this->is_empty_records_included || $record != [null]; + }; + + return new CallbackFilterIterator(new MapIterator($iterator, $mapper), $filter); } /** diff --git a/tests/ReaderTest.php b/tests/ReaderTest.php index 27f18ddf..26c53aa1 100644 --- a/tests/ReaderTest.php +++ b/tests/ReaderTest.php @@ -556,4 +556,75 @@ public function sourceProvider(): array ], ]; } + + public function testRemovingEmptyRecordsWhenBOMStringIsPresent(): void + { + $bom = Reader::BOM_UTF8; + $text = <<setHeaderOffset(1); + + self::assertCount(1, $csv); + self::assertSame([ + 'column 1' => 'cell11', + 'column 2' => 'cell12', + 'column 3' => 'cell13', + ], $csv->fetchOne(0)); + + $csv->includeEmptyRecords(); + + self::assertCount(2, $csv); + self::assertSame([ + 'column 1' => null, + 'column 2' => null, + 'column 3' => null, + ], $csv->fetchOne(0)); + } + + public function testRemovingEmptyRecordsWithoutBOMString(): void + { + $text = <<setHeaderOffset(1); + + self::assertCount(1, $csv); + self::assertSame([ + 'column 1' => 'cell11', + 'column 2' => 'cell12', + 'column 3' => 'cell13', + ], $csv->fetchOne(0)); + + $csv->includeEmptyRecords(); + + self::assertCount(2, $csv); + self::assertSame([ + 'column 1' => null, + 'column 2' => null, + 'column 3' => null, + ], $csv->fetchOne(0)); + } + + + public function testGetHeaderThrowsIfTheFirstRecordOnlyContainsBOMString(): void + { + $bom = Reader::BOM_UTF8; + $text = <<setHeaderOffset(0); + + self::expectException(Exception::class); + $csv->getHeader(); + } }