diff --git a/src/Spout/Common/Helper/GlobalFunctionsHelper.php b/src/Spout/Common/Helper/GlobalFunctionsHelper.php index 3dcddaf0..1d660c10 100644 --- a/src/Spout/Common/Helper/GlobalFunctionsHelper.php +++ b/src/Spout/Common/Helper/GlobalFunctionsHelper.php @@ -291,35 +291,6 @@ public function stream_get_wrappers() return stream_get_wrappers(); } - /** - * Wrapper around global function stream_get_line() - * @see stream_get_line() - * - * @param resource $handle - * @param int $length - * @param string|void $ending - * @return string|bool - */ - public function stream_get_line($handle, $length, $ending = null) - { - return stream_get_line($handle, $length, $ending); - } - - /** - * Wrapper around global function str_getcsv() - * @see str_getcsv() - * - * @param string $input - * @param string|void $delimiter - * @param string|void $enclosure - * @param string|void $escape - * @return array - */ - public function str_getcsv($input, $delimiter = null, $enclosure = null, $escape = null) - { - return str_getcsv($input, $delimiter, $enclosure, $escape); - } - /** * Wrapper around global function function_exists() * @see function_exists() diff --git a/src/Spout/Reader/CSV/Reader.php b/src/Spout/Reader/CSV/Reader.php index 056d0a77..ab887ef7 100644 --- a/src/Spout/Reader/CSV/Reader.php +++ b/src/Spout/Reader/CSV/Reader.php @@ -32,6 +32,9 @@ class Reader extends AbstractReader /** @var string Defines the End of line */ protected $endOfLineCharacter = "\n"; + /** @var string */ + protected $autoDetectLineEndings; + /** * Sets the field delimiter for the CSV. * Needs to be called before opening the reader. @@ -104,6 +107,9 @@ protected function doesSupportStreamWrapper() */ protected function openReader($filePath) { + $this->autoDetectLineEndings = ini_get('auto_detect_line_endings'); + ini_set('auto_detect_line_endings', '1'); + $this->filePointer = $this->globalFunctionsHelper->fopen($filePath, 'r'); if (!$this->filePointer) { throw new IOException("Could not open file $filePath for reading."); @@ -140,5 +146,7 @@ protected function closeReader() if ($this->filePointer) { $this->globalFunctionsHelper->fclose($this->filePointer); } + + ini_set('auto_detect_line_endings', $this->autoDetectLineEndings); } } diff --git a/src/Spout/Reader/CSV/RowIterator.php b/src/Spout/Reader/CSV/RowIterator.php index 42bdba41..95b2596e 100644 --- a/src/Spout/Reader/CSV/RowIterator.php +++ b/src/Spout/Reader/CSV/RowIterator.php @@ -14,7 +14,7 @@ class RowIterator implements IteratorInterface { /** - * If no value is given to stream_get_line(), it defaults to 8192 (which may be too low). + * If no value is given to fgetcsv(), it defaults to 8192 (which may be too low). * Alignement with other functions like fgets() is discussed here: https://bugs.php.net/bug.php?id=48421 */ const MAX_READ_BYTES_PER_LINE = 32768; @@ -128,16 +128,12 @@ public function next() } do { - $lineData = false; - $utf8EncodedLineData = $this->getNextUTF8EncodedLine(); - if ($utf8EncodedLineData !== false) { - $lineData = $this->globalFunctionsHelper->str_getcsv($utf8EncodedLineData, $this->fieldDelimiter, $this->fieldEnclosure); - } + $rowData = $this->getNextUTF8EncodedRow(); $hasNowReachedEndOfFile = $this->globalFunctionsHelper->feof($this->filePointer); - } while (($lineData === false && !$hasNowReachedEndOfFile) || $this->isEmptyLine($lineData)); + } while (($rowData === false && !$hasNowReachedEndOfFile) || $this->isEmptyLine($rowData)); - if ($lineData !== false) { - $this->rowDataBuffer = $lineData; + if ($rowData !== false) { + $this->rowDataBuffer = $rowData; $this->numReadRows++; } else { // If we reach this point, it means end of file was reached. @@ -147,24 +143,39 @@ public function next() } /** - * Returns the next line, converted if necessary to UTF-8. - * Neither fgets nor fgetcsv don't work with non UTF-8 data... so we need to do some things manually. + * Returns the next row, converted if necessary to UTF-8. + * As fgetcsv() does not manage correctly encoding for non UTF-8 data, + * we remove manually whitespace with ltrim or rtrim (depending on the order of the bytes) * - * @return string|false The next line for the current file pointer, encoded in UTF-8 or FALSE if nothing to read + * @return array|false The row for the current file pointer, encoded in UTF-8 or FALSE if nothing to read * @throws \Box\Spout\Common\Exception\EncodingConversionException If unable to convert data to UTF-8 */ - protected function getNextUTF8EncodedLine() + protected function getNextUTF8EncodedRow() { - // Read until the EOL delimiter or EOF is reached. The delimiter's encoding needs to match the CSV's encoding. - $encodedEOLDelimiter = $this->getEncodedEOLDelimiter(); - $encodedLineData = $this->globalFunctionsHelper->stream_get_line($this->filePointer, self::MAX_READ_BYTES_PER_LINE, $encodedEOLDelimiter); + $encodedRowData = fgetcsv($this->filePointer, self::MAX_READ_BYTES_PER_LINE, $this->fieldDelimiter, $this->fieldEnclosure); + if (false === $encodedRowData) { + return false; + } - // If the line could have been read, it can be converted to UTF-8 - $utf8EncodedLineData = ($encodedLineData !== false) ? - $this->encodingHelper->attemptConversionToUTF8($encodedLineData, $this->encoding) : - false; + foreach ($encodedRowData as $cellIndex => $cellValue) { + switch($this->encoding) { + case EncodingHelper::ENCODING_UTF16_LE: + case EncodingHelper::ENCODING_UTF32_LE: + // remove whitespace from the beginning of a string as fgetcsv() add extra whitespace when it try to explode non UTF-8 data + $cellValue = ltrim($cellValue); + break; + + case EncodingHelper::ENCODING_UTF16_BE: + case EncodingHelper::ENCODING_UTF32_BE: + // remove whitespace from the end of a string as fgetcsv() add extra whitespace when it try to explode non UTF-8 data + $cellValue = rtrim($cellValue); + break; + } + + $encodedRowData[$cellIndex] = $this->encodingHelper->attemptConversionToUTF8($cellValue, $this->encoding); + } - return $utf8EncodedLineData; + return $encodedRowData; } /** diff --git a/tests/Spout/Reader/CSV/ReaderTest.php b/tests/Spout/Reader/CSV/ReaderTest.php index 78d67215..1bf0e88d 100644 --- a/tests/Spout/Reader/CSV/ReaderTest.php +++ b/tests/Spout/Reader/CSV/ReaderTest.php @@ -204,6 +204,15 @@ public function testReadShouldSupportCustomFieldEnclosure() $this->assertEquals('This is, a comma', $allRows[0][0]); } + /** + * @return void + */ + public function testReadShouldNotTruncateLineBreak() + { + $allRows = $this->getAllRowsForFile('csv_with_line_breaks.csv', ','); + $this->assertEquals("This is,\na comma", $allRows[0][0]); + } + /** * @return array */ diff --git a/tests/resources/csv/csv_with_line_breaks.csv b/tests/resources/csv/csv_with_line_breaks.csv new file mode 100644 index 00000000..840949af --- /dev/null +++ b/tests/resources/csv/csv_with_line_breaks.csv @@ -0,0 +1,2 @@ +"This is, +a comma",csv--12