Skip to content
This repository has been archived by the owner on May 26, 2022. It is now read-only.

Commit

Permalink
Merge pull request #187 from skeleton/issue-183
Browse files Browse the repository at this point in the history
Fix line breaks on CSV reader
  • Loading branch information
adrilo committed Mar 24, 2016
2 parents e321f30 + d6e8fe4 commit b69e280
Show file tree
Hide file tree
Showing 5 changed files with 51 additions and 50 deletions.
29 changes: 0 additions & 29 deletions src/Spout/Common/Helper/GlobalFunctionsHelper.php
Original file line number Diff line number Diff line change
Expand Up @@ -291,35 +291,6 @@ public function stream_get_wrappers()
return stream_get_wrappers();
}

/**
* Wrapper around global function stream_get_line()
* @see stream_get_line()
*
* @param resource $handle
* @param int $length
* @param string|void $ending
* @return string|bool
*/
public function stream_get_line($handle, $length, $ending = null)
{
return stream_get_line($handle, $length, $ending);
}

/**
* Wrapper around global function str_getcsv()
* @see str_getcsv()
*
* @param string $input
* @param string|void $delimiter
* @param string|void $enclosure
* @param string|void $escape
* @return array
*/
public function str_getcsv($input, $delimiter = null, $enclosure = null, $escape = null)
{
return str_getcsv($input, $delimiter, $enclosure, $escape);
}

/**
* Wrapper around global function function_exists()
* @see function_exists()
Expand Down
8 changes: 8 additions & 0 deletions src/Spout/Reader/CSV/Reader.php
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,9 @@ class Reader extends AbstractReader
/** @var string Defines the End of line */
protected $endOfLineCharacter = "\n";

/** @var string */
protected $autoDetectLineEndings;

/**
* Sets the field delimiter for the CSV.
* Needs to be called before opening the reader.
Expand Down Expand Up @@ -104,6 +107,9 @@ protected function doesSupportStreamWrapper()
*/
protected function openReader($filePath)
{
$this->autoDetectLineEndings = ini_get('auto_detect_line_endings');
ini_set('auto_detect_line_endings', '1');

$this->filePointer = $this->globalFunctionsHelper->fopen($filePath, 'r');
if (!$this->filePointer) {
throw new IOException("Could not open file $filePath for reading.");
Expand Down Expand Up @@ -140,5 +146,7 @@ protected function closeReader()
if ($this->filePointer) {
$this->globalFunctionsHelper->fclose($this->filePointer);
}

ini_set('auto_detect_line_endings', $this->autoDetectLineEndings);
}
}
53 changes: 32 additions & 21 deletions src/Spout/Reader/CSV/RowIterator.php
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
class RowIterator implements IteratorInterface
{
/**
* If no value is given to stream_get_line(), it defaults to 8192 (which may be too low).
* If no value is given to fgetcsv(), it defaults to 8192 (which may be too low).
* Alignement with other functions like fgets() is discussed here: https://bugs.php.net/bug.php?id=48421
*/
const MAX_READ_BYTES_PER_LINE = 32768;
Expand Down Expand Up @@ -128,16 +128,12 @@ public function next()
}

do {
$lineData = false;
$utf8EncodedLineData = $this->getNextUTF8EncodedLine();
if ($utf8EncodedLineData !== false) {
$lineData = $this->globalFunctionsHelper->str_getcsv($utf8EncodedLineData, $this->fieldDelimiter, $this->fieldEnclosure);
}
$rowData = $this->getNextUTF8EncodedRow();
$hasNowReachedEndOfFile = $this->globalFunctionsHelper->feof($this->filePointer);
} while (($lineData === false && !$hasNowReachedEndOfFile) || $this->isEmptyLine($lineData));
} while (($rowData === false && !$hasNowReachedEndOfFile) || $this->isEmptyLine($rowData));

if ($lineData !== false) {
$this->rowDataBuffer = $lineData;
if ($rowData !== false) {
$this->rowDataBuffer = $rowData;
$this->numReadRows++;
} else {
// If we reach this point, it means end of file was reached.
Expand All @@ -147,24 +143,39 @@ public function next()
}

/**
* Returns the next line, converted if necessary to UTF-8.
* Neither fgets nor fgetcsv don't work with non UTF-8 data... so we need to do some things manually.
* Returns the next row, converted if necessary to UTF-8.
* As fgetcsv() does not manage correctly encoding for non UTF-8 data,
* we remove manually whitespace with ltrim or rtrim (depending on the order of the bytes)
*
* @return string|false The next line for the current file pointer, encoded in UTF-8 or FALSE if nothing to read
* @return array|false The row for the current file pointer, encoded in UTF-8 or FALSE if nothing to read
* @throws \Box\Spout\Common\Exception\EncodingConversionException If unable to convert data to UTF-8
*/
protected function getNextUTF8EncodedLine()
protected function getNextUTF8EncodedRow()
{
// Read until the EOL delimiter or EOF is reached. The delimiter's encoding needs to match the CSV's encoding.
$encodedEOLDelimiter = $this->getEncodedEOLDelimiter();
$encodedLineData = $this->globalFunctionsHelper->stream_get_line($this->filePointer, self::MAX_READ_BYTES_PER_LINE, $encodedEOLDelimiter);
$encodedRowData = fgetcsv($this->filePointer, self::MAX_READ_BYTES_PER_LINE, $this->fieldDelimiter, $this->fieldEnclosure);
if (false === $encodedRowData) {
return false;
}

// If the line could have been read, it can be converted to UTF-8
$utf8EncodedLineData = ($encodedLineData !== false) ?
$this->encodingHelper->attemptConversionToUTF8($encodedLineData, $this->encoding) :
false;
foreach ($encodedRowData as $cellIndex => $cellValue) {
switch($this->encoding) {
case EncodingHelper::ENCODING_UTF16_LE:
case EncodingHelper::ENCODING_UTF32_LE:
// remove whitespace from the beginning of a string as fgetcsv() add extra whitespace when it try to explode non UTF-8 data
$cellValue = ltrim($cellValue);
break;

case EncodingHelper::ENCODING_UTF16_BE:
case EncodingHelper::ENCODING_UTF32_BE:
// remove whitespace from the end of a string as fgetcsv() add extra whitespace when it try to explode non UTF-8 data
$cellValue = rtrim($cellValue);
break;
}

$encodedRowData[$cellIndex] = $this->encodingHelper->attemptConversionToUTF8($cellValue, $this->encoding);
}

return $utf8EncodedLineData;
return $encodedRowData;
}

/**
Expand Down
9 changes: 9 additions & 0 deletions tests/Spout/Reader/CSV/ReaderTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -204,6 +204,15 @@ public function testReadShouldSupportCustomFieldEnclosure()
$this->assertEquals('This is, a comma', $allRows[0][0]);
}

/**
* @return void
*/
public function testReadShouldNotTruncateLineBreak()
{
$allRows = $this->getAllRowsForFile('csv_with_line_breaks.csv', ',');
$this->assertEquals("This is,\na comma", $allRows[0][0]);
}

/**
* @return array
*/
Expand Down
2 changes: 2 additions & 0 deletions tests/resources/csv/csv_with_line_breaks.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
"This is,
a comma",csv--12

0 comments on commit b69e280

Please sign in to comment.