Skip to content

Commit

Permalink
Improve Performance of Csv Writer
Browse files Browse the repository at this point in the history
Fix PHPOffice#3904. PR PHPOffice#3839 provided a huge performance boost for sparsely populated spreadsheets. Unfortunately, it degraded performance for more densely populated spreadsheets when writing Csvs. The reason is that Csv Writer calls toArray for each row, meaning that a lot of the intermediate data used to speed things up needs to be recalculated for every row. It would be better off calling toArray just once for the entire spreadsheet; however this gives back some of the memory improvements of PR PHPOffice#3834. However, the memory effects can be substantially alleviated by supplying a Generator function to do the work. This PR does that; the result is that Csv Writer is now quite a bit faster, and with only a small memory uptick, vs. its performance in PhpSpreadsheet 1.29.
  • Loading branch information
oleibman committed Feb 17, 2024
1 parent d620497 commit 18a5396
Show file tree
Hide file tree
Showing 2 changed files with 47 additions and 20 deletions.
60 changes: 45 additions & 15 deletions src/PhpSpreadsheet/Worksheet/Worksheet.php
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
namespace PhpOffice\PhpSpreadsheet\Worksheet;

use ArrayObject;
use Generator;
use PhpOffice\PhpSpreadsheet\Calculation\Calculation;
use PhpOffice\PhpSpreadsheet\Calculation\Functions;
use PhpOffice\PhpSpreadsheet\Cell\AddressRange;
Expand Down Expand Up @@ -2800,9 +2801,6 @@ protected function cellToArray(Cell $cell, bool $calculateFormulas, bool $format
return $returnValue;
}

/** @var array<string, bool> */
private array $hiddenColumns;

/**
* Create array from a range of cells.
*
Expand All @@ -2822,9 +2820,40 @@ public function rangeToArray(
bool $returnCellRef = false,
bool $ignoreHidden = false
): array {
$returnValue = [];

// Loop through rows
foreach ($this->rangeToArrayYieldRows($range, $nullValue, $calculateFormulas, $formatData, $returnCellRef, $ignoreHidden) as $rowRef => $rowArray) {
$returnValue[$rowRef] = $rowArray;
}

// Return
return $returnValue;
}

/**
* Create array from a range of cells, yielding each row in turn.
*
* @param mixed $nullValue Value returned in the array entry if a cell doesn't exist
* @param bool $calculateFormulas Should formulas be calculated?
* @param bool $formatData Should formatting be applied to cell values?
* @param bool $returnCellRef False - Return a simple array of rows and columns indexed by number counting from zero
* True - Return rows and columns indexed by their actual row and column IDs
* @param bool $ignoreHidden False - Return values for rows/columns even if they are defined as hidden.
* True - Don't return values for rows/columns that are defined as hidden.
*
* @return Generator
*/
public function rangeToArrayYieldRows(
string $range,
mixed $nullValue = null,
bool $calculateFormulas = true,
bool $formatData = true,
bool $returnCellRef = false,
bool $ignoreHidden = false
) {
$range = Validations::validateCellOrCellRange($range);

$returnValue = [];
// Identify the range that we need to extract from the worksheet
[$rangeStart, $rangeEnd] = Coordinate::rangeBoundaries($range);
$minCol = Coordinate::stringFromColumnIndex($rangeStart[0]);
Expand All @@ -2835,8 +2864,10 @@ public function rangeToArray(
$maxColInt = $rangeEnd[0];

++$maxCol;
$nullRow = $this->buildNullRow($nullValue, $minCol, $maxCol, $returnCellRef, $ignoreHidden);
$hideColumns = !empty($this->hiddenColumns);
/** @var array<string, bool> */
$hiddenColumns = [];
$nullRow = $this->buildNullRow($nullValue, $minCol, $maxCol, $returnCellRef, $ignoreHidden, $hiddenColumns);
$hideColumns = !empty($hiddenColumns);

$keys = $this->cellCollection->getSortedCoordinatesInt();
$keyIndex = 0;
Expand All @@ -2847,7 +2878,7 @@ public function rangeToArray(
continue;
}
$rowRef = $returnCellRef ? $row : ($row - $minRow);
$returnValue[$rowRef] = $nullRow;
$returnValue = $nullRow;

$index = ($row - 1) * AddressRange::MAX_COLUMN_INT + 1;
$indexPlus = $index + AddressRange::MAX_COLUMN_INT - 1;
Expand All @@ -2860,24 +2891,22 @@ public function rangeToArray(
$thisCol = ($key % AddressRange::MAX_COLUMN_INT) ?: AddressRange::MAX_COLUMN_INT;
if ($thisCol >= $minColInt && $thisCol <= $maxColInt) {
$col = Coordinate::stringFromColumnIndex($thisCol);
if ($hideColumns === false || !isset($this->hiddenColumns[$col])) {
if ($hideColumns === false || !isset($hiddenColumns[$col])) {
$columnRef = $returnCellRef ? $col : ($thisCol - $minColInt);
$cell = $this->cellCollection->get("{$col}{$thisRow}");
if ($cell !== null) {
$value = $this->cellToArray($cell, $calculateFormulas, $formatData, $nullValue);
if ($value !== $nullValue) {
$returnValue[$rowRef][$columnRef] = $value;
$returnValue[$columnRef] = $value;
}
}
}
}
++$keyIndex;
}
}
unset($this->hiddenColumns);

// Return
return $returnValue;
yield $rowRef => $returnValue;
}
}

/**
Expand All @@ -2890,20 +2919,21 @@ public function rangeToArray(
* True - Return rows and columns indexed by their actual row and column IDs
* @param bool $ignoreHidden False - Return values for rows/columns even if they are defined as hidden.
* True - Don't return values for rows/columns that are defined as hidden.
* @param array<string, bool> $hiddenColumns
*/
private function buildNullRow(
mixed $nullValue,
string $minCol,
string $maxCol,
bool $returnCellRef,
bool $ignoreHidden,
array &$hiddenColumns
): array {
$this->hiddenColumns = [];
$nullRow = [];
$c = -1;
for ($col = $minCol; $col !== $maxCol; ++$col) {
if ($ignoreHidden === true && $this->columnDimensionExists($col) && $this->getColumnDimension($col)->getVisible() === false) {
$this->hiddenColumns[$col] = true;
$hiddenColumns[$col] = true;
} else {
$columnRef = $returnCellRef ? $col : ++$c;
$nullRow[$columnRef] = $nullValue;
Expand Down
7 changes: 2 additions & 5 deletions src/PhpSpreadsheet/Writer/Csv.php
Original file line number Diff line number Diff line change
Expand Up @@ -104,11 +104,8 @@ public function save($filename, int $flags = 0): void
$maxRow = $sheet->getHighestDataRow();

// Write rows to file
for ($row = 1; $row <= $maxRow; ++$row) {
// Convert the row to an array...
$cellsArray = $sheet->rangeToArray('A' . $row . ':' . $maxCol . $row, '', $this->preCalculateFormulas);
// ... and write to the file
$this->writeLine($this->fileHandle, $cellsArray[0]);
foreach ($sheet->rangeToArrayYieldRows("A1:$maxCol$maxRow", '', $this->preCalculateFormulas) as $cellsArray) {
$this->writeLine($this->fileHandle, $cellsArray);
}

$this->maybeCloseFileHandle();
Expand Down

0 comments on commit 18a5396

Please sign in to comment.