From 18a5396eb41dd8e06f389aa39f891711e3ee9d29 Mon Sep 17 00:00:00 2001 From: oleibman <10341515+oleibman@users.noreply.github.com> Date: Sat, 17 Feb 2024 08:48:31 -0800 Subject: [PATCH] Improve Performance of Csv Writer Fix #3904. PR #3839 provided a huge performance boost for sparsely populated spreadsheets. Unfortunately, it degraded performance for more densely populated spreadsheets when writing Csvs. The reason is that Csv Writer calls toArray for each row, meaning that a lot of the intermediate data used to speed things up needs to be recalculated for every row. It would be better off calling toArray just once for the entire spreadsheet; however this gives back some of the memory improvements of PR #3834. However, the memory effects can be substantially alleviated by supplying a Generator function to do the work. This PR does that; the result is that Csv Writer is now quite a bit faster, and with only a small memory uptick, vs. its performance in PhpSpreadsheet 1.29. --- src/PhpSpreadsheet/Worksheet/Worksheet.php | 60 ++++++++++++++++------ src/PhpSpreadsheet/Writer/Csv.php | 7 +-- 2 files changed, 47 insertions(+), 20 deletions(-) diff --git a/src/PhpSpreadsheet/Worksheet/Worksheet.php b/src/PhpSpreadsheet/Worksheet/Worksheet.php index 2ba3f08187..0e1f4dc321 100644 --- a/src/PhpSpreadsheet/Worksheet/Worksheet.php +++ b/src/PhpSpreadsheet/Worksheet/Worksheet.php @@ -3,6 +3,7 @@ namespace PhpOffice\PhpSpreadsheet\Worksheet; use ArrayObject; +use Generator; use PhpOffice\PhpSpreadsheet\Calculation\Calculation; use PhpOffice\PhpSpreadsheet\Calculation\Functions; use PhpOffice\PhpSpreadsheet\Cell\AddressRange; @@ -2800,9 +2801,6 @@ protected function cellToArray(Cell $cell, bool $calculateFormulas, bool $format return $returnValue; } - /** @var array */ - private array $hiddenColumns; - /** * Create array from a range of cells. * @@ -2822,9 +2820,40 @@ public function rangeToArray( bool $returnCellRef = false, bool $ignoreHidden = false ): array { + $returnValue = []; + + // Loop through rows + foreach ($this->rangeToArrayYieldRows($range, $nullValue, $calculateFormulas, $formatData, $returnCellRef, $ignoreHidden) as $rowRef => $rowArray) { + $returnValue[$rowRef] = $rowArray; + } + + // Return + return $returnValue; + } + + /** + * Create array from a range of cells, yielding each row in turn. + * + * @param mixed $nullValue Value returned in the array entry if a cell doesn't exist + * @param bool $calculateFormulas Should formulas be calculated? + * @param bool $formatData Should formatting be applied to cell values? + * @param bool $returnCellRef False - Return a simple array of rows and columns indexed by number counting from zero + * True - Return rows and columns indexed by their actual row and column IDs + * @param bool $ignoreHidden False - Return values for rows/columns even if they are defined as hidden. + * True - Don't return values for rows/columns that are defined as hidden. + * + * @return Generator + */ + public function rangeToArrayYieldRows( + string $range, + mixed $nullValue = null, + bool $calculateFormulas = true, + bool $formatData = true, + bool $returnCellRef = false, + bool $ignoreHidden = false + ) { $range = Validations::validateCellOrCellRange($range); - $returnValue = []; // Identify the range that we need to extract from the worksheet [$rangeStart, $rangeEnd] = Coordinate::rangeBoundaries($range); $minCol = Coordinate::stringFromColumnIndex($rangeStart[0]); @@ -2835,8 +2864,10 @@ public function rangeToArray( $maxColInt = $rangeEnd[0]; ++$maxCol; - $nullRow = $this->buildNullRow($nullValue, $minCol, $maxCol, $returnCellRef, $ignoreHidden); - $hideColumns = !empty($this->hiddenColumns); + /** @var array */ + $hiddenColumns = []; + $nullRow = $this->buildNullRow($nullValue, $minCol, $maxCol, $returnCellRef, $ignoreHidden, $hiddenColumns); + $hideColumns = !empty($hiddenColumns); $keys = $this->cellCollection->getSortedCoordinatesInt(); $keyIndex = 0; @@ -2847,7 +2878,7 @@ public function rangeToArray( continue; } $rowRef = $returnCellRef ? $row : ($row - $minRow); - $returnValue[$rowRef] = $nullRow; + $returnValue = $nullRow; $index = ($row - 1) * AddressRange::MAX_COLUMN_INT + 1; $indexPlus = $index + AddressRange::MAX_COLUMN_INT - 1; @@ -2860,24 +2891,22 @@ public function rangeToArray( $thisCol = ($key % AddressRange::MAX_COLUMN_INT) ?: AddressRange::MAX_COLUMN_INT; if ($thisCol >= $minColInt && $thisCol <= $maxColInt) { $col = Coordinate::stringFromColumnIndex($thisCol); - if ($hideColumns === false || !isset($this->hiddenColumns[$col])) { + if ($hideColumns === false || !isset($hiddenColumns[$col])) { $columnRef = $returnCellRef ? $col : ($thisCol - $minColInt); $cell = $this->cellCollection->get("{$col}{$thisRow}"); if ($cell !== null) { $value = $this->cellToArray($cell, $calculateFormulas, $formatData, $nullValue); if ($value !== $nullValue) { - $returnValue[$rowRef][$columnRef] = $value; + $returnValue[$columnRef] = $value; } } } } ++$keyIndex; } - } - unset($this->hiddenColumns); - // Return - return $returnValue; + yield $rowRef => $returnValue; + } } /** @@ -2890,6 +2919,7 @@ public function rangeToArray( * True - Return rows and columns indexed by their actual row and column IDs * @param bool $ignoreHidden False - Return values for rows/columns even if they are defined as hidden. * True - Don't return values for rows/columns that are defined as hidden. + * @param array $hiddenColumns */ private function buildNullRow( mixed $nullValue, @@ -2897,13 +2927,13 @@ private function buildNullRow( string $maxCol, bool $returnCellRef, bool $ignoreHidden, + array &$hiddenColumns ): array { - $this->hiddenColumns = []; $nullRow = []; $c = -1; for ($col = $minCol; $col !== $maxCol; ++$col) { if ($ignoreHidden === true && $this->columnDimensionExists($col) && $this->getColumnDimension($col)->getVisible() === false) { - $this->hiddenColumns[$col] = true; + $hiddenColumns[$col] = true; } else { $columnRef = $returnCellRef ? $col : ++$c; $nullRow[$columnRef] = $nullValue; diff --git a/src/PhpSpreadsheet/Writer/Csv.php b/src/PhpSpreadsheet/Writer/Csv.php index 73165d67c2..eadfc27bdb 100644 --- a/src/PhpSpreadsheet/Writer/Csv.php +++ b/src/PhpSpreadsheet/Writer/Csv.php @@ -104,11 +104,8 @@ public function save($filename, int $flags = 0): void $maxRow = $sheet->getHighestDataRow(); // Write rows to file - for ($row = 1; $row <= $maxRow; ++$row) { - // Convert the row to an array... - $cellsArray = $sheet->rangeToArray('A' . $row . ':' . $maxCol . $row, '', $this->preCalculateFormulas); - // ... and write to the file - $this->writeLine($this->fileHandle, $cellsArray[0]); + foreach ($sheet->rangeToArrayYieldRows("A1:$maxCol$maxRow", '', $this->preCalculateFormulas) as $cellsArray) { + $this->writeLine($this->fileHandle, $cellsArray); } $this->maybeCloseFileHandle();