diff --git a/docs/9.0/reader/tabular-data-reader.md b/docs/9.0/reader/tabular-data-reader.md index e1378d67..177396ac 100644 --- a/docs/9.0/reader/tabular-data-reader.md +++ b/docs/9.0/reader/tabular-data-reader.md @@ -513,24 +513,15 @@ $reader->matchingFirstOrFail('row=3-1;4-6'); // will throw If you are dealing with a large CSV and you want it to be split in smaller sizes for better handling you can use the `chunkBy` method which breaks the `TabularDataReader` into multiple, smaller instances with a given size. The -last instance may contain fewer records because of the chunk size you have chosen. The method passes each -small instances to a closure. +last instance may contain fewer records because of the chunk size you have chosen. ```php use League\Csv\Reader; use League\Csv\TabularDataReader; use League\Csv\Writer; -Reader::createFromPath('path/to/a/huge/file.csv') - ->chunkBy( - 1000, - function (TabularDataReader $data, int $offset): bool { - $writer = Writer::createFromPath('path/to/a/huge/file/split-'.($offset + 1).'.csv', 'w'); - $writer->setEscape(''); - $writer->insertOne($data->getHeader()); - $writer->insertAll($data); - - return true; - } - ); +$chunks = Reader::createFromPath('path/to/a/huge/file.csv')->chunkBy(1000); +foreach ($chunks as $chunk) { + // $chunk is a small CSV of 1000 records or less +} ``` diff --git a/src/Reader.php b/src/Reader.php index 7669ebbc..36b841a9 100644 --- a/src/Reader.php +++ b/src/Reader.php @@ -320,14 +320,15 @@ public function reduce(Closure $closure, mixed $initial = null): mixed } /** - * @param positive-int $length - * @param Closure(TabularDataReader, int=): bool $closure + * @param positive-int $recordsCount * * @throws InvalidArgument + * + * @return iterable */ - public function chunkBy(int $length, Closure $closure): bool + public function chunkBy(int $recordsCount): iterable { - return ResultSet::createFromTabularDataReader($this)->chunkBy($length, $closure); + return ResultSet::createFromTabularDataReader($this)->chunkBy($recordsCount); } /** @@ -405,9 +406,10 @@ public function select(string|int ...$columns): TabularDataReader */ public function getRecords(array $header = []): Iterator { - $header = $this->prepareHeader($header); - - return $this->combineHeader($this->prepareRecords(), $header); + return $this->combineHeader( + $this->prepareRecords(), + $this->prepareHeader($header) + ); } /** diff --git a/src/ResultSet.php b/src/ResultSet.php index e9a21515..d7feaf48 100644 --- a/src/ResultSet.php +++ b/src/ResultSet.php @@ -161,40 +161,32 @@ public function reduce(Closure $closure, mixed $initial = null): mixed } /** - * @param positive-int $length - * @param Closure(TabularDataReader, int=): bool $closure + * @param positive-int $recordsCount * * @throws InvalidArgument + * + * @return iterable */ - public function chunkBy(int $length, Closure $closure): bool + public function chunkBy(int $recordsCount): iterable { - if ($length < 1) { - throw InvalidArgument::dueToInvalidChunkSize($length, __METHOD__); + if ($recordsCount < 1) { + throw InvalidArgument::dueToInvalidChunkSize($recordsCount, __METHOD__); } $header = $this->getHeader(); $records = []; $nbRecords = 0; - $chunkOffset = 0; foreach ($this->getRecords() as $record) { $records[] = $record; ++$nbRecords; - if ($nbRecords === $length) { - $result = $closure(new self($records, $header), $chunkOffset); - if (false === $result) { - return false; - } - $nbRecords = 0; - $records = []; - ++$chunkOffset; + if ($nbRecords === $recordsCount) { + yield new self($records, $header); } } if ([] !== $records) { - return $closure(new self($records, $header), $chunkOffset); + yield new self($records, $header); } - - return true; } /** diff --git a/src/TabularDataReader.php b/src/TabularDataReader.php index 9f2fe80e..a06ad864 100644 --- a/src/TabularDataReader.php +++ b/src/TabularDataReader.php @@ -40,7 +40,7 @@ * @method TabularDataReader matchingFirstOrFail(string $expression) extract the first found fragment identifier of the tabular data or fail * @method TabularDataReader|null matchingFirst(string $expression) extract the first found fragment identifier of the tabular data or return null if none is found * @method iterable matching(string $expression) extract all found fragment identifiers for the tabular data - * @method bool chunkBy(int $length, Closure $closure) Chunk the TabulaDataReader into smaller TabularDataReader instances of the given size or less and apply a callable to them. + * @method iterable chunkBy(int $recordsCount) Chunk the TabulaDataReader into smaller TabularDataReader instances of the given size or less. * @method TabularDataReader mapHeader(array $headers) Returns a new TabulaDataReader with a new set of headers. */ interface TabularDataReader extends Countable, IteratorAggregate diff --git a/src/TabularDataReaderTestCase.php b/src/TabularDataReaderTestCase.php index f8b17f27..f2de6c9d 100644 --- a/src/TabularDataReaderTestCase.php +++ b/src/TabularDataReaderTestCase.php @@ -20,7 +20,6 @@ use PHPUnit\Framework\Attributes\Group; use PHPUnit\Framework\Attributes\Test; use PHPUnit\Framework\TestCase; -use ValueError; #[Group('tabulardata')] abstract class TabularDataReaderTestCase extends TestCase @@ -453,19 +452,7 @@ public function __construct( public function testChunkingTabularDataUsingTheRangeMethod(): void { - $this->tabularDataWithHeader() - ->chunkBy( - 4, - function (TabularDataReader $tabularData, int $offset): bool { - match ($offset) { - 0 => self::assertCount(4, $tabularData), - 1 => self::assertCount(2, $tabularData), - default => throw new ValueError('This should not happen.'), - }; - - return true; - } - ); + self::assertCount(2, [...$this->tabularDataWithHeader()->chunkBy(4)]); } }