Skip to content

Commit

Permalink
Speed up JsonConverter again
Browse files Browse the repository at this point in the history
  • Loading branch information
nyamsprod committed Oct 16, 2024
1 parent 474b613 commit 1799928
Show file tree
Hide file tree
Showing 4 changed files with 154 additions and 18 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ All Notable changes to `Csv` will be documented in this file

- `League\Csv\JsonConverter::download` the filename is now nullable
- `League\Csv\XMLConverter::download` the filename is now nullable
- `League\Csv\JsonConverter::save` throws a `TypeError` exception if the `$destination` type is not supported.

### Remove

Expand Down
38 changes: 21 additions & 17 deletions src/JsonConverter.php
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
use RuntimeException;
use SplFileInfo;
use SplFileObject;
use TypeError;

use function array_filter;
use function array_reduce;
Expand Down Expand Up @@ -104,8 +105,8 @@ final class JsonConverter
public readonly int $depth;
/** @var int<1, max> */
public readonly int $indentSize;
/** @var Closure(T, array-key): mixed */
public readonly Closure $formatter;
/** @var ?Closure(T, array-key): mixed */
public readonly ?Closure $formatter;
/** @var int<1, max> */
public readonly int $chunkSize;
/** @var non-empty-string */
Expand All @@ -127,20 +128,20 @@ public static function create(): self
flags: 0,
depth: 512,
indentSize: 4,
formatter: fn (mixed $value, int|string $offset) => $value,
formatter: null,
chunkSize: 500
);
}

/**
* @param int<1, max> $depth
* @param int<1, max> $indentSize
* @param Closure(T, array-key): mixed $formatter
* @param ?Closure(T, array-key): mixed $formatter
* @param int<1, max> $chunkSize
*
* @throws InvalidArgumentException
*/
private function __construct(int $flags, int $depth, int $indentSize, Closure $formatter, int $chunkSize)
private function __construct(int $flags, int $depth, int $indentSize, ?Closure $formatter, int $chunkSize)
{
json_encode([], $flags & ~JSON_THROW_ON_ERROR, $depth);

Expand Down Expand Up @@ -347,8 +348,6 @@ public function chunkSize(int $chunkSize): self
*/
public function formatter(?Closure $formatter): self
{
$formatter ??= fn (mixed $value, int|string $offset) => $value;

return new self($this->flags, $this->depth, $this->indentSize, $formatter, $this->chunkSize);
}

Expand Down Expand Up @@ -401,10 +400,10 @@ public function encode(iterable $records): string
* @param SplFileInfo|SplFileObject|Stream|resource|string $destination
* @param resource|null $context
*
* @throws UnavailableStream
* @throws InvalidArgumentException
* @throws JsonException
* @throws RuntimeException
* @throws TypeError
* @throws UnavailableStream
*/
public function save(iterable $records, mixed $destination, $context = null): int
{
Expand All @@ -414,7 +413,7 @@ public function save(iterable $records, mixed $destination, $context = null): in
$destination instanceof SplFileInfo => $destination->openFile(mode:'w', context: $context),
is_resource($destination) => Stream::createFromResource($destination),
is_string($destination) => Stream::createFromPath($destination, 'w', $context),
default => throw new InvalidArgumentException('The destination path must be a filename, a stream or a SplFileInfo object.'),
default => throw new TypeError('The destination path must be a filename, a stream or a SplFileInfo object.'),
};
$bytes = 0;
$writtenBytes = 0;
Expand All @@ -427,7 +426,7 @@ public function save(iterable $records, mixed $destination, $context = null): in
}
restore_error_handler();

false !== $writtenBytes || throw new RuntimeException('Unable to write '.(isset($line) ? '`'.$line.'`' : '').' to the destination path.');
false !== $writtenBytes || throw new RuntimeException('Unable to write '.(isset($line) ? '`'.$line.'`' : '').' to the destination path `'.$stream->getPathname().'`.');

return $bytes;
}
Expand All @@ -444,32 +443,37 @@ public function save(iterable $records, mixed $destination, $context = null): in
*/
public function convert(iterable $records): Iterator
{
$iterator = MapIterator::fromIterable($records, $this->formatter);
$iterator = match ($this->formatter) {
null => MapIterator::toIterator($records),
default => MapIterator::fromIterable($records, $this->formatter)
};

$iterator->rewind();
if (!$iterator->valid()) {
yield $this->emptyIterable;

return;
}

$incr = 0;
$chunk = [];
$chunkOffset = 0;
$offset = 0;
$current = $iterator->current();
$iterator->next();

yield $this->start;

while ($iterator->valid()) {
if ($incr === $this->chunkSize) {
if ($chunkOffset === $this->chunkSize) {
yield ($this->jsonEncodeChunk)($chunk).$this->separator;

$incr = 0;
$chunkOffset = 0;
$chunk = [];
}

++$incr;
$chunk[++$offset] = $current;
$chunk[$offset] = $current;
++$chunkOffset;
++$offset;
$current = $iterator->current();
$iterator->next();
}
Expand Down
108 changes: 108 additions & 0 deletions src/JsonConverterBench.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
<?php

/**
* League.Csv (https://csv.thephpleague.com)
*
* (c) Ignace Nyamagana Butera <[email protected]>
*
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
*/

declare(strict_types=1);

namespace League\Csv;

use PhpBench\Attributes as Bench;

use function assert;
use function fseek;
use function ftell;
use function fwrite;
use function json_encode;
use function tmpfile;

use const SEEK_END;

final class JsonConverterBench
{
#[Bench\OutputTimeUnit('seconds')]
#[Bench\Assert('mode(variant.mem.peak) <= 8000000'), Bench\Assert('mode(variant.time.avg) < 10000000')]
public function benchUsingJsonEncode(): void
{
$document = $this->getDocument();
$tmpFile = tmpfile();

/** @var int $bytes */
$bytes = fwrite($tmpFile, json_encode($document)); /* @phpstan-ignore-line */

$this->assertSameSize($bytes, $tmpFile);
}

#[Bench\OutputTimeUnit('seconds')]
#[Bench\Assert('mode(variant.mem.peak) < 4000000'), Bench\Assert('mode(variant.time.avg) < 10000000')]
public function benchUsingDefaultJsonConverter(): void
{
$document = $this->getDocument();
$tmpFile = tmpfile();

$bytes = JsonConverter::create()->save($document, $tmpFile);

$this->assertSameSize($bytes, $tmpFile);
}

#[Bench\OutputTimeUnit('seconds')]
#[Bench\Assert('mode(variant.mem.peak) < 4000000'), Bench\Assert('mode(variant.time.avg) < 10000000')]
public function benchUsingJsonConverterWithForceObject(): void
{
$document = $this->getDocument();
$tmpFile = tmpfile();

$bytes = JsonConverter::create()->withForceObject()->save($document, $tmpFile);

$this->assertSameSize($bytes, $tmpFile);
}

#[Bench\OutputTimeUnit('seconds')]
#[Bench\Assert('mode(variant.mem.peak) < 4000000'), Bench\Assert('mode(variant.time.avg) < 10000000')]
public function benchUsingJsonConverterWithPrettyPrint(): void
{
$document = $this->getDocument();
$tmpFile = tmpfile();

$bytes = JsonConverter::create()->withPrettyPrint()->save($document, $tmpFile);

$this->assertSameSize($bytes, $tmpFile);
}

#[Bench\OutputTimeUnit('seconds')]
#[Bench\Assert('mode(variant.mem.peak) < 4000000'), Bench\Assert('mode(variant.time.avg) < 10000000')]
public function benchUsingJsonConverterWithSmallChunkSize(): void
{
$document = $this->getDocument();
$tmpFile = tmpfile();

$bytes = JsonConverter::create()->chunkSize(1)->save($document, $tmpFile);

$this->assertSameSize($bytes, $tmpFile);
}

private function getDocument(): Reader
{
$document = Reader::createFromPath(dirname(__DIR__).'/test_files/prenoms.csv');
$document->setHeaderOffset(0);
$document->setDelimiter(';');
CharsetConverter::addTo($document, 'iso-8859-15', 'utf-8');

return $document;
}

/**
* @param resource $stream
*/
private function assertSameSize(int $bytes, $stream): void
{
fseek($stream, 0, SEEK_END);
assert($bytes === ftell($stream));
}
}
25 changes: 24 additions & 1 deletion src/JsonConverterTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,13 @@

namespace League\Csv;

use DateTimeImmutable;
use InvalidArgumentException;
use PHPUnit\Framework\Attributes\Group;
use PHPUnit\Framework\Attributes\Test;
use PHPUnit\Framework\TestCase;
use RuntimeException;
use TypeError;

use const JSON_FORCE_OBJECT;
use const JSON_HEX_QUOT;
Expand Down Expand Up @@ -140,7 +143,8 @@ public function it_can_use_syntactic_sugar_methods_to_set_json_flags(): void
self::assertEquals($usingJsonFlags, $usingMethodFlags);
}

public function testDownload(): void
#[Test]
public function it_can_make_the_generated_json_downloadable_ont_the_fly(): void
{
if (!function_exists('xdebug_get_headers')) {
self::markTestSkipped(__METHOD__.' needs the xdebug extension to run');
Expand All @@ -159,4 +163,23 @@ public function testDownload(): void
self::assertStringContainsString('content-disposition: attachment; filename="foobar.json"', $headers[3]);
self::assertSame('[{"foo":"bar"}]', $output);
}

#[Test]
public function it_fails_if_the_destination_path_type_is_invalid(): void
{
$this->expectException(TypeError::class);

JsonConverter::create()->save([['foo' => 'bar']], new DateTimeImmutable()); /* @phpstan-ignore-line */
}

#[Test]
public function it_fails_to_write_to_the_destination_path_if_it_is_open_in_read_mode_only(): void
{
$this->expectExceptionObject(new RuntimeException('Unable to write `[` to the destination path `'.__FILE__.'`.'));

/** @var resource $stream */
$stream = fopen(__FILE__, 'r');

JsonConverter::create()->save([['foo' => 'bar']], $stream);
}
}

0 comments on commit 1799928

Please sign in to comment.