From 7a0fc84b1354d46c920fdf133f4f748596fc8f46 Mon Sep 17 00:00:00 2001 From: Kurt Thiemann Date: Thu, 25 Jan 2024 14:53:53 +0100 Subject: [PATCH] add support for uncompressed and LZ4 compressed chunks --- .gitignore | 1 + README.md | 2 + composer.json | 3 + src/Chunk/AnvilChunk.php | 86 +++++++++++----- src/Reader/BufferedReader.php | 175 +++++++++++++++++++++++++++++++++ src/Reader/LZ4/BlockHeader.php | 121 +++++++++++++++++++++++ src/Reader/LZ4BlockReader.php | 53 ++++++++++ src/Reader/RawReader.php | 18 ++++ src/Reader/ZlibReader.php | 139 ++++---------------------- 9 files changed, 453 insertions(+), 145 deletions(-) create mode 100644 src/Reader/BufferedReader.php create mode 100644 src/Reader/LZ4/BlockHeader.php create mode 100644 src/Reader/LZ4BlockReader.php create mode 100644 src/Reader/RawReader.php diff --git a/.gitignore b/.gitignore index 4c36e38..f8d2025 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,3 @@ .idea/ vendor/ +test.php diff --git a/README.md b/README.md index 86bca37..7352364 100644 --- a/README.md +++ b/README.md @@ -14,6 +14,8 @@ Currently, only the Minecraft Anvil world format (Minecraft Java Edition) is sup composer require aternos/thanos ``` +To work with LZ4 compressed chunks (Minecraft 1.20.5+), you should also install the [PHP LZ4 extension](https://github.com/kjdev/php-ext-lz4). + ## Usage ### CLI tool diff --git a/composer.json b/composer.json index 5822303..7f1b607 100644 --- a/composer.json +++ b/composer.json @@ -15,6 +15,9 @@ "aternos/nbt": "^v1.9.0", "aternos/taskmaster": "^1.0" }, + "suggest": { + "ext-lz4": "Support for LZ4 compressed chunks (Minecraft 1.20.5+)" + }, "autoload": { "psr-4": { "Aternos\\Thanos\\": "src/" diff --git a/src/Chunk/AnvilChunk.php b/src/Chunk/AnvilChunk.php index 8f8293c..b612127 100644 --- a/src/Chunk/AnvilChunk.php +++ b/src/Chunk/AnvilChunk.php @@ -2,6 +2,9 @@ namespace Aternos\Thanos\Chunk; +use Aternos\Thanos\Reader\LZ4BlockReader; +use Aternos\Thanos\Reader\RawReader; +use Aternos\Thanos\Reader\ReaderInterface; use Aternos\Thanos\Reader\ZlibReader; use Exception; @@ -55,9 +58,9 @@ class AnvilChunk implements ChunkInterface protected ?int $lastUpdate = null; /** - * @var ZlibReader + * @var ReaderInterface */ - protected ZlibReader $zlibReader; + protected ReaderInterface $reader; /** * @var bool @@ -106,12 +109,42 @@ public function __construct($file, int $offset, array $regionPosition, int $regi $this->yPos = intdiv($this->regionFileIndex, 32); $this->readHeader(); - $this->zlibReader = new ZlibReader( - $this->file, - $this->compression === 1 ? ZLIB_ENCODING_GZIP : ZLIB_ENCODING_DEFLATE, - $this->dataOffset, - $this->length - 5 - ); + + $dataLength = $this->length - 5; + switch ($this->compression) { + case 1: + $this->reader = new ZlibReader( + $this->file, + ZLIB_ENCODING_GZIP, + $this->dataOffset, + $dataLength + ); + break; + case 2: + $this->reader = new ZlibReader( + $this->file, + ZLIB_ENCODING_DEFLATE, + $this->dataOffset, + $dataLength + ); + break; + case 3: + $this->reader = new RawReader( + $this->file, + $this->dataOffset, + $dataLength + ); + break; + case 4: + $this->reader = new LZ4BlockReader( + $this->file, + $this->dataOffset, + $dataLength + ); + break; + default: + throw new Exception("Unknown chunk compression type."); + } } /** @@ -122,13 +155,13 @@ public function __construct($file, int $offset, array $regionPosition, int $regi protected function readHeader(): void { $rawValue = unpack('N', fread($this->file, 4)); - if($rawValue === false) { + if ($rawValue === false) { throw new Exception("Failed to read chunk length."); } $this->length = $rawValue['1'] + 4; $rawValue = unpack('C', fread($this->file, 1)); - if($rawValue === false) { + if ($rawValue === false) { throw new Exception("Failed to read chunk compression."); } $this->compression = $rawValue['1']; @@ -164,10 +197,10 @@ public function getLength(): int public function getInhabitedTime(): int { if ($this->inhabitedTime === null) { - $this->zlibReader->rewind(); + $this->reader->rewind(); $data = $this->readAfter(hex2bin('04000D') . 'InhabitedTime', 8); $rawData = $data !== null ? unpack('J', $data) : false; - if($rawData === false) { + if ($rawData === false) { return -1; } $this->inhabitedTime = $rawData['1']; @@ -187,18 +220,19 @@ public function getInhabitedTime(): int */ protected function readAfter( string $str, - int $length, - int $limit = 1024 * 1024 * 10 - ): ?string { - $startPointer = $this->zlibReader->tell(); + int $length, + int $limit = 1024 * 1024 * 10 + ): ?string + { + $startPointer = $this->reader->tell(); $strPointer = 0; $valuePos = -1; while ( - !$this->zlibReader->eof() - && $this->zlibReader->tell() < $startPointer + $limit + !$this->reader->eof() + && $this->reader->tell() < $startPointer + $limit ) { - $data = $this->zlibReader->read(2048); - $dataStart = $this->zlibReader->tell() - strlen($data); + $data = $this->reader->read(2048); + $dataStart = $this->reader->tell() - strlen($data); $pos = strpos($data, $str); if ($pos !== false) { $valuePos = $dataStart + $pos + strlen($str); @@ -219,8 +253,8 @@ protected function readAfter( if ($valuePos === -1) { return null; } - $this->zlibReader->seek($valuePos); - return $this->zlibReader->read($length); + $this->reader->seek($valuePos); + return $this->reader->read($length); } /** @@ -285,10 +319,10 @@ public function isSaved(): bool public function getLastUpdate(): int { if ($this->lastUpdate === null) { - $this->zlibReader->rewind(); + $this->reader->rewind(); $data = $this->readAfter(hex2bin('04000A') . 'LastUpdate', 8); $rawData = $data !== null ? unpack('J', $data) : false; - if($rawData === false) { + if ($rawData === false) { return -1; } $this->lastUpdate = $rawData['1']; @@ -338,7 +372,7 @@ public function getGlobalXPos(): int */ public function getGlobalYPos(): int { - return $this->regionPosition[1] * 32 + $this->yPos; + return $this->regionPosition[1] * 32 + $this->yPos; } /** @@ -346,6 +380,6 @@ public function getGlobalYPos(): int */ public function close(): void { - $this->zlibReader->reset(); + $this->reader->reset(); } } diff --git a/src/Reader/BufferedReader.php b/src/Reader/BufferedReader.php new file mode 100644 index 0000000..40bd7c8 --- /dev/null +++ b/src/Reader/BufferedReader.php @@ -0,0 +1,175 @@ +offset = $offset; + $this->resourcePointer = $offset; + $this->length = $length; + $this->resource = $resource; + } + + /** + * Read $length bytes of data + * + * @param int $length + * @return string + * @throws Exception + */ + public function read(int $length): string + { + $readLength = max( + $length - (strlen($this->data) - $this->pointer), + 0 + ); + + if ($readLength > 0) { + $chunk = ""; + while (strlen($chunk) < $readLength && $this->getRemainingRawLength() > 0) { + $chunk .= $this->getRawChunk($readLength - strlen($chunk)); + } + $this->data .= $chunk; + } + + $data = substr($this->data, $this->pointer, $length); + $this->pointer += strlen($data); + + return $data; + } + + /** + * @param int $length + * @return string + * @throws Exception + */ + protected function readRaw(int $length): string + { + if ($length <= 0) { + return ''; + } + fseek($this->resource, $this->resourcePointer); + $rawData = fread( + $this->resource, + min( + $length, + $this->getRemainingRawLength() + ) + ); + if($rawData === false) { + throw new Exception("Failed to read compressed input data."); + } + + $this->resourcePointer = ftell($this->resource) ?: $this->resourcePointer + strlen($rawData); + return $rawData; + } + + /** + * @return int + */ + protected function getRemainingRawLength(): int + { + return $this->offset + $this->length - $this->resourcePointer; + } + + /** + * Read and uncompress a chunk of data + * $length is just a suggestion, the actual length of the returned data may be longer or shorter + * + * @param int $length + * @return string + */ + protected abstract function getRawChunk(int $length): string; + + /** + * Set pointer position to $offset + * + * @param int $offset + */ + public function seek(int $offset): void + { + $this->pointer = max($offset, 0); + } + + /** + * Set pointer position to 0 + * + */ + public function rewind(): void + { + $this->pointer = 0; + } + + /** + * @inheritDoc + */ + public function reset(): void + { + $this->data = ''; + $this->pointer = 0; + $this->resourcePointer = $this->offset; + } + + public function eof(): bool + { + return ($this->resourcePointer >= $this->offset + $this->length || feof($this->resource)) + && $this->pointer >= strlen($this->data); + } + + /** + * Get current pointer position + * + * @return int + */ + public function tell(): int + { + return $this->pointer; + } +} diff --git a/src/Reader/LZ4/BlockHeader.php b/src/Reader/LZ4/BlockHeader.php new file mode 100644 index 0000000..a97c945 --- /dev/null +++ b/src/Reader/LZ4/BlockHeader.php @@ -0,0 +1,121 @@ +token = $token; + $this->compressedLength = $compressedLength; + $this->decompressedLength = $decompressedLength; + $this->checksum = $checksum; + + $this->compressionMethod = $this->token & 0xf0; + $this->compressionLevel = static::COMPRESSION_LEVEL_BASE + ($this->token & 0x0f); + + if (!in_array($this->compressionMethod, [static::COMPRESSION_METHOD_LZ4, static::COMPRESSION_METHOD_RAW])) { + throw new Exception("Invalid LZ4 block compression method"); + } + } + + /** + * @return int + */ + public function getToken(): int + { + return $this->token; + } + + /** + * @return int + */ + public function getCompressedLength(): int + { + return $this->compressedLength; + } + + /** + * @return int + */ + public function getDecompressedLength(): int + { + return $this->decompressedLength; + } + + /** + * @return int + */ + public function getChecksum(): int + { + return $this->checksum; + } + + /** + * @return int + */ + public function getCompressionMethod(): int + { + return $this->compressionMethod; + } + + /** + * @return int + */ + public function getCompressionLevel(): int + { + return $this->compressionLevel; + } +} diff --git a/src/Reader/LZ4BlockReader.php b/src/Reader/LZ4BlockReader.php new file mode 100644 index 0000000..6eadc9c --- /dev/null +++ b/src/Reader/LZ4BlockReader.php @@ -0,0 +1,53 @@ +readRaw(BlockHeader::HEADER_LENGTH)); + + $compressedLength = $header->getCompressedLength(); + $chunk = $this->readRaw($compressedLength); + if (strlen($chunk) !== $compressedLength) { + throw new \Exception("Could not read compressed chunk data."); + } + + if ($header->getCompressionMethod() === BlockHeader::COMPRESSION_METHOD_RAW) { + return $chunk; + } + + $result = lz4_uncompress(pack("V", $header->getDecompressedLength()) . $chunk); + if ($result === false) { + throw new \Exception("Could not uncompress chunk data."); + } + + // https://github.com/lz4/lz4-java/blob/master/src/java/net/jpountz/lz4/LZ4BlockOutputStream.java#L125 + $hash = hash("xxh32", $result, true, ["seed" => BlockHeader::XXHASH_SEED]); + + // https://github.com/lz4/lz4-java/blob/master/src/java/net/jpountz/xxhash/StreamingXXHash32.java#L101 + $checksum = unpack("V", strrev($hash))[1] & 0xFFFFFFF; + + if ($checksum !== $header->getChecksum()) { + throw new \Exception("Checksum mismatch."); + } + + return $result; + } +} diff --git a/src/Reader/RawReader.php b/src/Reader/RawReader.php new file mode 100644 index 0000000..a53a82f --- /dev/null +++ b/src/Reader/RawReader.php @@ -0,0 +1,18 @@ +readRaw($length); + } +} diff --git a/src/Reader/ZlibReader.php b/src/Reader/ZlibReader.php index cd2f433..6b9b57f 100644 --- a/src/Reader/ZlibReader.php +++ b/src/Reader/ZlibReader.php @@ -10,38 +10,8 @@ * * @package Aternos\Thanos\Reader */ -class ZlibReader implements ReaderInterface +class ZlibReader extends BufferedReader { - /** - * @var int - */ - protected int $offset; - - /** - * @var int - */ - protected int $length; - - /** - * @var int - */ - protected int $resourcePointer; - - /** - * @var int - */ - protected int $pointer = 0; - - /** - * @var string - */ - protected string $data = ''; - - /** - * @var resource - */ - protected $resource; - /** * @var int */ @@ -62,109 +32,40 @@ class ZlibReader implements ReaderInterface */ public function __construct( $resource, - int $compression = ZLIB_ENCODING_RAW, - int $offset = 0, - int $length = -1 + int $compression, + int $offset, + int $length ) { - $this->offset = $offset; - $this->resourcePointer = $offset; - $this->length = $length; - $this->resource = $resource; + parent::__construct($resource, $offset, $length); $this->compression = $compression; $this->inflateContext = inflate_init($this->compression); } - /** - * Read $length bytes of data - * - * @param int $length - * @return string - * @throws Exception - */ - public function read(int $length): string - { - $remaining = $this->length !== -1 - ? $this->offset + $this->length - $this->resourcePointer - : $length - ; - $readLength = max( - $length - (strlen($this->data) - $this->pointer), - 0 - ); - - if ($readLength > 0 && $remaining > 0) { - fseek($this->resource, $this->resourcePointer); - $rawData = fread( - $this->resource, - min( - max(512, $readLength), - $remaining - ) - ); - if($rawData === false) { - throw new Exception("Failed to read compressed input data."); - } - - $uncompressedData = inflate_add( - $this->inflateContext, - $rawData - ); - if($uncompressedData === false) { - throw new Exception("Failed to inflate input data."); - } - $this->data .= $uncompressedData; - $this->resourcePointer = ftell($this->resource) ?: $this->resourcePointer + strlen($rawData); - } - - $data = substr($this->data, $this->pointer, $length); - $this->pointer += strlen($data); - - return $data; - } - - /** - * Set pointer position to $offset - * - * @param int $offset - */ - public function seek(int $offset): void - { - $this->pointer = max($offset, 0); - } - - /** - * Set pointer position to 0 - * - */ - public function rewind(): void - { - $this->pointer = 0; - } - /** * @inheritDoc */ public function reset(): void { - $this->data = ''; - $this->pointer = 0; - $this->resourcePointer = $this->offset; + parent::reset(); $this->inflateContext = inflate_init($this->compression); } - public function eof(): bool - { - return ($this->resourcePointer >= $this->offset + $this->length || feof($this->resource)) - && $this->pointer >= strlen($this->data); - } - /** - * Get current pointer position - * - * @return int + * @inheritDoc + * @throws Exception */ - public function tell(): int + protected function getRawChunk(int $length): string { - return $this->pointer; + fseek($this->resource, $this->resourcePointer); + $rawData = $this->readRaw($length); + + $uncompressedData = inflate_add( + $this->inflateContext, + $rawData + ); + if($uncompressedData === false) { + throw new Exception("Failed to inflate input data."); + } + return $uncompressedData; } }