From 295e40ae414b1a6372796f3831e192fe174e45ee Mon Sep 17 00:00:00 2001 From: Andrew Gorcester Date: Fri, 27 Oct 2023 10:46:13 -0700 Subject: [PATCH] feat: support brotli encoding (#403) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixes #204 🦕 --- google/resumable_media/requests/download.py | 60 +++++++++++++++--- noxfile.py | 4 +- testing/constraints-3.7.txt | 2 +- tests/data/brotli.txt | 64 ++++++++++++++++++++ tests/data/brotli.txt.br | Bin 0 -> 45 bytes tests/system/requests/test_download.py | 32 +++++++++- tests/unit/requests/test_download.py | 28 +++++++++ 7 files changed, 179 insertions(+), 11 deletions(-) create mode 100644 tests/data/brotli.txt create mode 100644 tests/data/brotli.txt.br diff --git a/google/resumable_media/requests/download.py b/google/resumable_media/requests/download.py index 8d0319c7..1719cb01 100644 --- a/google/resumable_media/requests/download.py +++ b/google/resumable_media/requests/download.py @@ -584,7 +584,7 @@ def _add_decoder(response_raw, checksum): This is so that we can intercept the compressed bytes before they are decoded. - Only patches if the content encoding is ``gzip``. + Only patches if the content encoding is ``gzip`` or ``br``. Args: response_raw (urllib3.response.HTTPResponse): The raw response for @@ -598,12 +598,16 @@ def _add_decoder(response_raw, checksum): caller will no longer need to hash to decoded bytes. """ encoding = response_raw.headers.get("content-encoding", "").lower() - if encoding != "gzip": + if encoding == "gzip": + response_raw._decoder = _GzipDecoder(checksum) + return _helpers._DoNothingHash() + # Only activate if brotli is installed + elif encoding == "br" and _BrotliDecoder: # type: ignore + response_raw._decoder = _BrotliDecoder(checksum) + return _helpers._DoNothingHash() + else: return checksum - response_raw._decoder = _GzipDecoder(checksum) - return _helpers._DoNothingHash() - class _GzipDecoder(urllib3.response.GzipDecoder): """Custom subclass of ``urllib3`` decoder for ``gzip``-ed bytes. @@ -617,7 +621,7 @@ class _GzipDecoder(urllib3.response.GzipDecoder): """ def __init__(self, checksum): - super(_GzipDecoder, self).__init__() + super().__init__() self._checksum = checksum def decompress(self, data): @@ -630,4 +634,46 @@ def decompress(self, data): bytes: The decompressed bytes from ``data``. """ self._checksum.update(data) - return super(_GzipDecoder, self).decompress(data) + return super().decompress(data) + + +# urllib3.response.BrotliDecoder might not exist depending on whether brotli is +# installed. +if hasattr(urllib3.response, "BrotliDecoder"): + + class _BrotliDecoder: + """Handler for ``brotli`` encoded bytes. + + Allows a checksum function to see the compressed bytes before they are + decoded. This way the checksum of the compressed value can be computed. + + Because BrotliDecoder's decompress method is dynamically created in + urllib3, a subclass is not practical. Instead, this class creates a + captive urllib3.requests.BrotliDecoder instance and acts as a proxy. + + Args: + checksum (object): + A checksum which will be updated with compressed bytes. + """ + + def __init__(self, checksum): + self._decoder = urllib3.response.BrotliDecoder() + self._checksum = checksum + + def decompress(self, data): + """Decompress the bytes. + + Args: + data (bytes): The compressed bytes to be decompressed. + + Returns: + bytes: The decompressed bytes from ``data``. + """ + self._checksum.update(data) + return self._decoder.decompress(data) + + def flush(self): + return self._decoder.flush() + +else: # pragma: NO COVER + _BrotliDecoder = None # type: ignore # pragma: NO COVER diff --git a/noxfile.py b/noxfile.py index 751bff21..cff73871 100644 --- a/noxfile.py +++ b/noxfile.py @@ -38,7 +38,7 @@ def unit(session): ) # Install all test dependencies, then install this package in-place. - session.install("mock", "pytest", "pytest-cov", "pytest-asyncio<=0.14.0") + session.install("mock", "pytest", "pytest-cov", "pytest-asyncio<=0.14.0", "brotli") session.install("-e", ".[requests,aiohttp]", "-c", constraints_path) # Run py.test against the unit tests. @@ -220,7 +220,7 @@ def system(session): # Install all test dependencies, then install this package into the # virtualenv's dist-packages. - session.install("mock", "pytest", "google-cloud-testutils") + session.install("mock", "pytest", "google-cloud-testutils", "brotli") session.install("-e", ".[requests,aiohttp]", "-c", constraints_path) # Run py.test against the async system tests. diff --git a/testing/constraints-3.7.txt b/testing/constraints-3.7.txt index 65e3508c..fa3aa7d0 100644 --- a/testing/constraints-3.7.txt +++ b/testing/constraints-3.7.txt @@ -1,4 +1,4 @@ crcmod==1.7 google-crc32c==1.0 aiohttp==3.6.2 -requests==2.18.0 +requests==2.23.0 diff --git a/tests/data/brotli.txt b/tests/data/brotli.txt new file mode 100644 index 00000000..da07c510 --- /dev/null +++ b/tests/data/brotli.txt @@ -0,0 +1,64 @@ +abcdefghijklmnopqrstuvwxyz0123456789 +abcdefghijklmnopqrstuvwxyz0123456789 +abcdefghijklmnopqrstuvwxyz0123456789 +abcdefghijklmnopqrstuvwxyz0123456789 +abcdefghijklmnopqrstuvwxyz0123456789 +abcdefghijklmnopqrstuvwxyz0123456789 +abcdefghijklmnopqrstuvwxyz0123456789 +abcdefghijklmnopqrstuvwxyz0123456789 +abcdefghijklmnopqrstuvwxyz0123456789 +abcdefghijklmnopqrstuvwxyz0123456789 +abcdefghijklmnopqrstuvwxyz0123456789 +abcdefghijklmnopqrstuvwxyz0123456789 +abcdefghijklmnopqrstuvwxyz0123456789 +abcdefghijklmnopqrstuvwxyz0123456789 +abcdefghijklmnopqrstuvwxyz0123456789 +abcdefghijklmnopqrstuvwxyz0123456789 +abcdefghijklmnopqrstuvwxyz0123456789 +abcdefghijklmnopqrstuvwxyz0123456789 +abcdefghijklmnopqrstuvwxyz0123456789 +abcdefghijklmnopqrstuvwxyz0123456789 +abcdefghijklmnopqrstuvwxyz0123456789 +abcdefghijklmnopqrstuvwxyz0123456789 +abcdefghijklmnopqrstuvwxyz0123456789 +abcdefghijklmnopqrstuvwxyz0123456789 +abcdefghijklmnopqrstuvwxyz0123456789 +abcdefghijklmnopqrstuvwxyz0123456789 +abcdefghijklmnopqrstuvwxyz0123456789 +abcdefghijklmnopqrstuvwxyz0123456789 +abcdefghijklmnopqrstuvwxyz0123456789 +abcdefghijklmnopqrstuvwxyz0123456789 +abcdefghijklmnopqrstuvwxyz0123456789 +abcdefghijklmnopqrstuvwxyz0123456789 +abcdefghijklmnopqrstuvwxyz0123456789 +abcdefghijklmnopqrstuvwxyz0123456789 +abcdefghijklmnopqrstuvwxyz0123456789 +abcdefghijklmnopqrstuvwxyz0123456789 +abcdefghijklmnopqrstuvwxyz0123456789 +abcdefghijklmnopqrstuvwxyz0123456789 +abcdefghijklmnopqrstuvwxyz0123456789 +abcdefghijklmnopqrstuvwxyz0123456789 +abcdefghijklmnopqrstuvwxyz0123456789 +abcdefghijklmnopqrstuvwxyz0123456789 +abcdefghijklmnopqrstuvwxyz0123456789 +abcdefghijklmnopqrstuvwxyz0123456789 +abcdefghijklmnopqrstuvwxyz0123456789 +abcdefghijklmnopqrstuvwxyz0123456789 +abcdefghijklmnopqrstuvwxyz0123456789 +abcdefghijklmnopqrstuvwxyz0123456789 +abcdefghijklmnopqrstuvwxyz0123456789 +abcdefghijklmnopqrstuvwxyz0123456789 +abcdefghijklmnopqrstuvwxyz0123456789 +abcdefghijklmnopqrstuvwxyz0123456789 +abcdefghijklmnopqrstuvwxyz0123456789 +abcdefghijklmnopqrstuvwxyz0123456789 +abcdefghijklmnopqrstuvwxyz0123456789 +abcdefghijklmnopqrstuvwxyz0123456789 +abcdefghijklmnopqrstuvwxyz0123456789 +abcdefghijklmnopqrstuvwxyz0123456789 +abcdefghijklmnopqrstuvwxyz0123456789 +abcdefghijklmnopqrstuvwxyz0123456789 +abcdefghijklmnopqrstuvwxyz0123456789 +abcdefghijklmnopqrstuvwxyz0123456789 +abcdefghijklmnopqrstuvwxyz0123456789 +abcdefghijklmnopqrstuvwxyz0123456789 diff --git a/tests/data/brotli.txt.br b/tests/data/brotli.txt.br new file mode 100644 index 0000000000000000000000000000000000000000..84828432cc5ca79230cb35b9f22d6a188a87b8dc GIT binary patch literal 45 zcmV+|0Mh@#_({MogY)`Yf?K2x#)(10R09BLW1SB%N#)VV> D1|St` literal 0 HcmV?d00001 diff --git a/tests/system/requests/test_download.py b/tests/system/requests/test_download.py index 347e19b5..9d34523d 100644 --- a/tests/system/requests/test_download.py +++ b/tests/system/requests/test_download.py @@ -121,6 +121,15 @@ def get_path(filename): "slices": (), "metadata": {"contentEncoding": "gzip"}, }, + { + "path": get_path("brotli.txt.br"), + "uncompressed": get_path("brotli.txt"), + "content_type": PLAIN_TEXT, + "md5": "MffJw7pTSX/7CVWFFPgwQA==", + "crc32c": "GGK0OQ==", + "slices": (), + "metadata": {"contentEncoding": "br"}, + }, ) @@ -298,7 +307,7 @@ def test_download_gzip_w_stored_content_headers( self, add_files, authorized_transport ): # Retrieve the gzip compressed file - info = ALL_FILES[-1] + info = ALL_FILES[-2] actual_contents = self._get_contents(info) blob_name = get_blob_name(info) @@ -314,6 +323,27 @@ def test_download_gzip_w_stored_content_headers( assert stream.getvalue() == actual_contents check_tombstoned(download, authorized_transport) + @pytest.mark.parametrize("checksum", ["md5", "crc32c"]) + def test_download_brotli_w_stored_content_headers( + self, add_files, authorized_transport, checksum + ): + # Retrieve the br compressed file + info = ALL_FILES[-1] + actual_contents = self._get_contents(info) + blob_name = get_blob_name(info) + + # Create the actual download object. + media_url = utils.DOWNLOAD_URL_TEMPLATE.format(blob_name=blob_name) + stream = io.BytesIO() + download = self._make_one(media_url, stream=stream, checksum=checksum) + # Consume the resource. + response = download.consume(authorized_transport) + assert response.status_code == http.client.OK + assert response.headers.get(_helpers._STORED_CONTENT_ENCODING_HEADER) == "br" + assert response.headers.get("X-Goog-Stored-Content-Length") is not None + assert stream.getvalue() == actual_contents + check_tombstoned(download, authorized_transport) + def test_extra_headers(self, authorized_transport, secret_file): blob_name, data, headers = secret_file # Create the actual download object. diff --git a/tests/unit/requests/test_download.py b/tests/unit/requests/test_download.py index a63fe6f9..afb2f0d4 100644 --- a/tests/unit/requests/test_download.py +++ b/tests/unit/requests/test_download.py @@ -1110,6 +1110,18 @@ def test_gzipped(self): assert isinstance(response_raw._decoder, download_mod._GzipDecoder) assert response_raw._decoder._checksum is mock.sentinel.md5_hash + def test_brotli(self): + headers = {"content-encoding": "br"} + response_raw = mock.Mock(headers=headers, spec=["headers", "_decoder"]) + md5_hash = download_mod._add_decoder(response_raw, mock.sentinel.md5_hash) + + assert md5_hash is not mock.sentinel.md5_hash + assert isinstance(md5_hash, _helpers._DoNothingHash) + assert isinstance(response_raw._decoder, download_mod._BrotliDecoder) + assert response_raw._decoder._checksum is mock.sentinel.md5_hash + # Go ahead and exercise the flush method, added only for completion + response_raw._decoder.flush() + class Test_GzipDecoder(object): def test_constructor(self): @@ -1127,6 +1139,22 @@ def test_decompress(self): md5_hash.update.assert_called_once_with(data) +class Test_BrotliDecoder(object): + def test_constructor(self): + decoder = download_mod._BrotliDecoder(mock.sentinel.md5_hash) + assert decoder._checksum is mock.sentinel.md5_hash + + def test_decompress(self): + md5_hash = mock.Mock(spec=["update"]) + decoder = download_mod._BrotliDecoder(md5_hash) + + data = b"\xc1\xf8I\xc0/\x83\xf3\xfa" + result = decoder.decompress(data) + + assert result == b"" + md5_hash.update.assert_called_once_with(data) + + def _mock_response(status_code=http.client.OK, chunks=(), headers=None): if headers is None: headers = {}