Skip to content

Commit

Permalink
feat: support brotli encoding
Browse files Browse the repository at this point in the history
  • Loading branch information
andrewsg committed Oct 21, 2023
1 parent eecfb74 commit 93bc107
Show file tree
Hide file tree
Showing 6 changed files with 176 additions and 9 deletions.
57 changes: 51 additions & 6 deletions google/resumable_media/requests/download.py
Original file line number Diff line number Diff line change
Expand Up @@ -598,12 +598,15 @@ def _add_decoder(response_raw, checksum):
caller will no longer need to hash to decoded bytes.
"""
encoding = response_raw.headers.get("content-encoding", "").lower()
if encoding != "gzip":
if encoding == "gzip":
response_raw._decoder = _GzipDecoder(checksum)
return _helpers._DoNothingHash()
elif encoding == "br" and _BrotliDecoder: # Only activate if brotli is installed
response_raw._decoder = _BrotliDecoder(checksum)
return _helpers._DoNothingHash()
else:
return checksum

response_raw._decoder = _GzipDecoder(checksum)
return _helpers._DoNothingHash()


class _GzipDecoder(urllib3.response.GzipDecoder):
"""Custom subclass of ``urllib3`` decoder for ``gzip``-ed bytes.
Expand All @@ -617,7 +620,7 @@ class _GzipDecoder(urllib3.response.GzipDecoder):
"""

def __init__(self, checksum):
super(_GzipDecoder, self).__init__()
super().__init__()
self._checksum = checksum

def decompress(self, data):
Expand All @@ -630,4 +633,46 @@ def decompress(self, data):
bytes: The decompressed bytes from ``data``.
"""
self._checksum.update(data)
return super(_GzipDecoder, self).decompress(data)
return super().decompress(data)


# urllib3.response.BrotliDecoder might not exist depending on whether brotli is
# installed.
if hasattr(urllib3.response, "BrotliDecoder"):

class _BrotliDecoder:
"""Handler for ``brotli`` encoded bytes.
Allows a checksum function to see the compressed bytes before they are
decoded. This way the checksum of the compressed value can be computed.
Because BrotliDecoder's decompress method is dynamically created in
urllib3, a subclass is not practical. Instead, this class creates a
captive urllib3.requests.BrotliDecoder instance and acts as a proxy.
Args:
checksum (object):
A checksum which will be updated with compressed bytes.
"""

def __init__(self, checksum):
self._decoder = urllib3.response.BrotliDecoder()
self._checksum = checksum

def decompress(self, data):
"""Decompress the bytes.
Args:
data (bytes): The compressed bytes to be decompressed.
Returns:
bytes: The decompressed bytes from ``data``.
"""
self._checksum.update(data)
return self._decoder.decompress(data)

def flush(self):
return self._decoder.flush()

else: # pragma: NO COVER
_BrotliDecoder = None # pragma: NO COVER
4 changes: 2 additions & 2 deletions noxfile.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ def unit(session):
)

# Install all test dependencies, then install this package in-place.
session.install("mock", "pytest", "pytest-cov", "pytest-asyncio<=0.14.0")
session.install("mock", "pytest", "pytest-cov", "pytest-asyncio<=0.14.0", "brotli")
session.install("-e", ".[requests,aiohttp]", "-c", constraints_path)

# Run py.test against the unit tests.
Expand Down Expand Up @@ -220,7 +220,7 @@ def system(session):

# Install all test dependencies, then install this package into the
# virtualenv's dist-packages.
session.install("mock", "pytest", "google-cloud-testutils")
session.install("mock", "pytest", "google-cloud-testutils", "brotli")
session.install("-e", ".[requests,aiohttp]", "-c", constraints_path)

# Run py.test against the async system tests.
Expand Down
64 changes: 64 additions & 0 deletions tests/data/brotli.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
abcdefghijklmnopqrstuvwxyz0123456789
abcdefghijklmnopqrstuvwxyz0123456789
abcdefghijklmnopqrstuvwxyz0123456789
abcdefghijklmnopqrstuvwxyz0123456789
abcdefghijklmnopqrstuvwxyz0123456789
abcdefghijklmnopqrstuvwxyz0123456789
abcdefghijklmnopqrstuvwxyz0123456789
abcdefghijklmnopqrstuvwxyz0123456789
abcdefghijklmnopqrstuvwxyz0123456789
abcdefghijklmnopqrstuvwxyz0123456789
abcdefghijklmnopqrstuvwxyz0123456789
abcdefghijklmnopqrstuvwxyz0123456789
abcdefghijklmnopqrstuvwxyz0123456789
abcdefghijklmnopqrstuvwxyz0123456789
abcdefghijklmnopqrstuvwxyz0123456789
abcdefghijklmnopqrstuvwxyz0123456789
abcdefghijklmnopqrstuvwxyz0123456789
abcdefghijklmnopqrstuvwxyz0123456789
abcdefghijklmnopqrstuvwxyz0123456789
abcdefghijklmnopqrstuvwxyz0123456789
abcdefghijklmnopqrstuvwxyz0123456789
abcdefghijklmnopqrstuvwxyz0123456789
abcdefghijklmnopqrstuvwxyz0123456789
abcdefghijklmnopqrstuvwxyz0123456789
abcdefghijklmnopqrstuvwxyz0123456789
abcdefghijklmnopqrstuvwxyz0123456789
abcdefghijklmnopqrstuvwxyz0123456789
abcdefghijklmnopqrstuvwxyz0123456789
abcdefghijklmnopqrstuvwxyz0123456789
abcdefghijklmnopqrstuvwxyz0123456789
abcdefghijklmnopqrstuvwxyz0123456789
abcdefghijklmnopqrstuvwxyz0123456789
abcdefghijklmnopqrstuvwxyz0123456789
abcdefghijklmnopqrstuvwxyz0123456789
abcdefghijklmnopqrstuvwxyz0123456789
abcdefghijklmnopqrstuvwxyz0123456789
abcdefghijklmnopqrstuvwxyz0123456789
abcdefghijklmnopqrstuvwxyz0123456789
abcdefghijklmnopqrstuvwxyz0123456789
abcdefghijklmnopqrstuvwxyz0123456789
abcdefghijklmnopqrstuvwxyz0123456789
abcdefghijklmnopqrstuvwxyz0123456789
abcdefghijklmnopqrstuvwxyz0123456789
abcdefghijklmnopqrstuvwxyz0123456789
abcdefghijklmnopqrstuvwxyz0123456789
abcdefghijklmnopqrstuvwxyz0123456789
abcdefghijklmnopqrstuvwxyz0123456789
abcdefghijklmnopqrstuvwxyz0123456789
abcdefghijklmnopqrstuvwxyz0123456789
abcdefghijklmnopqrstuvwxyz0123456789
abcdefghijklmnopqrstuvwxyz0123456789
abcdefghijklmnopqrstuvwxyz0123456789
abcdefghijklmnopqrstuvwxyz0123456789
abcdefghijklmnopqrstuvwxyz0123456789
abcdefghijklmnopqrstuvwxyz0123456789
abcdefghijklmnopqrstuvwxyz0123456789
abcdefghijklmnopqrstuvwxyz0123456789
abcdefghijklmnopqrstuvwxyz0123456789
abcdefghijklmnopqrstuvwxyz0123456789
abcdefghijklmnopqrstuvwxyz0123456789
abcdefghijklmnopqrstuvwxyz0123456789
abcdefghijklmnopqrstuvwxyz0123456789
abcdefghijklmnopqrstuvwxyz0123456789
abcdefghijklmnopqrstuvwxyz0123456789
Binary file added tests/data/brotli.txt.br
Binary file not shown.
32 changes: 31 additions & 1 deletion tests/system/requests/test_download.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,15 @@ def get_path(filename):
"slices": (),
"metadata": {"contentEncoding": "gzip"},
},
{
"path": get_path("brotli.txt.br"),
"uncompressed": get_path("brotli.txt"),
"content_type": PLAIN_TEXT,
"md5": "MffJw7pTSX/7CVWFFPgwQA==",
"crc32c": "GGK0OQ==",
"slices": (),
"metadata": {"contentEncoding": "br"},
},
)


Expand Down Expand Up @@ -298,7 +307,7 @@ def test_download_gzip_w_stored_content_headers(
self, add_files, authorized_transport
):
# Retrieve the gzip compressed file
info = ALL_FILES[-1]
info = ALL_FILES[-2]
actual_contents = self._get_contents(info)
blob_name = get_blob_name(info)

Expand All @@ -314,6 +323,27 @@ def test_download_gzip_w_stored_content_headers(
assert stream.getvalue() == actual_contents
check_tombstoned(download, authorized_transport)

@pytest.mark.parametrize("checksum", ["md5", "crc32c"])
def test_download_brotli_w_stored_content_headers(
self, add_files, authorized_transport, checksum
):
# Retrieve the br compressed file
info = ALL_FILES[-1]
actual_contents = self._get_contents(info)
blob_name = get_blob_name(info)

# Create the actual download object.
media_url = utils.DOWNLOAD_URL_TEMPLATE.format(blob_name=blob_name)
stream = io.BytesIO()
download = self._make_one(media_url, stream=stream, checksum=checksum)
# Consume the resource.
response = download.consume(authorized_transport)
assert response.status_code == http.client.OK
assert response.headers.get(_helpers._STORED_CONTENT_ENCODING_HEADER) == "br"
assert response.headers.get("X-Goog-Stored-Content-Length") is not None
assert stream.getvalue() == actual_contents
check_tombstoned(download, authorized_transport)

def test_extra_headers(self, authorized_transport, secret_file):
blob_name, data, headers = secret_file
# Create the actual download object.
Expand Down
28 changes: 28 additions & 0 deletions tests/unit/requests/test_download.py
Original file line number Diff line number Diff line change
Expand Up @@ -1110,6 +1110,18 @@ def test_gzipped(self):
assert isinstance(response_raw._decoder, download_mod._GzipDecoder)
assert response_raw._decoder._checksum is mock.sentinel.md5_hash

def test_brotli(self):
headers = {"content-encoding": "br"}
response_raw = mock.Mock(headers=headers, spec=["headers", "_decoder"])
md5_hash = download_mod._add_decoder(response_raw, mock.sentinel.md5_hash)

assert md5_hash is not mock.sentinel.md5_hash
assert isinstance(md5_hash, _helpers._DoNothingHash)
assert isinstance(response_raw._decoder, download_mod._BrotliDecoder)
assert response_raw._decoder._checksum is mock.sentinel.md5_hash
# Go ahead and exercise the flush method, added only for completion
response_raw._decoder.flush()


class Test_GzipDecoder(object):
def test_constructor(self):
Expand All @@ -1127,6 +1139,22 @@ def test_decompress(self):
md5_hash.update.assert_called_once_with(data)


class Test_BrotliDecoder(object):
def test_constructor(self):
decoder = download_mod._BrotliDecoder(mock.sentinel.md5_hash)
assert decoder._checksum is mock.sentinel.md5_hash

def test_decompress(self):
md5_hash = mock.Mock(spec=["update"])
decoder = download_mod._BrotliDecoder(md5_hash)

data = b"\xc1\xf8I\xc0/\x83\xf3\xfa"
result = decoder.decompress(data)

assert result == b""
md5_hash.update.assert_called_once_with(data)


def _mock_response(status_code=http.client.OK, chunks=(), headers=None):
if headers is None:
headers = {}
Expand Down

0 comments on commit 93bc107

Please sign in to comment.