Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[gcloud] Add a setting for connect/read timeouts #1120

Open
wants to merge 3 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 16 additions & 0 deletions docs/backends/gcloud.rst
Original file line number Diff line number Diff line change
Expand Up @@ -204,6 +204,22 @@ Note: Default Google Compute Engine (GCE) Service accounts are
The ``GS_EXPIRATION`` value is handled by the underlying `Google library <https://googlecloudplatform.github.io/google-cloud-python/latest/storage/blobs.html#google.cloud.storage.blob.Blob.generate_signed_url>`_.
It supports `timedelta`, `datetime`, or `integer` seconds since epoch time.

``GS_TIMEOUT`` (optional: default is ``60``, float or tuple)

Connect/read timeout. The amount of time, in seconds, to wait for the connection to the server to establish, and between
bytes sent from the server. If float is given it's applied to both, if a tuple – the first value is for the connect
timeout, second for read.

Note that read timeout =/= download timeout. It’s the number of seconds that the client will wait *between* bytes sent
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Did you mean ≠ ?
I was not sure what =/= meant.

from the server. In 99.9% of cases, this is the time before the server sends the first byte.

See https://docs.python-requests.org/en/master/user/advanced/#timeouts

Sometimes requests can get stuck, so it's better if the timeout is low, couple of seconds. This means that a new request
(via retry) will be made sooner. The default is higher to keep the behavior from before this setting was introduced.

Timeouts will be automatically retried when using `google-cloud-storage` version that includes
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

google-cloud-storage > 2.2.0

https://github.com/googleapis/python-storage/pull/727

Usage
-----
Expand Down
24 changes: 15 additions & 9 deletions storages/backends/gcloud.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ def __init__(self, name, mode, storage):
self.mime_type = mimetypes.guess_type(name)[0]
self._mode = mode
self._storage = storage
self.blob = storage.bucket.get_blob(name)
self.blob = storage.bucket.get_blob(name, timeout=storage.timeout)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: you use time=storage.timeout here, but elsewhere in same function you use timeout=self._storage.timeout
I realize they are the same, but better to be consistent.

if not self.blob and 'w' in mode:
self.blob = Blob(
self.name, storage.bucket,
Expand All @@ -55,7 +55,7 @@ def _get_file(self):
)
if 'r' in self._mode:
self._is_dirty = False
self.blob.download_to_file(self._file)
self.blob.download_to_file(self._file, timeout=self._storage.timeout)
self._file.seek(0)
if self._storage.gzip and self.blob.content_encoding == 'gzip':
self._file = self._decompress_file(mode=self._mode, file=self._file)
Expand Down Expand Up @@ -87,7 +87,8 @@ def close(self):
blob_params = self._storage.get_object_parameters(self.name)
self.blob.upload_from_file(
self.file, rewind=True, content_type=self.mime_type,
predefined_acl=blob_params.get('acl', self._storage.default_acl))
predefined_acl=blob_params.get('acl', self._storage.default_acl),
timeout=self._storage.timeout)
self._file.close()
self._file = None

Expand Down Expand Up @@ -128,6 +129,7 @@ def get_default_settings(self):
# roll over.
"max_memory_size": setting('GS_MAX_MEMORY_SIZE', 0),
"blob_chunk_size": setting('GS_BLOB_CHUNK_SIZE'),
"timeout": setting('GS_TIMEOUT', 60)
}

@property
Expand Down Expand Up @@ -186,7 +188,10 @@ def _save(self, name, content):
for prop, val in blob_params.items():
setattr(file_object.blob, prop, val)

file_object.blob.upload_from_file(content, rewind=True, size=getattr(content, 'size', None), **upload_params)
file_object.blob.upload_from_file(content, rewind=True,
size=getattr(content, 'size', None),
timeout=self.timeout,
**upload_params)
return cleaned_name

def get_object_parameters(self, name):
Expand All @@ -209,20 +214,20 @@ def get_object_parameters(self, name):
def delete(self, name):
name = self._normalize_name(clean_name(name))
try:
self.bucket.delete_blob(name)
self.bucket.delete_blob(name, timeout=self.timeout)
except NotFound:
pass

def exists(self, name):
if not name: # root element aka the bucket
try:
self.client.get_bucket(self.bucket)
self.client.get_bucket(self.bucket, timeout=self.timeout)
return True
except NotFound:
return False

name = self._normalize_name(clean_name(name))
return bool(self.bucket.get_blob(name))
return bool(self.bucket.get_blob(name, timeout=self.timeout))

def listdir(self, name):
name = self._normalize_name(clean_name(name))
Expand All @@ -231,7 +236,8 @@ def listdir(self, name):
if name and not name.endswith('/'):
name += '/'

iterator = self.bucket.list_blobs(prefix=name, delimiter='/')
iterator = self.bucket.list_blobs(prefix=name, delimiter='/',
timeout=self.timeout)
blobs = list(iterator)
prefixes = iterator.prefixes

Expand All @@ -249,7 +255,7 @@ def listdir(self, name):

def _get_blob(self, name):
# Wrap google.cloud.storage's blob to raise if the file doesn't exist
blob = self.bucket.get_blob(name)
blob = self.bucket.get_blob(name, timeout=self.timeout)

if blob is None:
raise NotFound('File does not exist: {}'.format(name))
Expand Down
46 changes: 25 additions & 21 deletions tests/test_gcloud.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,9 +38,9 @@ def test_open_read(self):

f = self.storage.open(self.filename)
self.storage._client.bucket.assert_called_with(self.bucket_name)
self.storage._bucket.get_blob.assert_called_with(self.filename)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

maybe define the DEFAULT_TIMEOUT=60 and use it instead of the 60 in the test. It will make the tests easier to update if the default is ever changed. It also make it very explicit what is going on.

self.storage._bucket.get_blob.assert_called_with(self.filename, timeout=60)

f.blob.download_to_file = lambda tmpfile: tmpfile.write(data)
f.blob.download_to_file = lambda tmpfile, **kwargs: tmpfile.write(data)
self.assertEqual(f.read(), data)

def test_open_read_num_bytes(self):
Expand All @@ -49,17 +49,17 @@ def test_open_read_num_bytes(self):

f = self.storage.open(self.filename)
self.storage._client.bucket.assert_called_with(self.bucket_name)
self.storage._bucket.get_blob.assert_called_with(self.filename)
self.storage._bucket.get_blob.assert_called_with(self.filename, timeout=60)

f.blob.download_to_file = lambda tmpfile: tmpfile.write(data)
f.blob.download_to_file = lambda tmpfile, **kwargs: tmpfile.write(data)
self.assertEqual(f.read(num_bytes), data[0:num_bytes])

def test_open_read_nonexistent(self):
self.storage._bucket = mock.MagicMock()
self.storage._bucket.get_blob.return_value = None

self.assertRaises(FileNotFoundError, self.storage.open, self.filename)
self.storage._bucket.get_blob.assert_called_with(self.filename)
self.storage._bucket.get_blob.assert_called_with(self.filename, timeout=60)

def test_open_read_nonexistent_unicode(self):
filename = 'ủⓝï℅ⅆℇ.txt'
Expand Down Expand Up @@ -92,7 +92,7 @@ def test_open_write(self, MockBlob):
MockBlob().upload_from_file.assert_called_with(
tmpfile, rewind=True,
content_type=mimetypes.guess_type(self.filename)[0],
predefined_acl='projectPrivate')
predefined_acl='projectPrivate', timeout=60)

def test_save(self):
data = 'This is some test content.'
Expand All @@ -103,7 +103,7 @@ def test_save(self):
self.storage._client.bucket.assert_called_with(self.bucket_name)
self.storage._bucket.get_blob().upload_from_file.assert_called_with(
content, rewind=True, size=len(data), content_type=mimetypes.guess_type(self.filename)[0],
predefined_acl=None)
predefined_acl=None, timeout=60)

def test_save2(self):
data = 'This is some test ủⓝï℅ⅆℇ content.'
Expand All @@ -115,7 +115,7 @@ def test_save2(self):
self.storage._client.bucket.assert_called_with(self.bucket_name)
self.storage._bucket.get_blob().upload_from_file.assert_called_with(
content, rewind=True, size=len(data), content_type=mimetypes.guess_type(filename)[0],
predefined_acl=None)
predefined_acl=None, timeout=60)

def test_save_with_default_acl(self):
data = 'This is some test ủⓝï℅ⅆℇ content.'
Expand All @@ -132,23 +132,23 @@ def test_save_with_default_acl(self):
self.storage._client.bucket.assert_called_with(self.bucket_name)
self.storage._bucket.get_blob().upload_from_file.assert_called_with(
content, rewind=True, size=len(data), content_type=mimetypes.guess_type(filename)[0],
predefined_acl='publicRead')
predefined_acl='publicRead', timeout=60)

def test_delete(self):
self.storage.delete(self.filename)

self.storage._client.bucket.assert_called_with(self.bucket_name)
self.storage._bucket.delete_blob.assert_called_with(self.filename)
self.storage._bucket.delete_blob.assert_called_with(self.filename, timeout=60)

def test_exists(self):
self.storage._bucket = mock.MagicMock()
self.assertTrue(self.storage.exists(self.filename))
self.storage._bucket.get_blob.assert_called_with(self.filename)
self.storage._bucket.get_blob.assert_called_with(self.filename, timeout=60)

self.storage._bucket.reset_mock()
self.storage._bucket.get_blob.return_value = None
self.assertFalse(self.storage.exists(self.filename))
self.storage._bucket.get_blob.assert_called_with(self.filename)
self.storage._bucket.get_blob.assert_called_with(self.filename, timeout=60)

def test_exists_no_bucket(self):
# exists('') should return False if the bucket doesn't exist
Expand Down Expand Up @@ -233,7 +233,7 @@ def test_size(self):
self.storage._bucket.get_blob.return_value = blob

self.assertEqual(self.storage.size(self.filename), size)
self.storage._bucket.get_blob.assert_called_with(self.filename)
self.storage._bucket.get_blob.assert_called_with(self.filename, timeout=60)

def test_size_no_file(self):
self.storage._bucket = mock.MagicMock()
Expand All @@ -254,7 +254,7 @@ def test_modified_time(self):
mt = self.storage.modified_time(self.filename)
self.assertTrue(timezone.is_naive(mt))
self.assertEqual(mt, naive_date)
self.storage._bucket.get_blob.assert_called_with(self.filename)
self.storage._bucket.get_blob.assert_called_with(self.filename, timeout=60)

def test_get_modified_time(self):
naive_date = datetime(2017, 1, 2, 3, 4, 5, 678)
Expand All @@ -270,13 +270,13 @@ def test_get_modified_time(self):
self.assertTrue(timezone.is_naive(mt))
naive_date_montreal = timezone.make_naive(aware_date)
self.assertEqual(mt, naive_date_montreal)
self.storage._bucket.get_blob.assert_called_with(self.filename)
self.storage._bucket.get_blob.assert_called_with(self.filename, timeout=60)

with self.settings(TIME_ZONE='America/Montreal', USE_TZ=True):
mt = self.storage.get_modified_time(self.filename)
self.assertTrue(timezone.is_aware(mt))
self.assertEqual(mt, aware_date)
self.storage._bucket.get_blob.assert_called_with(self.filename)
self.storage._bucket.get_blob.assert_called_with(self.filename, timeout=60)

def test_get_created_time(self):
naive_date = datetime(2017, 1, 2, 3, 4, 5, 678)
Expand All @@ -292,13 +292,13 @@ def test_get_created_time(self):
self.assertTrue(timezone.is_naive(mt))
naive_date_montreal = timezone.make_naive(aware_date)
self.assertEqual(mt, naive_date_montreal)
self.storage._bucket.get_blob.assert_called_with(self.filename)
self.storage._bucket.get_blob.assert_called_with(self.filename, timeout=60)

with self.settings(TIME_ZONE='America/Montreal', USE_TZ=True):
mt = self.storage.get_created_time(self.filename)
self.assertTrue(timezone.is_aware(mt))
self.assertEqual(mt, aware_date)
self.storage._bucket.get_blob.assert_called_with(self.filename)
self.storage._bucket.get_blob.assert_called_with(self.filename, timeout=60)

def test_modified_time_no_file(self):
self.storage._bucket = mock.MagicMock()
Expand Down Expand Up @@ -385,7 +385,7 @@ def test_get_available_name(self):
self.storage.file_overwrite = False
self.assertEqual(self.storage.get_available_name(
self.filename), self.filename)
self.storage._bucket.get_blob.assert_called_with(self.filename)
self.storage._bucket.get_blob.assert_called_with(self.filename, timeout=60)

def test_get_available_name_unicode(self):
filename = 'ủⓝï℅ⅆℇ.txt'
Expand Down Expand Up @@ -417,7 +417,8 @@ def test_storage_save_gzipped(self):
rewind=True,
size=11,
predefined_acl=None,
content_type=None
content_type=None,
timeout=60,
)

def test_storage_save_gzipped_non_seekable(self):
Expand All @@ -433,7 +434,8 @@ def test_storage_save_gzipped_non_seekable(self):
rewind=True,
size=11,
predefined_acl=None,
content_type=None
content_type=None,
timeout=60,
)

def test_storage_save_gzip(self):
Expand All @@ -453,6 +455,7 @@ def test_storage_save_gzip(self):
size=None,
predefined_acl=None,
content_type='text/css',
timeout=60,
)
args, kwargs = obj.upload_from_file.call_args
content = args[0]
Expand Down Expand Up @@ -482,6 +485,7 @@ def test_storage_save_gzip_twice(self):
size=None,
predefined_acl=None,
content_type='text/css',
timeout=60,
)
args, kwargs = obj.upload_from_file.call_args
content = args[0]
Expand Down