diff --git a/storages/backends/s3.py b/storages/backends/s3.py index ef915164..0f99a5a8 100644 --- a/storages/backends/s3.py +++ b/storages/backends/s3.py @@ -3,6 +3,7 @@ import posixpath import tempfile import threading +import typing import warnings from datetime import datetime from datetime import timedelta @@ -10,6 +11,7 @@ from urllib.parse import urlencode from urllib.parse import urlsplit +from cachetools import LRUCache from django.contrib.staticfiles.storage import ManifestFilesMixin from django.core.exceptions import ImproperlyConfigured from django.core.exceptions import SuspiciousOperation @@ -98,6 +100,12 @@ def _filter_download_params(params): } +# Use named tuple to make it clear what the values are +class _S3Meta(typing.NamedTuple): + content_length: int + last_modified: datetime + + @deconstructible class S3File(CompressedFileMixin, File): """ @@ -474,6 +482,8 @@ def _normalize_name(self, name): def _open(self, name, mode="rb"): name = self._normalize_name(clean_name(name)) + # Remove cached metadata since writing will alter it + self._evict_from_cache(name) try: f = S3File(name, mode, self) except ClientError as err: @@ -485,6 +495,8 @@ def _open(self, name, mode="rb"): def _save(self, name, content): cleaned_name = clean_name(name) name = self._normalize_name(cleaned_name) + # Remove cached metadata since writing will alter it + self._evict_from_cache(name) params = self._get_write_parameters(name, content) if is_seekable(content): @@ -516,6 +528,8 @@ def _save(self, name, content): def delete(self, name): try: name = self._normalize_name(clean_name(name)) + # Remove cached metadata since the key will be deleted + self._evict_from_cache(name) self.bucket.Object(name).delete() except ClientError as err: if err.response["ResponseMetadata"]["HTTPStatusCode"] == 404: @@ -527,15 +541,11 @@ def delete(self, name): def exists(self, name): name = self._normalize_name(clean_name(name)) - try: - self.connection.meta.client.head_object(Bucket=self.bucket_name, Key=name) + key_meta = self._get_from_cache(name) # type: _S3Meta + if key_meta: return True - except ClientError as err: - if err.response["ResponseMetadata"]["HTTPStatusCode"] == 404: - return False - - # Some other error was encountered. Re-raise it. - raise + key_meta = self._fetch_to_cache(name) + return key_meta is not None def listdir(self, name): path = self._normalize_name(clean_name(name)) @@ -560,12 +570,14 @@ def listdir(self, name): def size(self, name): name = self._normalize_name(clean_name(name)) - try: - return self.bucket.Object(name).content_length - except ClientError as err: - if err.response["ResponseMetadata"]["HTTPStatusCode"] == 404: - raise FileNotFoundError("File does not exist: %s" % name) - raise # Let it bubble up if it was some other error + key_meta = self._get_from_cache(name) # type: _S3Meta + if key_meta: + # return the size from the cache + return key_meta.content_length + key_meta = self._fetch_to_cache(name) + if key_meta: + return key_meta.content_length + raise FileNotFoundError("File does not exist: %s" % name) def _get_write_parameters(self, name, content=None): params = self.get_object_parameters(name) @@ -601,12 +613,20 @@ def get_modified_time(self, name): USE_TZ is True, otherwise returns a naive datetime in the local timezone. """ name = self._normalize_name(clean_name(name)) - entry = self.bucket.Object(name) + key_meta = self._get_from_cache(name) # type: _S3Meta + if key_meta: + # use the cached last modified time + last_modified = key_meta.last_modified + else: + key_meta = self._fetch_to_cache(name) + if not key_meta: + raise FileNotFoundError("File does not exist: %s" % name) + last_modified = key_meta.last_modified if setting("USE_TZ"): # boto3 returns TZ aware timestamps - return entry.last_modified + return last_modified else: - return make_naive(entry.last_modified) + return make_naive(last_modified) def _strip_signing_parameters(self, url): # Boto3 does not currently support generating URLs that are unsigned. Instead @@ -678,6 +698,39 @@ def get_available_name(self, name, max_length=None): return get_available_overwrite_name(name, max_length) return super().get_available_name(name, max_length) + @property + def _cache(self): + cache = getattr(self, "__cache", None) + if cache is None: + # Even a small cache can be very helpful in reducing the number of + # requests to S3 and increasing the speed for example collectstatic command + self.__cache = LRUCache(maxsize=32) + cache = self.__cache + return cache + + def _add_to_cache(self, name, value): + content_length = value["ContentLength"] + last_modified = value["LastModified"] + self._cache[name] = _S3Meta(content_length, last_modified) + + def _evict_from_cache(self, name): + self._cache.pop(name, None) + + def _get_from_cache(self, name): + return self._cache.get(name, None) + + def _fetch_to_cache(self, name): + try: + key_meta = self.connection.meta.client.head_object( + Bucket=self.bucket_name, Key=name + ) + self._add_to_cache(name, key_meta) + return self._get_from_cache(name) + except ClientError as err: + if err.response["ResponseMetadata"]["HTTPStatusCode"] == 404: + return None + raise + class S3StaticStorage(S3Storage): """Querystring auth must be disabled so that url() returns a consistent output.""" diff --git a/tests/test_s3.py b/tests/test_s3.py index 2d4e3e53..43d4e5bd 100644 --- a/tests/test_s3.py +++ b/tests/test_s3.py @@ -642,14 +642,16 @@ def test_storage_listdir_empty(self): self.assertEqual(files, []) def test_storage_size(self): - obj = self.storage.bucket.Object.return_value - obj.content_length = 4098 + self.storage.connection.meta.client.head_object.return_value = { + "ContentLength": 4098, + "LastModified": datetime.datetime.utcnow(), + } name = "file.txt" - self.assertEqual(self.storage.size(name), obj.content_length) + self.assertEqual(self.storage.size(name), 4098) def test_storage_size_not_exists(self): - self.storage.bucket.Object.side_effect = ClientError( + self.storage.connection.meta.client.head_object.side_effect = ClientError( {"Error": {}, "ResponseMetadata": {"HTTPStatusCode": 404}}, "HeadObject", ) @@ -666,8 +668,10 @@ def test_storage_mtime(self): self._test_storage_mtime(use_tz) def _test_storage_mtime(self, use_tz): - obj = self.storage.bucket.Object.return_value - obj.last_modified = datetime.datetime.now(datetime.timezone.utc) + self.storage.connection.meta.client.head_object.return_value = { + "ContentLength": 4098, + "LastModified": datetime.datetime.now(datetime.timezone.utc), + } name = "file.txt" self.assertIs( @@ -982,6 +986,65 @@ def test_auth_config(self): access_key="foo", secret_key="boo", session_profile="moo" ) + def test_open_should_evict_from_cache(self): + self._setup_test_data_to_cache() + self.assertTrue(self.storage.exists("file.txt")) + self.assertIsNotNone(self.storage._get_from_cache("file.txt")) + with self.storage.open("file.txt") as _: + self.assertIsNone(self.storage._get_from_cache("file.txt")) + self.assertIsNone(self.storage._get_from_cache("file.txt")) + + def test_save_should_evict_from_cache(self): + self._setup_test_data_to_cache() + self.assertTrue(self.storage.exists("file.txt")) + self.assertIsNotNone(self.storage._get_from_cache("file.txt")) + self.storage.save("file.txt", ContentFile(b"abc")) + self.assertIsNone(self.storage._get_from_cache("file.txt")) + + def test_delete_should_evict_from_cache(self): + self._setup_test_data_to_cache() + self.assertTrue(self.storage.exists("file.txt")) + self.assertIsNotNone(self.storage._get_from_cache("file.txt")) + self.storage.delete("file.txt") + self.assertIsNone(self.storage._get_from_cache("file.txt")) + + def test_exists_should_use_cached_value(self): + self._setup_test_data_to_cache() + self.assertTrue(self.storage.exists("file.txt")) + self.assertTrue(self.storage.exists("file.txt")) + + def test_size_should_use_cached_value(self): + self._setup_test_data_to_cache() + self.assertEqual(self.storage.size("file.txt"), 123) + self.assertEqual(self.storage.size("file.txt"), 123) + + def test_get_modified_time_should_use_cached_value(self): + self._setup_test_data_to_cache() + modified_at = datetime.datetime(2021, 1, 1, 0, 0, 0) + self.assertEqual(self.storage.get_modified_time("file.txt"), modified_at) + self.assertEqual(self.storage.get_modified_time("file.txt"), modified_at) + + def test_get_modified_time_not_exists(self): + self.storage.connection.meta.client.head_object.side_effect = ClientError( + {"Error": {}, "ResponseMetadata": {"HTTPStatusCode": 404}}, + "HeadObject", + ) + with self.assertRaisesMessage( + FileNotFoundError, "File does not exist: file.txt" + ): + self.storage.get_modified_time("file.txt") + + def _setup_test_data_to_cache(self): + mock_connection = mock.MagicMock() + self.storage._connections.connection = mock_connection + mock_connection.meta.client.head_object.side_effect = [ + { + "ContentLength": 123, + "LastModified": datetime.datetime(2021, 1, 1, 0, 0, 0), + }, + Exception("Should not be called"), + ] + class S3StaticStorageTests(TestCase): def setUp(self):