From 2ea014571b3fd0c222343244df324c1256079bf9 Mon Sep 17 00:00:00 2001 From: Michael Graeb Date: Wed, 18 Oct 2023 11:07:29 -0700 Subject: [PATCH] Add checksum settings for S3 (#512) --- awscrt/s3.py | 112 ++++++++++++++++++++++++++++++++------- crt/aws-c-cal | 2 +- crt/aws-c-s3 | 2 +- crt/s2n | 2 +- source/s3_client.c | 32 +++++------ source/s3_meta_request.c | 45 ++++++++++------ test/test_s3.py | 71 +++++++++++++++++++++---- 7 files changed, 200 insertions(+), 66 deletions(-) diff --git a/awscrt/s3.py b/awscrt/s3.py index 2738f2d4a..26432270a 100644 --- a/awscrt/s3.py +++ b/awscrt/s3.py @@ -12,7 +12,9 @@ from awscrt.io import ClientBootstrap, TlsConnectionOptions from awscrt.auth import AwsCredentialsProvider, AwsSignatureType, AwsSignedBodyHeaderType, AwsSignedBodyValue, AwsSigningAlgorithm, AwsSigningConfig import awscrt.exceptions +from dataclasses import dataclass import threading +from typing import Optional from enum import IntEnum @@ -50,6 +52,60 @@ class S3RequestTlsMode(IntEnum): """ +class S3ChecksumAlgorithm(IntEnum): + """ + Checksum algorithm used to verify object integrity. + https://docs.aws.amazon.com/AmazonS3/latest/userguide/checking-object-integrity.html + """ + + CRC32C = 1 + """CRC32C""" + + CRC32 = 2 + """CRC32""" + + SHA1 = 3 + """SHA-1""" + + SHA256 = 4 + """SHA-256""" + + +class S3ChecksumLocation(IntEnum): + """Where to put the checksum.""" + + HEADER = 1 + """ + Add checksum as a request header field. + The checksum is calculated before any part of the request is sent to the server. + """ + + TRAILER = 2 + """ + Add checksum as a request trailer field. + The checksum is calculated as the body is streamed to the server, then + added as a trailer field. This may be more efficient than HEADER, but + can only be used with "streaming" requests that support it. + """ + + +@dataclass +class S3ChecksumConfig: + """Configures how the S3Client calculates and verifies checksums.""" + + algorithm: Optional[S3ChecksumAlgorithm] = None + """ + If set, the S3Client will calculate a checksum using this algorithm + and add it to the request. If you set this, you must also set `location`. + """ + + location: Optional[S3ChecksumLocation] = None + """Where to put the request checksum.""" + + validate_response: bool = False + """Whether to retrieve and validate response checksums.""" + + class S3Client(NativeResource): """S3 client @@ -153,12 +209,13 @@ def on_shutdown(): def make_request( self, *, - request, type, - signing_config=None, - credential_provider=None, + request, recv_filepath=None, send_filepath=None, + signing_config=None, + credential_provider=None, + checksum_config=None, on_headers=None, on_body=None, on_done=None, @@ -168,19 +225,11 @@ def make_request( requests under the hood for acceleration. Keyword Args: - request (HttpRequest): The overall outgoing API request for S3 operation. - If the request body is a file, set send_filepath for better performance. - type (S3RequestType): The type of S3 request passed in, :attr:`~S3RequestType.GET_OBJECT`/:attr:`~S3RequestType.PUT_OBJECT` can be accelerated - signing_config (Optional[AwsSigningConfig]): - Configuration for signing of the request to override the configuration from client. Use :func:`create_default_s3_signing_config()` to create the default config. - If None is provided, the client configuration will be used. - - credential_provider (Optional[AwsCredentialsProvider]): Deprecated, prefer `signing_config` instead. - Credentials providers source the :class:`~awscrt.auth.AwsCredentials` needed to sign an authenticated AWS request, for this request only. - If None is provided, the client configuration will be used. + request (HttpRequest): The overall outgoing API request for S3 operation. + If the request body is a file, set send_filepath for better performance. recv_filepath (Optional[str]): Optional file path. If set, the response body is written directly to a file and the @@ -192,6 +241,16 @@ def make_request( request's `body_stream` is ignored. This should give better performance than reading a file from a stream. + signing_config (Optional[AwsSigningConfig]): + Configuration for signing of the request to override the configuration from client. Use :func:`create_default_s3_signing_config()` to create the default config. + If None is provided, the client configuration will be used. + + credential_provider (Optional[AwsCredentialsProvider]): Deprecated, prefer `signing_config` instead. + Credentials providers source the :class:`~awscrt.auth.AwsCredentials` needed to sign an authenticated AWS request, for this request only. + If None is provided, the client configuration will be used. + + checksum_config (Optional[S3ChecksumConfig]): Optional checksum settings. + on_headers: Optional callback invoked as the response received, and even the API request has been split into multiple parts, this callback will only be invoked once as it's just making one API request to S3. @@ -244,12 +303,13 @@ def make_request( """ return S3Request( client=self, - request=request, type=type, - signing_config=signing_config, - credential_provider=credential_provider, + request=request, recv_filepath=recv_filepath, send_filepath=send_filepath, + signing_config=signing_config, + credential_provider=credential_provider, + checksum_config=checksum_config, on_headers=on_headers, on_body=on_body, on_done=on_done, @@ -277,12 +337,13 @@ def __init__( self, *, client, - request, type, - signing_config=None, - credential_provider=None, + request, recv_filepath=None, send_filepath=None, + signing_config=None, + credential_provider=None, + checksum_config=None, on_headers=None, on_body=None, on_done=None, @@ -299,6 +360,16 @@ def __init__( self._finished_future = Future() self.shutdown_event = threading.Event() + checksum_algorithm = 0 # 0 means NONE in C + checksum_location = 0 # 0 means NONE in C + validate_response_checksum = False + if checksum_config is not None: + if checksum_config.algorithm is not None: + checksum_algorithm = checksum_config.algorithm.value + if checksum_config.location is not None: + checksum_location = checksum_config.location.value + validate_response_checksum = checksum_config.validate_response + s3_request_core = _S3RequestCore( request, self._finished_future, @@ -320,6 +391,9 @@ def __init__( recv_filepath, send_filepath, region, + checksum_algorithm, + checksum_location, + validate_response_checksum, s3_request_core) @property diff --git a/crt/aws-c-cal b/crt/aws-c-cal index b0219caef..a916a84ec 160000 --- a/crt/aws-c-cal +++ b/crt/aws-c-cal @@ -1 +1 @@ -Subproject commit b0219caef2c2261daeac20d617cbdca5e13d8d36 +Subproject commit a916a84ec07d028fa7d8c09d4aecaa81df7e8a23 diff --git a/crt/aws-c-s3 b/crt/aws-c-s3 index 1fe34646f..f710806f2 160000 --- a/crt/aws-c-s3 +++ b/crt/aws-c-s3 @@ -1 +1 @@ -Subproject commit 1fe34646f6bb2282491077b1d2407c724c0d5471 +Subproject commit f710806f290d9a4d34c4c5ce07be66d314b6fba2 diff --git a/crt/s2n b/crt/s2n index 4654fecb0..3526e69d6 160000 --- a/crt/s2n +++ b/crt/s2n @@ -1 +1 @@ -Subproject commit 4654fecb05cd5aacbda262654eb95a3876183698 +Subproject commit 3526e69d6b61efedf454f436a6d876fb3e9b6cd7 diff --git a/source/s3_client.c b/source/s3_client.c index f7a558f5a..4761c59ff 100644 --- a/source/s3_client.c +++ b/source/s3_client.c @@ -71,17 +71,16 @@ PyObject *aws_py_s3_client_new(PyObject *self, PyObject *args) { struct aws_allocator *allocator = aws_py_get_allocator(); - PyObject *bootstrap_py = NULL; - PyObject *signing_config_py = NULL; - PyObject *credential_provider_py = NULL; - PyObject *tls_options_py = NULL; - PyObject *on_shutdown_py = NULL; - PyObject *py_core = NULL; - const char *region; - Py_ssize_t region_len; - uint64_t part_size = 0; - double throughput_target_gbps = 0; - int tls_mode; + PyObject *bootstrap_py; /* O */ + PyObject *signing_config_py; /* O */ + PyObject *credential_provider_py; /* O */ + PyObject *tls_options_py; /* O */ + PyObject *on_shutdown_py; /* O */ + struct aws_byte_cursor region; /* s# */ + int tls_mode; /* i */ + uint64_t part_size; /* K */ + double throughput_target_gbps; /* d */ + PyObject *py_core; /* O */ if (!PyArg_ParseTuple( args, "OOOOOs#iKdO", @@ -90,8 +89,8 @@ PyObject *aws_py_s3_client_new(PyObject *self, PyObject *args) { &credential_provider_py, &tls_options_py, &on_shutdown_py, - ®ion, - ®ion_len, + ®ion.ptr, + ®ion.len, &tls_mode, &part_size, &throughput_target_gbps, @@ -121,11 +120,8 @@ PyObject *aws_py_s3_client_new(PyObject *self, PyObject *args) { struct aws_signing_config_aws signing_config_from_credentials_provider; AWS_ZERO_STRUCT(signing_config_from_credentials_provider); - struct aws_byte_cursor region_cursor = aws_byte_cursor_from_array((const uint8_t *)region, region_len); - if (credential_provider) { - aws_s3_init_default_signing_config( - &signing_config_from_credentials_provider, region_cursor, credential_provider); + aws_s3_init_default_signing_config(&signing_config_from_credentials_provider, region, credential_provider); signing_config = &signing_config_from_credentials_provider; } @@ -157,7 +153,7 @@ PyObject *aws_py_s3_client_new(PyObject *self, PyObject *args) { Py_INCREF(s3_client->py_core); struct aws_s3_client_config s3_config = { - .region = aws_byte_cursor_from_array((const uint8_t *)region, region_len), + .region = region, .client_bootstrap = bootstrap, .tls_mode = tls_mode, .signing_config = signing_config, diff --git a/source/s3_meta_request.c b/source/s3_meta_request.c index c57efde20..5646dda75 100644 --- a/source/s3_meta_request.c +++ b/source/s3_meta_request.c @@ -319,6 +319,7 @@ static void s_s3_request_on_progress( const struct aws_s3_meta_request_progress *progress, void *user_data) { + (void)meta_request; struct s3_meta_request_binding *request_binding = user_data; bool report_progress = false; @@ -346,20 +347,22 @@ PyObject *aws_py_s3_client_make_meta_request(PyObject *self, PyObject *args) { struct aws_allocator *allocator = aws_py_get_allocator(); - PyObject *py_s3_request = NULL; - PyObject *s3_client_py = NULL; - PyObject *http_request_py = NULL; - int type; - PyObject *signing_config_py = NULL; - PyObject *credential_provider_py = NULL; - const char *recv_filepath; - const char *send_filepath; - const char *region; - Py_ssize_t region_len; - PyObject *py_core = NULL; + PyObject *py_s3_request; /* O */ + PyObject *s3_client_py; /* O */ + PyObject *http_request_py; /* O */ + int type; /* i */ + PyObject *signing_config_py; /* O */ + PyObject *credential_provider_py; /* O */ + const char *recv_filepath; /* z */ + const char *send_filepath; /* z */ + struct aws_byte_cursor region; /* s# */ + enum aws_s3_checksum_algorithm checksum_algorithm; /* i */ + enum aws_s3_checksum_location checksum_location; /* i */ + int validate_response_checksum; /* p - boolean predicate */ + PyObject *py_core; /* O */ if (!PyArg_ParseTuple( args, - "OOOiOOzzs#O", + "OOOiOOzzs#iipO", &py_s3_request, &s3_client_py, &http_request_py, @@ -368,8 +371,11 @@ PyObject *aws_py_s3_client_make_meta_request(PyObject *self, PyObject *args) { &credential_provider_py, &recv_filepath, &send_filepath, - ®ion, - ®ion_len, + ®ion.ptr, + ®ion.len, + &checksum_algorithm, + &checksum_location, + &validate_response_checksum, &py_core)) { return NULL; } @@ -402,12 +408,16 @@ PyObject *aws_py_s3_client_make_meta_request(PyObject *self, PyObject *args) { struct aws_signing_config_aws signing_config_from_credentials_provider; AWS_ZERO_STRUCT(signing_config_from_credentials_provider); if (credential_provider) { - struct aws_byte_cursor region_cursor = aws_byte_cursor_from_array((const uint8_t *)region, region_len); - aws_s3_init_default_signing_config( - &signing_config_from_credentials_provider, region_cursor, credential_provider); + aws_s3_init_default_signing_config(&signing_config_from_credentials_provider, region, credential_provider); signing_config = &signing_config_from_credentials_provider; } + struct aws_s3_checksum_config checksum_config = { + .checksum_algorithm = checksum_algorithm, + .location = checksum_location, + .validate_response_checksum = validate_response_checksum != 0, + }; + struct s3_meta_request_binding *meta_request = aws_mem_calloc(allocator, 1, sizeof(struct s3_meta_request_binding)); if (!meta_request) { return PyErr_AwsLastError(); @@ -438,6 +448,7 @@ PyObject *aws_py_s3_client_make_meta_request(PyObject *self, PyObject *args) { .type = type, .message = http_request, .signing_config = signing_config, + .checksum_config = &checksum_config, .send_filepath = aws_byte_cursor_from_c_str(send_filepath), .headers_callback = s_s3_request_on_headers, .body_callback = s_s3_request_on_body, diff --git a/test/test_s3.py b/test/test_s3.py index 736a1ecf3..0e32f6675 100644 --- a/test/test_s3.py +++ b/test/test_s3.py @@ -1,6 +1,7 @@ # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. # SPDX-License-Identifier: Apache-2.0. +import base64 from io import BytesIO import unittest import os @@ -11,9 +12,31 @@ from concurrent.futures import Future from awscrt.http import HttpHeaders, HttpRequest -from awscrt.s3 import S3Client, S3RequestType, create_default_s3_signing_config -from awscrt.io import ClientBootstrap, ClientTlsContext, DefaultHostResolver, EventLoopGroup, TlsConnectionOptions, TlsContextOptions -from awscrt.auth import AwsCredentialsProvider, AwsSignatureType, AwsSignedBodyHeaderType, AwsSignedBodyValue, AwsSigningAlgorithm, AwsSigningConfig +from awscrt.s3 import ( + S3ChecksumAlgorithm, + S3ChecksumConfig, + S3ChecksumLocation, + S3Client, + S3RequestType, + create_default_s3_signing_config, +) +from awscrt.io import ( + ClientBootstrap, + ClientTlsContext, + DefaultHostResolver, + EventLoopGroup, + TlsConnectionOptions, + TlsContextOptions, +) +from awscrt.auth import ( + AwsCredentialsProvider, + AwsSignatureType, + AwsSignedBodyHeaderType, + AwsSignedBodyValue, + AwsSigningAlgorithm, + AwsSigningConfig, +) +import zlib MB = 1024 ** 2 GB = 1024 ** 3 @@ -131,6 +154,7 @@ def test_wait_shutdown(self): @unittest.skipUnless(os.environ.get('AWS_TEST_S3'), 'set env var to run test: AWS_TEST_S3') class S3RequestTest(NativeResourceTest): def setUp(self): + super().setUp() # TODO: use env-vars to customize how these tests are run, instead of relying on hard-coded values self.get_test_object_path = "/get_object_test_10MB.txt" self.put_test_object_path = "/put_object_test_py_10MB.txt" @@ -150,7 +174,6 @@ def setUp(self): self.files = FileCreator() self.temp_put_obj_file_path = self.files.create_file_with_size("temp_put_obj_10mb", 10 * MB) - super().setUp() def tearDown(self): self.files.remove_all() @@ -203,17 +226,16 @@ def _test_s3_put_get_object( request, request_type, exception_name=None, - send_filepath=None, - recv_filepath=None): + **kwargs, + ): s3_client = s3_client_new(False, self.region, 5 * MB) s3_request = s3_client.make_request( request=request, type=request_type, on_headers=self._on_request_headers, - send_filepath=send_filepath, - recv_filepath=recv_filepath, - on_body=self._on_request_body) + on_body=self._on_request_body, + **kwargs) finished_future = s3_request.finished_future try: finished_future.result(self.timeout) @@ -356,6 +378,37 @@ def on_done_remove_file(**kwargs): "the transferred length reported does not match body we sent") self._validate_successful_response(request_type is S3RequestType.PUT_OBJECT) + def test_put_get_with_checksum(self): + put_body = b'hello world' + put_body_stream = BytesIO(put_body) + content_length = len(put_body) + path = '/hello-world.txt' + + # calculate expected CRC32 header value: + # a string containing the url-safe-base64-encoding of a big-endian-32-bit-CRC + crc32_int = zlib.crc32(put_body) + crc32_big_endian = crc32_int.to_bytes(4, 'big') + crc32_base64_bytes = base64.urlsafe_b64encode(crc32_big_endian) + crc32_base64_str = crc32_base64_bytes.decode() + + # upload, with client adding checksum + upload_request = self._put_object_request(put_body_stream, content_length, path=path) + upload_checksum_config = S3ChecksumConfig( + algorithm=S3ChecksumAlgorithm.CRC32, + location=S3ChecksumLocation.TRAILER) + self._test_s3_put_get_object(upload_request, S3RequestType.PUT_OBJECT, + checksum_config=upload_checksum_config) + self.assertEqual(HttpHeaders(self.response_headers).get('x-amz-checksum-crc32'), + crc32_base64_str) + + # download, with client validating checksum + download_request = self._get_object_request(path) + download_checksum_config = S3ChecksumConfig(validate_response=True) + self._test_s3_put_get_object(download_request, S3RequestType.GET_OBJECT, + checksum_config=download_checksum_config) + self.assertEqual(HttpHeaders(self.response_headers).get('x-amz-checksum-crc32'), + crc32_base64_str) + def _on_progress_cancel_after_first_chunk(self, progress): self.transferred_len += progress self.progress_invoked += 1