Skip to content

Commit

Permalink
Checksum feature (#181)
Browse files Browse the repository at this point in the history
Add support for flexible checksums
  • Loading branch information
ilevyor authored Mar 3, 2022
1 parent 8af5489 commit 303d62c
Show file tree
Hide file tree
Showing 41 changed files with 4,108 additions and 733 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# IDE Artifacts
.metadata
.vscode
.build
.idea
*.d
Expand Down
1 change: 1 addition & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,7 @@ target_include_directories(${PROJECT_NAME} PUBLIC
$<INSTALL_INTERFACE:include>)

aws_use_package(aws-c-auth)
aws_use_package(aws-checksums)

target_link_libraries(${PROJECT_NAME} PUBLIC ${DEP_AWS_LIBS})

Expand Down
4 changes: 3 additions & 1 deletion builder.json
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,9 @@
},
"upstream": [
{ "name": "aws-c-auth" },
{ "name": "aws-c-http" }
{ "name": "aws-c-http" },
{ "name": "aws-checksums" },
{ "name": "aws-c-sdkutils"}
],
"downstream": [
],
Expand Down
1 change: 1 addition & 0 deletions cmake/aws-c-s3-config.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ include(CMakeFindDependencyMacro)

find_dependency(aws-c-auth)
find_dependency(aws-c-http)
find_dependency(aws-checksums)

if (BUILD_SHARED_LIBS)
include(${CMAKE_CURRENT_LIST_DIR}/shared/@PROJECT_NAME@-targets.cmake)
Expand Down
7 changes: 7 additions & 0 deletions include/aws/s3/private/s3_auto_ranged_put.h
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,13 @@ struct aws_s3_auto_ranged_put {
uint32_t next_part_number;
} threaded_update_data;

/* very similar to the etag_list used in complete_multipart_upload to create the XML payload. Each part will set the
* corresponding index to it's checksum result, so while the list is shared across threads each index will only be
* accessed once to initialize by the corresponding part number, and then again during the complete multipart upload
* request which will only be invoked after all other parts/threads have completed.
*/
struct aws_byte_buf *checksums_list;

/* Members to only be used when the mutex in the base type is locked. */
struct {
struct aws_array_list etag_list;
Expand Down
130 changes: 130 additions & 0 deletions include/aws/s3/private/s3_checksums.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,130 @@
#ifndef AWS_S3_CHECKSUMS_H
#define AWS_S3_CHECKSUMS_H
/**
* Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
* SPDX-License-Identifier: Apache-2.0.
*/
#include "aws/s3/s3_client.h"

struct aws_s3_checksum;

struct aws_checksum_vtable {
void (*destroy)(struct aws_s3_checksum *checksum);
int (*update)(struct aws_s3_checksum *checksum, const struct aws_byte_cursor *buf);
int (*finalize)(struct aws_s3_checksum *checksum, struct aws_byte_buf *out, size_t truncate_to);
};

struct aws_s3_checksum {
struct aws_allocator *allocator;
struct aws_checksum_vtable *vtable;
void *impl;
size_t digest_size;
enum aws_s3_checksum_algorithm algorithm;
bool good;
};

/**
* a stream that takes in a stream, computes a running checksum as it is read, and outputs the checksum when the stream
* is destroyed. Scanning this stream will immediatly fail, as it would prevent an accurate calcuation of the checksum.
*
* @param allocator
* @param existing_stream
* The real content to read from. checksum stream takes ownership of the existing_streams lifetime. Destroying the
* checksum stream destroys the existing stream. outputs the checksum of existing stream to checksum_output upon
* destruction.
* @param algorithm
* checksum algorithm to use.
* @param checksum_output
* Checksum of the `existing_stream`, owned by caller, which will be calculated when this stream is destroyed.
*/
AWS_S3_API
struct aws_input_stream *aws_checksum_stream_new(
struct aws_allocator *allocator,
struct aws_input_stream *existing_stream,
enum aws_s3_checksum_algorithm algorithm,
struct aws_byte_buf *checksum_output);

/**
* A stream that takes in a stream, turns it into a chunk, and follows it with a aws-chunked trailer. Scanning this
* stream will immediatly fail, as it would prevent an accurate calcuation of the checksum.
*
* @param allocator
* @param existing_stream
* The data to be chunkified prepended by information on the stream length followed by a final chunk and a trailing
* chunk containing a checksum of the existing stream. chunk stream takes ownership of the existing_streams lifetime.
* Destroying the chunk stream will destroy the existing stream.
* @param algorithm
* checksum algorithm to use.
* @param checksum_output
* optional argument, if provided the buffer will be initialized to the appropriate size and
* filled with the checksum result when calculated. Callers responsibility to cleanup.
*/
AWS_S3_API
struct aws_input_stream *aws_chunk_stream_new(
struct aws_allocator *allocator,
struct aws_input_stream *existing_stream,
enum aws_s3_checksum_algorithm algorithm,
struct aws_byte_buf *checksum_output);

/**
* Get the size of the checksum output corresponding to the aws_s3_checksum_algorithm enum value.
*/
AWS_S3_API
size_t aws_get_digest_size_from_algorithm(enum aws_s3_checksum_algorithm algorithm);

/**
* Get the header name corresponding to the aws_s3_checksum_algorithm enum value.
*/
AWS_S3_API
const struct aws_byte_cursor *aws_get_http_header_name_from_algorithm(enum aws_s3_checksum_algorithm algorithm);

/**
* Get the multipart upload header name corresponding to the aws_s3_checksum_algorithm enum value.
*/
AWS_S3_API
const struct aws_byte_cursor *aws_get_create_mpu_header_name_from_algorithm(enum aws_s3_checksum_algorithm algorithm);

/**
* Get the complete multipart upload name corresponding to the aws_s3_checksum_algorithm enum value.
*/
AWS_S3_API
const struct aws_byte_cursor *aws_get_complete_mpu_name_from_algorithm(enum aws_s3_checksum_algorithm algorithm);

/**
* create a new aws_checksum corresponding to the aws_s3_checksum_algorithm enum value.
*/
AWS_S3_API
struct aws_s3_checksum *aws_checksum_new(struct aws_allocator *allocator, enum aws_s3_checksum_algorithm algorithm);

/**
* Compute an aws_checksum corresponding to the provided enum, passing a function pointer around instead of using a
* conditional would be faster, but would be a negligble improvment compared to the cost of processing data twice
* which would be the only time this function would be used, and would be harder to follow.
*/
AWS_S3_API
int aws_checksum_compute(
struct aws_allocator *allocator,
enum aws_s3_checksum_algorithm algorithm,
const struct aws_byte_cursor *input,
struct aws_byte_buf *output,
size_t truncate_to);

/**
* Cleans up and deallocates checksum.
*/
AWS_S3_API
void aws_checksum_destroy(struct aws_s3_checksum *checksum);

/**
* Updates the running checksum with to_checksum. this can be called multiple times.
*/
AWS_S3_API
int aws_checksum_update(struct aws_s3_checksum *checksum, const struct aws_byte_cursor *to_checksum);

/**
* Completes the checksum computation and writes the final digest to output.
* Allocation of output is the caller's responsibility.
*/
AWS_S3_API
int aws_checksum_finalize(struct aws_s3_checksum *checksum, struct aws_byte_buf *output, size_t truncate_to);
#endif
4 changes: 3 additions & 1 deletion include/aws/s3/private/s3_default_meta_request.h
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,8 @@ struct aws_s3_meta_request *aws_s3_meta_request_default_new(
struct aws_s3_client *client,
uint64_t content_length,
bool should_compute_content_md5,
const struct aws_s3_meta_request_options *options);
const struct aws_s3_meta_request_options *options,
const enum aws_s3_checksum_algorithm checksum_algorithm,
const bool validate_get_response_checksum);

#endif
18 changes: 17 additions & 1 deletion include/aws/s3/private/s3_meta_request_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@ struct aws_s3_request_options;
struct aws_http_headers;
struct aws_http_make_request_options;
struct aws_retry_strategy;
struct aws_byte_buffer;

enum aws_s3_meta_request_state {
AWS_S3_META_REQUEST_STATE_ACTIVE,
Expand Down Expand Up @@ -129,6 +128,11 @@ struct aws_s3_meta_request {
aws_s3_meta_request_shutdown_fn *shutdown_callback;
aws_s3_meta_request_progress_fn *progress_callback;

/* Customer specified callbacks to be called by our specialized callback to calculate the response checkum. */
aws_s3_meta_request_headers_callback_fn *headers_user_callback_after_checksum;
aws_s3_meta_request_receive_body_callback_fn *body_user_callback_after_checksum;
aws_s3_meta_request_finish_fn *finish_user_callback_after_checksum;

enum aws_s3_meta_request_type type;

struct {
Expand Down Expand Up @@ -179,6 +183,16 @@ struct aws_s3_meta_request {
} client_process_work_threaded_data;

const bool should_compute_content_md5;

const enum aws_s3_checksum_algorithm checksum_algorithm;

bool validate_get_response_checksum;

/* checksum found in either a default get request, or in the initial head request of a mutlipart get */
struct aws_byte_buf meta_request_level_response_header_checksum;

/* running checksum of all of the parts of a default get, or ranged get meta request*/
struct aws_s3_checksum *meta_request_level_running_response_sum;
};

AWS_EXTERN_C_BEGIN
Expand All @@ -190,6 +204,8 @@ int aws_s3_meta_request_init_base(
struct aws_s3_client *client,
size_t part_size,
bool should_compute_content_md5,
const enum aws_s3_checksum_algorithm checksum_algorithm,
bool validate_get_response_checksum,
const struct aws_s3_meta_request_options *options,
void *impl,
struct aws_s3_meta_request_vtable *vtable,
Expand Down
14 changes: 14 additions & 0 deletions include/aws/s3/private/s3_request.h
Original file line number Diff line number Diff line change
Expand Up @@ -58,9 +58,23 @@ struct aws_s3_request {
* prepare function, this will be 0.*/
uint32_t num_times_prepared;

/* checksum found in the header of an individual get part http request */
struct aws_byte_buf request_level_response_header_checksum;

/* running checksum of the respone to an individual get part http request */
struct aws_s3_checksum *request_level_running_response_sum;

/* Get request only, was there a checksum to validate */
bool did_validate;

/* Get request only, if there was an attached checksum to validate did it match the computed checksum */
bool checksum_match;

/* Tag that defines what the built request will actually consist of. This is meant to be space for an enum defined
* by the derived type. Request tags do not necessarily map 1:1 with actual S3 API requests. For example, they can
* be more contextual, like "first part" instead of just "part".) */

/* TODO: this should be a union type to make it clear that this could be one of two enums for puts, and gets. */
int request_tag;

/* Members of this structure will be repopulated each time the request is sent. If the request fails, and needs to
Expand Down
18 changes: 13 additions & 5 deletions include/aws/s3/private/s3_request_messages.h
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
* SPDX-License-Identifier: Apache-2.0.
*/
#include "aws/s3/s3.h"
#include "aws/s3/s3_client.h"
#include <inttypes.h>
#include <stdbool.h>
#include <stddef.h>
Expand All @@ -20,7 +21,7 @@ struct aws_array_list;
AWS_EXTERN_C_BEGIN

AWS_S3_API
struct aws_http_message *aws_s3_message_util_copy_http_message(
struct aws_http_message *aws_s3_message_util_copy_http_message_no_body(
struct aws_allocator *allocator,
struct aws_http_message *message,
const struct aws_byte_cursor *excluded_headers_arrays,
Expand All @@ -30,7 +31,9 @@ AWS_S3_API
struct aws_input_stream *aws_s3_message_util_assign_body(
struct aws_allocator *allocator,
struct aws_byte_buf *byte_buf,
struct aws_http_message *out_message);
struct aws_http_message *out_message,
enum aws_s3_checksum_algorithm algorithm,
struct aws_byte_buf *out_checksum);

/* Create an HTTP request for an S3 Ranged Get Object Request, using the given request as a basis */
AWS_S3_API
Expand All @@ -52,7 +55,8 @@ int aws_s3_message_util_set_multipart_request_path(
AWS_S3_API
struct aws_http_message *aws_s3_create_multipart_upload_message_new(
struct aws_allocator *allocator,
struct aws_http_message *base_message);
struct aws_http_message *base_message,
enum aws_s3_checksum_algorithm algorithm);

/* Create an HTTP request for an S3 Put Object request, using the original request as a basis. Creates and assigns a
* body stream using the passed in buffer. If multipart is not needed, part number and upload_id can be 0 and NULL,
Expand All @@ -64,7 +68,9 @@ struct aws_http_message *aws_s3_upload_part_message_new(
struct aws_byte_buf *buffer,
uint32_t part_number,
const struct aws_string *upload_id,
bool should_compute_content_md5);
bool should_compute_content_md5,
const enum aws_s3_checksum_algorithm checksum_algorithm,
struct aws_byte_buf *encoded_checksum_output);

/* Create an HTTP request for an S3 UploadPartCopy request, using the original request as a basis.
* If multipart is not needed, part number and upload_id can be 0 and NULL,
Expand All @@ -89,7 +95,9 @@ struct aws_http_message *aws_s3_complete_multipart_message_new(
struct aws_http_message *base_message,
struct aws_byte_buf *body_buffer,
const struct aws_string *upload_id,
const struct aws_array_list *etags);
const struct aws_array_list *etags,
struct aws_byte_buf *checksums,
enum aws_s3_checksum_algorithm algorithm);

AWS_S3_API
struct aws_http_message *aws_s3_abort_multipart_upload_message_new(
Expand Down
57 changes: 57 additions & 0 deletions include/aws/s3/private/s3_util.h
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,54 @@ AWS_EXTERN_C_BEGIN
AWS_S3_API
extern const struct aws_byte_cursor g_content_md5_header_name;

AWS_S3_API
extern const struct aws_byte_cursor g_trailer_header_name;

AWS_S3_API
extern const struct aws_byte_cursor g_request_validation_mode;

AWS_S3_API
extern const struct aws_byte_cursor g_enabled;

AWS_S3_API
extern const struct aws_byte_cursor g_create_mpu_checksum_header_name;

AWS_S3_API
extern const struct aws_byte_cursor g_crc32c_header_name;

AWS_S3_API
extern const struct aws_byte_cursor g_crc32_header_name;

AWS_S3_API
extern const struct aws_byte_cursor g_sha1_header_name;

AWS_S3_API
extern const struct aws_byte_cursor g_sha256_header_name;

AWS_S3_API
extern const struct aws_byte_cursor g_crc32c_create_mpu_header_name;

AWS_S3_API
extern const struct aws_byte_cursor g_crc32_create_mpu_header_name;

AWS_S3_API
extern const struct aws_byte_cursor g_sha1_create_mpu_header_name;

AWS_S3_API
extern const struct aws_byte_cursor g_sha256_create_mpu_header_name;

AWS_S3_API
extern const struct aws_byte_cursor g_crc32c_complete_mpu_name;

AWS_S3_API
extern const struct aws_byte_cursor g_crc32_complete_mpu_name;

AWS_S3_API
extern const struct aws_byte_cursor g_sha1_complete_mpu_name;

AWS_S3_API
extern const struct aws_byte_cursor g_sha256_complete_mpu_name;

AWS_S3_API
extern const struct aws_byte_cursor g_s3_client_version;

Expand All @@ -71,9 +119,18 @@ extern const struct aws_byte_cursor g_host_header_name;
AWS_S3_API
extern const struct aws_byte_cursor g_content_type_header_name;

AWS_S3_API
extern const struct aws_byte_cursor g_content_encoding_header_name;

AWS_S3_API
extern const struct aws_byte_cursor g_content_encoding_header_aws_chunked;

AWS_S3_API
extern const struct aws_byte_cursor g_content_length_header_name;

AWS_S3_API
extern const struct aws_byte_cursor g_decoded_content_length_header_name;

AWS_S3_API
extern const struct aws_byte_cursor g_etag_header_name;

Expand Down
Loading

0 comments on commit 303d62c

Please sign in to comment.