diff --git a/include/aws/common/byte_buf.h b/include/aws/common/byte_buf.h index 17b0ae59b..6fc5c3ff9 100644 --- a/include/aws/common/byte_buf.h +++ b/include/aws/common/byte_buf.h @@ -135,6 +135,18 @@ AWS_COMMON_API int aws_byte_buf_init_copy( AWS_COMMON_API int aws_byte_buf_init_from_file(struct aws_byte_buf *out_buf, struct aws_allocator *alloc, const char *filename); +/** + * Same as aws_byte_buf_init_from_file(), but for reading "special files" like /proc/cpuinfo. + * These files don't accurately report their size, so size_hint is used as initial buffer size, + * and the buffer grows until the while file is read. + */ +AWS_COMMON_API +int aws_byte_buf_init_from_file_with_size_hint( + struct aws_byte_buf *out_buf, + struct aws_allocator *alloc, + const char *filename, + size_t size_hint); + /** * Evaluates the set of properties that define the shape of all valid aws_byte_buf structures. * It is also a cheap check, in the sense it run in constant time (i.e., no loops or recursion). diff --git a/source/file.c b/source/file.c index 050d9cc62..504e547f5 100644 --- a/source/file.c +++ b/source/file.c @@ -11,6 +11,16 @@ #include +/* For "special files", the OS often lies about size. + * For example, on Amazon Linux 2: + * /proc/cpuinfo: size is 0, but contents are several KB of data. + * /sys/devices/virtual/dmi/id/product_name: size is 4096, but contents are "c5.2xlarge" + * + * Therefore, we may need to grow the buffer as we read until EOF. + * This is the min/max step size for growth. */ +#define MIN_BUFFER_GROWTH_READING_FILES 32 +#define MAX_BUFFER_GROWTH_READING_FILES 4096 + FILE *aws_fopen(const char *file_path, const char *mode) { if (!file_path || strlen(file_path) == 0) { AWS_LOGF_ERROR(AWS_LS_COMMON_IO, "static: Failed to open file. path is empty"); @@ -34,7 +44,13 @@ FILE *aws_fopen(const char *file_path, const char *mode) { return file; } -int aws_byte_buf_init_from_file(struct aws_byte_buf *out_buf, struct aws_allocator *alloc, const char *filename) { +/* Helper function used by aws_byte_buf_init_from_file() and aws_byte_buf_init_from_file_with_size_hint() */ +static int s_byte_buf_init_from_file_impl( + struct aws_byte_buf *out_buf, + struct aws_allocator *alloc, + const char *filename, + bool use_file_size_as_hint, + size_t size_hint) { AWS_ZERO_STRUCT(*out_buf); FILE *fp = aws_fopen(filename, "rb"); @@ -42,63 +58,80 @@ int aws_byte_buf_init_from_file(struct aws_byte_buf *out_buf, struct aws_allocat goto error; } - int64_t len64 = 0; - if (aws_file_get_length(fp, &len64)) { - AWS_LOGF_ERROR( - AWS_LS_COMMON_IO, - "static: Failed to get file length. file:'%s' error:%s", - filename, - aws_error_name(aws_last_error())); - goto error; + if (use_file_size_as_hint) { + int64_t len64 = 0; + if (aws_file_get_length(fp, &len64)) { + AWS_LOGF_ERROR( + AWS_LS_COMMON_IO, + "static: Failed to get file length. file:'%s' error:%s", + filename, + aws_error_name(aws_last_error())); + goto error; + } + + if (len64 >= SIZE_MAX) { + aws_raise_error(AWS_ERROR_OVERFLOW_DETECTED); + AWS_LOGF_ERROR( + AWS_LS_COMMON_IO, + "static: File too large to read into memory. file:'%s' error:%s", + filename, + aws_error_name(aws_last_error())); + goto error; + } + + /* Leave space for null terminator at end of buffer */ + size_hint = (size_t)len64 + 1; } - if (len64 >= SIZE_MAX) { - aws_raise_error(AWS_ERROR_OVERFLOW_DETECTED); - AWS_LOGF_ERROR( - AWS_LS_COMMON_IO, - "static: File too large to read into memory. file:'%s' error:%s", - filename, - aws_error_name(aws_last_error())); - goto error; + aws_byte_buf_init(out_buf, alloc, size_hint); + + /* Read in a loop until we hit EOF */ + while (true) { + /* Expand buffer if necessary (at a reasonable rate) */ + if (out_buf->len == out_buf->capacity) { + size_t additional_capacity = out_buf->capacity; + additional_capacity = aws_max_size(MIN_BUFFER_GROWTH_READING_FILES, additional_capacity); + additional_capacity = aws_min_size(MAX_BUFFER_GROWTH_READING_FILES, additional_capacity); + if (aws_byte_buf_reserve_relative(out_buf, additional_capacity)) { + AWS_LOGF_ERROR(AWS_LS_COMMON_IO, "static: Failed to grow buffer for file:'%s'", filename); + goto error; + } + } + + size_t space_available = out_buf->capacity - out_buf->len; + size_t bytes_read = fread(out_buf->buffer + out_buf->len, 1, space_available, fp); + out_buf->len += bytes_read; + + /* If EOF, we're done! */ + if (feof(fp)) { + break; + } + + /* If no EOF but we read 0 bytes, there's been an error or at least we need + * to treat it like one because we can't just infinitely loop. */ + if (bytes_read == 0) { + int errno_value = ferror(fp) ? errno : 0; /* Always cache errno before potential side-effect */ + aws_translate_and_raise_io_error_or(errno_value, AWS_ERROR_FILE_READ_FAILURE); + AWS_LOGF_ERROR( + AWS_LS_COMMON_IO, + "static: Failed reading file:'%s' errno:%d aws-error:%s", + filename, + errno_value, + aws_error_name(aws_last_error())); + goto error; + } } - /* - * This number is usually correct, but in cases of device files that don't correspond to storage on disk, - * it may just be the size of a page. Go ahead and use it as a good hint of how much to allocate initially, - * but otherwise don't rely on it. - */ - size_t allocation_size = (size_t)len64 + 1; - aws_byte_buf_init(out_buf, alloc, allocation_size); - - size_t read = 0; - size_t total_read = 0; - do { - if (total_read == out_buf->capacity) { - /* just add allocation size space to read some more. It's not perfect but it's plenty good. */ - aws_byte_buf_reserve_relative(out_buf, allocation_size); + /* A null terminator is appended, but is not included as part of the length field. */ + if (out_buf->len == out_buf->capacity) { + if (aws_byte_buf_reserve_relative(out_buf, 1)) { + AWS_LOGF_ERROR(AWS_LS_COMMON_IO, "static: Failed to grow buffer for file:'%s'", filename); + goto error; } - read = fread(out_buf->buffer + out_buf->len, 1, out_buf->capacity - out_buf->len, fp); - out_buf->len += read; - total_read += read; - } while (read > 0); - - int errno_value = ferror(fp) ? errno : 0; /* Always cache errno before potential side-effect */ - if (errno_value != 0) { - aws_translate_and_raise_io_error_or(errno_value, AWS_ERROR_FILE_READ_FAILURE); - AWS_LOGF_ERROR( - AWS_LS_COMMON_IO, - "static: Failed reading file:'%s' errno:%d aws-error:%s", - filename, - errno_value, - aws_error_name(aws_last_error())); - goto error; } + out_buf->buffer[out_buf->len] = 0; fclose(fp); - /* write the NULL terminator out. */ - aws_byte_buf_write_u8(out_buf, 0x00); - /* we wrote the NULL terminator, but don't include it in the length. */ - out_buf->len -= 1; return AWS_OP_SUCCESS; error: @@ -109,6 +142,19 @@ int aws_byte_buf_init_from_file(struct aws_byte_buf *out_buf, struct aws_allocat return AWS_OP_ERR; } +int aws_byte_buf_init_from_file(struct aws_byte_buf *out_buf, struct aws_allocator *alloc, const char *filename) { + return s_byte_buf_init_from_file_impl(out_buf, alloc, filename, true /*use_file_size_as_hint*/, 0 /*size_hint*/); +} + +int aws_byte_buf_init_from_file_with_size_hint( + struct aws_byte_buf *out_buf, + struct aws_allocator *alloc, + const char *filename, + size_t size_hint) { + + return s_byte_buf_init_from_file_impl(out_buf, alloc, filename, false /*use_file_size_as_hint*/, size_hint); +} + bool aws_is_any_directory_separator(char value) { return value == '\\' || value == '/'; } diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 8d254bfc2..4681e04c3 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -481,7 +481,7 @@ add_test_case(directory_move_src_non_existent_test) add_test_case(test_home_directory_not_null) add_test_case(test_normalize_posix_directory_separator) add_test_case(test_normalize_windows_directory_separator) -add_test_case(test_byte_buf_file_read) +add_test_case(test_byte_buf_init_from_file) add_test_case(promise_test_wait_forever) add_test_case(promise_test_wait_for_a_bit) diff --git a/tests/file_test.c b/tests/file_test.c index 6205a6f89..6a3f4fe8b 100644 --- a/tests/file_test.c +++ b/tests/file_test.c @@ -2,6 +2,7 @@ * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. * SPDX-License-Identifier: Apache-2.0. */ +#include #include #include @@ -440,30 +441,104 @@ static int s_test_normalize_windows_directory_separator(struct aws_allocator *al AWS_TEST_CASE(test_normalize_windows_directory_separator, s_test_normalize_windows_directory_separator); -static int s_test_byte_buf_file_read(struct aws_allocator *allocator, void *ctx) { +static int s_check_byte_buf_from_file(const struct aws_byte_buf *buf, struct aws_byte_cursor expected_contents) { + ASSERT_TRUE(aws_byte_cursor_eq_byte_buf(&expected_contents, buf), "Contents should match"); + ASSERT_TRUE(buf->capacity > buf->len, "Buffer should end with null-terminator"); + ASSERT_UINT_EQUALS(0, buf->buffer[buf->len], "Buffer should end with null-terminator"); + return AWS_OP_SUCCESS; +} + +static int s_create_file_then_read_it(struct aws_allocator *allocator, struct aws_byte_cursor contents) { + /* create file */ + const char *filename = "testy"; + FILE *f = aws_fopen(filename, "wb"); + ASSERT_UINT_EQUALS(contents.len, fwrite(contents.ptr, 1, contents.len, f)); + ASSERT_INT_EQUALS(0, fclose(f)); + + struct aws_byte_buf buf; + + /* check aws_byte_buf_init_from_file() */ + ASSERT_SUCCESS(aws_byte_buf_init_from_file(&buf, allocator, filename)); + ASSERT_SUCCESS(s_check_byte_buf_from_file(&buf, contents)); + aws_byte_buf_clean_up(&buf); + + /* now check aws_byte_buf_init_from_file_with_size_hint() ... */ + + /* size_hint more then big enough */ + size_t size_hint = contents.len * 2; + ASSERT_SUCCESS(aws_byte_buf_init_from_file_with_size_hint(&buf, allocator, filename, size_hint)); + ASSERT_SUCCESS(s_check_byte_buf_from_file(&buf, contents)); + aws_byte_buf_clean_up(&buf); + + /* size_hint not big enough for null-terminator */ + size_hint = contents.len; + ASSERT_SUCCESS(aws_byte_buf_init_from_file_with_size_hint(&buf, allocator, filename, size_hint)); + ASSERT_SUCCESS(s_check_byte_buf_from_file(&buf, contents)); + aws_byte_buf_clean_up(&buf); + + /* size_hint 0 */ + size_hint = 0; + ASSERT_SUCCESS(aws_byte_buf_init_from_file_with_size_hint(&buf, allocator, filename, size_hint)); + ASSERT_SUCCESS(s_check_byte_buf_from_file(&buf, contents)); + aws_byte_buf_clean_up(&buf); + + /* size_hint 1 */ + size_hint = 1; + ASSERT_SUCCESS(aws_byte_buf_init_from_file_with_size_hint(&buf, allocator, filename, size_hint)); + ASSERT_SUCCESS(s_check_byte_buf_from_file(&buf, contents)); + aws_byte_buf_clean_up(&buf); + + remove(filename); + return AWS_OP_SUCCESS; +} + +/* Read an actual "special file" (if it exists on this machine) */ +static int s_read_special_file(struct aws_allocator *allocator, const char *filename) { + struct aws_string *filename_str = aws_string_new_from_c_str(allocator, filename); + bool exists = aws_path_exists(filename_str); + aws_string_destroy(filename_str); + if (!exists) { + return AWS_OP_SUCCESS; + } + + struct aws_byte_buf buf; + ASSERT_SUCCESS(aws_byte_buf_init_from_file(&buf, allocator, filename)); + ASSERT_TRUE(buf.capacity > buf.len, "Buffer should end with null-terminator"); + ASSERT_UINT_EQUALS(0, buf.buffer[buf.len], "Buffer should end with null-terminator"); + + if (strcmp("/dev/null", filename) == 0) { + ASSERT_UINT_EQUALS(0, buf.len, "expected /dev/null to be empty"); + } else { + ASSERT_TRUE(buf.len > 0, "expected special file to have data"); + } + + aws_byte_buf_clean_up(&buf); + return AWS_OP_SUCCESS; +} + +static int s_test_byte_buf_init_from_file(struct aws_allocator *allocator, void *ctx) { (void)ctx; - const char *test_string = "This is a message that's going to test a read loop."; - struct aws_byte_buf test_buf = aws_byte_buf_from_c_str(test_string); - struct aws_string *test_file = aws_string_new_from_c_str(allocator, "test_file.txt"); - struct aws_string *test_file_mode = aws_string_new_from_c_str(allocator, "w"); + /* simple text file */ + ASSERT_SUCCESS(s_create_file_then_read_it(allocator, aws_byte_cursor_from_c_str("asdf"))); - FILE *output_file = aws_fopen("test_file", "w"); - ASSERT_NOT_NULL(output_file); - ASSERT_UINT_EQUALS(test_buf.len, fwrite(test_buf.buffer, 1, test_buf.len, output_file)); - fclose(output_file); + /* empty file */ + ASSERT_SUCCESS(s_create_file_then_read_it(allocator, aws_byte_cursor_from_c_str(""))); - struct aws_byte_buf output_buf; - AWS_ZERO_STRUCT(output_buf); - ASSERT_SUCCESS(aws_byte_buf_init_from_file(&output_buf, allocator, "test_file")); - aws_file_delete(test_file); - ASSERT_BIN_ARRAYS_EQUALS(test_buf.buffer, test_buf.len, output_buf.buffer, output_buf.len); + /* large 3MB+1byte binary file */ + struct aws_byte_buf big_rando; + aws_byte_buf_init(&big_rando, allocator, (1024 * 1024 * 3) + 1); + ASSERT_SUCCESS(aws_device_random_buffer(&big_rando)); + ASSERT_SUCCESS(s_create_file_then_read_it(allocator, aws_byte_cursor_from_buf(&big_rando))); + aws_byte_buf_clean_up(&big_rando); - aws_byte_buf_clean_up(&output_buf); - aws_string_destroy(test_file_mode); - aws_string_destroy(test_file); + /* test some "special files" (if they exist) */ + ASSERT_SUCCESS(s_read_special_file(allocator, "/proc/cpuinfo")); + ASSERT_SUCCESS(s_read_special_file(allocator, "/proc/net/tcp")); + ASSERT_SUCCESS(s_read_special_file(allocator, "/sys/devices/virtual/dmi/id/sys_vendor")); + ASSERT_SUCCESS(s_read_special_file(allocator, "/dev/null")); return AWS_OP_SUCCESS; } -AWS_TEST_CASE(test_byte_buf_file_read, s_test_byte_buf_file_read); +AWS_TEST_CASE(test_byte_buf_init_from_file, s_test_byte_buf_init_from_file)