Skip to content

Commit

Permalink
Support reading "special files" that lie about their size (#1064)
Browse files Browse the repository at this point in the history
**Issue**:
`aws_byte_buf_init_from_file()` didn't work with "special files", which don't accurately report their size. For example, on my Amazon Linux 2:
- `/proc/cpuinfo`: size is 0, but contents are several KB of data.
- `/sys/devices/virtual/dmi/id/product_name`: size is 4096, but contents are "c5.2xlarge"

**Description of changes**:
- `aws_byte_buf_init_from_file()` no longer 100% trusts the reported size. Size is used as a hint, but it always reads until EOF, growing the buffer if necessary.
- New function `aws_byte_buf_init_from_file_with_size_hint()` lets users provide hint, rather than querying the OS for it.
  • Loading branch information
graebm authored Oct 12, 2023
1 parent d09b75e commit 0baed28
Show file tree
Hide file tree
Showing 4 changed files with 212 additions and 35 deletions.
12 changes: 12 additions & 0 deletions include/aws/common/byte_buf.h
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,18 @@ AWS_COMMON_API int aws_byte_buf_init_copy(
AWS_COMMON_API
int aws_byte_buf_init_from_file(struct aws_byte_buf *out_buf, struct aws_allocator *alloc, const char *filename);

/**
* Same as aws_byte_buf_init_from_file(), but for reading "special files" like /proc/cpuinfo.
* These files don't accurately report their size, so size_hint is used as initial buffer size,
* and the buffer grows until the while file is read.
*/
AWS_COMMON_API
int aws_byte_buf_init_from_file_with_size_hint(
struct aws_byte_buf *out_buf,
struct aws_allocator *alloc,
const char *filename,
size_t size_hint);

/**
* Evaluates the set of properties that define the shape of all valid aws_byte_buf structures.
* It is also a cheap check, in the sense it run in constant time (i.e., no loops or recursion).
Expand Down
131 changes: 96 additions & 35 deletions source/file.c
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,16 @@

#include <errno.h>

/* For "special files", the OS often lies about size.
* For example, on Amazon Linux 2:
* /proc/cpuinfo: size is 0, but contents are several KB of data.
* /sys/devices/virtual/dmi/id/product_name: size is 4096, but contents are "c5.2xlarge"
*
* Therefore, we may need to grow the buffer as we read until EOF.
* This is the min/max step size for growth. */
#define MIN_BUFFER_GROWTH_READING_FILES 32
#define MAX_BUFFER_GROWTH_READING_FILES 4096

FILE *aws_fopen(const char *file_path, const char *mode) {
if (!file_path || strlen(file_path) == 0) {
AWS_LOGF_ERROR(AWS_LS_COMMON_IO, "static: Failed to open file. path is empty");
Expand All @@ -34,54 +44,92 @@ FILE *aws_fopen(const char *file_path, const char *mode) {
return file;
}

int aws_byte_buf_init_from_file(struct aws_byte_buf *out_buf, struct aws_allocator *alloc, const char *filename) {
/* Helper function used by aws_byte_buf_init_from_file() and aws_byte_buf_init_from_file_with_size_hint() */
static int s_byte_buf_init_from_file_impl(
struct aws_byte_buf *out_buf,
struct aws_allocator *alloc,
const char *filename,
bool use_file_size_as_hint,
size_t size_hint) {

AWS_ZERO_STRUCT(*out_buf);
FILE *fp = aws_fopen(filename, "rb");
if (fp == NULL) {
goto error;
}

int64_t len64 = 0;
if (aws_file_get_length(fp, &len64)) {
AWS_LOGF_ERROR(
AWS_LS_COMMON_IO,
"static: Failed to get file length. file:'%s' error:%s",
filename,
aws_error_name(aws_last_error()));
goto error;
}
if (use_file_size_as_hint) {
int64_t len64 = 0;
if (aws_file_get_length(fp, &len64)) {
AWS_LOGF_ERROR(
AWS_LS_COMMON_IO,
"static: Failed to get file length. file:'%s' error:%s",
filename,
aws_error_name(aws_last_error()));
goto error;
}

if (len64 >= SIZE_MAX) {
aws_raise_error(AWS_ERROR_OVERFLOW_DETECTED);
AWS_LOGF_ERROR(
AWS_LS_COMMON_IO,
"static: File too large to read into memory. file:'%s' error:%s",
filename,
aws_error_name(aws_last_error()));
goto error;
if (len64 >= SIZE_MAX) {
aws_raise_error(AWS_ERROR_OVERFLOW_DETECTED);
AWS_LOGF_ERROR(
AWS_LS_COMMON_IO,
"static: File too large to read into memory. file:'%s' error:%s",
filename,
aws_error_name(aws_last_error()));
goto error;
}

/* Leave space for null terminator at end of buffer */
size_hint = (size_t)len64 + 1;
}

size_t allocation_size = (size_t)len64 + 1;
aws_byte_buf_init(out_buf, alloc, allocation_size);
aws_byte_buf_init(out_buf, alloc, size_hint);

/* Read in a loop until we hit EOF */
while (true) {
/* Expand buffer if necessary (at a reasonable rate) */
if (out_buf->len == out_buf->capacity) {
size_t additional_capacity = out_buf->capacity;
additional_capacity = aws_max_size(MIN_BUFFER_GROWTH_READING_FILES, additional_capacity);
additional_capacity = aws_min_size(MAX_BUFFER_GROWTH_READING_FILES, additional_capacity);
if (aws_byte_buf_reserve_relative(out_buf, additional_capacity)) {
AWS_LOGF_ERROR(AWS_LS_COMMON_IO, "static: Failed to grow buffer for file:'%s'", filename);
goto error;
}
}

/* Ensure compatibility with null-terminated APIs, but don't consider
* the null terminator part of the length of the payload */
out_buf->len = out_buf->capacity - 1;
out_buf->buffer[out_buf->len] = 0;
size_t space_available = out_buf->capacity - out_buf->len;
size_t bytes_read = fread(out_buf->buffer + out_buf->len, 1, space_available, fp);
out_buf->len += bytes_read;

size_t read = fread(out_buf->buffer, 1, out_buf->len, fp);
if (read < out_buf->len) {
int errno_value = ferror(fp) ? errno : 0; /* Always cache errno before potential side-effect */
aws_translate_and_raise_io_error_or(errno_value, AWS_ERROR_FILE_READ_FAILURE);
AWS_LOGF_ERROR(
AWS_LS_COMMON_IO,
"static: Failed reading file:'%s' errno:%d aws-error:%s",
filename,
errno_value,
aws_error_name(aws_last_error()));
goto error;
/* If EOF, we're done! */
if (feof(fp)) {
break;
}

/* If no EOF but we read 0 bytes, there's been an error or at least we need
* to treat it like one because we can't just infinitely loop. */
if (bytes_read == 0) {
int errno_value = ferror(fp) ? errno : 0; /* Always cache errno before potential side-effect */
aws_translate_and_raise_io_error_or(errno_value, AWS_ERROR_FILE_READ_FAILURE);
AWS_LOGF_ERROR(
AWS_LS_COMMON_IO,
"static: Failed reading file:'%s' errno:%d aws-error:%s",
filename,
errno_value,
aws_error_name(aws_last_error()));
goto error;
}
}

/* A null terminator is appended, but is not included as part of the length field. */
if (out_buf->len == out_buf->capacity) {
if (aws_byte_buf_reserve_relative(out_buf, 1)) {
AWS_LOGF_ERROR(AWS_LS_COMMON_IO, "static: Failed to grow buffer for file:'%s'", filename);
goto error;
}
}
out_buf->buffer[out_buf->len] = 0;

fclose(fp);
return AWS_OP_SUCCESS;
Expand All @@ -94,6 +142,19 @@ int aws_byte_buf_init_from_file(struct aws_byte_buf *out_buf, struct aws_allocat
return AWS_OP_ERR;
}

int aws_byte_buf_init_from_file(struct aws_byte_buf *out_buf, struct aws_allocator *alloc, const char *filename) {
return s_byte_buf_init_from_file_impl(out_buf, alloc, filename, true /*use_file_size_as_hint*/, 0 /*size_hint*/);
}

int aws_byte_buf_init_from_file_with_size_hint(
struct aws_byte_buf *out_buf,
struct aws_allocator *alloc,
const char *filename,
size_t size_hint) {

return s_byte_buf_init_from_file_impl(out_buf, alloc, filename, false /*use_file_size_as_hint*/, size_hint);
}

bool aws_is_any_directory_separator(char value) {
return value == '\\' || value == '/';
}
Expand Down
1 change: 1 addition & 0 deletions tests/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -480,6 +480,7 @@ add_test_case(directory_move_src_non_existent_test)
add_test_case(test_home_directory_not_null)
add_test_case(test_normalize_posix_directory_separator)
add_test_case(test_normalize_windows_directory_separator)
add_test_case(test_byte_buf_init_from_file)

add_test_case(promise_test_wait_forever)
add_test_case(promise_test_wait_for_a_bit)
Expand Down
103 changes: 103 additions & 0 deletions tests/file_test.c
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
* Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
* SPDX-License-Identifier: Apache-2.0.
*/
#include <aws/common/device_random.h>
#include <aws/common/file.h>
#include <aws/common/string.h>

Expand Down Expand Up @@ -439,3 +440,105 @@ static int s_test_normalize_windows_directory_separator(struct aws_allocator *al
}

AWS_TEST_CASE(test_normalize_windows_directory_separator, s_test_normalize_windows_directory_separator);

static int s_check_byte_buf_from_file(const struct aws_byte_buf *buf, struct aws_byte_cursor expected_contents) {
ASSERT_TRUE(aws_byte_cursor_eq_byte_buf(&expected_contents, buf), "Contents should match");
ASSERT_TRUE(buf->capacity > buf->len, "Buffer should end with null-terminator");
ASSERT_UINT_EQUALS(0, buf->buffer[buf->len], "Buffer should end with null-terminator");
return AWS_OP_SUCCESS;
}

static int s_create_file_then_read_it(struct aws_allocator *allocator, struct aws_byte_cursor contents) {
/* create file */
const char *filename = "testy";
FILE *f = aws_fopen(filename, "wb");
ASSERT_UINT_EQUALS(contents.len, fwrite(contents.ptr, 1, contents.len, f));
ASSERT_INT_EQUALS(0, fclose(f));

struct aws_byte_buf buf;

/* check aws_byte_buf_init_from_file() */
ASSERT_SUCCESS(aws_byte_buf_init_from_file(&buf, allocator, filename));
ASSERT_SUCCESS(s_check_byte_buf_from_file(&buf, contents));
aws_byte_buf_clean_up(&buf);

/* now check aws_byte_buf_init_from_file_with_size_hint() ... */

/* size_hint more then big enough */
size_t size_hint = contents.len * 2;
ASSERT_SUCCESS(aws_byte_buf_init_from_file_with_size_hint(&buf, allocator, filename, size_hint));
ASSERT_SUCCESS(s_check_byte_buf_from_file(&buf, contents));
aws_byte_buf_clean_up(&buf);

/* size_hint not big enough for null-terminator */
size_hint = contents.len;
ASSERT_SUCCESS(aws_byte_buf_init_from_file_with_size_hint(&buf, allocator, filename, size_hint));
ASSERT_SUCCESS(s_check_byte_buf_from_file(&buf, contents));
aws_byte_buf_clean_up(&buf);

/* size_hint 0 */
size_hint = 0;
ASSERT_SUCCESS(aws_byte_buf_init_from_file_with_size_hint(&buf, allocator, filename, size_hint));
ASSERT_SUCCESS(s_check_byte_buf_from_file(&buf, contents));
aws_byte_buf_clean_up(&buf);

/* size_hint 1 */
size_hint = 1;
ASSERT_SUCCESS(aws_byte_buf_init_from_file_with_size_hint(&buf, allocator, filename, size_hint));
ASSERT_SUCCESS(s_check_byte_buf_from_file(&buf, contents));
aws_byte_buf_clean_up(&buf);

remove(filename);
return AWS_OP_SUCCESS;
}

/* Read an actual "special file" (if it exists on this machine) */
static int s_read_special_file(struct aws_allocator *allocator, const char *filename) {
struct aws_string *filename_str = aws_string_new_from_c_str(allocator, filename);
bool exists = aws_path_exists(filename_str);
aws_string_destroy(filename_str);
if (!exists) {
return AWS_OP_SUCCESS;
}

struct aws_byte_buf buf;
ASSERT_SUCCESS(aws_byte_buf_init_from_file(&buf, allocator, filename));
ASSERT_TRUE(buf.capacity > buf.len, "Buffer should end with null-terminator");
ASSERT_UINT_EQUALS(0, buf.buffer[buf.len], "Buffer should end with null-terminator");

if (strcmp("/dev/null", filename) == 0) {
ASSERT_UINT_EQUALS(0, buf.len, "expected /dev/null to be empty");
} else {
ASSERT_TRUE(buf.len > 0, "expected special file to have data");
}

aws_byte_buf_clean_up(&buf);
return AWS_OP_SUCCESS;
}

static int s_test_byte_buf_init_from_file(struct aws_allocator *allocator, void *ctx) {
(void)ctx;

/* simple text file */
ASSERT_SUCCESS(s_create_file_then_read_it(allocator, aws_byte_cursor_from_c_str("asdf")));

/* empty file */
ASSERT_SUCCESS(s_create_file_then_read_it(allocator, aws_byte_cursor_from_c_str("")));

/* large 3MB+1byte binary file */
struct aws_byte_buf big_rando;
aws_byte_buf_init(&big_rando, allocator, (1024 * 1024 * 3) + 1);
ASSERT_SUCCESS(aws_device_random_buffer(&big_rando));
ASSERT_SUCCESS(s_create_file_then_read_it(allocator, aws_byte_cursor_from_buf(&big_rando)));
aws_byte_buf_clean_up(&big_rando);

/* test some "special files" (if they exist) */
ASSERT_SUCCESS(s_read_special_file(allocator, "/proc/cpuinfo"));
ASSERT_SUCCESS(s_read_special_file(allocator, "/proc/net/tcp"));
ASSERT_SUCCESS(s_read_special_file(allocator, "/sys/devices/virtual/dmi/id/sys_vendor"));
ASSERT_SUCCESS(s_read_special_file(allocator, "/dev/null"));

return AWS_OP_SUCCESS;
}

AWS_TEST_CASE(test_byte_buf_init_from_file, s_test_byte_buf_init_from_file)

0 comments on commit 0baed28

Please sign in to comment.