Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support reading "special files" that lie about their size #1064

Merged
merged 3 commits into from
Oct 12, 2023
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions include/aws/common/byte_buf.h
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,18 @@ AWS_COMMON_API int aws_byte_buf_init_copy(
AWS_COMMON_API
int aws_byte_buf_init_from_file(struct aws_byte_buf *out_buf, struct aws_allocator *alloc, const char *filename);

/**
* Same as aws_byte_buf_init_from_file(), but for reading "special files" like /proc/cpuinfo.
* These files don't accurately report their size, so size_hint is used as initial buffer size,
* and the buffer grows until the while file is read.
*/
AWS_COMMON_API
int aws_byte_buf_init_from_file_with_size_hint(
struct aws_byte_buf *out_buf,
struct aws_allocator *alloc,
const char *filename,
size_t size_hint);

/**
* Evaluates the set of properties that define the shape of all valid aws_byte_buf structures.
* It is also a cheap check, in the sense it run in constant time (i.e., no loops or recursion).
Expand Down
130 changes: 95 additions & 35 deletions source/file.c
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,16 @@

#include <errno.h>

/* For "special files", the OS often lies about size.
* For example, on Amazon Linux 2:
* /proc/cpuinfo: size is 0, but contents are several KB of data.
* /sys/devices/virtual/dmi/id/product_name: size is 4096, but contents are "c5.2xlarge"
*
* Therefore, we may need to grow the buffer as we read until EOF.
* This is the min/max step size for growth. */
#define MIN_BUFFER_GROWTH_READING_FILES 32
#define MAX_BUFFER_GROWTH_READING_FILES 4096

FILE *aws_fopen(const char *file_path, const char *mode) {
if (!file_path || strlen(file_path) == 0) {
AWS_LOGF_ERROR(AWS_LS_COMMON_IO, "static: Failed to open file. path is empty");
Expand All @@ -34,54 +44,91 @@ FILE *aws_fopen(const char *file_path, const char *mode) {
return file;
}

int aws_byte_buf_init_from_file(struct aws_byte_buf *out_buf, struct aws_allocator *alloc, const char *filename) {
static int s_byte_buf_init_from_file(
struct aws_byte_buf *out_buf,
struct aws_allocator *alloc,
const char *filename,
bool use_file_size_as_hint,
size_t size_hint) {
Comment on lines +52 to +53
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Debatable / Code Clarity: There are two different ways to use this function, which is a bit confusing. I suggest we change this function to

Suggested change
bool use_file_size_as_hint,
size_t size_hint) {
s_byte_buf_init_from_file(
struct aws_byte_buf *out_buf,
struct aws_allocator *alloc,
const char *filename,
size_t size_hint)

and let aws_byte_buf_init_from_file calculate size_hint.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I tried writing it that way first, but it was more confusing IMHO.

You ended up with 3 functions that needed to deal with error-handling instead of just 1, and scrambled logic around instead of putting it all in 1 place.


AWS_ZERO_STRUCT(*out_buf);
FILE *fp = aws_fopen(filename, "rb");
if (fp == NULL) {
goto error;
}

int64_t len64 = 0;
if (aws_file_get_length(fp, &len64)) {
AWS_LOGF_ERROR(
AWS_LS_COMMON_IO,
"static: Failed to get file length. file:'%s' error:%s",
filename,
aws_error_name(aws_last_error()));
goto error;
}
if (use_file_size_as_hint) {
int64_t len64 = 0;
if (aws_file_get_length(fp, &len64)) {
AWS_LOGF_ERROR(
AWS_LS_COMMON_IO,
"static: Failed to get file length. file:'%s' error:%s",
filename,
aws_error_name(aws_last_error()));
goto error;
}

if (len64 >= SIZE_MAX) {
aws_raise_error(AWS_ERROR_OVERFLOW_DETECTED);
AWS_LOGF_ERROR(
AWS_LS_COMMON_IO,
"static: File too large to read into memory. file:'%s' error:%s",
filename,
aws_error_name(aws_last_error()));
goto error;
if (len64 >= SIZE_MAX) {
aws_raise_error(AWS_ERROR_OVERFLOW_DETECTED);
AWS_LOGF_ERROR(
AWS_LS_COMMON_IO,
"static: File too large to read into memory. file:'%s' error:%s",
filename,
aws_error_name(aws_last_error()));
goto error;
}

/* Leave space for null terminator at end of buffer */
size_hint = (size_t)len64 + 1;
}

size_t allocation_size = (size_t)len64 + 1;
aws_byte_buf_init(out_buf, alloc, allocation_size);
aws_byte_buf_init(out_buf, alloc, size_hint);

/* Read in a loop until we hit EOF */
while (true) {
/* Expand buffer if necessary (at a reasonable rate) */
if (out_buf->len == out_buf->capacity) {
size_t additional_capacity = out_buf->capacity;
additional_capacity = aws_max_size(MIN_BUFFER_GROWTH_READING_FILES, additional_capacity);
additional_capacity = aws_min_size(MAX_BUFFER_GROWTH_READING_FILES, additional_capacity);
if (aws_byte_buf_reserve_relative(out_buf, additional_capacity)) {
AWS_LOGF_ERROR(AWS_LS_COMMON_IO, "static: Failed to grow buffer for file:'%s'", filename);
goto error;
}
}

/* Ensure compatibility with null-terminated APIs, but don't consider
* the null terminator part of the length of the payload */
out_buf->len = out_buf->capacity - 1;
out_buf->buffer[out_buf->len] = 0;
size_t space_available = out_buf->capacity - out_buf->len;
size_t bytes_read = fread(out_buf->buffer + out_buf->len, 1, space_available, fp);
out_buf->len += bytes_read;

size_t read = fread(out_buf->buffer, 1, out_buf->len, fp);
if (read < out_buf->len) {
int errno_value = ferror(fp) ? errno : 0; /* Always cache errno before potential side-effect */
aws_translate_and_raise_io_error_or(errno_value, AWS_ERROR_FILE_READ_FAILURE);
AWS_LOGF_ERROR(
AWS_LS_COMMON_IO,
"static: Failed reading file:'%s' errno:%d aws-error:%s",
filename,
errno_value,
aws_error_name(aws_last_error()));
goto error;
/* If EOF, we're done! */
if (feof(fp)) {
break;
}

/* If no EOF but we read 0 bytes, there's been an error or at least we need
* to treat it like one because we can't just infinitely loop. */
if (bytes_read == 0) {
int errno_value = ferror(fp) ? errno : 0; /* Always cache errno before potential side-effect */
aws_translate_and_raise_io_error_or(errno_value, AWS_ERROR_FILE_READ_FAILURE);
AWS_LOGF_ERROR(
AWS_LS_COMMON_IO,
"static: Failed reading file:'%s' errno:%d aws-error:%s",
filename,
errno_value,
aws_error_name(aws_last_error()));
goto error;
}
}

/* A null terminator is appended, but is not included as part of the length field. */
if (out_buf->len == out_buf->capacity) {
if (aws_byte_buf_reserve_relative(out_buf, 1)) {
AWS_LOGF_ERROR(AWS_LS_COMMON_IO, "static: Failed to grow buffer for file:'%s'", filename);
goto error;
}
}
out_buf->buffer[out_buf->len] = 0;

fclose(fp);
return AWS_OP_SUCCESS;
Expand All @@ -94,6 +141,19 @@ int aws_byte_buf_init_from_file(struct aws_byte_buf *out_buf, struct aws_allocat
return AWS_OP_ERR;
}

int aws_byte_buf_init_from_file(struct aws_byte_buf *out_buf, struct aws_allocator *alloc, const char *filename) {
return s_byte_buf_init_from_file(out_buf, alloc, filename, true /*use_file_size_as_hint*/, 0 /*size_hint*/);
}

int aws_byte_buf_init_from_file_with_size_hint(
struct aws_byte_buf *out_buf,
struct aws_allocator *alloc,
const char *filename,
size_t size_hint) {

return s_byte_buf_init_from_file(out_buf, alloc, filename, false /*use_file_size_as_hint*/, size_hint);
}

bool aws_is_any_directory_separator(char value) {
return value == '\\' || value == '/';
}
Expand Down
1 change: 1 addition & 0 deletions tests/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -480,6 +480,7 @@ add_test_case(directory_move_src_non_existent_test)
add_test_case(test_home_directory_not_null)
add_test_case(test_normalize_posix_directory_separator)
add_test_case(test_normalize_windows_directory_separator)
add_test_case(test_byte_buf_init_from_file)

add_test_case(promise_test_wait_forever)
add_test_case(promise_test_wait_for_a_bit)
Expand Down
103 changes: 103 additions & 0 deletions tests/file_test.c
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
* Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
* SPDX-License-Identifier: Apache-2.0.
*/
#include <aws/common/device_random.h>
#include <aws/common/file.h>
#include <aws/common/string.h>

Expand Down Expand Up @@ -439,3 +440,105 @@ static int s_test_normalize_windows_directory_separator(struct aws_allocator *al
}

AWS_TEST_CASE(test_normalize_windows_directory_separator, s_test_normalize_windows_directory_separator);

static int s_check_byte_buf_from_file(const struct aws_byte_buf *buf, struct aws_byte_cursor expected_contents) {
ASSERT_TRUE(aws_byte_cursor_eq_byte_buf(&expected_contents, buf), "Contents should match");
ASSERT_TRUE(buf->capacity > buf->len, "Buffer should end with null-terminator");
ASSERT_UINT_EQUALS(0, buf->buffer[buf->len], "Buffer should end with null-terminator");
return AWS_OP_SUCCESS;
}

static int s_create_file_then_read_it(struct aws_allocator *allocator, struct aws_byte_cursor contents) {
/* create file */
const char *filename = "testy";
FILE *f = aws_fopen(filename, "wb");
ASSERT_UINT_EQUALS(contents.len, fwrite(contents.ptr, 1, contents.len, f));
ASSERT_INT_EQUALS(0, fclose(f));

struct aws_byte_buf buf;

/* check aws_byte_buf_init_from_file() */
ASSERT_SUCCESS(aws_byte_buf_init_from_file(&buf, allocator, filename));
ASSERT_SUCCESS(s_check_byte_buf_from_file(&buf, contents));
aws_byte_buf_clean_up(&buf);

/* now check aws_byte_buf_init_from_file_with_size_hint() ... */

/* size_hint more then big enough */
size_t size_hint = contents.len * 2;
ASSERT_SUCCESS(aws_byte_buf_init_from_file_with_size_hint(&buf, allocator, filename, size_hint));
ASSERT_SUCCESS(s_check_byte_buf_from_file(&buf, contents));
aws_byte_buf_clean_up(&buf);

/* size_hint not big enough for null-terminator */
size_hint = contents.len;
ASSERT_SUCCESS(aws_byte_buf_init_from_file_with_size_hint(&buf, allocator, filename, size_hint));
ASSERT_SUCCESS(s_check_byte_buf_from_file(&buf, contents));
aws_byte_buf_clean_up(&buf);

/* size_hint 0 */
size_hint = 0;
ASSERT_SUCCESS(aws_byte_buf_init_from_file_with_size_hint(&buf, allocator, filename, size_hint));
ASSERT_SUCCESS(s_check_byte_buf_from_file(&buf, contents));
aws_byte_buf_clean_up(&buf);

/* size_hint 1 */
size_hint = 1;
ASSERT_SUCCESS(aws_byte_buf_init_from_file_with_size_hint(&buf, allocator, filename, size_hint));
ASSERT_SUCCESS(s_check_byte_buf_from_file(&buf, contents));
aws_byte_buf_clean_up(&buf);

remove(filename);
return AWS_OP_SUCCESS;
}

/* Read an actual "special file" (if it exists on this machine) */
static int s_read_special_file(struct aws_allocator *allocator, const char *filename) {
struct aws_string *filename_str = aws_string_new_from_c_str(allocator, filename);
bool exists = aws_path_exists(filename_str);
aws_string_destroy(filename_str);
if (!exists) {
return AWS_OP_SUCCESS;
}

struct aws_byte_buf buf;
ASSERT_SUCCESS(aws_byte_buf_init_from_file(&buf, allocator, filename));
ASSERT_TRUE(buf.capacity > buf.len, "Buffer should end with null-terminator");
ASSERT_UINT_EQUALS(0, buf.buffer[buf.len], "Buffer should end with null-terminator");

if (strcmp("/dev/null", filename) == 0) {
ASSERT_UINT_EQUALS(0, buf.len, "expected /dev/null to be empty");
} else {
ASSERT_TRUE(buf.len > 0, "expected special file to have data");
}

aws_byte_buf_clean_up(&buf);
return AWS_OP_SUCCESS;
}

static int s_test_byte_buf_init_from_file(struct aws_allocator *allocator, void *ctx) {
(void)ctx;

/* simple text file */
ASSERT_SUCCESS(s_create_file_then_read_it(allocator, aws_byte_cursor_from_c_str("asdf")));

/* empty file */
ASSERT_SUCCESS(s_create_file_then_read_it(allocator, aws_byte_cursor_from_c_str("")));

/* large 3MB+1byte binary file */
struct aws_byte_buf big_rando;
aws_byte_buf_init(&big_rando, allocator, (1024 * 1024 * 3) + 1);
ASSERT_SUCCESS(aws_device_random_buffer(&big_rando));
ASSERT_SUCCESS(s_create_file_then_read_it(allocator, aws_byte_cursor_from_buf(&big_rando)));
aws_byte_buf_clean_up(&big_rando);

/* test some "special files" (if they exist) */
ASSERT_SUCCESS(s_read_special_file(allocator, "/proc/cpuinfo"));
ASSERT_SUCCESS(s_read_special_file(allocator, "/proc/net/tcp"));
ASSERT_SUCCESS(s_read_special_file(allocator, "/sys/devices/virtual/dmi/id/sys_vendor"));
ASSERT_SUCCESS(s_read_special_file(allocator, "/dev/null"));

return AWS_OP_SUCCESS;
}

AWS_TEST_CASE(test_byte_buf_init_from_file, s_test_byte_buf_init_from_file)