Skip to content

Commit

Permalink
i#6921: Fix heap allocation on latest Linux kernel (#6922)
Browse files Browse the repository at this point in the history
Fixes 3 problems hit on recent Linux kernels where our mmap hints are
being ignored:

1) os_heap_reserve_in_region()'s POINTER_MAX check fails as the end is
aligned,
   causing an unnecessary bounded-region search.

2) vmm_place_vmcode() tries smaller sizes on failure but does not
propagate
   the new size to the caller, resulting in memory corruption.

3) os_heap_reserve() for Linux now uses the new-ish MAP_FIXED_NOREPLACE
   to get the behavior various code expects without risk of clobbering.

Tested on a 6.7.12 kernel where raw2trace failed up front reliably
without these fixes but now succeeds.

Further tested the size propagation with the other fixes disabled and
now we have a graceful failure instead of memory corruption and a weird
crash:

```
$ clients/bin64/drmemtrace_launcher -indir drmemtrace.*.dir
<Full size vmm heap allocation failed>
<Application <path>/clients/bin64/drmemtrace_launcher (3686844).  Out of memory.  Program aborted.  Source I, type 0x0000000000000001, code 0x0000000000000001.>
```

Fixes #6921
  • Loading branch information
derekbruening authored Aug 16, 2024
1 parent 86c9689 commit d302f5e
Show file tree
Hide file tree
Showing 2 changed files with 44 additions and 23 deletions.
35 changes: 18 additions & 17 deletions core/heap.c
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/* **********************************************************
* Copyright (c) 2010-2023 Google, Inc. All rights reserved.
* Copyright (c) 2010-2024 Google, Inc. All rights reserved.
* Copyright (c) 2001-2010 VMware, Inc. All rights reserved.
* **********************************************************/

Expand Down Expand Up @@ -782,7 +782,7 @@ report_w_xor_x_fatal_error_and_exit(void)
}

static void
vmm_place_vmcode(vm_heap_t *vmh, size_t size, heap_error_code_t *error_code)
vmm_place_vmcode(vm_heap_t *vmh, /*INOUT*/ size_t *size, heap_error_code_t *error_code)
{
ptr_uint_t preferred = 0;
#ifdef X64
Expand Down Expand Up @@ -818,7 +818,7 @@ vmm_place_vmcode(vm_heap_t *vmh, size_t size, heap_error_code_t *error_code)
}
vmh->alloc_start = os_heap_reserve_in_region(
(void *)ALIGN_FORWARD(reach_base, PAGE_SIZE),
(void *)ALIGN_BACKWARD(reach_end, PAGE_SIZE), size + add_for_align,
(void *)ALIGN_BACKWARD(reach_end, PAGE_SIZE), *size + add_for_align,
error_code, true /*+x*/);
if (vmh->alloc_start != NULL) {
vmh->start_addr = (heap_pc)ALIGN_FORWARD(
Expand Down Expand Up @@ -851,19 +851,19 @@ vmm_place_vmcode(vm_heap_t *vmh, size_t size, heap_error_code_t *error_code)
DYNAMO_OPTION(vmm_block_size));
preferred = ALIGN_FORWARD(preferred, OS_ALLOC_GRANULARITY);
/* overflow check: w/ vm_base shouldn't happen so debug-only check */
ASSERT(!POINTER_OVERFLOW_ON_ADD(preferred, size));
ASSERT(!POINTER_OVERFLOW_ON_ADD(preferred, *size));
/* let's assume a single chunk is sufficient to reserve */
#ifdef X64
if ((byte *)preferred < heap_allowable_region_start ||
(byte *)preferred + size > heap_allowable_region_end) {
(byte *)preferred + *size > heap_allowable_region_end) {
*error_code = HEAP_ERROR_NOT_AT_PREFERRED;
LOG(GLOBAL, LOG_HEAP, 1,
"vmm_heap_unit_init preferred=" PFX " too far from " PFX "-" PFX "\n",
preferred, heap_allowable_region_start, heap_allowable_region_end);
} else {
#endif
vmh->alloc_start =
os_heap_reserve((void *)preferred, size, error_code, true /*+x*/);
os_heap_reserve((void *)preferred, *size, error_code, true /*+x*/);
vmh->start_addr = vmh->alloc_start;
LOG(GLOBAL, LOG_HEAP, 1,
"vmm_heap_unit_init preferred=" PFX " got start_addr=" PFX "\n",
Expand All @@ -877,29 +877,30 @@ vmm_place_vmcode(vm_heap_t *vmh, size_t size, heap_error_code_t *error_code)
* syslog or assert here
*/
/* need extra size to ensure alignment */
vmh->alloc_size = size + DYNAMO_OPTION(vmm_block_size);
vmh->alloc_size = *size + DYNAMO_OPTION(vmm_block_size);
#ifdef X64
/* PR 215395, make sure allocation satisfies heap reachability contraints */
vmh->alloc_start = os_heap_reserve_in_region(
(void *)ALIGN_FORWARD(heap_allowable_region_start, PAGE_SIZE),
(void *)ALIGN_BACKWARD(heap_allowable_region_end, PAGE_SIZE),
size + DYNAMO_OPTION(vmm_block_size), error_code, true /*+x*/);
*size + DYNAMO_OPTION(vmm_block_size), error_code, true /*+x*/);
#else
vmh->alloc_start = (heap_pc)os_heap_reserve(
NULL, size + DYNAMO_OPTION(vmm_block_size), error_code, true /*+x*/);
NULL, *size + DYNAMO_OPTION(vmm_block_size), error_code, true /*+x*/);
#endif
vmh->start_addr =
(heap_pc)ALIGN_FORWARD(vmh->alloc_start, DYNAMO_OPTION(vmm_block_size));
LOG(GLOBAL, LOG_HEAP, 1,
"vmm_heap_unit_init unable to allocate at preferred=" PFX
" letting OS place sz=%dM addr=" PFX "\n",
preferred, size / (1024 * 1024), vmh->start_addr);
preferred, *size / (1024 * 1024), vmh->start_addr);
if (vmh->alloc_start == NULL && DYNAMO_OPTION(vm_allow_smaller)) {
/* Just a little smaller might fit */
size_t sub = (size_t)ALIGN_FORWARD(size / 16, 1024 * 1024);
SYSLOG_INTERNAL_WARNING_ONCE("Full size vmm heap allocation failed");
if (size > sub)
size -= sub;
/* Don't go too small. */
if (*size > 2 * sub)
*size -= sub;
else
break;
} else
Expand All @@ -913,7 +914,7 @@ vmm_place_vmcode(vm_heap_t *vmh, size_t size, heap_error_code_t *error_code)
* on top. TODO i#3566: We need a different strategy on Windows.
*/
/* Ensure os_map_file ignores vmcode: */
ASSERT(!is_vmm_reserved_address(vmh->start_addr, size, NULL, NULL));
ASSERT(!is_vmm_reserved_address(vmh->start_addr, *size, NULL, NULL));
size_t map_size = vmh->alloc_size;
byte *map_base =
os_map_file(heapmgt->dual_map_file, &map_size, 0, vmh->alloc_start,
Expand All @@ -926,9 +927,9 @@ vmm_place_vmcode(vm_heap_t *vmh, size_t size, heap_error_code_t *error_code)
/* ensure future out-of-block heap allocations are reachable from this allocation */
if (vmh->start_addr != NULL) {
ASSERT(vmh->start_addr >= heap_allowable_region_start &&
!POINTER_OVERFLOW_ON_ADD(vmh->start_addr, size) &&
vmh->start_addr + size <= heap_allowable_region_end);
request_region_be_heap_reachable(vmh->start_addr, size);
!POINTER_OVERFLOW_ON_ADD(vmh->start_addr, *size) &&
vmh->start_addr + *size <= heap_allowable_region_end);
request_region_be_heap_reachable(vmh->start_addr, *size);
}
#endif
ASSERT(ALIGNED(vmh->start_addr, DYNAMO_OPTION(vmm_block_size)));
Expand Down Expand Up @@ -977,7 +978,7 @@ vmm_heap_unit_init(vm_heap_t *vmh, size_t size, bool is_vmcode, const char *name
ASSERT_NOT_REACHED();
}
}
vmm_place_vmcode(vmh, size, &error_code);
vmm_place_vmcode(vmh, &size, &error_code);
if (DYNAMO_OPTION(satisfy_w_xor_x)) {
size_t map_size = vmh->alloc_size;
heapmgt->vmcode_writable_alloc =
Expand Down
32 changes: 26 additions & 6 deletions core/unix/os.c
Original file line number Diff line number Diff line change
Expand Up @@ -3480,11 +3480,30 @@ os_heap_reserve(void *preferred, size_t size, heap_error_code_t *error_code,
if (executable)
os_flags |= MAP_JIT;
#endif
#if defined(LINUX) && !defined(ANDROID)
if (preferred != NULL) {
/* We fail if we don't get the preferred address, so we use the 4.17+
* fixed-but-no-clobber flag to ensure the kernel actually tries for our hint.
*/
os_flags |= MAP_FIXED_NOREPLACE;
}
#endif

/* FIXME: note that this memory is in fact still committed - see man mmap */
/* FIXME: case 2347 on Linux or -vm_reserve should be set to false */
/* FIXME: Need to actually get a mmap-ing with |MAP_NORESERVE */
/* We could try for |MAP_NORESERVE but usually overcommit is set on the
* system and pages aren't actually committed until we touch them.
*/
p = mmap_syscall(preferred, size, prot, os_flags, -1, 0);
#if defined(LINUX) && !defined(ANDROID)
if (preferred != NULL && p == (void *)(-EINVAL)) {
/* We're probably on an old pre-4.17 kernel.
* We could have a global var but we live w/ doing this every time.
*/
SYSLOG_INTERNAL_WARNING_ONCE(
"Got EINVAL on mmap: removing MAP_FIXED_NOREPLACE\n");
os_flags &= ~MAP_FIXED_NOREPLACE;
p = mmap_syscall(preferred, size, prot, os_flags, -1, 0);
}
#endif
if (!mmap_syscall_succeeded(p)) {
*error_code = -(heap_error_code_t)(ptr_int_t)p;
LOG(GLOBAL, LOG_HEAP, 4, "os_heap_reserve %d bytes failed " PFX "\n", size, p);
Expand All @@ -3497,8 +3516,8 @@ os_heap_reserve(void *preferred, size_t size, heap_error_code_t *error_code,
os_heap_free(p, size, &dummy);
ASSERT(dummy == HEAP_ERROR_SUCCESS);
LOG(GLOBAL, LOG_HEAP, 4,
"os_heap_reserve %d bytes at " PFX " not preferred " PFX "\n", size,
preferred, p);
"os_heap_reserve %d bytes at " PFX " not preferred " PFX "\n", size, p,
preferred);
return NULL;
} else {
*error_code = HEAP_ERROR_SUCCESS;
Expand Down Expand Up @@ -3564,7 +3583,8 @@ os_heap_reserve_in_region(void *start, void *end, size_t size,
end);

/* if no restriction on location use regular os_heap_reserve() */
if (start == (void *)PTR_UINT_0 && end == (void *)POINTER_MAX)
if (start == (void *)PTR_UINT_0 &&
end == (void *)ALIGN_BACKWARD(POINTER_MAX, PAGE_SIZE))
return os_heap_reserve(NULL, size, error_code, executable);

/* loop to handle races */
Expand Down

0 comments on commit d302f5e

Please sign in to comment.