Skip to content

Commit

Permalink
chore: Introduce fiber stack allocator (#2730)
Browse files Browse the repository at this point in the history
1. Use clib malloc for allocating fiber stacks but reduce the fiber stack size.
   clib malloc uses default 4K OS pages when reserving memory from the OS.
   The reason for not using mi_malloc, because we use 2MB large OS pages with mimalloc.
   However, allocating stacks is one of the cases, when using smaller 4KB memory pages is actually more
   RSS efficient because memory pages become hot at better granularity.

2. Add "memory_fiberstack_vms_bytes" metric exposing fiber stack vm usage.
3. Fix macos dependencies & update ci versions.

Signed-off-by: Roman Gershman <[email protected]>
  • Loading branch information
romange authored Mar 18, 2024
1 parent e9548a2 commit f7292de
Show file tree
Hide file tree
Showing 17 changed files with 78 additions and 39 deletions.
4 changes: 2 additions & 2 deletions .github/actions/lint-test-chart/action.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ runs:
fetch-depth: 0

- name: Set up Helm
uses: azure/setup-helm@v3
uses: azure/setup-helm@v4

- uses: actions/setup-python@v5
with:
Expand Down Expand Up @@ -43,7 +43,7 @@ runs:
${{github.event_name == 'workflow_dispatch' && '--all'}} ;
- name: Create kind cluster
uses: helm/kind-action@v1.8.0
uses: helm/kind-action@v1

- name: Getting cluster ready
shell: bash
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -176,5 +176,5 @@ jobs:
lint-test-chart:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- uses: actions/checkout@v4
- uses: ./.github/actions/lint-test-chart
9 changes: 7 additions & 2 deletions .github/workflows/daily-builds.yml
Original file line number Diff line number Diff line change
Expand Up @@ -85,8 +85,13 @@ jobs:
- name: Install dependencies
run: |
brew update && brew install ninja boost openssl automake gcc zstd bison c-ares \
autoconf libtool automake flatbuffers
brew uninstall --formula node kotlin harfbuzz sbt selenium-server imagemagick \
gradle maven openjdk postgresql r ant [email protected] mongosh \
node@18 php composer
brew update && brew install --force ninja boost openssl automake gcc zstd bison c-ares \
autoconf libtool automake
# brew info icu4c
mkdir -p $GITHUB_WORKSPACE/build
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/docker-release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ jobs:
fetch-depth: 0

- name: Install helm
uses: azure/setup-helm@v3
uses: azure/setup-helm@v4

- name: Setup Go
uses: actions/setup-go@v4
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/regression-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ jobs:
container:
image: ghcr.io/romange/${{ matrix.container }}
steps:
- uses: actions/checkout@v3
- uses: actions/checkout@v4
with:
submodules: true

Expand Down Expand Up @@ -57,5 +57,5 @@ jobs:
lint-test-chart:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- uses: actions/checkout@v4
- uses: ./.github/actions/lint-test-chart
4 changes: 2 additions & 2 deletions .github/workflows/release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ jobs:
name: Build aarch64 on ubuntu20.04
needs: create-release
steps:
- uses: actions/checkout@v3
- uses: actions/checkout@v4
with:
submodules: true
- name: Cache build deps
Expand Down Expand Up @@ -111,7 +111,7 @@ jobs:
container:
image: ghcr.io/romange/${{ matrix.container }}
steps:
- uses: actions/checkout@v3
- uses: actions/checkout@v4
with:
submodules: true
- name: Configure
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/reusable-container-workflow.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ jobs:
tag_main: true
steps:
- name: checkout
uses: actions/checkout@v3
uses: actions/checkout@v4
with:
fetch-depth: 1
submodules: true
Expand Down
18 changes: 9 additions & 9 deletions contrib/charts/dragonfly/ci/priorityclassname-values.golden.yaml
Original file line number Diff line number Diff line change
@@ -1,4 +1,13 @@
---
# Source: dragonfly/templates/extra-manifests.yaml
apiVersion: scheduling.k8s.io/v1
description: This priority class should be used only for tests.
globalDefault: false
kind: PriorityClass
metadata:
name: high-priority
value: 1000000
---
# Source: dragonfly/templates/serviceaccount.yaml
apiVersion: v1
kind: ServiceAccount
Expand Down Expand Up @@ -92,12 +101,3 @@ spec:
resources:
limits: {}
requests: {}
---
# Source: dragonfly/templates/extra-manifests.yaml
apiVersion: scheduling.k8s.io/v1
description: This priority class should be used only for tests.
globalDefault: false
kind: PriorityClass
metadata:
name: high-priority
value: 1000000
11 changes: 7 additions & 4 deletions src/core/mi_memory_resource.cc
Original file line number Diff line number Diff line change
Expand Up @@ -3,18 +3,21 @@
//
#include "core/mi_memory_resource.h"

#include <sys/mman.h>

#include "base/logging.h"

namespace dfly {

void* MiMemoryResource::do_allocate(std::size_t size, std::size_t align) {
using namespace std;

void* MiMemoryResource::do_allocate(size_t size, size_t align) {
DCHECK(align);

void* res = mi_heap_malloc_aligned(heap_, size, align);

if (!res)
throw std::bad_alloc{};

throw bad_alloc{};

// It seems that mimalloc has a bug with larger allocations that causes
// mi_heap_contains_block to lie. See https://github.com/microsoft/mimalloc/issues/587
Expand All @@ -28,7 +31,7 @@ void* MiMemoryResource::do_allocate(std::size_t size, std::size_t align) {
return res;
}

void MiMemoryResource::do_deallocate(void* ptr, std::size_t size, std::size_t align) {
void MiMemoryResource::do_deallocate(void* ptr, size_t size, size_t align) {
DCHECK(size > 33554400 || mi_heap_contains_block(heap_, ptr));

size_t usable = mi_usable_size(ptr);
Expand Down
1 change: 1 addition & 0 deletions src/core/mi_memory_resource.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@

namespace dfly {

// Per thread memory resource that uses mimalloc.
class MiMemoryResource : public PMR_NS::memory_resource {
public:
explicit MiMemoryResource(mi_heap_t* heap) : heap_(heap) {
Expand Down
2 changes: 1 addition & 1 deletion src/server/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ cxx_link(dragonfly base dragonfly_lib)
if (CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" AND CMAKE_BUILD_TYPE STREQUAL "Release")
# Add core2 only to this file, thus avoiding instructions in this object file that
# can cause SIGILL.
set_source_files_properties(dfly_main.cc PROPERTIES COMPILE_FLAGS -march=core2)
set_source_files_properties(dfly_main.cc PROPERTIES COMPILE_FLAGS "-march=core2")
endif()

set_property(SOURCE dfly_main.cc APPEND PROPERTY COMPILE_DEFINITIONS
Expand Down
34 changes: 26 additions & 8 deletions src/server/dfly_main.cc
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,11 @@ namespace dfly {

namespace {

// Default stack size for fibers. We decrease it by 16 bytes because some allocators
// need additional 8-16 bytes for their internal structures, thus over reserving additional
// memory pages if using round sizes.
constexpr size_t kFiberDefaultStackSize = 32_KB - 16;

using util::http::TlsClient;

enum class TermColor { kDefault, kRed, kGreen, kYellow };
Expand Down Expand Up @@ -151,6 +156,12 @@ string NormalizePaths(std::string_view path) {
return string(path);
}

template <typename... Args> unique_ptr<Listener> MakeListener(Args&&... args) {
auto res = make_unique<Listener>(forward<Args>(args)...);
res->SetConnFiberStackSize(kFiberDefaultStackSize);
return res;
}

bool RunEngine(ProactorPool* pool, AcceptServer* acceptor) {
uint64_t maxmemory = GetMaxMemoryFlag();
if (maxmemory > 0 && maxmemory < pool->size() * 256_MB) {
Expand All @@ -165,12 +176,14 @@ bool RunEngine(ProactorPool* pool, AcceptServer* acceptor) {
Listener* main_listener = nullptr;

std::vector<facade::Listener*> listeners;

// If we ever add a new listener, plz don't change this,
// we depend on tcp listener to be at the front since we later
// need to pass it to the AclFamily::Init
if (!tcp_disabled) {
main_listener = new Listener{Protocol::REDIS, &service, Listener::Role::MAIN};
listeners.push_back(main_listener);
auto listener = MakeListener(Protocol::REDIS, &service, Listener::Role::MAIN);
main_listener = listener.get();
listeners.push_back(listener.release());
}

Service::InitOpts opts;
Expand Down Expand Up @@ -218,7 +231,7 @@ bool RunEngine(ProactorPool* pool, AcceptServer* acceptor) {
}
unlink(unix_sock.c_str());

auto uds_listener = std::make_unique<Listener>(Protocol::REDIS, &service);
auto uds_listener = MakeListener(Protocol::REDIS, &service);
error_code ec =
acceptor->AddUDSListener(unix_sock.c_str(), unix_socket_perm, uds_listener.get());
if (ec) {
Expand Down Expand Up @@ -248,8 +261,8 @@ bool RunEngine(ProactorPool* pool, AcceptServer* acceptor) {
const char* interface_addr = admin_bind.empty() ? nullptr : admin_bind.c_str();
const std::string printable_addr =
absl::StrCat("admin socket ", interface_addr ? interface_addr : "any", ":", admin_port);
auto admin_listener =
std::make_unique<Listener>(Protocol::REDIS, &service, Listener::Role::PRIVILEGED);
auto admin_listener = MakeListener(Protocol::REDIS, &service, Listener::Role::PRIVILEGED);

error_code ec = acceptor->AddListener(interface_addr, admin_port, admin_listener.get());

if (ec) {
Expand All @@ -260,7 +273,7 @@ bool RunEngine(ProactorPool* pool, AcceptServer* acceptor) {
}
}

if (!tcp_disabled) {
if (main_listener) {
error_code ec = acceptor->AddListener(bind_addr, port, main_listener);

if (ec) {
Expand All @@ -274,7 +287,8 @@ bool RunEngine(ProactorPool* pool, AcceptServer* acceptor) {
}

if (mc_port > 0 && !tcp_disabled) {
acceptor->AddListener(mc_port, new Listener{Protocol::MEMCACHE, &service});
auto listener = MakeListener(Protocol::MEMCACHE, &service);
acceptor->AddListener(mc_port, listener.release());
}

service.Init(acceptor, listeners, opts);
Expand Down Expand Up @@ -690,10 +704,14 @@ Usage: dragonfly [FLAGS]
LOG(INFO) << "Max memory limit is: " << hr_limit;
}

// Initialize mi_malloc options
// export MIMALLOC_VERBOSE=1 to see the options before the override.
mi_option_enable(mi_option_show_errors);
mi_option_set(mi_option_max_warnings, 0);
mi_option_set(mi_option_decommit_delay, 1);

fb2::SetDefaultStackResource(&fb2::std_malloc_resource, kFiberDefaultStackSize);

unique_ptr<util::ProactorPool> pool;

#ifdef __linux__
Expand All @@ -716,7 +734,7 @@ Usage: dragonfly [FLAGS]

pool->Run();

AcceptServer acceptor(pool.get());
AcceptServer acceptor(pool.get(), &fb2::std_malloc_resource, true);
acceptor.set_back_log(absl::GetFlag(FLAGS_tcp_backlog));

int res = dfly::RunEngine(pool.get(), &acceptor) ? 0 : -1;
Expand Down
6 changes: 5 additions & 1 deletion src/server/memory_cmd.cc
Original file line number Diff line number Diff line change
@@ -1,11 +1,15 @@
// Copyright 2022, DragonflyDB authors. All rights reserved.
// Copyright 2024, DragonflyDB authors. All rights reserved.
// See LICENSE for licensing terms.
//

#include "server/memory_cmd.h"

#include <absl/strings/str_cat.h>

#ifdef __linux__
#include <malloc.h>
#endif

#include <mimalloc.h>

#include "base/io_buf.h"
Expand Down
1 change: 0 additions & 1 deletion src/server/replica.cc
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
#include <chrono>

#include "absl/strings/match.h"
#include "util/fibers/fiber2.h"

extern "C" {
#include "redis/rdb.h"
Expand Down
13 changes: 10 additions & 3 deletions src/server/server_family.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1023,8 +1023,11 @@ void PrintPrometheusMetrics(const Metrics& m, StringResponse* resp) {
&resp->body());
AppendMetricWithoutLabels("memory_used_peak_bytes", "", used_mem_peak.load(memory_order_relaxed),
MetricType::GAUGE, &resp->body());
AppendMetricWithoutLabels("comitted_memory", "", GetMallocCurrentCommitted(), MetricType::GAUGE,
AppendMetricWithoutLabels("memory_fiberstack_vms_bytes", "", m.worker_fiber_stack_size,
MetricType::GAUGE, &resp->body());
AppendMetricWithoutLabels("fibers_count", "", m.worker_fiber_count, MetricType::GAUGE,
&resp->body());

AppendMetricWithoutLabels("memory_max_bytes", "", max_memory_limit, MetricType::GAUGE,
&resp->body());

Expand Down Expand Up @@ -1773,6 +1776,8 @@ Metrics ServerFamily::GetMetrics() const {
result.fiber_switch_delay_usec += fb2::FiberSwitchDelayUsec();
result.fiber_longrun_cnt += fb2::FiberLongRunCnt();
result.fiber_longrun_usec += fb2::FiberLongRunSumUsec();
result.worker_fiber_stack_size += fb2::WorkerFibersStackSize();
result.worker_fiber_count += fb2::WorkerFibersCount();

result.coordinator_stats.Add(ss->stats);

Expand Down Expand Up @@ -1888,13 +1893,15 @@ void ServerFamily::Info(CmdArgList args, ConnectionContext* cntx) {
append("used_memory_peak", ump);
append("used_memory_peak_human", HumanReadableNumBytes(ump));

// Virtual memory size, upper bound estimation on the RSS memory used by the fiber stacks.
append("fibers_stack_vms", m.worker_fiber_stack_size);
append("fibers_count", m.worker_fiber_count);

size_t rss = rss_mem_current.load(memory_order_relaxed);
append("used_memory_rss", rss);
append("used_memory_rss_human", HumanReadableNumBytes(rss));
append("used_memory_peak_rss", rss_mem_peak.load(memory_order_relaxed));

append("comitted_memory", GetMallocCurrentCommitted());

append("maxmemory", max_memory_limit);
append("maxmemory_human", HumanReadableNumBytes(max_memory_limit));

Expand Down
2 changes: 2 additions & 0 deletions src/server/server_family.h
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,8 @@ struct Metrics {

// Max length of the all the tx shard-queues.
uint32_t tx_queue_len = 0;
uint32_t worker_fiber_count = 0;
size_t worker_fiber_stack_size = 0;

// command call frequencies (count, aggregated latency in usec).
std::map<std::string, std::pair<uint64_t, uint64_t>> cmd_stats_map;
Expand Down

0 comments on commit f7292de

Please sign in to comment.