-
Notifications
You must be signed in to change notification settings - Fork 968
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
feat: Huge values breakdown in cluster migration #4144
Changes from all commits
11e5e38
e23b2c6
f9375ed
1c2dd5f
042b2a1
81010bd
9078320
c813b8a
d70b177
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,206 @@ | ||
// Copyright 2024, DragonflyDB authors. All rights reserved. | ||
// See LICENSE for licensing terms. | ||
// | ||
|
||
#include "server/journal/cmd_serializer.h" | ||
|
||
#include "server/container_utils.h" | ||
#include "server/journal/serializer.h" | ||
#include "server/rdb_save.h" | ||
|
||
namespace dfly { | ||
|
||
namespace { | ||
using namespace std; | ||
|
||
class CommandAggregator { | ||
public: | ||
using WriteCmdCallback = std::function<void(absl::Span<const string_view>)>; | ||
|
||
CommandAggregator(string_view key, WriteCmdCallback cb) : key_(key), cb_(cb) { | ||
} | ||
|
||
~CommandAggregator() { | ||
CommitPending(); | ||
} | ||
|
||
enum class CommitMode { kAuto, kNoCommit }; | ||
void AddArg(string arg, CommitMode commit_mode = CommitMode::kAuto) { | ||
agg_bytes_ += arg.size(); | ||
members_.push_back(std::move(arg)); | ||
|
||
if (commit_mode != CommitMode::kNoCommit && agg_bytes_ >= serialization_max_chunk_size) { | ||
CommitPending(); | ||
} | ||
} | ||
|
||
private: | ||
void CommitPending() { | ||
if (members_.empty()) { | ||
return; | ||
} | ||
|
||
args_.clear(); | ||
args_.reserve(members_.size() + 1); | ||
args_.push_back(key_); | ||
for (string_view member : members_) { | ||
args_.push_back(member); | ||
} | ||
cb_(args_); | ||
members_.clear(); | ||
} | ||
|
||
string_view key_; | ||
WriteCmdCallback cb_; | ||
vector<string> members_; | ||
absl::InlinedVector<string_view, 5> args_; | ||
size_t agg_bytes_ = 0; | ||
}; | ||
|
||
} // namespace | ||
|
||
CmdSerializer::CmdSerializer(FlushSerialized cb) : cb_(std::move(cb)) { | ||
} | ||
|
||
void CmdSerializer::SerializeEntry(string_view key, const PrimeValue& pk, const PrimeValue& pv, | ||
uint64_t expire_ms) { | ||
// We send RESTORE commands for small objects, or objects we don't support breaking. | ||
bool use_restore_serialization = true; | ||
if (serialization_max_chunk_size > 0 && pv.MallocUsed() > serialization_max_chunk_size) { | ||
switch (pv.ObjType()) { | ||
case OBJ_SET: | ||
SerializeSet(key, pv); | ||
use_restore_serialization = false; | ||
break; | ||
case OBJ_ZSET: | ||
SerializeZSet(key, pv); | ||
use_restore_serialization = false; | ||
break; | ||
case OBJ_HASH: | ||
SerializeHash(key, pv); | ||
use_restore_serialization = false; | ||
break; | ||
case OBJ_LIST: | ||
SerializeList(key, pv); | ||
use_restore_serialization = false; | ||
break; | ||
case OBJ_STRING: | ||
case OBJ_STREAM: | ||
case OBJ_JSON: | ||
case OBJ_SBF: | ||
default: | ||
// These types are unsupported wrt splitting huge values to multiple commands, so we send | ||
// them as a RESTORE command. | ||
break; | ||
} | ||
} | ||
|
||
if (use_restore_serialization) { | ||
// RESTORE sets STICK and EXPIRE as part of the command. | ||
SerializeRestore(key, pk, pv, expire_ms); | ||
} else { | ||
SerializeStickIfNeeded(key, pk); | ||
SerializeExpireIfNeeded(key, expire_ms); | ||
} | ||
} | ||
|
||
void CmdSerializer::SerializeCommand(string_view cmd, absl::Span<const string_view> args) { | ||
journal::Entry entry(0, // txid | ||
journal::Op::COMMAND, // single command | ||
0, // db index | ||
1, // shard count | ||
0, // slot-id, but it is ignored at this level | ||
journal::Entry::Payload(cmd, ArgSlice(args))); | ||
|
||
// Serialize into a string | ||
io::StringSink cmd_sink; | ||
JournalWriter writer{&cmd_sink}; | ||
writer.Write(entry); | ||
|
||
cb_(std::move(cmd_sink).str()); | ||
} | ||
|
||
void CmdSerializer::SerializeStickIfNeeded(string_view key, const PrimeValue& pk) { | ||
if (!pk.IsSticky()) { | ||
return; | ||
} | ||
|
||
SerializeCommand("STICK", {key}); | ||
} | ||
|
||
void CmdSerializer::SerializeExpireIfNeeded(string_view key, uint64_t expire_ms) { | ||
if (expire_ms == 0) { | ||
return; | ||
} | ||
|
||
SerializeCommand("PEXIRE", {key, absl::StrCat(expire_ms)}); | ||
} | ||
|
||
void CmdSerializer::SerializeSet(string_view key, const PrimeValue& pv) { | ||
CommandAggregator aggregator( | ||
key, [&](absl::Span<const string_view> args) { SerializeCommand("SADD", args); }); | ||
|
||
container_utils::IterateSet(pv, [&](container_utils::ContainerEntry ce) { | ||
aggregator.AddArg(ce.ToString()); | ||
return true; | ||
}); | ||
} | ||
|
||
void CmdSerializer::SerializeZSet(string_view key, const PrimeValue& pv) { | ||
CommandAggregator aggregator( | ||
key, [&](absl::Span<const string_view> args) { SerializeCommand("ZADD", args); }); | ||
|
||
container_utils::IterateSortedSet( | ||
pv.GetRobjWrapper(), | ||
[&](container_utils::ContainerEntry ce, double score) { | ||
aggregator.AddArg(absl::StrCat(score), CommandAggregator::CommitMode::kNoCommit); | ||
aggregator.AddArg(ce.ToString()); | ||
return true; | ||
}, | ||
/*start=*/0, /*end=*/-1, /*reverse=*/false, /*use_score=*/true); | ||
} | ||
|
||
void CmdSerializer::SerializeHash(string_view key, const PrimeValue& pv) { | ||
CommandAggregator aggregator( | ||
key, [&](absl::Span<const string_view> args) { SerializeCommand("HSET", args); }); | ||
|
||
container_utils::IterateMap( | ||
pv, [&](container_utils::ContainerEntry k, container_utils::ContainerEntry v) { | ||
aggregator.AddArg(k.ToString(), CommandAggregator::CommitMode::kNoCommit); | ||
aggregator.AddArg(v.ToString()); | ||
return true; | ||
}); | ||
} | ||
|
||
void CmdSerializer::SerializeList(string_view key, const PrimeValue& pv) { | ||
CommandAggregator aggregator( | ||
key, [&](absl::Span<const string_view> args) { SerializeCommand("RPUSH", args); }); | ||
|
||
container_utils::IterateList(pv, [&](container_utils::ContainerEntry ce) { | ||
aggregator.AddArg(ce.ToString()); | ||
return true; | ||
}); | ||
} | ||
|
||
void CmdSerializer::SerializeRestore(string_view key, const PrimeValue& pk, const PrimeValue& pv, | ||
uint64_t expire_ms) { | ||
absl::InlinedVector<string_view, 5> args; | ||
args.push_back(key); | ||
|
||
string expire_str = absl::StrCat(expire_ms); | ||
args.push_back(expire_str); | ||
|
||
io::StringSink value_dump_sink; | ||
SerializerBase::DumpObject(pv, &value_dump_sink); | ||
args.push_back(value_dump_sink.str()); | ||
|
||
args.push_back("ABSTTL"); // Means expire string is since epoch | ||
|
||
if (pk.IsSticky()) { | ||
args.push_back("STICK"); | ||
} | ||
|
||
SerializeCommand("RESTORE", args); | ||
} | ||
|
||
} // namespace dfly |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,44 @@ | ||
// Copyright 2024, DragonflyDB authors. All rights reserved. | ||
// See LICENSE for licensing terms. | ||
// | ||
|
||
#pragma once | ||
|
||
#include <absl/types/span.h> | ||
|
||
#include <string> | ||
#include <string_view> | ||
|
||
#include "server/table.h" | ||
|
||
namespace dfly { | ||
|
||
// CmdSerializer serializes DB entries (key+value) into command(s) in RESP format string. | ||
// Small entries are serialized as RESTORE commands, while bigger ones (see | ||
// serialization_max_chunk_size) are split into multiple commands (like rpush, hset, etc). | ||
// Expiration and stickiness are also serialized into commands. | ||
class CmdSerializer { | ||
public: | ||
using FlushSerialized = std::function<void(std::string)>; | ||
|
||
explicit CmdSerializer(FlushSerialized cb); | ||
|
||
void SerializeEntry(std::string_view key, const PrimeValue& pk, const PrimeValue& pv, | ||
uint64_t expire_ms); | ||
|
||
private: | ||
void SerializeCommand(std::string_view cmd, absl::Span<const std::string_view> args); | ||
void SerializeStickIfNeeded(std::string_view key, const PrimeValue& pk); | ||
void SerializeExpireIfNeeded(std::string_view key, uint64_t expire_ms); | ||
|
||
void SerializeSet(std::string_view key, const PrimeValue& pv); | ||
void SerializeZSet(std::string_view key, const PrimeValue& pv); | ||
void SerializeHash(std::string_view key, const PrimeValue& pv); | ||
void SerializeList(std::string_view key, const PrimeValue& pv); | ||
void SerializeRestore(std::string_view key, const PrimeValue& pk, const PrimeValue& pv, | ||
uint64_t expire_ms); | ||
|
||
FlushSerialized cb_; | ||
}; | ||
|
||
} // namespace dfly |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -9,6 +9,7 @@ | |
#include "base/flags.h" | ||
#include "base/logging.h" | ||
#include "server/cluster/cluster_defs.h" | ||
#include "server/journal/cmd_serializer.h" | ||
#include "util/fibers/synchronization.h" | ||
|
||
using namespace facade; | ||
|
@@ -317,37 +318,8 @@ void RestoreStreamer::OnDbChange(DbIndex db_index, const DbSlice::ChangeReq& req | |
|
||
void RestoreStreamer::WriteEntry(string_view key, const PrimeValue& pk, const PrimeValue& pv, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. RestoreStreamer::WriteBucket has fiber gaurd, doesnt this means that the buffer before send to socket just grows and grows? so you break the values when serializing but we dont realy reduce rss usage There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Very nice catch! There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. you can add a check for used_memory_peak_rss in the pytest compare before and after migration There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ok, so I'll split this to a separate test then which actually uses huge values, otherwise RSS will be noisy with small values like now |
||
uint64_t expire_ms) { | ||
absl::InlinedVector<string_view, 5> args; | ||
args.push_back(key); | ||
|
||
string expire_str = absl::StrCat(expire_ms); | ||
args.push_back(expire_str); | ||
|
||
io::StringSink restore_cmd_sink; | ||
{ // to destroy extra copy | ||
io::StringSink value_dump_sink; | ||
SerializerBase::DumpObject(pv, &value_dump_sink); | ||
args.push_back(value_dump_sink.str()); | ||
|
||
args.push_back("ABSTTL"); // Means expire string is since epoch | ||
|
||
if (pk.IsSticky()) { | ||
args.push_back("STICK"); | ||
} | ||
|
||
journal::Entry entry(0, // txid | ||
journal::Op::COMMAND, // single command | ||
0, // db index | ||
1, // shard count | ||
0, // slot-id, but it is ignored at this level | ||
journal::Entry::Payload("RESTORE", ArgSlice(args))); | ||
|
||
JournalWriter writer{&restore_cmd_sink}; | ||
writer.Write(entry); | ||
} | ||
// TODO: From DumpObject to till Write we tripple copy the PrimeValue. It's very inefficient and | ||
// will burn CPU for large values. | ||
Write(restore_cmd_sink.str()); | ||
CmdSerializer serializer([&](std::string s) { Write(s); }); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is the write function There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Somewhere there will be a cast from |
||
serializer.SerializeEntry(key, pk, pv, expire_ms); | ||
} | ||
|
||
} // namespace dfly |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I wonder if we can make this sink and the other one members of this class and the CmdSerializer a member of RestoreSteamer and by that reduce the number of allocations we have
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The challenge is that
JournalStreamer::Write
is more complicated than just a simple sink :(