Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: add support for big values in SeederV2 #4222

Merged
merged 15 commits into from
Dec 5, 2024
7 changes: 7 additions & 0 deletions tests/dragonfly/replication_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
from . import dfly_args
from .proxy import Proxy
from .seeder import StaticSeeder
from .seeder import SeederBase
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
from .seeder import StaticSeeder
from .seeder import SeederBase
from .seeder import StaticSeeder, SeederBase


ADMIN_PORT = 1211

Expand Down Expand Up @@ -132,6 +133,12 @@ async def check():
# Check data after stable state stream
await check()

if big_value:
info = await c_master.info()
preemptions = info["big_value_preemptions"]
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Where is this computed? I couldn't find

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Where is this computed? I couldn't find

It's a new metric I introduced in my other PR. I will remove this for now and we will add it after it;s merged

logging.info(f"Preemptions {preemptions}")
assert preemptions > 0


async def check_replica_finished_exec(c_replica: aioredis.Redis, m_offset):
role = await c_replica.role()
Expand Down
8 changes: 8 additions & 0 deletions tests/dragonfly/seeder/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ class SeederBase:
UID_COUNTER = 1 # multiple generators should not conflict on keys
CACHED_SCRIPTS = {}
DEFAULT_TYPES = ["STRING", "LIST", "SET", "HASH", "ZSET", "JSON"]
BIG_VALUE_TYPES = ["LIST", "SET", "HASH", "ZSET"]

def __init__(self, types: typing.Optional[typing.List[str]] = None):
self.uid = SeederBase.UID_COUNTER
Expand Down Expand Up @@ -137,6 +138,8 @@ def __init__(
data_size=100,
collection_size=None,
types: typing.Optional[typing.List[str]] = None,
huge_value_percentage=5,
huge_value_size=16384,
):
SeederBase.__init__(self, types)
self.key_target = key_target
Expand All @@ -146,6 +149,9 @@ def __init__(
else:
self.collection_size = collection_size

self.huge_value_percentage = huge_value_percentage
self.huge_value_size = huge_value_size

self.units = [
Seeder.Unit(
prefix=f"k-s{self.uid}u{i}-",
Expand All @@ -166,6 +172,8 @@ async def run(self, client: aioredis.Redis, target_ops=None, target_deviation=No
target_deviation if target_deviation is not None else -1,
self.data_size,
self.collection_size,
self.huge_value_percentage,
self.huge_value_size,
]

sha = await client.script_load(Seeder._load_script("generate"))
Expand Down
6 changes: 5 additions & 1 deletion tests/dragonfly/seeder/script-generate.lua
Original file line number Diff line number Diff line change
Expand Up @@ -18,12 +18,15 @@ local total_ops = tonumber(ARGV[6])
local min_dev = tonumber(ARGV[7])
local data_size = tonumber(ARGV[8])
local collection_size = tonumber(ARGV[9])
-- Probability of each key in key_target to be a big value
local huge_value_percentage = tonumber(ARGV[10])
local huge_value_size = tonumber(ARGV[11])

-- collect all keys belonging to this script
-- assumes exclusive ownership
local keys = LU_collect_keys(prefix, type)

LG_funcs.init(data_size, collection_size)
LG_funcs.init(data_size, collection_size, huge_value_percentage, huge_value_size)
local addfunc = LG_funcs['add_' .. string.lower(type)]
local modfunc = LG_funcs['mod_' .. string.lower(type)]

Expand Down Expand Up @@ -85,6 +88,7 @@ while true do
if counter % 10 == 0 then
-- calculate intensity (not normalized probabilities)
-- please see attached plots in PR to undertand convergence
-- https://github.com/dragonflydb/dragonfly/pull/2556
chakaz marked this conversation as resolved.
Show resolved Hide resolved

-- the add intensity is monotonically decreasing with keycount growing,
-- the delete intensity is monotonically increasing with keycount growing,
Expand Down
95 changes: 83 additions & 12 deletions tests/dragonfly/seeder/script-genlib.lua
Original file line number Diff line number Diff line change
@@ -1,9 +1,19 @@
local LG_funcs = {}

function LG_funcs.init(dsize, csize)
function LG_funcs.init(dsize, csize, large_val_perc, large_val_sz)
LG_funcs.dsize = dsize
LG_funcs.csize = csize
LG_funcs.esize = math.ceil(dsize / csize)
LG_funcs.huge_value_percentage = large_val_perc
LG_funcs.huge_value_size = large_val_sz
end

local function huge_entry()
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would like to expose this as a metric such that once the seeder finishes it will preempt how many big values it created. However, since this code is a script I don't see a "smart way".

Maybe a seeder can create a key in dragonfly (set big_values number_of_big_values) which can then the poll ?

@chakaz any ideas/thoughts?

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We can simply iterate over all db keys in this lua script. That shouldn't be too hard, nor slow.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

(we can use SCAN, TYPE and MEMORY USAGE in the script to get all the info we seek)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I thought about this and we don't really need scan. In fact I baked this metric in the lua script which we return -- works perfectly

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Don't you want huge_entry() to depend on the key? Such that some keys are huge, while others aren't, based on (say) their hash?
The reason I say this is because the seeder uses many operations to generate the values, like many lpush, hset, etc. If we do 100 operations per key (just throwing numbers here), doing 5% huge will make them all roughly of the same size...

local perc = LG_funcs.huge_value_percentage / 100
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: this now isn't percent, right? fraction or ratio would be more accurate

-- [0, 1]
local rand = math.random()
local huge_entry = (perc > rand)
return huge_entry
end

-- strings
Expand All @@ -27,12 +37,18 @@ end
-- lists
-- store list of random blobs of default container/element sizes

function LG_funcs.add_list(key)
local elements = dragonfly.randstr(LG_funcs.esize, LG_funcs.csize)
function LG_funcs.add_list(key, huge_value)
local elements
if huge_entry() then
elements = dragonfly.randstr(LG_funcs.huge_value_size, LG_funcs.csize)
else
elements = dragonfly.randstr(LG_funcs.esize, LG_funcs.csize)
end

redis.apcall('LPUSH', key, unpack(elements))
end

function LG_funcs.mod_list(key)
function LG_funcs.mod_list(key, huge_value)
-- equally likely pops and pushes, we rely on the list size being large enough
-- to "highly likely" not get emptied out by consequitve pops
local action = math.random(1, 4)
Expand All @@ -41,9 +57,23 @@ function LG_funcs.mod_list(key)
elseif action == 2 then
redis.apcall('LPOP', key)
elseif action == 3 then
redis.apcall('LPUSH', key, dragonfly.randstr(LG_funcs.esize))
local str
if huge_entry() then
str = dragonfly.randstr(LG_funcs.huge_value_size)
else
str = dragonfly.randstr(LG_funcs.esize)
end

redis.apcall('LPUSH', key, str)
else
redis.apcall('RPUSH', key, dragonfly.randstr(LG_funcs.esize))
local str
if huge_entry() then
str = dragonfly.randstr(LG_funcs.huge_value_size)
else
str = dragonfly.randstr(LG_funcs.esize)
end

redis.apcall('RPUSH', key, str)
end
end

Expand All @@ -62,7 +92,15 @@ function LG_funcs.add_set(key, keys)
end
redis.apcall('SDIFFSTORE', key, keys[i1], keys[i2])
else
local elements = dragonfly.randstr(LG_funcs.esize, LG_funcs.csize)
local elements
if huge_entry() then
-- Hard coded 10 here, meaning up to 10 huge entries per set
Copy link
Contributor Author

@kostasrim kostasrim Nov 28, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

//TODO so I don't forget to fix it. Replace 10 with LG_funcs.csize()

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

please fix :)

-- TODO make this configurable
elements = dragonfly.randstr(LG_funcs.large_val_sz, 10)
else
elements = dragonfly.randstr(LG_funcs.esize, LG_funcs.csize)
end

redis.apcall('SADD', key, unpack(elements))
end
end
Expand All @@ -72,7 +110,14 @@ function LG_funcs.mod_set(key)
if math.random() < 0.5 then
redis.apcall('SPOP', key)
else
redis.apcall('SADD', key, dragonfly.randstr(LG_funcs.esize))
local rand_str
if huge_entry() then
rand_str = dragonfly.randstr(LG_funcs.huge_value_size)
else
rand_str = dragonfly.randstr(LG_funcs.esize)
end

redis.apcall('SADD', key, rand_str)
end
end

Expand All @@ -82,7 +127,13 @@ end
-- where `value` is a random string for even indices and a number for odd indices

function LG_funcs.add_hash(key)
local blobs = dragonfly.randstr(LG_funcs.esize, LG_funcs.csize / 2)
local blobs
if huge_entry() then
blobs = dragonfly.randstr(LG_funcs.huge_value_size, LG_funcs.csize / 2)
else
blobs = dragonfly.randstr(LG_funcs.esize, LG_funcs.csize / 2)
end

local htable = {}
for i = 1, LG_funcs.csize, 2 do
htable[i * 2 - 1] = tostring(i)
Expand All @@ -100,15 +151,28 @@ function LG_funcs.mod_hash(key)
if idx % 2 == 1 then
redis.apcall('HINCRBY', key, tostring(idx), 1)
else
redis.apcall('HSET', key, tostring(idx), dragonfly.randstr(LG_funcs.esize))
local str
if huge_entry() then
str = dragonfly.randstr(LG_funcs.large_val_sz)
else
str = dragonfly.randstr(LG_funcs.esize)
end

redis.apcall('HSET', key, tostring(idx), str)
end
end

-- sorted sets

function LG_funcs.add_zset(key, keys)
-- TODO: We don't support ZDIFFSTORE
local blobs = dragonfly.randstr(LG_funcs.esize, LG_funcs.csize)
local blobs
if huge_entry() then
blobs = dragonfly.randstr(LG_funcs.huge_value_size, LG_funcs.csize)
else
blobs = dragonfly.randstr(LG_funcs.csize, LG_funcs.csize)
end

local ztable = {}
for i = 1, LG_funcs.csize do
ztable[i * 2 - 1] = tostring(i)
Expand All @@ -120,7 +184,14 @@ end
function LG_funcs.mod_zset(key, dbsize)
local action = math.random(1, 4)
if action <= 2 then
redis.apcall('ZADD', key, math.random(0, LG_funcs.csize * 2), dragonfly.randstr(LG_funcs.esize))
local str
if huge_entry() then
str = dragonfly.randstr(LG_funcs.large_val_sz)
else
str = dragonfly.randstr(LG_funcs.esize)
end

redis.apcall('ZADD', key, math.random(0, LG_funcs.csize * 2), str)
elseif action == 3 then
redis.apcall('ZPOPMAX', key)
else
Expand Down
Loading