Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: add support for big values in SeederV2 #4222

Merged
merged 15 commits into from
Dec 5, 2024
16 changes: 14 additions & 2 deletions tests/dragonfly/seeder/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ class SeederBase:
UID_COUNTER = 1 # multiple generators should not conflict on keys
CACHED_SCRIPTS = {}
DEFAULT_TYPES = ["STRING", "LIST", "SET", "HASH", "ZSET", "JSON"]
BIG_VALUE_TYPES = ["LIST", "SET", "HASH", "ZSET"]

def __init__(self, types: typing.Optional[typing.List[str]] = None):
self.uid = SeederBase.UID_COUNTER
Expand Down Expand Up @@ -137,6 +138,8 @@ def __init__(
data_size=100,
collection_size=None,
types: typing.Optional[typing.List[str]] = None,
huge_value_percentage=3,
huge_value_size=8192,
):
SeederBase.__init__(self, types)
self.key_target = key_target
Expand All @@ -146,6 +149,9 @@ def __init__(
else:
self.collection_size = collection_size

self.huge_value_percentage = huge_value_percentage
self.huge_value_size = huge_value_size

self.units = [
Seeder.Unit(
prefix=f"k-s{self.uid}u{i}-",
Expand All @@ -166,6 +172,8 @@ async def run(self, client: aioredis.Redis, target_ops=None, target_deviation=No
target_deviation if target_deviation is not None else -1,
self.data_size,
self.collection_size,
self.huge_value_percentage,
self.huge_value_size,
]

sha = await client.script_load(Seeder._load_script("generate"))
Expand Down Expand Up @@ -196,8 +204,12 @@ async def _run_unit(client: aioredis.Redis, sha: str, unit: Unit, using_stopkey,
unit.stop_key if using_stopkey else "",
] + args

unit.counter = await client.evalsha(sha, 0, *args)
result = await client.evalsha(sha, 0, *args)
result = result.split()
unit.counter = int(result[0])
huge_keys = int(result[1])
huge_entries = int(result[2])

logging.debug(
f"running unit {unit.prefix}/{unit.type} took {time.time() - s}, target {args[4+0]}"
f"running unit {unit.prefix}/{unit.type} took {time.time() - s}, target {args[4+0]}, huge keys {huge_keys} with total huge entries {huge_entries}"
)
33 changes: 29 additions & 4 deletions tests/dragonfly/seeder/script-generate.lua
Original file line number Diff line number Diff line change
Expand Up @@ -18,21 +18,45 @@ local total_ops = tonumber(ARGV[6])
local min_dev = tonumber(ARGV[7])
local data_size = tonumber(ARGV[8])
local collection_size = tonumber(ARGV[9])
-- Probability of each key in key_target to be a big value
local huge_value_percentage = tonumber(ARGV[10])
local huge_value_size = tonumber(ARGV[11])

-- collect all keys belonging to this script
-- assumes exclusive ownership
local keys = LU_collect_keys(prefix, type)

LG_funcs.init(data_size, collection_size)
LG_funcs.init(data_size, collection_size, huge_value_percentage, huge_value_size)
local addfunc = LG_funcs['add_' .. string.lower(type)]
local modfunc = LG_funcs['mod_' .. string.lower(type)]
local huge_entries = LG_funcs["get_huge_entries"]

local huge_keys = 0

local function huge_entry()
chakaz marked this conversation as resolved.
Show resolved Hide resolved
local ratio = LG_funcs.huge_value_percentage / 100
-- [0, 1]
local rand = math.random()
local huge_entry = (ratio > rand)
return huge_entry
end

local function action_add()
local key = prefix .. tostring(key_counter)
local op_type = string.lower(type)
local is_huge = false
if op_type ~= "string" and op_type ~= "json" then
is_huge = huge_entry()
end
Comment on lines +51 to +53
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please add a comment explaining that only string and json are handled here, because other types are handled below (and where)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We don't handle json or string here, we just roll a dice to decide if it should be huge value or not. There are no huge values for strings or json so that's why we skip the roll

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ok, my bad, can you please add that as a comment?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

of course!


key_counter = key_counter + 1
if is_huge then
huge_keys = huge_keys + 1
end

addfunc(key, keys)
table.insert(keys, key)
keys[key] = is_huge
addfunc(key, keys)
end

local function action_mod()
Expand Down Expand Up @@ -84,7 +108,8 @@ while true do
-- update probability only every 10 iterations
if counter % 10 == 0 then
-- calculate intensity (not normalized probabilities)
-- please see attached plots in PR to undertand convergence
-- please see attached plots in PR to understand convergence
-- https://github.com/dragonflydb/dragonfly/pull/2556
chakaz marked this conversation as resolved.
Show resolved Hide resolved

-- the add intensity is monotonically decreasing with keycount growing,
-- the delete intensity is monotonically increasing with keycount growing,
Expand Down Expand Up @@ -121,4 +146,4 @@ if stop_key ~= '' then
redis.call('DEL', stop_key)
end

return key_counter
return tostring(key_counter) .. " " .. tostring(huge_keys) .. " " .. tostring(huge_entries())
80 changes: 62 additions & 18 deletions tests/dragonfly/seeder/script-genlib.lua
Original file line number Diff line number Diff line change
@@ -1,9 +1,35 @@
local LG_funcs = {}

function LG_funcs.init(dsize, csize)
function LG_funcs.init(dsize, csize, large_val_perc, large_val_sz)
LG_funcs.dsize = dsize
LG_funcs.csize = csize
LG_funcs.esize = math.ceil(dsize / csize)
LG_funcs.huge_value_percentage = large_val_perc
LG_funcs.huge_value_size = large_val_sz
end

local huge_entries = 0

local function randstr(huge_entry)
local str
if huge_entry then
str = dragonfly.randstr(LG_funcs.huge_value_size)
huge_entries = huge_entries + 1
else
str = dragonfly.randstr(LG_funcs.esize)
end
return str
end

local function randstr_sequence(huge_entry)
local strs
if huge_entry then
strs = dragonfly.randstr(LG_funcs.huge_value_size, LG_funcs.csize)
huge_entries = huge_entries + 1
else
strs = dragonfly.randstr(LG_funcs.esize, LG_funcs.csize)
end
return strs
end

-- strings
Expand All @@ -27,23 +53,24 @@ end
-- lists
-- store list of random blobs of default container/element sizes

function LG_funcs.add_list(key)
local elements = dragonfly.randstr(LG_funcs.esize, LG_funcs.csize)
redis.apcall('LPUSH', key, unpack(elements))
function LG_funcs.add_list(key, keys)
chakaz marked this conversation as resolved.
Show resolved Hide resolved
local is_huge = keys[key]
redis.apcall('LPUSH', key, unpack(randstr_sequence(is_huge)))
end

function LG_funcs.mod_list(key)
function LG_funcs.mod_list(key, keys)
-- equally likely pops and pushes, we rely on the list size being large enough
-- to "highly likely" not get emptied out by consequitve pops
local is_huge = keys[key]
local action = math.random(1, 4)
if action == 1 then
redis.apcall('RPOP', key)
elseif action == 2 then
redis.apcall('LPOP', key)
elseif action == 3 then
redis.apcall('LPUSH', key, dragonfly.randstr(LG_funcs.esize))
redis.apcall('LPUSH', key, randstr(is_huge))
else
redis.apcall('RPUSH', key, dragonfly.randstr(LG_funcs.esize))
redis.apcall('RPUSH', key, randstr(is_huge))
end
end

Expand All @@ -62,17 +89,18 @@ function LG_funcs.add_set(key, keys)
end
redis.apcall('SDIFFSTORE', key, keys[i1], keys[i2])
else
local elements = dragonfly.randstr(LG_funcs.esize, LG_funcs.csize)
redis.apcall('SADD', key, unpack(elements))
local is_huge = keys[key]
redis.apcall('SADD', key, unpack(randstr_sequence(is_huge)))
end
end

function LG_funcs.mod_set(key)
function LG_funcs.mod_set(key, keys)
-- equally likely pops and additions
if math.random() < 0.5 then
redis.apcall('SPOP', key)
else
redis.apcall('SADD', key, dragonfly.randstr(LG_funcs.esize))
local is_huge = keys[key]
redis.apcall('SADD', key, randstr(is_huge))
end
end

Expand All @@ -81,8 +109,16 @@ end
-- store {to_string(i): value for i in [1, csize]},
-- where `value` is a random string for even indices and a number for odd indices

function LG_funcs.add_hash(key)
local blobs = dragonfly.randstr(LG_funcs.esize, LG_funcs.csize / 2)
function LG_funcs.add_hash(key, keys)
local blobs
local is_huge = keys[key]
if is_huge then
blobs = dragonfly.randstr(LG_funcs.huge_value_size, LG_funcs.csize / 2)
huge_entries = huge_entries + 1
else
blobs = dragonfly.randstr(LG_funcs.esize, LG_funcs.csize / 2)
end

local htable = {}
for i = 1, LG_funcs.csize, 2 do
htable[i * 2 - 1] = tostring(i)
Expand All @@ -95,20 +131,23 @@ function LG_funcs.add_hash(key)
redis.apcall('HSET', key, unpack(htable))
end

function LG_funcs.mod_hash(key)
function LG_funcs.mod_hash(key, keys)
local idx = math.random(LG_funcs.csize)
if idx % 2 == 1 then
redis.apcall('HINCRBY', key, tostring(idx), 1)
else
redis.apcall('HSET', key, tostring(idx), dragonfly.randstr(LG_funcs.esize))
local is_huge = keys[key]
redis.apcall('HSET', key, tostring(idx), randstr(is_huge))
end
end

-- sorted sets

function LG_funcs.add_zset(key, keys)
-- TODO: We don't support ZDIFFSTORE
local blobs = dragonfly.randstr(LG_funcs.esize, LG_funcs.csize)
local is_huge = keys[key]
local blobs = randstr_sequence(is_huge)

local ztable = {}
for i = 1, LG_funcs.csize do
ztable[i * 2 - 1] = tostring(i)
Expand All @@ -117,10 +156,11 @@ function LG_funcs.add_zset(key, keys)
redis.apcall('ZADD', key, unpack(ztable))
end

function LG_funcs.mod_zset(key, dbsize)
function LG_funcs.mod_zset(key, keys)
local action = math.random(1, 4)
if action <= 2 then
redis.apcall('ZADD', key, math.random(0, LG_funcs.csize * 2), dragonfly.randstr(LG_funcs.esize))
local is_huge = keys[key]
redis.apcall('ZADD', key, math.random(0, LG_funcs.csize * 2), randstr(is_huge))
elseif action == 3 then
redis.apcall('ZPOPMAX', key)
else
Expand Down Expand Up @@ -153,3 +193,7 @@ function LG_funcs.mod_json(key, dbsize)
redis.apcall('JSON.NUMINCRBY', key, '$.counters[' .. math.random(LG_funcs.csize ) .. ']', 1)
end
end

function LG_funcs.get_huge_entries()
return huge_entries
end
10 changes: 9 additions & 1 deletion tests/dragonfly/seeder_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,15 @@ async def check_list():

await async_client.flushall()

s = Seeder(units=1, key_target=10, data_size=10_000, collection_size=1, types=["LIST"])
s = Seeder(
units=1,
key_target=10,
data_size=10_000,
collection_size=1,
types=["LIST"],
huge_value_percentage=0,
huge_value_size=0,
Comment on lines +38 to +39
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why is this needed?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Because we get really big containers which causes the memory to grow really fast. That's why I rather have two specific parameters. One for the size of each element on the container and one for the total elements per container

)
await s.run(async_client)
await check_list()

Expand Down
Loading