dragonflydb · kostasrim · Dec 5, 2024 · Nov 28, 2024 · Nov 29, 2024 · Dec 2, 2024
diff --git a/tests/dragonfly/seeder/__init__.py b/tests/dragonfly/seeder/__init__.py
@@ -19,6 +19,7 @@ class SeederBase:
     UID_COUNTER = 1  # multiple generators should not conflict on keys
     CACHED_SCRIPTS = {}
     DEFAULT_TYPES = ["STRING", "LIST", "SET", "HASH", "ZSET", "JSON"]
+    BIG_VALUE_TYPES = ["LIST", "SET", "HASH", "ZSET"]
 
     def __init__(self, types: typing.Optional[typing.List[str]] = None):
         self.uid = SeederBase.UID_COUNTER
@@ -137,6 +138,8 @@ def __init__(
         data_size=100,
         collection_size=None,
         types: typing.Optional[typing.List[str]] = None,
+        huge_value_percentage=3,
+        huge_value_size=8192,
     ):
         SeederBase.__init__(self, types)
         self.key_target = key_target
@@ -146,6 +149,9 @@ def __init__(
         else:
             self.collection_size = collection_size
 
+        self.huge_value_percentage = huge_value_percentage
+        self.huge_value_size = huge_value_size
+
         self.units = [
             Seeder.Unit(
                 prefix=f"k-s{self.uid}u{i}-",
@@ -166,6 +172,8 @@ async def run(self, client: aioredis.Redis, target_ops=None, target_deviation=No
             target_deviation if target_deviation is not None else -1,
             self.data_size,
             self.collection_size,
+            self.huge_value_percentage,
+            self.huge_value_size,
         ]
 
         sha = await client.script_load(Seeder._load_script("generate"))
@@ -196,8 +204,12 @@ async def _run_unit(client: aioredis.Redis, sha: str, unit: Unit, using_stopkey,
             unit.stop_key if using_stopkey else "",
         ] + args
 
-        unit.counter = await client.evalsha(sha, 0, *args)
+        result = await client.evalsha(sha, 0, *args)
+        result = result.split()
+        unit.counter = int(result[0])
+        huge_keys = int(result[1])
+        huge_entries = int(result[2])
 
         logging.debug(
-            f"running unit {unit.prefix}/{unit.type} took {time.time() - s}, target {args[4+0]}"
+            f"running unit {unit.prefix}/{unit.type} took {time.time() - s}, target {args[4+0]}, huge keys {huge_keys} with total huge entries {huge_entries}"
         )
diff --git a/tests/dragonfly/seeder/script-generate.lua b/tests/dragonfly/seeder/script-generate.lua
@@ -18,21 +18,45 @@ local total_ops = tonumber(ARGV[6])
 local min_dev = tonumber(ARGV[7])
 local data_size = tonumber(ARGV[8])
 local collection_size = tonumber(ARGV[9])
+-- Probability of each key in key_target to be a big value
+local huge_value_percentage = tonumber(ARGV[10])
+local huge_value_size = tonumber(ARGV[11])
 
 -- collect all keys belonging to this script
 -- assumes exclusive ownership
 local keys = LU_collect_keys(prefix, type)
 
-LG_funcs.init(data_size, collection_size)
+LG_funcs.init(data_size, collection_size, huge_value_percentage, huge_value_size)
 local addfunc = LG_funcs['add_' .. string.lower(type)]
 local modfunc = LG_funcs['mod_' .. string.lower(type)]
+local huge_entries = LG_funcs["get_huge_entries"]
+
+local huge_keys = 0
+
+local function huge_entry()
+    local ratio = LG_funcs.huge_value_percentage / 100
+    -- [0, 1]
+    local rand = math.random()
+    local huge_entry = (ratio > rand)
+    return huge_entry
+end
 
 local function action_add()
     local key = prefix .. tostring(key_counter)
+    local op_type = string.lower(type)
+    local is_huge = false
+    if op_type ~= "string" and op_type ~= "json" then
+      is_huge = huge_entry()
+    end
+
     key_counter = key_counter + 1
+    if is_huge then
+      huge_keys = huge_keys + 1
+    end
 
-    addfunc(key, keys)
     table.insert(keys, key)
+    keys[key] = is_huge
+    addfunc(key, keys)
 end
 
 local function action_mod()
@@ -84,7 +108,8 @@ while true do
     -- update probability only every 10 iterations
     if counter % 10 == 0 then
         -- calculate intensity (not normalized probabilities)
-        -- please see attached plots in PR to undertand convergence
+        -- please see attached plots in PR to understand convergence
+        -- https://github.com/dragonflydb/dragonfly/pull/2556
 
         -- the add intensity is monotonically decreasing with keycount growing,
         -- the delete intensity is monotonically increasing with keycount growing,
@@ -121,4 +146,4 @@ if stop_key ~= '' then
     redis.call('DEL', stop_key)
 end
 
-return key_counter
+return tostring(key_counter) .. " " .. tostring(huge_keys) .. " " .. tostring(huge_entries())
diff --git a/tests/dragonfly/seeder/script-genlib.lua b/tests/dragonfly/seeder/script-genlib.lua
@@ -1,9 +1,35 @@
 local LG_funcs = {}
 
-function LG_funcs.init(dsize, csize)
+function LG_funcs.init(dsize, csize, large_val_perc, large_val_sz)
     LG_funcs.dsize = dsize
     LG_funcs.csize = csize
     LG_funcs.esize = math.ceil(dsize / csize)
+    LG_funcs.huge_value_percentage = large_val_perc
+    LG_funcs.huge_value_size = large_val_sz
+end
+
+local huge_entries = 0
+
+local function randstr(huge_entry)
+    local str
+    if huge_entry then
+        str = dragonfly.randstr(LG_funcs.huge_value_size)
+        huge_entries = huge_entries + 1
+    else
+        str = dragonfly.randstr(LG_funcs.esize)
+    end
+    return str
+end
+
+local function randstr_sequence(huge_entry)
+    local strs
+    if huge_entry then
+        strs = dragonfly.randstr(LG_funcs.huge_value_size, LG_funcs.csize)
+        huge_entries = huge_entries + 1
+    else
+        strs = dragonfly.randstr(LG_funcs.esize, LG_funcs.csize)
+    end
+    return strs
 end
 
 -- strings
@@ -27,23 +53,24 @@ end
 -- lists
 -- store list of random blobs of default container/element sizes
 
-function LG_funcs.add_list(key)
-    local elements = dragonfly.randstr(LG_funcs.esize, LG_funcs.csize)
-    redis.apcall('LPUSH', key, unpack(elements))
+function LG_funcs.add_list(key, keys)
+    local is_huge = keys[key]
+    redis.apcall('LPUSH', key, unpack(randstr_sequence(is_huge)))
 end
 
-function LG_funcs.mod_list(key)
+function LG_funcs.mod_list(key, keys)
     -- equally likely pops and pushes, we rely on the list size being large enough
     -- to "highly likely" not get emptied out by consequitve pops
+    local is_huge = keys[key]
     local action = math.random(1, 4)
     if action == 1 then
         redis.apcall('RPOP', key)
     elseif action == 2 then
         redis.apcall('LPOP', key)
     elseif action == 3 then
-        redis.apcall('LPUSH', key, dragonfly.randstr(LG_funcs.esize))
+      redis.apcall('LPUSH', key, randstr(is_huge))
     else
-        redis.apcall('RPUSH', key, dragonfly.randstr(LG_funcs.esize))
+      redis.apcall('RPUSH', key, randstr(is_huge))
     end
 end
 
@@ -62,17 +89,18 @@ function LG_funcs.add_set(key, keys)
         end
         redis.apcall('SDIFFSTORE', key, keys[i1], keys[i2])
     else
-        local elements = dragonfly.randstr(LG_funcs.esize, LG_funcs.csize)
-        redis.apcall('SADD', key, unpack(elements))
+        local is_huge = keys[key]
+        redis.apcall('SADD', key, unpack(randstr_sequence(is_huge)))
     end
 end
 
-function LG_funcs.mod_set(key)
+function LG_funcs.mod_set(key, keys)
      -- equally likely pops and additions
     if math.random() < 0.5 then
         redis.apcall('SPOP', key)
     else
-        redis.apcall('SADD', key, dragonfly.randstr(LG_funcs.esize))
+        local is_huge = keys[key]
+        redis.apcall('SADD', key, randstr(is_huge))
     end
 end
 
@@ -81,8 +109,16 @@ end
 -- store  {to_string(i): value for i in [1, csize]},
 -- where `value` is a random string for even indices and a number for odd indices
 
-function LG_funcs.add_hash(key)
-    local blobs  = dragonfly.randstr(LG_funcs.esize, LG_funcs.csize / 2)
+function LG_funcs.add_hash(key, keys)
+    local blobs
+    local is_huge = keys[key]
+    if is_huge then
+        blobs  = dragonfly.randstr(LG_funcs.huge_value_size, LG_funcs.csize / 2)
+        huge_entries = huge_entries + 1
+    else
+        blobs  = dragonfly.randstr(LG_funcs.esize, LG_funcs.csize / 2)
+    end
+
     local htable = {}
     for i = 1,  LG_funcs.csize, 2 do
         htable[i * 2 - 1] = tostring(i)
@@ -95,20 +131,23 @@ function LG_funcs.add_hash(key)
     redis.apcall('HSET', key, unpack(htable))
 end
 
-function LG_funcs.mod_hash(key)
+function LG_funcs.mod_hash(key, keys)
     local idx = math.random(LG_funcs.csize)
     if idx % 2 == 1 then
         redis.apcall('HINCRBY', key, tostring(idx), 1)
     else
-        redis.apcall('HSET', key, tostring(idx), dragonfly.randstr(LG_funcs.esize))
+      local is_huge = keys[key]
+      redis.apcall('HSET', key, tostring(idx), randstr(is_huge))
     end
 end
 
 -- sorted sets
 
 function LG_funcs.add_zset(key, keys)
     -- TODO: We don't support ZDIFFSTORE
-    local blobs  = dragonfly.randstr(LG_funcs.esize, LG_funcs.csize)
+    local is_huge = keys[key]
+    local blobs = randstr_sequence(is_huge)
+
     local ztable = {}
     for i = 1,  LG_funcs.csize do
         ztable[i * 2 - 1] = tostring(i)
@@ -117,10 +156,11 @@ function LG_funcs.add_zset(key, keys)
     redis.apcall('ZADD', key, unpack(ztable))
 end
 
-function LG_funcs.mod_zset(key, dbsize)
+function LG_funcs.mod_zset(key, keys)
     local action = math.random(1, 4)
     if action <= 2 then
-        redis.apcall('ZADD', key, math.random(0, LG_funcs.csize * 2), dragonfly.randstr(LG_funcs.esize))
+        local is_huge = keys[key]
+        redis.apcall('ZADD', key, math.random(0, LG_funcs.csize * 2), randstr(is_huge))
     elseif action == 3 then
         redis.apcall('ZPOPMAX', key)
     else
@@ -153,3 +193,7 @@ function LG_funcs.mod_json(key, dbsize)
         redis.apcall('JSON.NUMINCRBY', key, '$.counters[' .. math.random(LG_funcs.csize ) .. ']', 1)
     end
 end
+
+function LG_funcs.get_huge_entries()
+  return huge_entries
+end
diff --git a/tests/dragonfly/seeder_test.py b/tests/dragonfly/seeder_test.py
@@ -29,7 +29,15 @@ async def check_list():
 
     await async_client.flushall()
 
-    s = Seeder(units=1, key_target=10, data_size=10_000, collection_size=1, types=["LIST"])
+    s = Seeder(
+        units=1,
+        key_target=10,
+        data_size=10_000,
+        collection_size=1,
+        types=["LIST"],
+        huge_value_percentage=0,
+        huge_value_size=0,
+    )
     await s.run(async_client)
     await check_list()