Skip to content

Commit

Permalink
Change rocksdb configs (pytorch#2581)
Browse files Browse the repository at this point in the history
Summary:
Pull Request resolved: pytorch#2581

Add option to set block cache size

Add async_io option

Reviewed By: sryap

Differential Revision: D57219710

fbshipit-source-id: 5e58792e2bd3a6b4269a071602984d162522c7a9
  • Loading branch information
pranjalssh authored and facebook-github-bot committed May 21, 2024
1 parent 1fee2b0 commit 5d35f5c
Show file tree
Hide file tree
Showing 4 changed files with 26 additions and 5 deletions.
8 changes: 8 additions & 0 deletions fbgemm_gpu/bench/ssd_table_batched_embeddings_benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,7 @@ def benchmark_read_write(
warmup_iters: int,
num_shards: int,
num_threads: int,
block_cache_size_mb: int,
) -> None:
idx_dtype = torch.int64
data_dtype = torch.float32
Expand All @@ -136,6 +137,7 @@ def benchmark_read_write(
-0.01, # ssd_uniform_init_lower
0.01, # ssd_uniform_init_upper
32, # row_storage_bitwidth
block_cache_size_mb * (2**20), # block cache size
)

total_indices = (warmup_iters + iters) * batch_size * bag_size
Expand Down Expand Up @@ -179,6 +181,7 @@ def benchmark_read_write(
) # Check P556577690 and https://fburl.com/t9lf4d7v
@click.option("--num-shards", default=8)
@click.option("--num-threads", default=8)
@click.option("--block-cache-size-mb", default=0)
def ssd_read_write(
ssd_prefix: str,
num_embeddings: int,
Expand All @@ -189,6 +192,7 @@ def ssd_read_write(
warmup_iters: int,
num_shards: int,
num_threads: int,
block_cache_size_mb: int,
) -> None:
benchmark_read_write(
ssd_prefix,
Expand All @@ -200,6 +204,7 @@ def ssd_read_write(
warmup_iters,
num_shards,
num_threads,
block_cache_size_mb,
)


Expand Down Expand Up @@ -227,6 +232,7 @@ def ssd_read_write(
@click.option("--requests_data_file", type=str, default=None)
@click.option("--tables", type=str, default=None)
@click.option("--ssd-prefix", type=str, default="/tmp/ssd_benchmark")
@click.option("--block-cache-size-mb", default=0)
def ssd_training( # noqa C901
alpha: float,
bag_size: int,
Expand All @@ -250,6 +256,7 @@ def ssd_training( # noqa C901
requests_data_file: Optional[str],
tables: Optional[str],
ssd_prefix: Optional[str],
block_cache_size_mb: int,
) -> None:
np.random.seed(42)
torch.manual_seed(42)
Expand Down Expand Up @@ -349,6 +356,7 @@ def gen_split_tbe_generator(
ssd_storage_directory=tempdir,
ssd_cache_location=EmbeddingLocation.MANAGED,
ssd_shards=8,
ssd_block_cache_size=block_cache_size_mb * (2**20),
**common_args,
),
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,7 @@ def __init__(
ssd_cache_location: EmbeddingLocation = EmbeddingLocation.MANAGED,
ssd_uniform_init_lower: float = -0.01,
ssd_uniform_init_upper: float = 0.01,
ssd_block_cache_size: int = 0,
# General Optimizer args
stochastic_rounding: bool = True,
gradient_clipping: bool = False,
Expand Down Expand Up @@ -233,6 +234,7 @@ def __init__(
ssd_uniform_init_lower,
ssd_uniform_init_upper,
32, # row_storage_bitwidth
ssd_block_cache_size,
)
# pyre-fixme[20]: Argument `self` expected.
(low_priority, high_priority) = torch.cuda.Stream.priority_range()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,8 @@ class EmbeddingRocksDBWrapper : public torch::jit::CustomClassHolder {
int64_t max_write_buffer_num,
double uniform_init_lower,
double uniform_init_upper,
int64_t row_storage_bitwidth = 32)
int64_t row_storage_bitwidth = 32,
int64_t cache_size = 0)
: impl_(std::make_shared<ssd::EmbeddingRocksDB>(
path,
num_shards,
Expand All @@ -128,7 +129,8 @@ class EmbeddingRocksDBWrapper : public torch::jit::CustomClassHolder {
max_write_buffer_num,
uniform_init_lower,
uniform_init_upper,
row_storage_bitwidth)) {}
row_storage_bitwidth,
cache_size)) {}

void
set_cuda(Tensor indices, Tensor weights, Tensor count, int64_t timestep) {
Expand Down Expand Up @@ -177,6 +179,7 @@ static auto embedding_rocks_db_wrapper =
int64_t,
double,
double,
int64_t,
int64_t>())
.def("set_cuda", &EmbeddingRocksDBWrapper::set_cuda)
.def("get_cuda", &EmbeddingRocksDBWrapper::get_cuda)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -171,7 +171,8 @@ class EmbeddingRocksDB : public std::enable_shared_from_this<EmbeddingRocksDB> {
int64_t max_write_buffer_num,
float uniform_init_lower,
float uniform_init_upper,
int64_t row_storage_bitwidth = 32) {
int64_t row_storage_bitwidth = 32,
int64_t cache_size = 0) {
// TODO: lots of tunables. NNI or something for this?
rocksdb::Options options;
options.create_if_missing = true;
Expand Down Expand Up @@ -219,8 +220,14 @@ class EmbeddingRocksDB : public std::enable_shared_from_this<EmbeddingRocksDB> {
options.stats_dump_period_sec = 600;

rocksdb::BlockBasedTableOptions table_options;
// Don't use block cache since we have a "user-mode" UVM/HBM row cache.
table_options.no_block_cache = true;

if (cache_size > 0) {
table_options.block_cache = rocksdb::NewLRUCache(cache_size);
table_options.cache_index_and_filter_blocks = true;
} else {
table_options.no_block_cache = true;
}

table_options.index_type = rocksdb::BlockBasedTableOptions::kHashSearch;
table_options.data_block_index_type =
rocksdb::BlockBasedTableOptions::kDataBlockBinaryAndHash;
Expand Down Expand Up @@ -271,6 +278,7 @@ class EmbeddingRocksDB : public std::enable_shared_from_this<EmbeddingRocksDB> {
}
executor_ = std::make_unique<folly::CPUThreadPoolExecutor>(num_shards);
ro_.verify_checksums = false;
ro_.async_io = true;
wo_.disableWAL = true;
wo_.sync = false;

Expand Down

0 comments on commit 5d35f5c

Please sign in to comment.