Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

EQSIM benchmark code and fixes #213

Open
wants to merge 35 commits into
base: develop
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 31 commits
Commits
Show all changes
35 commits
Select commit Hold shift + click to select a range
f8d5f8d
Update getting_started.rst (#184)
houjun Feb 23, 2024
38d5f77
Removing gres option for ctest (#182)
hyunjuooh Feb 23, 2024
0f326da
enable cache by default (#187)
jeanbez Mar 18, 2024
cc8c2a0
Benchmark code for EQSIM data
houjun Mar 18, 2024
ab4d5b6
Committing clang-format changes
github-actions[bot] Mar 18, 2024
512e714
Minor adjustments
houjun Mar 18, 2024
8a5835f
Committing clang-format changes
github-actions[bot] Mar 18, 2024
1f661bc
Updates
houjun Mar 21, 2024
a7903e1
Committing clang-format changes
github-actions[bot] Mar 21, 2024
b0f1c8f
Change vpicio to use local server partitioning, add some debug prints
houjun Mar 22, 2024
788dd2e
Committing clang-format changes
github-actions[bot] Mar 22, 2024
dcc56a1
Add metadata query to benchmark code
houjun Mar 26, 2024
cf3f7a5
Committing clang-format changes
github-actions[bot] Mar 26, 2024
8d44125
Add ZFP compression for read and write
houjun Mar 28, 2024
b8840e0
Committing clang-format changes
github-actions[bot] Mar 28, 2024
3bcdf53
Add a option to use more ranks to read data so total data of each ran…
houjun Mar 29, 2024
3e97d5d
Committing clang-format changes
github-actions[bot] Mar 29, 2024
0fe3f6f
Add a data query code for EQSIM data
houjun Mar 29, 2024
a502ac9
Committing clang-format changes
github-actions[bot] Mar 29, 2024
b72e981
Minor adjustments for the HDF5 read code
houjun Mar 30, 2024
9a3ba68
Committing clang-format changes
github-actions[bot] Mar 30, 2024
7a069b3
Fix an issue with periodic data flush, minor changes to benchmark code
houjun Apr 1, 2024
b3f618f
Committing clang-format changes
github-actions[bot] Apr 1, 2024
4c9d2bb
fix an issue with 3d read segfault
houjun Oct 10, 2024
9068c09
Committing clang-format changes
github-actions[bot] Oct 10, 2024
460f6e2
Merge branch 'develop' into eqsim
houjun Oct 23, 2024
226e3ba
Fix compile issue
houjun Oct 23, 2024
09198bc
Update .gitlab-ci.yml
houjun Oct 23, 2024
8fe16eb
Update sleep time
houjun Oct 23, 2024
a06cc24
Replace function
houjun Oct 24, 2024
3370ae1
Replace function
houjun Oct 24, 2024
88a856b
Minor updates and doc changes
houjun Nov 4, 2024
5da6241
Committing clang-format changes
github-actions[bot] Nov 4, 2024
7b5c9e9
Update
houjun Nov 4, 2024
d9660e0
Merge branch 'develop' into eqsim
jeanbez Nov 27, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions .gitlab-ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ perlmutter-no-cache-build:
SUPERCOMPUTER: "perlmutter"
MERCURY_DIR: "/global/cfs/cdirs/${PDC_PROJECT}/pdc-perlmutter/mercury/install"
script:
- module load libfabric/1.15.2.0
- module load libfabric
- module list
- mkdir -p ${PDC_BUILD_PATH}/perlmutter/no-cache
- cd ${PDC_BUILD_PATH}/perlmutter/no-cache
Expand Down Expand Up @@ -51,7 +51,7 @@ perlmutter-cache-build:
SUPERCOMPUTER: "perlmutter"
MERCURY_DIR: "/global/cfs/cdirs/${PDC_PROJECT}/pdc-perlmutter/mercury/install"
script:
- module load libfabric/1.15.2.0
- module load libfabric
- module list
- mkdir -p ${PDC_BUILD_PATH}/perlmutter/cache
- cd ${PDC_BUILD_PATH}/perlmutter/cache
Expand Down Expand Up @@ -374,7 +374,7 @@ perlmutter-metrics-build:
SUPERCOMPUTER: "perlmutter"
MERCURY_DIR: "/global/cfs/cdirs/${PDC_PROJECT}/pdc-perlmutter/mercury/install"
script:
- module load libfabric/1.15.2.0
- module load libfabric
- module list
- mkdir -p ${PDC_BUILD_PATH}/perlmutter/metrics
- cd ${PDC_BUILD_PATH}/perlmutter/metrics
Expand Down
14 changes: 11 additions & 3 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -388,10 +388,10 @@ endif()
option(PDC_SERVER_CACHE "Enable Server Caching." ON)
if(PDC_SERVER_CACHE)
set(PDC_SERVER_CACHE 1)
set(PDC_SERVER_CACHE_MAX_GB "3" CACHE STRING "Max GB for server cache")
set(PDC_SERVER_CACHE_FLUSH_TIME "30" CACHE STRING "Flush time for server cache")
set(PDC_SERVER_CACHE_MAX_GB "32" CACHE STRING "Max GB for server cache")
set(PDC_SERVER_IDLE_CACHE_FLUSH_TIME "3" CACHE STRING "Idle time to initiate flush from server cache")

add_compile_definitions(PDC_SERVER_CACHE_MAX_GB=${PDC_SERVER_CACHE_MAX_GB} PDC_SERVER_CACHE_FLUSH_TIME=${PDC_SERVER_CACHE_FLUSH_TIME})
add_compile_definitions(PDC_SERVER_CACHE_MAX_GB=${PDC_SERVER_CACHE_MAX_GB} PDC_SERVER_IDLE_CACHE_FLUSH_TIME=${PDC_SERVER_IDLE_CACHE_FLUSH_TIME})
endif()


Expand Down Expand Up @@ -487,6 +487,14 @@ if(PDC_ENABLE_SQLITE3)
set(ENABLE_SQLITE3 1)
endif()

#-----------------------------------------------------------------------------
# ZFP option
#-----------------------------------------------------------------------------
option(PDC_ENABLE_ZFP "Enable ZFP." OFF)
if(PDC_ENABLE_ZFP)
set(ENABLE_ZFP 1)
endif()

# Check availability of symbols
#-----------------------------------------------------------------------------
check_symbol_exists(malloc_usable_size "malloc.h" HAVE_MALLOC_USABLE_SIZE)
Expand Down
32 changes: 18 additions & 14 deletions src/api/pdc_region/pdc_region_transfer.c
Original file line number Diff line number Diff line change
Expand Up @@ -1787,25 +1787,29 @@ release_region_buffer(char *buf, uint64_t *obj_dims, int local_ndim, uint64_t *l
if (local_ndim == 2) {
if (access_type == PDC_READ) {
ptr = new_buf;
for (i = 0; i < local_size[0]; ++i) {
memcpy(buf + ((local_offset[0] + i) * obj_dims[1] + local_offset[1]) * unit, ptr,
local_size[1] * unit);
ptr += local_size[1] * unit;
}
// Tang
houjun marked this conversation as resolved.
Show resolved Hide resolved
memcpy(buf, ptr, local_size[0] * local_size[1] * unit);
/* for (i = 0; i < local_size[0]; ++i) { */
/* memcpy(buf + ((local_offset[0] + i) * obj_dims[1] + local_offset[1]) * unit, ptr, */
/* local_size[1] * unit); */
/* ptr += local_size[1] * unit; */
/* } */
}
}
else if (local_ndim == 3) {
if (access_type == PDC_READ) {
ptr = new_buf;
for (i = 0; i < local_size[0]; ++i) {
for (j = 0; j < local_size[1]; ++j) {
memcpy(buf + ((local_offset[0] + i) * obj_dims[1] * obj_dims[2] +
(local_offset[1] + j) * obj_dims[2] + local_offset[2]) *
unit,
ptr, local_size[2] * unit);
ptr += local_size[2] * unit;
}
}
// Tang
houjun marked this conversation as resolved.
Show resolved Hide resolved
memcpy(buf, ptr, local_size[0] * local_size[1] * local_size[2] * unit);
/* for (i = 0; i < local_size[0]; ++i) { */
/* for (j = 0; j < local_size[1]; ++j) { */
/* memcpy(buf + ((local_offset[0] + i) * obj_dims[1] * obj_dims[2] + */
/* (local_offset[1] + j) * obj_dims[2] + local_offset[2]) * */
/* unit, */
/* ptr, local_size[2] * unit); */
/* ptr += local_size[2] * unit; */
/* } */
/* } */
}
}
if (bulk_buf_ref) {
Expand Down
1 change: 1 addition & 0 deletions src/commons/utils/pdc_timing.c
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
#include "pdc_timing.h"
#include "assert.h"
#include "mpi.h"

#ifdef PDC_TIMING
static double pdc_base_time;
Expand Down
10 changes: 10 additions & 0 deletions src/server/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,13 @@ if(PDC_ENABLE_SQLITE3)
find_package(SQLite3 3.31.0 REQUIRED)
endif()

if(PDC_ENABLE_ZFP)
add_definitions(-DENABLE_ZFP=1)
find_package(ZFP REQUIRED)
# find_path(ZFP_INCLUDE_DIR include/zfp.h)
endif()


include_directories(
${PDC_COMMON_INCLUDE_DIRS}
${PDC_INCLUDES_BUILD_TIME}
Expand All @@ -40,6 +47,7 @@ include_directories(
${MERCURY_INCLUDE_DIR}
${FASTBIT_INCLUDE_DIR}
${ROCKSDB_INCLUDE_DIR}
${ZFP_INCLUDE_DIRS}
)

add_definitions( -DIS_PDC_SERVER=1 )
Expand Down Expand Up @@ -70,6 +78,8 @@ add_library(pdc_server_lib
if(PDC_ENABLE_FASTBIT)
message(STATUS "Enabled fastbit")
target_link_libraries(pdc_server_lib ${MERCURY_LIBRARY} ${PDC_COMMONS_LIBRARIES} -lm -ldl ${PDC_EXT_LIB_DEPENDENCIES} ${FASTBIT_LIBRARY}/libfastbit.so)
elseif(PDC_ENABLE_ZFP)
target_link_libraries(pdc_server_lib ${MERCURY_LIBRARY} ${PDC_COMMONS_LIBRARIES} -lm -ldl ${PDC_EXT_LIB_DEPENDENCIES} zfp::zfp)
elseif(PDC_ENABLE_ROCKSDB)
if(PDC_ENABLE_SQLITE3)
target_link_libraries(pdc_server_lib ${MERCURY_LIBRARY} ${PDC_COMMONS_LIBRARIES} -lm -ldl ${PDC_EXT_LIB_DEPENDENCIES} ${ROCKSDB_LIBRARY} SQLite::SQLite3)
Expand Down
27 changes: 15 additions & 12 deletions src/server/pdc_server.c
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,7 @@ char ** all_addr_strings_g = NULL;
int is_hash_table_init_g = 0;
int lustre_stripe_size_mb_g = 16;
int lustre_total_ost_g = 0;
int pdc_disable_checkpoint_g = 0;

hg_id_t get_remote_metadata_register_id_g;
hg_id_t buf_map_server_register_id_g;
Expand Down Expand Up @@ -719,15 +720,9 @@ PDC_Server_set_close(void)
#ifdef PDC_TIMING
start = MPI_Wtime();
#endif
char *tmp_env_char = getenv("PDC_DISABLE_CHECKPOINT");
if (tmp_env_char != NULL && strcmp(tmp_env_char, "TRUE") == 0) {
if (pdc_server_rank_g == 0) {
printf("==PDC_SERVER[0]: checkpoint disabled!\n");
}
}
else {
if (pdc_disable_checkpoint_g == 0)
PDC_Server_checkpoint();
}

#ifdef PDC_TIMING
pdc_server_timings->PDCserver_checkpoint += MPI_Wtime() - start;
#endif
Expand Down Expand Up @@ -1204,7 +1199,8 @@ PDC_Server_recv_shm_cb(const struct hg_cb_info *callback_info)
hg_return_t
PDC_Server_checkpoint_cb()
{
PDC_Server_checkpoint();
if (pdc_disable_checkpoint_g == 0)
PDC_Server_checkpoint();

return HG_SUCCESS;
}
Expand Down Expand Up @@ -1862,7 +1858,7 @@ PDC_Server_loop(hg_context_t *hg_context)
#ifdef PDC_ENABLE_CHECKPOINT
checkpoint_interval++;
// Avoid calling clock() every operation
if (checkpoint_interval % PDC_CHECKPOINT_CHK_OP_INTERVAL == 0) {
if (pdc_disable_checkpoint_g == 0 && checkpoint_interval % PDC_CHECKPOINT_CHK_OP_INTERVAL == 0) {
cur_time = clock();
double elapsed_time = ((double)(cur_time - last_checkpoint_time)) / CLOCKS_PER_SEC;
/* fprintf(stderr, "PDC_SERVER: loop elapsed time %.2f\n", elapsed_time); */
Expand Down Expand Up @@ -2117,7 +2113,7 @@ PDC_Server_get_env()
data_sieving_g = atoi(tmp_env_char);
}
else {
data_sieving_g = 0;
data_sieving_g = 1;
}

// Get number of OST per file
Expand Down Expand Up @@ -2158,7 +2154,7 @@ PDC_Server_get_env()

tmp_env_char = getenv("PDC_GEN_HIST");
if (tmp_env_char != NULL)
gen_hist_g = 1;
gen_hist_g = atoi(tmp_env_char);

tmp_env_char = getenv("PDC_GEN_FASTBIT_IDX");
if (tmp_env_char != NULL)
Expand All @@ -2184,6 +2180,13 @@ PDC_Server_get_env()
printf("==PDC_SERVER[%d]: using SQLite3 for kvtag\n", pdc_server_rank_g);
}

tmp_env_char = getenv("PDC_DISABLE_CHECKPOINT");
if (tmp_env_char != NULL && strcmp(tmp_env_char, "TRUE") == 0) {
pdc_disable_checkpoint_g = 1;
if (pdc_server_rank_g == 0)
printf("==PDC_SERVER[0]: checkpoint disabled!\n");
}

if (pdc_server_rank_g == 0) {
printf("==PDC_SERVER[%d]: using [%s] as tmp dir, %d OSTs, %d OSTs per data file, %d%% to BB\n",
pdc_server_rank_g, pdc_server_tmp_dir_g, lustre_total_ost_g, pdc_nost_per_file_g,
Expand Down
Loading
Loading