Skip to content

Commit

Permalink
Merge branch 'release-2.3.0'
Browse files Browse the repository at this point in the history
  • Loading branch information
alazzaro committed Jun 26, 2022
2 parents aa14760 + 6899a2f commit c134996
Show file tree
Hide file tree
Showing 7 changed files with 50 additions and 18 deletions.
4 changes: 2 additions & 2 deletions VERSION
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
MAJOR = 2
MINOR = 3
PATCH = 0-rc3
PATCH = 0
# A specific DATE (YYYY-MM-DD) fixes an official release, otherwise
# it is considered Development version.
DATE = 2022-06-08
DATE = 2022-06-26


2 changes: 1 addition & 1 deletion src/acc/libsmm_acc/notebooks/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
bokeh==1.0.4
matplotlib==3.0.2
numpy==1.21.0
numpy==1.22.0
pandas==0.23.4
pandas-profiling==1.4.1
seaborn==0.9.0
2 changes: 1 addition & 1 deletion src/acc/libsmm_acc/tune/requirements.txt
Original file line number Diff line number Diff line change
@@ -1 +1 @@
numpy==1.21.0
numpy==1.22.0
6 changes: 3 additions & 3 deletions src/acc/opencl/acc_opencl.c
Original file line number Diff line number Diff line change
Expand Up @@ -217,7 +217,7 @@ int c_dbcsr_acc_init(void) {
char buffer[ACC_OPENCL_BUFFERSIZE];
const char *const env_devmatch = getenv("ACC_OPENCL_DEVMATCH"), *const env_devtype = getenv("ACC_OPENCL_DEVTYPE");
const char *const env_priority = getenv("ACC_OPENCL_PRIORITY"), *const env_xhints = getenv("ACC_OPENCL_XHINTS");
const char *const env_nullify = getenv("ACC_OPENCL_NULLIFY"), *const env_dump_acc = getenv("ACC_OPENCL_DUMP");
const char *const env_devcopy = getenv("ACC_OPENCL_DEVCOPY"), *const env_dump_acc = getenv("ACC_OPENCL_DUMP");
const char *const env_verbose = getenv("ACC_OPENCL_VERBOSE"), *const env_flush = getenv("ACC_OPENCL_FLUSH");
const char *const env_device = getenv("ACC_OPENCL_DEVICE"), *const env_timer = getenv("ACC_OPENCL_TIMER");
const char *const env_share = getenv("ACC_OPENCL_SHARE"), *const env_async = getenv("ACC_OPENCL_ASYNC");
Expand All @@ -233,7 +233,7 @@ int c_dbcsr_acc_init(void) {
# endif
c_dbcsr_acc_opencl_config.verbosity = (NULL == env_verbose ? 0 : atoi(env_verbose));
c_dbcsr_acc_opencl_config.priority = (NULL == env_priority ? /*default*/ 3 : atoi(env_priority));
c_dbcsr_acc_opencl_config.nullify = (NULL == env_nullify ? /*default*/ 0 : atoi(env_nullify));
c_dbcsr_acc_opencl_config.devcopy = (NULL == env_devcopy ? /*default*/ 0 : atoi(env_devcopy));
c_dbcsr_acc_opencl_config.xhints = (NULL == env_xhints ? /*default*/ 1 : atoi(env_xhints));
c_dbcsr_acc_opencl_config.share = (NULL == env_share ? /*default*/ 0 : atoi(env_share));
c_dbcsr_acc_opencl_config.async = (NULL == env_async ? /*default*/ 3 : atoi(env_async));
Expand Down Expand Up @@ -557,7 +557,7 @@ int c_dbcsr_acc_finalize(void) {
for (i = 0; i < ACC_OPENCL_DEVICES_MAXCOUNT; ++i) {
const cl_device_id device_id = c_dbcsr_acc_opencl_config.devices[i];
if (NULL != device_id) {
# if defined(CL_VERSION_1_2)
# if defined(CL_VERSION_1_2) && defined(_DEBUG)
ACC_OPENCL_CHECK(clReleaseDevice(device_id), "release device", result);
# endif
/* c_dbcsr_acc_opencl_create_context scans for non-NULL devices */
Expand Down
4 changes: 2 additions & 2 deletions src/acc/opencl/acc_opencl.h
Original file line number Diff line number Diff line change
Expand Up @@ -223,8 +223,8 @@ typedef struct c_dbcsr_acc_opencl_config_t {
cl_int nthreads;
/** How to apply/use stream priorities. */
cl_int priority;
/** How to zero device-side buffers. */
cl_int nullify;
/** How to zero/copy device-side buffers. */
cl_int devcopy;
/** Execution-hints (command stream). */
cl_int xhints;
/** Share streams across threads. */
Expand Down
44 changes: 38 additions & 6 deletions src/acc/opencl/acc_opencl_mem.c
Original file line number Diff line number Diff line change
Expand Up @@ -224,7 +224,9 @@ int c_dbcsr_acc_dev_mem_allocate(void** dev_mem, size_t nbytes) {
# endif
clReleaseMemObject(buffer);
# if defined(CL_VERSION_2_0)
/*if (NULL != ptr)*/ clSVMFree(context, ptr);
if (0 != c_dbcsr_acc_opencl_config.device[tid].svm_interop /*&& (NULL != ptr)*/) {
clSVMFree(context, ptr);
}
# endif
result = EXIT_FAILURE;
}
Expand Down Expand Up @@ -271,8 +273,10 @@ int c_dbcsr_acc_dev_mem_deallocate(void* dev_mem) {
}
# endif
# if defined(CL_VERSION_2_0)
assert(NULL != c_dbcsr_acc_opencl_config.device[tid].context);
clSVMFree(c_dbcsr_acc_opencl_config.device[tid].context, ptr); /*if (NULL != ptr)*/
if (0 != c_dbcsr_acc_opencl_config.device[tid].svm_interop /*&& (NULL != ptr)*/) {
assert(NULL != c_dbcsr_acc_opencl_config.device[tid].context);
clSVMFree(c_dbcsr_acc_opencl_config.device[tid].context, ptr);
}
# endif
}
# if defined(__DBCSR_ACC) && defined(ACC_OPENCL_PROFILE)
Expand Down Expand Up @@ -363,8 +367,36 @@ int c_dbcsr_acc_memcpy_d2d(const void* devmem_src, void* devmem_dst, size_t nbyt
# endif
assert((NULL != devmem_src || 0 == nbytes) && (NULL != devmem_dst || 0 == nbytes) && NULL != stream);
if (NULL != devmem_src && NULL != devmem_dst && 0 != nbytes) {
result = clEnqueueCopyBuffer(*ACC_OPENCL_STREAM(stream), *ACC_OPENCL_MEM(devmem_src), *ACC_OPENCL_MEM(devmem_dst),
0 /*src_offset*/, 0 /*dst_offset*/, nbytes, 0, NULL, NULL);
const cl_mem *const src = ACC_OPENCL_MEM(devmem_src), *const dst = ACC_OPENCL_MEM(devmem_dst);
assert(NULL != *src && NULL != *dst);
if (*src != *dst) {
const cl_command_queue queue = *ACC_OPENCL_STREAM(stream);
if (0 == (2 & c_dbcsr_acc_opencl_config.devcopy)) {
result = clEnqueueCopyBuffer(queue, *src, *dst, 0 /*src_offset*/, 0 /*dst_offset*/, nbytes, 0, NULL, NULL);
}
else {
static volatile int lock; /* creating cl_kernel and clSetKernelArg must be synchronized */
static cl_kernel kernel = NULL;
LIBXSMM_ATOMIC_ACQUIRE(&lock, LIBXSMM_SYNC_NPAUSE, LIBXSMM_ATOMIC_RELAXED);
if (NULL == kernel) { /* generate kernel */
const char source[] = "kernel void memcpy_d2d(global uchar *restrict src, global uchar *restrict dst) {\n"
" const size_t i = get_global_id(0);\n"
" dst[i] = src[i];\n"
"}\n";
result = c_dbcsr_acc_opencl_kernel(source, "memcpy_d2d" /*kernel_name*/, NULL /*build_params*/, NULL /*build_options*/,
NULL /*try_build_options*/, NULL /*try_ok*/, NULL /*extnames*/, 0 /*num_exts*/, &kernel);
}
if (EXIT_SUCCESS == result) {
assert(NULL != kernel);
ACC_OPENCL_CHECK(clSetKernelArg(kernel, 0, sizeof(cl_mem), src), "set src argument of memcpy_d2d kernel", result);
ACC_OPENCL_CHECK(clSetKernelArg(kernel, 1, sizeof(cl_mem), dst), "set dst argument of memcpy_d2d kernel", result);
ACC_OPENCL_CHECK(clEnqueueNDRangeKernel(
queue, kernel, 1 /*work_dim*/, NULL /*offset*/, &nbytes, NULL /*local_work_size*/, 0, NULL, NULL),
"launch memcpy_d2d kernel", result);
}
LIBXSMM_ATOMIC_RELEASE(&lock, LIBXSMM_ATOMIC_RELAXED);
}
}
}
# if defined(__DBCSR_ACC) && defined(ACC_OPENCL_PROFILE)
c_dbcsr_timestop(&routine_handle);
Expand All @@ -385,7 +417,7 @@ int c_dbcsr_acc_memset_zero(void* dev_mem, size_t offset, size_t nbytes, void* s
if (0 != nbytes) {
const cl_command_queue queue = *ACC_OPENCL_STREAM(stream);
const cl_mem* const buffer = ACC_OPENCL_MEM(dev_mem);
if (0 == c_dbcsr_acc_opencl_config.nullify) {
if (0 == (1 & c_dbcsr_acc_opencl_config.devcopy)) {
static const cl_uchar pattern = 0; /* fill with zeros */
result = clEnqueueFillBuffer(queue, *buffer, &pattern, sizeof(pattern), offset, nbytes, 0, NULL, NULL);
}
Expand Down
6 changes: 3 additions & 3 deletions src/acc/opencl/smm/opencl_libsmm.c
Original file line number Diff line number Diff line change
Expand Up @@ -891,8 +891,8 @@ int libsmm_acc_transpose(const int* dev_trs_stack, int offset, int stack_size, v
clSetKernelArg(config->kernel, 1, sizeof(int), &offset), "set offset argument of transpose kernel", result);
ACC_OPENCL_CHECK(clSetKernelArg(config->kernel, 2, sizeof(cl_mem), ACC_OPENCL_MEM(dev_data)),
"set matrix-data argument of transpose kernel", result);
ACC_OPENCL_CHECK(
clEnqueueNDRangeKernel(queue, config->kernel, 1 /*work_dim*/, NULL, &work_size, &config->wgsize, 0, NULL, perf_event),
ACC_OPENCL_CHECK(clEnqueueNDRangeKernel(queue, config->kernel, 1 /*work_dim*/, NULL /*offset*/, &work_size, &config->wgsize,
0, NULL, perf_event),
"launch transpose kernel", result);
/* eventually update performance counters inside of locked region */
# if !defined(OPENCL_LIBSMM_VALIDATE_TRANS)
Expand Down Expand Up @@ -1635,7 +1635,7 @@ int libsmm_acc_process(const int* host_param_stack, const int* dev_param_stack,
ACC_OPENCL_CHECK(
clSetKernelArg(config->kernel[kernel_idx], 5, sizeof(int), &bs), "set minibatch argument of SMM-kernel", result);
}
ACC_OPENCL_CHECK(clEnqueueNDRangeKernel(queue, config->kernel[kernel_idx], 1 /*work_dim*/, NULL, &work_size,
ACC_OPENCL_CHECK(clEnqueueNDRangeKernel(queue, config->kernel[kernel_idx], 1 /*work_dim*/, NULL /*offset*/, &work_size,
config->wgsize + kernel_idx, 0, NULL, perf_event),
"launch SMM-kernel", result);
/* eventually update performance counters inside of locked region */
Expand Down

0 comments on commit c134996

Please sign in to comment.