From 77f431e7b6c0c55cb2aa6d3539ee40fbde3593e6 Mon Sep 17 00:00:00 2001 From: Mamy Ratsimbazafy Date: Fri, 27 Dec 2019 13:54:40 +0100 Subject: [PATCH] Matrix Mult bench vs Intel MKL-DNN (#70) * Add Intel Deep Learning library MKL-DNN / DNNL as a submodule for benching * Use an older MKL-DNN (1.0.4) that did not entangle everything with cpu_engine.cpp * Add Intel MKL-DNN AVX2 and AVX512 benchmarks --- .gitmodules | 3 + .../matmul_gemm_blas/mkldnn_gemm_jit_avx2.nim | 59 +++++++++++++ .../mkldnn_gemm_jit_avx512.nim | 59 +++++++++++++ benchmarks/vendor/mkl-dnn | 1 + benchmarks/vendor/mkldnn.nim | 84 ++++++++++++++++++ benchmarks/vendor/mkldnn_config.h | 85 +++++++++++++++++++ benchmarks/vendor/mkldnn_version.h | 32 +++++++ 7 files changed, 323 insertions(+) create mode 100644 benchmarks/matmul_gemm_blas/mkldnn_gemm_jit_avx2.nim create mode 100644 benchmarks/matmul_gemm_blas/mkldnn_gemm_jit_avx512.nim create mode 160000 benchmarks/vendor/mkl-dnn create mode 100644 benchmarks/vendor/mkldnn.nim create mode 100644 benchmarks/vendor/mkldnn_config.h create mode 100644 benchmarks/vendor/mkldnn_version.h diff --git a/.gitmodules b/.gitmodules index 0f0b71d..363c3a8 100644 --- a/.gitmodules +++ b/.gitmodules @@ -13,3 +13,6 @@ [submodule "benchmarks/vendor/tasking-2.0"] path = benchmarks/vendor/tasking-2.0 url = https://github.com/aprell/tasking-2.0 +[submodule "benchmarks/vendor/mkl-dnn"] + path = benchmarks/vendor/mkl-dnn + url = https://github.com/intel/mkl-dnn diff --git a/benchmarks/matmul_gemm_blas/mkldnn_gemm_jit_avx2.nim b/benchmarks/matmul_gemm_blas/mkldnn_gemm_jit_avx2.nim new file mode 100644 index 0000000..d6d4a33 --- /dev/null +++ b/benchmarks/matmul_gemm_blas/mkldnn_gemm_jit_avx2.nim @@ -0,0 +1,59 @@ +# Weave +# Copyright (c) 2019 Mamy André-Ratsimbazafy +# Licensed and distributed under either of +# * MIT license (license terms in the root directory or at http://opensource.org/licenses/MIT). +# * Apache v2 license (license terms in the root directory or at http://www.apache.org/licenses/LICENSE-2.0). +# at your option. This file may not be copied, modified, or distributed except according to those terms. + +# GEMM (GEneralized Matrix Multiplication) using MKL-DNN / DNNL +# Intel Deep Neural Network Library. + +import + ./gemm_bench_common, ./gemm_bench_config, + ../vendor/mkldnn + + +proc benchMKLDNN(a, b: seq[float32], ashape, bshape: MatrixShape, nb_samples: int): seq[float32] = + let req_ops = gemm_required_ops(ashape, bshape) + let out_shape = gemm_out_shape(ashape, bshape) + let out_size = out_shape.M * out_shape.N + + result = newSeq[float32](out_size) + var # MKL-DNN wants pointers as inputs + trans = 'N' + m = int32 M + n = int32 N + k = int32 K + alpha = 1'f32 + lda = int32 K + ldb = int32 N + beta = 0'f32 + ldc = int32 N + + bench("Intel MKL-DNN / DNNL JIT AVX benchmark", req_ops): + # Initialisation, not measured apart for the "Collected n samples in ... seconds" + zeroMem(result[0].addr, out_size * sizeof(float32)) # We zero memory between computation + do: + # Main work + discard mkldnn_jit_avx_gemm_f32( + trans.addr, trans.addr, + m.addr, n.addr, k.addr, + alpha.addr, a[0].unsafeaddr, lda.addr, + b[0].unsafeAddr, ldb.addr, + beta.addr, result[0].addr, ldc.addr, + bias = nil + ) + +# Bench +when isMainModule: + import std/[random, sequtils] + + randomize(42) # FOr reproducibility + # warmup() + reportConfig("Intel MKL-DNN JIT AVX", float32, (M, K), (K, N)) + + block: + let a = newSeqWith(M*K, float32 rand(-0.1..0.1)) + let b = newSeqWith(K*N, float32 rand(-0.1..0.1)) + + let mkl = benchMKLDNN(a, b, (M,K), (K,N), NbSamples) diff --git a/benchmarks/matmul_gemm_blas/mkldnn_gemm_jit_avx512.nim b/benchmarks/matmul_gemm_blas/mkldnn_gemm_jit_avx512.nim new file mode 100644 index 0000000..5cd003c --- /dev/null +++ b/benchmarks/matmul_gemm_blas/mkldnn_gemm_jit_avx512.nim @@ -0,0 +1,59 @@ +# Weave +# Copyright (c) 2019 Mamy André-Ratsimbazafy +# Licensed and distributed under either of +# * MIT license (license terms in the root directory or at http://opensource.org/licenses/MIT). +# * Apache v2 license (license terms in the root directory or at http://www.apache.org/licenses/LICENSE-2.0). +# at your option. This file may not be copied, modified, or distributed except according to those terms. + +# GEMM (GEneralized Matrix Multiplication) using MKL-DNN / DNNL +# Intel Deep Neural Network Library. + +import + ./gemm_bench_common, ./gemm_bench_config, + ../vendor/mkldnn + + +proc benchMKLDNN(a, b: seq[float32], ashape, bshape: MatrixShape, nb_samples: int): seq[float32] = + let req_ops = gemm_required_ops(ashape, bshape) + let out_shape = gemm_out_shape(ashape, bshape) + let out_size = out_shape.M * out_shape.N + + result = newSeq[float32](out_size) + var # MKL-DNN wants pointers as inputs + trans = 'N' + m = int32 M + n = int32 N + k = int32 K + alpha = 1'f32 + lda = int32 K + ldb = int32 N + beta = 0'f32 + ldc = int32 N + + bench("Intel MKL-DNN / DNNL JIT AVX512 benchmark", req_ops): + # Initialisation, not measured apart for the "Collected n samples in ... seconds" + zeroMem(result[0].addr, out_size * sizeof(float32)) # We zero memory between computation + do: + # Main work + discard mkldnn_jit_avx512_common_gemm_f32( + trans.addr, trans.addr, + m.addr, n.addr, k.addr, + alpha.addr, a[0].unsafeaddr, lda.addr, + b[0].unsafeAddr, ldb.addr, + beta.addr, result[0].addr, ldc.addr, + bias = nil + ) + +# Bench +when isMainModule: + import std/[random, sequtils] + + randomize(42) # FOr reproducibility + # warmup() + reportConfig("Intel MKL-DNN JIT AVX512", float32, (M, K), (K, N)) + + block: + let a = newSeqWith(M*K, float32 rand(-0.1..0.1)) + let b = newSeqWith(K*N, float32 rand(-0.1..0.1)) + + let mkl = benchMKLDNN(a, b, (M,K), (K,N), NbSamples) diff --git a/benchmarks/vendor/mkl-dnn b/benchmarks/vendor/mkl-dnn new file mode 160000 index 0000000..a0a87d6 --- /dev/null +++ b/benchmarks/vendor/mkl-dnn @@ -0,0 +1 @@ +Subproject commit a0a87d662edeef38d01db4ac5dd25f59a1f0881f diff --git a/benchmarks/vendor/mkldnn.nim b/benchmarks/vendor/mkldnn.nim new file mode 100644 index 0000000..ff30ab6 --- /dev/null +++ b/benchmarks/vendor/mkldnn.nim @@ -0,0 +1,84 @@ +# Weave +# Copyright (c) 2019 Mamy André-Ratsimbazafy +# Licensed and distributed under either of +# * MIT license (license terms in the root directory or at http://opensource.org/licenses/MIT). +# * Apache v2 license (license terms in the root directory or at http://www.apache.org/licenses/LICENSE-2.0). +# at your option. This file may not be copied, modified, or distributed except according to those terms. + +import os, strutils +const cSourcesPath = currentSourcePath.rsplit(DirSep, 1)[0] & '/' + +{.passC:"-fopenmp".} +{.passL:"-fopenmp".} + +{.passC:"-I" & cSourcesPath.} +{.passC:"-I" & cSourcesPath & "mkl-dnn/include".} +{.passC:"-I" & cSourcesPath & "mkl-dnn/src/common".} +{.passC:"-I" & cSourcesPath & "mkl-dnn/src/cpu".} +{.passC:"-I" & cSourcesPath & "mkl-dnn/src/cpu/gemm/f32".} +# {.passC:"-std=c++11".} + +{.compile: cSourcesPath & "mkl-dnn/src/common/utils.cpp".} +{.compile: cSourcesPath & "mkl-dnn/src/cpu/jit_utils/jit_utils.cpp".} +{.compile: cSourcesPath & "mkl-dnn/src/cpu/jit_utils/jitprofiling/jitprofiling.c".} +{.compile: cSourcesPath & "mkl-dnn/src/cpu/gemm/f32/gemm_utils_f32.cpp".} +{.compile: cSourcesPath & "mkl-dnn/src/cpu/gemm/f32/ref_gemm_f32.cpp".} +{.compile: cSourcesPath & "mkl-dnn/src/cpu/gemm/f32/jit_avx_gemm_f32.cpp".} +{.compile: cSourcesPath & "mkl-dnn/src/cpu/gemm/f32/jit_avx512_common_gemm_f32.cpp".} + + +type MkldnnStatus {.importc: "mkldnn_status_t".} = enum + # The operation was successful + MkldnnSuccess = 0, + # The operation failed due to an out-of-memory condition + MkldnnOutOfMemory = 1, + # The operation failed and should be retried + MkldnnTryAgain = 2, + # The operation failed because of incorrect function arguments + MkldnnInvalidArguments = 3, + # The operation failed because a primitive was not ready for execution + MkldnnNotReady = 4, + # The operation failed because requested functionality is not implemented + MkldnnUnimplemented = 5, + # Primitive iterator passed over last primitive descriptor + MkldnnIteratorEnds = 6, + # Primitive or engine failed on execution + MkldnnRuntimeError = 7, + # Queried element is not required for given primitive + MkldnnNotRequired = 8 + +proc mkldnn_ref_gemm*[T]( + transa: ptr char, transb: ptr char, + M, N, K: ptr int32, + alpha, A: ptr T, lda: ptr int32, + B: ptr T, ldb: ptr int32, + beta, C: ptr T, ldc: ptr int32, + bias: ptr T +): MkldnnStatus {. + importcpp:"mkldnn::impl::cpu::ref_gemm<'*6>(@)", + header: cSourcesPath & "mkl-dnn/src/cpu/gemm/f32/ref_gemm_f32.hpp" +.} + +proc mkldnn_jit_avx_gemm_f32*( + transa: ptr char, transb: ptr char, + M, N, K: ptr int32, + alpha, A: ptr float32, lda: ptr int32, + B: ptr float32, ldb: ptr int32, + beta, C: ptr float32, ldc: ptr int32, + bias: ptr float32 +): MkldnnStatus {. + importcpp:"mkldnn::impl::cpu::jit_avx_gemm_f32(@)", + header: cSourcesPath & "mkl-dnn/src/cpu/gemm/f32/jit_avx_gemm_f32.hpp" +.} + +proc mkldnn_jit_avx512_common_gemm_f32*( + transa: ptr char, transb: ptr char, + M, N, K: ptr int32, + alpha, A: ptr float32, lda: ptr int32, + B: ptr float32, ldb: ptr int32, + beta, C: ptr float32, ldc: ptr int32, + bias: ptr float32 +): MkldnnStatus {. + importcpp:"mkldnn::impl::cpu::jit_avx512_common_gemm_f32(@)", + header: cSourcesPath & "mkl-dnn/src/cpu/gemm/f32/jit_avx512_common_gemm_f32.hpp" +.} diff --git a/benchmarks/vendor/mkldnn_config.h b/benchmarks/vendor/mkldnn_config.h new file mode 100644 index 0000000..bd40ba5 --- /dev/null +++ b/benchmarks/vendor/mkldnn_config.h @@ -0,0 +1,85 @@ +/******************************************************************************* +* Copyright 2019 Intel Corporation +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*******************************************************************************/ + +#ifndef MKLDNN_CONFIG_H +#define MKLDNN_CONFIG_H + +#ifndef DOXYGEN_SHOULD_SKIP_THIS + +// All symbols shall be internal unless marked as MKLDNN_API +#if defined _WIN32 || defined __CYGWIN__ +# define MKLDNN_HELPER_DLL_IMPORT __declspec(dllimport) +# define MKLDNN_HELPER_DLL_EXPORT __declspec(dllexport) +#else +# if __GNUC__ >= 4 +# define MKLDNN_HELPER_DLL_IMPORT __attribute__((visibility("default"))) +# define MKLDNN_HELPER_DLL_EXPORT __attribute__((visibility("default"))) +# else +# define MKLDNN_HELPER_DLL_IMPORT +# define MKLDNN_HELPER_DLL_EXPORT +# endif +#endif + +#ifdef MKLDNN_DLL +# ifdef MKLDNN_DLL_EXPORTS +# define MKLDNN_API MKLDNN_HELPER_DLL_EXPORT +# else +# define MKLDNN_API MKLDNN_HELPER_DLL_IMPORT +# endif +#else +# define MKLDNN_API +#endif + +#if defined (__GNUC__) +# define MKLDNN_DEPRECATED __attribute__((deprecated)) +#elif defined(_MSC_VER) +# define MKLDNN_DEPRECATED __declspec(deprecated) +#else +# define MKLDNN_DEPRECATED +#endif +#endif // DOXYGEN_SHOULD_SKIP_THIS + +// No runtime (disabled) +#define MKLDNN_RUNTIME_NONE 0u +// Sequential runtime (CPU only) +#define MKLDNN_RUNTIME_SEQ 1u +// OpenMP runtime (CPU only) +#define MKLDNN_RUNTIME_OMP 2u +// TBB runtime (CPU only) +#define MKLDNN_RUNTIME_TBB 4u +// OpenCL runtime +#define MKLDNN_RUNTIME_OCL 256u + +// MKL-DNN CPU engine runtime +#define MKLDNN_CPU_RUNTIME MKLDNN_RUNTIME_OMP + +// MKL-DNN GPU engine runtime +#define MKLDNN_GPU_RUNTIME MKLDNN_RUNTIME_NONE + +#if defined(MKLDNN_CPU_RUNTIME) && defined(MKLDNN_GPU_RUNTIME) +# if (MKLDNN_CPU_RUNTIME == MKLDNN_RUNTIME_NONE) \ + || (MKLDNN_CPU_RUNTIME == MKLDNN_RUNTIME_OCL) +# error "Unexpected MKLDNN_CPU_RUNTIME" +# endif +# if (MKLDNN_GPU_RUNTIME != MKLDNN_RUNTIME_NONE) \ + && (MKLDNN_GPU_RUNTIME != MKLDNN_RUNTIME_OCL) +# error "Unexpected MKLDNN_GPU_RUNTIME" +# endif +#else +# error "BOTH MKLDNN_CPU_RUNTIME and MKLDNN_GPU_RUNTIME must be defined" +#endif + +#endif diff --git a/benchmarks/vendor/mkldnn_version.h b/benchmarks/vendor/mkldnn_version.h new file mode 100644 index 0000000..28a3234 --- /dev/null +++ b/benchmarks/vendor/mkldnn_version.h @@ -0,0 +1,32 @@ +/******************************************************************************* +* Copyright 2019 Intel Corporation +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*******************************************************************************/ + +#ifndef MKLDNN_VERSION_H +#define MKLDNN_VERSION_H + +/// Major version +#define MKLDNN_VERSION_MAJOR 1 + +/// Minor version +#define MKLDNN_VERSION_MINOR 0 + +/// Patch version +#define MKLDNN_VERSION_PATCH 4 + +/// Git commit hash +#define MKLDNN_VERSION_HASH "a0a87d662edeef38d01db4ac5dd25f59a1f0881f" + +#endif