Matrix Mult bench vs Intel MKL-DNN (#70)

* Add Intel Deep Learning library MKL-DNN / DNNL as a submodule for benching * Use an older MKL-DNN (1.0.4) that did not entangle everything with cpu_engine.cpp * Add Intel MKL-DNN AVX2 and AVX512 benchmarks
mratsim · Dec 27, 2019 · 77f431e · 77f431e
1 parent 3239bc1
commit 77f431e
Show file tree

Hide file tree

Showing 7 changed files with 323 additions and 0 deletions.
diff --git a/.gitmodules b/.gitmodules
@@ -13,3 +13,6 @@
 [submodule "benchmarks/vendor/tasking-2.0"]
 	path = benchmarks/vendor/tasking-2.0
 	url = https://github.com/aprell/tasking-2.0
+[submodule "benchmarks/vendor/mkl-dnn"]
+	path = benchmarks/vendor/mkl-dnn
+	url = https://github.com/intel/mkl-dnn
diff --git a/benchmarks/matmul_gemm_blas/mkldnn_gemm_jit_avx2.nim b/benchmarks/matmul_gemm_blas/mkldnn_gemm_jit_avx2.nim
@@ -0,0 +1,59 @@
+# Weave
+# Copyright (c) 2019 Mamy André-Ratsimbazafy
+# Licensed and distributed under either of
+#   * MIT license (license terms in the root directory or at http://opensource.org/licenses/MIT).
+#   * Apache v2 license (license terms in the root directory or at http://www.apache.org/licenses/LICENSE-2.0).
+# at your option. This file may not be copied, modified, or distributed except according to those terms.
+
+# GEMM (GEneralized Matrix Multiplication) using MKL-DNN / DNNL
+# Intel Deep Neural Network Library.
+
+import
+  ./gemm_bench_common, ./gemm_bench_config,
+  ../vendor/mkldnn
+
+
+proc benchMKLDNN(a, b: seq[float32], ashape, bshape: MatrixShape, nb_samples: int): seq[float32] =
+  let req_ops = gemm_required_ops(ashape, bshape)
+  let out_shape = gemm_out_shape(ashape, bshape)
+  let out_size = out_shape.M * out_shape.N
+
+  result = newSeq[float32](out_size)
+  var # MKL-DNN wants pointers as inputs
+    trans = 'N'
+    m = int32 M
+    n = int32 N
+    k = int32 K
+    alpha = 1'f32
+    lda = int32 K
+    ldb = int32 N
+    beta = 0'f32
+    ldc = int32 N
+
+  bench("Intel MKL-DNN / DNNL JIT AVX benchmark", req_ops):
+    # Initialisation, not measured apart for the "Collected n samples in ... seconds"
+    zeroMem(result[0].addr, out_size * sizeof(float32)) # We zero memory between computation
+  do:
+    # Main work
+    discard mkldnn_jit_avx_gemm_f32(
+      trans.addr, trans.addr,
+      m.addr, n.addr, k.addr,
+      alpha.addr, a[0].unsafeaddr, lda.addr,
+                  b[0].unsafeAddr, ldb.addr,
+      beta.addr,  result[0].addr, ldc.addr,
+                  bias = nil
+    )
+
+# Bench
+when isMainModule:
+  import std/[random, sequtils]
+
+  randomize(42) # FOr reproducibility
+  # warmup()
+  reportConfig("Intel MKL-DNN JIT AVX", float32, (M, K), (K, N))
+
+  block:
+    let a = newSeqWith(M*K, float32 rand(-0.1..0.1))
+    let b = newSeqWith(K*N, float32 rand(-0.1..0.1))
+
+    let mkl = benchMKLDNN(a, b, (M,K), (K,N), NbSamples)
diff --git a/benchmarks/matmul_gemm_blas/mkldnn_gemm_jit_avx512.nim b/benchmarks/matmul_gemm_blas/mkldnn_gemm_jit_avx512.nim
@@ -0,0 +1,59 @@
+# Weave
+# Copyright (c) 2019 Mamy André-Ratsimbazafy
+# Licensed and distributed under either of
+#   * MIT license (license terms in the root directory or at http://opensource.org/licenses/MIT).
+#   * Apache v2 license (license terms in the root directory or at http://www.apache.org/licenses/LICENSE-2.0).
+# at your option. This file may not be copied, modified, or distributed except according to those terms.
+
+# GEMM (GEneralized Matrix Multiplication) using MKL-DNN / DNNL
+# Intel Deep Neural Network Library.
+
+import
+  ./gemm_bench_common, ./gemm_bench_config,
+  ../vendor/mkldnn
+
+
+proc benchMKLDNN(a, b: seq[float32], ashape, bshape: MatrixShape, nb_samples: int): seq[float32] =
+  let req_ops = gemm_required_ops(ashape, bshape)
+  let out_shape = gemm_out_shape(ashape, bshape)
+  let out_size = out_shape.M * out_shape.N
+
+  result = newSeq[float32](out_size)
+  var # MKL-DNN wants pointers as inputs
+    trans = 'N'
+    m = int32 M
+    n = int32 N
+    k = int32 K
+    alpha = 1'f32
+    lda = int32 K
+    ldb = int32 N
+    beta = 0'f32
+    ldc = int32 N
+
+  bench("Intel MKL-DNN / DNNL JIT AVX512 benchmark", req_ops):
+    # Initialisation, not measured apart for the "Collected n samples in ... seconds"
+    zeroMem(result[0].addr, out_size * sizeof(float32)) # We zero memory between computation
+  do:
+    # Main work
+    discard mkldnn_jit_avx512_common_gemm_f32(
+      trans.addr, trans.addr,
+      m.addr, n.addr, k.addr,
+      alpha.addr, a[0].unsafeaddr, lda.addr,
+                  b[0].unsafeAddr, ldb.addr,
+      beta.addr,  result[0].addr, ldc.addr,
+                  bias = nil
+    )
+
+# Bench
+when isMainModule:
+  import std/[random, sequtils]
+
+  randomize(42) # FOr reproducibility
+  # warmup()
+  reportConfig("Intel MKL-DNN JIT AVX512", float32, (M, K), (K, N))
+
+  block:
+    let a = newSeqWith(M*K, float32 rand(-0.1..0.1))
+    let b = newSeqWith(K*N, float32 rand(-0.1..0.1))
+
+    let mkl = benchMKLDNN(a, b, (M,K), (K,N), NbSamples)
diff --git a/benchmarks/vendor/mkl-dnn b/benchmarks/vendor/mkl-dnn
diff --git a/benchmarks/vendor/mkldnn.nim b/benchmarks/vendor/mkldnn.nim
@@ -0,0 +1,84 @@
+# Weave
+# Copyright (c) 2019 Mamy André-Ratsimbazafy
+# Licensed and distributed under either of
+#   * MIT license (license terms in the root directory or at http://opensource.org/licenses/MIT).
+#   * Apache v2 license (license terms in the root directory or at http://www.apache.org/licenses/LICENSE-2.0).
+# at your option. This file may not be copied, modified, or distributed except according to those terms.
+
+import os, strutils
+const cSourcesPath = currentSourcePath.rsplit(DirSep, 1)[0] & '/'
+
+{.passC:"-fopenmp".}
+{.passL:"-fopenmp".}
+
+{.passC:"-I" & cSourcesPath.}
+{.passC:"-I" & cSourcesPath & "mkl-dnn/include".}
+{.passC:"-I" & cSourcesPath & "mkl-dnn/src/common".}
+{.passC:"-I" & cSourcesPath & "mkl-dnn/src/cpu".}
+{.passC:"-I" & cSourcesPath & "mkl-dnn/src/cpu/gemm/f32".}
+# {.passC:"-std=c++11".}
+
+{.compile: cSourcesPath & "mkl-dnn/src/common/utils.cpp".}
+{.compile: cSourcesPath & "mkl-dnn/src/cpu/jit_utils/jit_utils.cpp".}
+{.compile: cSourcesPath & "mkl-dnn/src/cpu/jit_utils/jitprofiling/jitprofiling.c".}
+{.compile: cSourcesPath & "mkl-dnn/src/cpu/gemm/f32/gemm_utils_f32.cpp".}
+{.compile: cSourcesPath & "mkl-dnn/src/cpu/gemm/f32/ref_gemm_f32.cpp".}
+{.compile: cSourcesPath & "mkl-dnn/src/cpu/gemm/f32/jit_avx_gemm_f32.cpp".}
+{.compile: cSourcesPath & "mkl-dnn/src/cpu/gemm/f32/jit_avx512_common_gemm_f32.cpp".}
+
+
+type MkldnnStatus {.importc: "mkldnn_status_t".} = enum
+    # The operation was successful
+    MkldnnSuccess = 0,
+    # The operation failed due to an out-of-memory condition
+    MkldnnOutOfMemory = 1,
+    # The operation failed and should be retried
+    MkldnnTryAgain = 2,
+    # The operation failed because of incorrect function arguments
+    MkldnnInvalidArguments = 3,
+    # The operation failed because a primitive was not ready for execution
+    MkldnnNotReady = 4,
+    # The operation failed because requested functionality is not implemented
+    MkldnnUnimplemented = 5,
+    # Primitive iterator passed over last primitive descriptor
+    MkldnnIteratorEnds = 6,
+    # Primitive or engine failed on execution
+    MkldnnRuntimeError = 7,
+    # Queried element is not required for given primitive
+    MkldnnNotRequired = 8
+
+proc mkldnn_ref_gemm*[T](
+  transa: ptr char, transb: ptr char,
+  M, N, K: ptr int32,
+  alpha, A: ptr T, lda: ptr int32,
+         B: ptr T, ldb: ptr int32,
+  beta,  C: ptr T, ldc: ptr int32,
+      bias: ptr T
+): MkldnnStatus {.
+  importcpp:"mkldnn::impl::cpu::ref_gemm<'*6>(@)",
+  header: cSourcesPath & "mkl-dnn/src/cpu/gemm/f32/ref_gemm_f32.hpp"
+.}
+
+proc mkldnn_jit_avx_gemm_f32*(
+  transa: ptr char, transb: ptr char,
+  M, N, K: ptr int32,
+  alpha, A: ptr float32, lda: ptr int32,
+         B: ptr float32, ldb: ptr int32,
+  beta,  C: ptr float32, ldc: ptr int32,
+      bias: ptr float32
+): MkldnnStatus {.
+  importcpp:"mkldnn::impl::cpu::jit_avx_gemm_f32(@)",
+  header: cSourcesPath & "mkl-dnn/src/cpu/gemm/f32/jit_avx_gemm_f32.hpp"
+.}
+
+proc mkldnn_jit_avx512_common_gemm_f32*(
+  transa: ptr char, transb: ptr char,
+  M, N, K: ptr int32,
+  alpha, A: ptr float32, lda: ptr int32,
+         B: ptr float32, ldb: ptr int32,
+  beta,  C: ptr float32, ldc: ptr int32,
+      bias: ptr float32
+): MkldnnStatus {.
+  importcpp:"mkldnn::impl::cpu::jit_avx512_common_gemm_f32(@)",
+  header: cSourcesPath & "mkl-dnn/src/cpu/gemm/f32/jit_avx512_common_gemm_f32.hpp"
+.}
diff --git a/benchmarks/vendor/mkldnn_config.h b/benchmarks/vendor/mkldnn_config.h
@@ -0,0 +1,85 @@
+/*******************************************************************************
+* Copyright 2019 Intel Corporation
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*******************************************************************************/
+
+#ifndef MKLDNN_CONFIG_H
+#define MKLDNN_CONFIG_H
+
+#ifndef DOXYGEN_SHOULD_SKIP_THIS
+
+// All symbols shall be internal unless marked as MKLDNN_API
+#if defined _WIN32 || defined __CYGWIN__
+#   define MKLDNN_HELPER_DLL_IMPORT __declspec(dllimport)
+#   define MKLDNN_HELPER_DLL_EXPORT __declspec(dllexport)
+#else
+#   if __GNUC__ >= 4
+#       define MKLDNN_HELPER_DLL_IMPORT __attribute__((visibility("default")))
+#       define MKLDNN_HELPER_DLL_EXPORT __attribute__((visibility("default")))
+#   else
+#       define MKLDNN_HELPER_DLL_IMPORT
+#       define MKLDNN_HELPER_DLL_EXPORT
+#   endif
+#endif
+
+#ifdef MKLDNN_DLL
+#   ifdef MKLDNN_DLL_EXPORTS
+#       define MKLDNN_API MKLDNN_HELPER_DLL_EXPORT
+#   else
+#       define MKLDNN_API MKLDNN_HELPER_DLL_IMPORT
+#   endif
+#else
+#   define MKLDNN_API
+#endif
+
+#if defined (__GNUC__)
+#   define MKLDNN_DEPRECATED __attribute__((deprecated))
+#elif defined(_MSC_VER)
+#   define MKLDNN_DEPRECATED __declspec(deprecated)
+#else
+#   define MKLDNN_DEPRECATED
+#endif
+#endif // DOXYGEN_SHOULD_SKIP_THIS
+
+// No runtime (disabled)
+#define MKLDNN_RUNTIME_NONE 0u
+// Sequential runtime (CPU only)
+#define MKLDNN_RUNTIME_SEQ 1u
+// OpenMP runtime (CPU only)
+#define MKLDNN_RUNTIME_OMP 2u
+// TBB runtime (CPU only)
+#define MKLDNN_RUNTIME_TBB 4u
+// OpenCL runtime
+#define MKLDNN_RUNTIME_OCL 256u
+
+// MKL-DNN CPU engine runtime
+#define MKLDNN_CPU_RUNTIME MKLDNN_RUNTIME_OMP
+
+// MKL-DNN GPU engine runtime
+#define MKLDNN_GPU_RUNTIME MKLDNN_RUNTIME_NONE
+
+#if defined(MKLDNN_CPU_RUNTIME) && defined(MKLDNN_GPU_RUNTIME)
+#    if (MKLDNN_CPU_RUNTIME == MKLDNN_RUNTIME_NONE) \
+            || (MKLDNN_CPU_RUNTIME == MKLDNN_RUNTIME_OCL)
+#        error "Unexpected MKLDNN_CPU_RUNTIME"
+#    endif
+#    if (MKLDNN_GPU_RUNTIME != MKLDNN_RUNTIME_NONE) \
+            && (MKLDNN_GPU_RUNTIME != MKLDNN_RUNTIME_OCL)
+#        error "Unexpected MKLDNN_GPU_RUNTIME"
+#    endif
+#else
+#    error "BOTH MKLDNN_CPU_RUNTIME and MKLDNN_GPU_RUNTIME must be defined"
+#endif
+
+#endif
diff --git a/benchmarks/vendor/mkldnn_version.h b/benchmarks/vendor/mkldnn_version.h
@@ -0,0 +1,32 @@
+/*******************************************************************************
+* Copyright 2019 Intel Corporation
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*******************************************************************************/
+
+#ifndef MKLDNN_VERSION_H
+#define MKLDNN_VERSION_H
+
+/// Major version
+#define MKLDNN_VERSION_MAJOR 1
+
+/// Minor version
+#define MKLDNN_VERSION_MINOR 0
+
+/// Patch version
+#define MKLDNN_VERSION_PATCH 4
+
+/// Git commit hash
+#define MKLDNN_VERSION_HASH  "a0a87d662edeef38d01db4ac5dd25f59a1f0881f"
+
+#endif