From 92a179c354058b5e2a3235e72541a2f13530fb8c Mon Sep 17 00:00:00 2001 From: Kent Knox Date: Tue, 21 Jul 2015 16:58:29 -0500 Subject: [PATCH] Adding benchmark data wrt 14.301 Refactored the csv directory layout --- .../clSPARSE-0.6.0.0/W9100/Csr2Coo.csv | 19 ----------- .../clSPARSE-0.6.0.0/W9100/Csr2Dense.csv | 16 --------- .../clSPARSE-0.6.0.0/W9100/SpM-dV.csv | 16 --------- .../cuSPARSE-0.6.0.0/Tesla_K40/Csr2Coo.csv | 19 ----------- .../cuSPARSE-0.6.0.0/Tesla_K40/Csr2Dense.csv | 17 ---------- .../cuSPARSE-0.6.0.0/Tesla_K40/SpM-dV.csv | 16 --------- .../clSPARSE-0.6.0.0/W9100/Csr2Coo.csv | 17 ++++++++++ .../clSPARSE-0.6.0.0/W9100/Csr2Dense.csv | 15 +++++++++ .../clSPARSE-0.6.0.0/W9100/README.md | 26 +++++++++++++++ .../clSPARSE-0.6.0.0/W9100/SpM-dV.csv | 17 ++++++++++ .../clSPARSE-0.6.0.0/W9100/clsparse-bench.sh | 33 +++++++++++++++++++ .../cuSPARSE-7.0/Tesla_K40/Csr2Coo.csv | 17 ++++++++++ .../cuSPARSE-7.0/Tesla_K40/Csr2Dense.csv | 15 +++++++++ .../cuSPARSE-7.0/Tesla_K40/README.md | 26 +++++++++++++++ .../cuSPARSE-7.0/Tesla_K40/SpM-dV.csv | 17 ++++++++++ .../cuSPARSE-7.0/Tesla_K40/cusparse-bench.sh | 32 ++++++++++++++++++ 16 files changed, 215 insertions(+), 103 deletions(-) delete mode 100644 doc/performance/clSPARSE-0.6.0.0/W9100/Csr2Coo.csv delete mode 100644 doc/performance/clSPARSE-0.6.0.0/W9100/Csr2Dense.csv delete mode 100644 doc/performance/clSPARSE-0.6.0.0/W9100/SpM-dV.csv delete mode 100644 doc/performance/cuSPARSE-0.6.0.0/Tesla_K40/Csr2Coo.csv delete mode 100644 doc/performance/cuSPARSE-0.6.0.0/Tesla_K40/Csr2Dense.csv delete mode 100644 doc/performance/cuSPARSE-0.6.0.0/Tesla_K40/SpM-dV.csv create mode 100644 docs/performance/clSPARSE-0.6.0.0/W9100/Csr2Coo.csv create mode 100644 docs/performance/clSPARSE-0.6.0.0/W9100/Csr2Dense.csv create mode 100644 docs/performance/clSPARSE-0.6.0.0/W9100/README.md create mode 100644 docs/performance/clSPARSE-0.6.0.0/W9100/SpM-dV.csv create mode 100644 docs/performance/clSPARSE-0.6.0.0/W9100/clsparse-bench.sh create mode 100644 docs/performance/cuSPARSE-7.0/Tesla_K40/Csr2Coo.csv create mode 100644 docs/performance/cuSPARSE-7.0/Tesla_K40/Csr2Dense.csv create mode 100644 docs/performance/cuSPARSE-7.0/Tesla_K40/README.md create mode 100644 docs/performance/cuSPARSE-7.0/Tesla_K40/SpM-dV.csv create mode 100644 docs/performance/cuSPARSE-7.0/Tesla_K40/cusparse-bench.sh diff --git a/doc/performance/clSPARSE-0.6.0.0/W9100/Csr2Coo.csv b/doc/performance/clSPARSE-0.6.0.0/W9100/Csr2Coo.csv deleted file mode 100644 index 988095f..0000000 --- a/doc/performance/clSPARSE-0.6.0.0/W9100/Csr2Coo.csv +++ /dev/null @@ -1,19 +0,0 @@ -Csr2Coo,Bell-Garland matrices,, -,,, -,,, -,,GiElements/s, -cant,,5.34468, -consph,,5.79985, -cop20k_A,,4.58041, -mac_econ_fwd500,,3.3711, -mc2depi,,4.21598, -pdb1HYS,,5.66851, -pwtk,,6.80393, -rail4284,,6.15234, -rma10,,4.31987, -scircuit,,2.47316, -shipsec1,,5.0256, -webbase_1M,,3.6331, -,,, -,,, -AMD System:,Ubuntu 14.04.2 LTS,,(timmy-linux.amd.com) diff --git a/doc/performance/clSPARSE-0.6.0.0/W9100/Csr2Dense.csv b/doc/performance/clSPARSE-0.6.0.0/W9100/Csr2Dense.csv deleted file mode 100644 index a37270f..0000000 --- a/doc/performance/clSPARSE-0.6.0.0/W9100/Csr2Dense.csv +++ /dev/null @@ -1,16 +0,0 @@ -Csr2Dense,Small-MTX,matrices, -,,, -,,, -,,GiElements/s, -Dubcova1,,0.0278669, -hydr1c_A_11,,0.0211048, -hydr1c_A_72,,0.021233, -hydr1c_A_76,,0.0210692, -Maragal_6,,0.0689761, -Na5,,0.223689, -psse1,,0.0103225, -Reuters911,,0.0461601, -Si10H16,,0.0849898, -tomography,,0.195625, -,,, -AMD System:,Ubuntu 14.04.2 LTS,,(timmy-linux.amd.com) diff --git a/doc/performance/clSPARSE-0.6.0.0/W9100/SpM-dV.csv b/doc/performance/clSPARSE-0.6.0.0/W9100/SpM-dV.csv deleted file mode 100644 index 2ff97e2..0000000 --- a/doc/performance/clSPARSE-0.6.0.0/W9100/SpM-dV.csv +++ /dev/null @@ -1,16 +0,0 @@ -SpM-dV,Bell-Garland matrices,,,,, -,,320 GB/s,,,, -,Linux,1642.5 (VM),,,,1642.5 == 14.50.2 -,,w9100,,,, -cant,,96.0199,,,, -consph,,118.862,,,, -cop20k_A,,86.6051,,,, -mac_econ_fwd500,,63.2257,,,, -mc2depi,,114.147,,,, -pdb1HYS,,104.649,,,, -pwtk,,139.115,,,, -rail4284,,98.4723,,,, -rma10,,76.0268,,,, -scircuit,,46.9494,,,, -shipsec1,,100.119,,,, -webbase_1M,,117.437,,,, diff --git a/doc/performance/cuSPARSE-0.6.0.0/Tesla_K40/Csr2Coo.csv b/doc/performance/cuSPARSE-0.6.0.0/Tesla_K40/Csr2Coo.csv deleted file mode 100644 index d32712f..0000000 --- a/doc/performance/cuSPARSE-0.6.0.0/Tesla_K40/Csr2Coo.csv +++ /dev/null @@ -1,19 +0,0 @@ -Csr2Coo,Bell-Garland matrices,, -,,, -,,, -,,,GiElements/s -cant,,,12.5817 -consph,,,13.2232 -cop20k_A,,,6.12102 -mac_econ_fwd500,,,1.8865 -mc2depi,,,1.25825 -pdb1HYS,,,16.5828 -pwtk,,,12.4101 -rail4284,,,14.6147 -rma10,,,10.8501 -scircuit,,,1.71464 -shipsec1,,,13.0017 -webbase_1M,,,0.989057 -,,, -,,, -Nvidia System:,openSUSE 13.2,,(Durres.amd.com) diff --git a/doc/performance/cuSPARSE-0.6.0.0/Tesla_K40/Csr2Dense.csv b/doc/performance/cuSPARSE-0.6.0.0/Tesla_K40/Csr2Dense.csv deleted file mode 100644 index 6cb7fd6..0000000 --- a/doc/performance/cuSPARSE-0.6.0.0/Tesla_K40/Csr2Dense.csv +++ /dev/null @@ -1,17 +0,0 @@ -Csr2Dense,Small-MTX,matrices, -,,, -,,,GiElements/s -,,, -Dubcova1,,,0.0107514 -hydr1c_A_11,,,0.0121738 -hydr1c_A_72,,,0.0121759 -hydr1c_A_76,,,0.0121681 -Maragal_6,,,0.026069 -Na5,,,0.155912 -psse1,,,0.00519411 -Reuters911,,,0.0217721 -Si10H16,,,0.0308366 -tomography,,,0.566176 -,,, -,,, -Nvidia System:,openSUSE 13.2,,(Durres.amd.com) diff --git a/doc/performance/cuSPARSE-0.6.0.0/Tesla_K40/SpM-dV.csv b/doc/performance/cuSPARSE-0.6.0.0/Tesla_K40/SpM-dV.csv deleted file mode 100644 index edd5b8a..0000000 --- a/doc/performance/cuSPARSE-0.6.0.0/Tesla_K40/SpM-dV.csv +++ /dev/null @@ -1,16 +0,0 @@ -SpM-dV,Bell-Garland matrices,,,,, -,,,280 GB/s,,, -,Linux,,346.47,,,1642.5 == 14.50.2 -,,,k40c,,, -cant,,,96.4569,,, -consph,,,102.113,,, -cop20k_A,,,72.1035,,, -mac_econ_fwd500,,,47.1653,,, -mc2depi,,,74.2639,,, -pdb1HYS,,,102.969,,, -pwtk,,,74.1772,,, -rail4284,,,34.8417,,, -rma10,,,64.8728,,, -scircuit,,,51.2187,,, -shipsec1,,,161.233,,, -webbase_1M,,,32.5601,,, diff --git a/docs/performance/clSPARSE-0.6.0.0/W9100/Csr2Coo.csv b/docs/performance/clSPARSE-0.6.0.0/W9100/Csr2Coo.csv new file mode 100644 index 0000000..201c7cc --- /dev/null +++ b/docs/performance/clSPARSE-0.6.0.0/W9100/Csr2Coo.csv @@ -0,0 +1,17 @@ +csr2coo,, +,, +OpenCL runtime:,1573.4 (VM), +OpenCL Device:,w9100, +,,Gi-Elements/s +cant,,5.34 +consph,,5.80 +cop20k_A,,4.58 +mac_econ_fwd500,,3.37 +mc2depi,,4.22 +pdb1HYS,,5.67 +pwtk,,6.80 +rail4284,,6.15 +rma10,,4.32 +scircuit,,2.47 +shipsec1,,5.03 +webbase_1M,,3.63 diff --git a/docs/performance/clSPARSE-0.6.0.0/W9100/Csr2Dense.csv b/docs/performance/clSPARSE-0.6.0.0/W9100/Csr2Dense.csv new file mode 100644 index 0000000..f666497 --- /dev/null +++ b/docs/performance/clSPARSE-0.6.0.0/W9100/Csr2Dense.csv @@ -0,0 +1,15 @@ +csr2dense,, +,, +OpenCL runtime:,1573.4 (VM), +OpenCL Device:,w9100, +,,Gi-Elements/s +Dubcova1,,0.028 +hydr1c_A_11,,0.021 +hydr1c_A_72,,0.021 +hydr1c_A_76,,0.021 +Maragal_6,,0.069 +Na5,,0.224 +psse1,,0.010 +Reuters911,,0.046 +Si10H16,,0.085 +tomography,,0.196 diff --git a/docs/performance/clSPARSE-0.6.0.0/W9100/README.md b/docs/performance/clSPARSE-0.6.0.0/W9100/README.md new file mode 100644 index 0000000..409585e --- /dev/null +++ b/docs/performance/clSPARSE-0.6.0.0/W9100/README.md @@ -0,0 +1,26 @@ +# Benchmarking +## Hardware +w9100 + +## Environment +Ubuntu 14.04 + +clSPARSE v0.6.0.0 + +[Catalyst FirePro](http://support.amd.com/en-us/download/workstation?os=Linux%20x86_64#catalyst-pro) 14.301 + +## Tool +[clsparse-bench](clSPARSE\src\benchmarks\clsparse-bench) + +## Methodology +For each data point, we took 20 samples. Each sample consists of 20 calls +with a wait afterward. We benchmark with respect to the API, utilizing host timers +(not pure kernel time with ). +Outlying samples beyond 1 standard deviation were removed. + +Conversion routines benchmarked as number of Gi-Elements/s converted + +SpM-dV routine calculated as Gi-Bytes/s +```c +( sizeof( cl_int )*( csrMtx.num_nonzeros + csrMtx.num_rows ) + sizeof( T ) * ( csrMtx.num_nonzeros + csrMtx.num_cols + csrMtx.num_rows ) ) / time_in_ns( ); +``` diff --git a/docs/performance/clSPARSE-0.6.0.0/W9100/SpM-dV.csv b/docs/performance/clSPARSE-0.6.0.0/W9100/SpM-dV.csv new file mode 100644 index 0000000..88fae20 --- /dev/null +++ b/docs/performance/clSPARSE-0.6.0.0/W9100/SpM-dV.csv @@ -0,0 +1,17 @@ +SpM-dV,, +,, +OpenCL runtime:,1573.4 (VM), +OpenCL Device:,w9100, +,,Gi-Bytes/s +cant,,99.7 +consph,,118.8 +cop20k_A,,90.1 +mac_econ_fwd500,,65.9 +mc2depi,,113.4 +pdb1HYS,,102.6 +pwtk,,144.9 +rail4284,,100.7 +rma10,,78.4 +scircuit,,56.5 +shipsec1,,100.5 +webbase_1M,,120.3 diff --git a/docs/performance/clSPARSE-0.6.0.0/W9100/clsparse-bench.sh b/docs/performance/clSPARSE-0.6.0.0/W9100/clsparse-bench.sh new file mode 100644 index 0000000..08c80d2 --- /dev/null +++ b/docs/performance/clSPARSE-0.6.0.0/W9100/clsparse-bench.sh @@ -0,0 +1,33 @@ +#!/bin/bash + +#Executable +clsparse_bench=~/code/github/clMathLibraries/bin/clSPARSE/release/clSPARSE-build/staging/clsparse-bench + +#Data directories +mtx_cant=~/code/github/clMathLibraries/bin/deps/release/Externals/MTX/Bell_Garland/cant +mtx_consph=~/code/github/clMathLibraries/bin/deps/release/Externals/MTX/Bell_Garland/consph +mtx_cop20k_A=~/code/github/clMathLibraries/bin/deps/release/Externals/MTX/Bell_Garland/cop20k_A +mtx_mac_econ_fwd500=~/code/github/clMathLibraries/bin/deps/release/Externals/MTX/Bell_Garland/mac_econ_fwd500 +mtx_mc2depi=~/code/github/clMathLibraries/bin/deps/release/Externals/MTX/Bell_Garland/mc2depi +mtx_pdb1HYS=~/code/github/clMathLibraries/bin/deps/release/Externals/MTX/Bell_Garland/pdb1HYS +mtx_pwtk=~/code/github/clMathLibraries/bin/deps/release/Externals/MTX/Bell_Garland/pwtk +mtx_rail4284=~/code/github/clMathLibraries/bin/deps/release/Externals/MTX/Bell_Garland/rail4284 +mtx_rma10=~/code/github/clMathLibraries/bin/deps/release/Externals/MTX/Bell_Garland/rma10 +mtx_scircuit=~/code/github/clMathLibraries/bin/deps/release/Externals/MTX/Bell_Garland/scircuit +mtx_shipsec1=~/code/github/clMathLibraries/bin/deps/release/Externals/MTX/Bell_Garland/shipsec1 +mtx_webbase_1M=~/code/github/clMathLibraries/bin/deps/release/Externals/MTX/Bell_Garland/webbase-1M + +clsparse_timing_dir=timings/spm-dv/14.301 +mkdir -p ${clsparse_timing_dir} +${clsparse_bench} -f spmdv -d ${mtx_cant} > ${clsparse_timing_dir}/cant.txt +${clsparse_bench} -f spmdv -d ${mtx_consph} > ${clsparse_timing_dir}/consph.txt +${clsparse_bench} -f spmdv -d ${mtx_cop20k_A} > ${clsparse_timing_dir}/cop20k_A.txt +${clsparse_bench} -f spmdv -d ${mtx_mac_econ_fwd500} > ${clsparse_timing_dir}/mac_econ_fwd500.txt +${clsparse_bench} -f spmdv -d ${mtx_mc2depi} > ${clsparse_timing_dir}/mc2depi.txt +${clsparse_bench} -f spmdv -d ${mtx_pdb1HYS} > ${clsparse_timing_dir}/pdb1HYS.txt +${clsparse_bench} -f spmdv -d ${mtx_pwtk} > ${clsparse_timing_dir}/pwtk.txt +${clsparse_bench} -f spmdv -d ${mtx_rail4284} > ${clsparse_timing_dir}/rail4284.txt +${clsparse_bench} -f spmdv -d ${mtx_rma10} > ${clsparse_timing_dir}/rma10.txt +${clsparse_bench} -f spmdv -d ${mtx_scircuit} > ${clsparse_timing_dir}/scircuit.txt +${clsparse_bench} -f spmdv -d ${mtx_shipsec1} > ${clsparse_timing_dir}/shipsec1.txt +${clsparse_bench} -f spmdv -d ${mtx_webbase_1M} > ${clsparse_timing_dir}/webbase_1M.txt diff --git a/docs/performance/cuSPARSE-7.0/Tesla_K40/Csr2Coo.csv b/docs/performance/cuSPARSE-7.0/Tesla_K40/Csr2Coo.csv new file mode 100644 index 0000000..af2b02f --- /dev/null +++ b/docs/performance/cuSPARSE-7.0/Tesla_K40/Csr2Coo.csv @@ -0,0 +1,17 @@ +Csr2Coo,, +,, +CUDA driver:,346.47, +OpenCL Device:,k40c, +,,Gi-Elements/s +cant,,12.58 +consph,,13.22 +cop20k_A,,6.12 +mac_econ_fwd500,,1.89 +mc2depi,,1.26 +pdb1HYS,,16.58 +pwtk,,12.41 +rail4284,,14.61 +rma10,,10.85 +scircuit,,1.71 +shipsec1,,13.00 +webbase_1M,,0.99 diff --git a/docs/performance/cuSPARSE-7.0/Tesla_K40/Csr2Dense.csv b/docs/performance/cuSPARSE-7.0/Tesla_K40/Csr2Dense.csv new file mode 100644 index 0000000..7853e12 --- /dev/null +++ b/docs/performance/cuSPARSE-7.0/Tesla_K40/Csr2Dense.csv @@ -0,0 +1,15 @@ +Csr2Dense,, +,, +CUDA driver:,346.47, +OpenCL Device:,k40c, +,,Gi-Elements/s +Dubcova1,,0.011 +hydr1c_A_11,,0.012 +hydr1c_A_72,,0.012 +hydr1c_A_76,,0.012 +Maragal_6,,0.026 +Na5,,0.156 +psse1,,0.005 +Reuters911,,0.022 +Si10H16,,0.031 +tomography,,0.566 diff --git a/docs/performance/cuSPARSE-7.0/Tesla_K40/README.md b/docs/performance/cuSPARSE-7.0/Tesla_K40/README.md new file mode 100644 index 0000000..27fe3b8 --- /dev/null +++ b/docs/performance/cuSPARSE-7.0/Tesla_K40/README.md @@ -0,0 +1,26 @@ +# Benchmarking +## Hardware +Tesla K40c + +## Environment +OpenSUSE 13.2 + +cuSPARSE v7.0 + +Tesla driver 346.47 + +## Tool +[cusparse-bench](clSPARSE\src\benchmarks\cusparse-bench) + +## Methodology +For each data point, we took 20 samples. Each sample consists of 20 calls +with a wait afterward. We benchmark with respect to the API, utilizing host timers +(not pure kernel time with ). +Outlying samples beyond 1 standard deviation were removed. + +Conversion routines benchmarked as number of Gi-Elements/s converted + +SpM-dV routine calculated as Gi-Bytes/s +```c +( sizeof( cl_int )*( csrMtx.num_nonzeros + csrMtx.num_rows ) + sizeof( T ) * ( csrMtx.num_nonzeros + csrMtx.num_cols + csrMtx.num_rows ) ) / time_in_ns( ); +``` diff --git a/docs/performance/cuSPARSE-7.0/Tesla_K40/SpM-dV.csv b/docs/performance/cuSPARSE-7.0/Tesla_K40/SpM-dV.csv new file mode 100644 index 0000000..f911394 --- /dev/null +++ b/docs/performance/cuSPARSE-7.0/Tesla_K40/SpM-dV.csv @@ -0,0 +1,17 @@ +SpM-dV,, +,, +CUDA driver:,346.47, +OpenCL Device:,k40c, +,,Gi-Bytes/s +cant,,96.5 +consph,,102.1 +cop20k_A,,72.1 +mac_econ_fwd500,,47.2 +mc2depi,,74.3 +pdb1HYS,,103.0 +pwtk,,74.2 +rail4284,,34.8 +rma10,,64.9 +scircuit,,51.2 +shipsec1,,161.2 +webbase_1M,,32.6 diff --git a/docs/performance/cuSPARSE-7.0/Tesla_K40/cusparse-bench.sh b/docs/performance/cuSPARSE-7.0/Tesla_K40/cusparse-bench.sh new file mode 100644 index 0000000..4e2958f --- /dev/null +++ b/docs/performance/cuSPARSE-7.0/Tesla_K40/cusparse-bench.sh @@ -0,0 +1,32 @@ +#!/bin/bash + +#Executable +cusparse_bench=~/code/github/clMathLibraries/bin/clSPARSE/release/clSPARSE-build/staging/cusparse-bench + +#Data directories +mtx_cant=~/code/github/clMathLibraries/bin/deps/release/Externals/MTX/Bell_Garland/cant +mtx_consph=~/code/github/clMathLibraries/bin/deps/release/Externals/MTX/Bell_Garland/consph +mtx_cop20k_A=~/code/github/clMathLibraries/bin/deps/release/Externals/MTX/Bell_Garland/cop20k_A +mtx_mac_econ_fwd500=~/code/github/clMathLibraries/bin/deps/release/Externals/MTX/Bell_Garland/mac_econ_fwd500 +mtx_mc2depi=~/code/github/clMathLibraries/bin/deps/release/Externals/MTX/Bell_Garland/mc2depi +mtx_pdb1HYS=~/code/github/clMathLibraries/bin/deps/release/Externals/MTX/Bell_Garland/pdb1HYS +mtx_pwtk=~/code/github/clMathLibraries/bin/deps/release/Externals/MTX/Bell_Garland/pwtk +mtx_rail4284=~/code/github/clMathLibraries/bin/deps/release/Externals/MTX/Bell_Garland/rail4284 +mtx_rma10=~/code/github/clMathLibraries/bin/deps/release/Externals/MTX/Bell_Garland/rma10 +mtx_scircuit=~/code/github/clMathLibraries/bin/deps/release/Externals/MTX/Bell_Garland/scircuit +mtx_shipsec1=~/code/github/clMathLibraries/bin/deps/release/Externals/MTX/Bell_Garland/shipsec1 +mtx_webbase_1M=~/code/github/clMathLibraries/bin/deps/release/Externals/MTX/Bell_Garland/webbase-1M + +mkdir -p timings-cusparse-spmdv +${cusparse_bench} -f spmdv -d ${mtx_cant} > timings-cusparse-spmdv/cant.txt +${cusparse_bench} -f spmdv -d ${mtx_consph} > timings-cusparse-spmdv/consph.txt +${cusparse_bench} -f spmdv -d ${mtx_cop20k_A} > timings-cusparse-spmdv/cop20k_A.txt +${cusparse_bench} -f spmdv -d ${mtx_mac_econ_fwd500} > timings-cusparse-spmdv/mac_econ_fwd500.txt +${cusparse_bench} -f spmdv -d ${mtx_mc2depi} > timings-cusparse-spmdv/mc2depi.txt +${cusparse_bench} -f spmdv -d ${mtx_pdb1HYS} > timings-cusparse-spmdv/pdb1HYS.txt +${cusparse_bench} -f spmdv -d ${mtx_pwtk} > timings-cusparse-spmdv/pwtk.txt +${cusparse_bench} -f spmdv -d ${mtx_rail4284} > timings-cusparse-spmdv/rail4284.txt +${cusparse_bench} -f spmdv -d ${mtx_rma10} > timings-cusparse-spmdv/rma10.txt +${cusparse_bench} -f spmdv -d ${mtx_scircuit} > timings-cusparse-spmdv/scircuit.txt +${cusparse_bench} -f spmdv -d ${mtx_shipsec1} > timings-cusparse-spmdv/shipsec1.txt +${cusparse_bench} -f spmdv -d ${mtx_webbase_1M} > timings-cusparse-spmdv/webbase_1M.txt