Skip to content

Commit

Permalink
add jaocib
Browse files Browse the repository at this point in the history
  • Loading branch information
archibate committed Jan 16, 2022
1 parent bec9d28 commit 2e3de62
Show file tree
Hide file tree
Showing 54 changed files with 1,160 additions and 7 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,4 @@ a.out
build
.~*
GNUmakefile
r000*/
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
23 changes: 23 additions & 0 deletions 07/01_bandwidth/07/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
cmake_minimum_required(VERSION 3.10)

set(CMAKE_CXX_STANDARD 17)
set(CMAKE_BUILD_TYPE Release)

project(main LANGUAGES CXX)

add_executable(main main.cpp)

find_package(OpenMP REQUIRED)
target_link_libraries(main PUBLIC OpenMP::OpenMP_CXX)

#find_package(TBB REQUIRED)
#target_link_libraries(main PUBLIC TBB::tbb)

find_package(benchmark REQUIRED)
target_link_libraries(main PUBLIC benchmark::benchmark)

if (MSVC)
target_compile_options(main PUBLIC /fp:fast /arch:AVX)
else()
target_compile_options(main PUBLIC -ffast-math -march=native)
endif()
26 changes: 26 additions & 0 deletions 07/01_bandwidth/07/main.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
#include <iostream>
#include <vector>
#include <cmath>
#include <cstring>
#include <cstdlib>
#include <array>
#include <benchmark/benchmark.h>
#include <x86intrin.h>
#include <omp.h>

constexpr size_t n = 1<<28;

std::vector<float> a(n); // 1GB

void BM_parallel_add(benchmark::State &bm) {
for (auto _: bm) {
#pragma omp parallel for
for (size_t i = 0; i < n; i++) {
a[i] = a[i] + 1;
}
benchmark::DoNotOptimize(a);
}
}
BENCHMARK(BM_parallel_add);

BENCHMARK_MAIN();
13 changes: 13 additions & 0 deletions 07/01_bandwidth/07/mtprint.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
#pragma once

#include <iostream>
#include <sstream>

template <class T, class ...Ts>
static void mtprint(T &&t, Ts &&...ts) {
std::stringstream ss;
ss << std::forward<T>(t);
((ss << ' ' << std::forward<Ts>(ts)), ...);
ss << std::endl;
std::cout << ss.str();
}
66 changes: 66 additions & 0 deletions 07/01_bandwidth/07/pod.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
#pragma once

#include <new>
#include <utility>

template <class T>
struct pod {
private:
T m_t;
public:
pod() {}

pod(pod &&p) : m_t(std::move(p.m_t)) {}

pod(pod const &p) : m_t(p.m_t) {}

pod &operator=(pod &&p) {
m_t = std::move(p.m_t);
return *this;
}

pod &operator=(pod const &p) {
m_t = p.m_t;
return *this;
}

pod(T &&t) : m_t(std::move(t)) {}

pod(T const &t) : m_t(t) {}

pod &operator=(T &&t) {
m_t = std::move(t);
return *this;
}

pod &operator=(T const &t) {
m_t = t;
return *this;
}

operator T const &() const {
return m_t;
}

operator T &() {
return m_t;
}

T const &get() const {
return m_t;
}

T &get() {
return m_t;
}

template <class ...Ts>
pod &emplace(Ts &&...ts) {
::new (&m_t) T(std::forward<Ts>(ts)...);
return *this;
}

void destroy() {
m_t.~T();
}
};
9 changes: 9 additions & 0 deletions 07/01_bandwidth/07/ticktock.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
#pragma once

//#include <chrono>
//#define TICK(x) auto bench_##x = std::chrono::steady_clock::now();
//#define TOCK(x) std::cout << #x ": " << std::chrono::duration_cast<std::chrono::duration<double>>(std::chrono::steady_clock::now() - bench_##x).count() << "s" << std::endl;

#include <tbb/tick_count.h>
#define TICK(x) auto bench_##x = tbb::tick_count::now();
#define TOCK(x) std::cout << #x ": " << (tbb::tick_count::now() - bench_##x).seconds() << "s" << std::endl;
25 changes: 18 additions & 7 deletions 07/02_cache/01/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,9 @@
#include <x86intrin.h>
#include <omp.h>

// 32KB, 256KB, 12MB
// L1: 32KB
// L2: 256KB
// L3: 12MB

constexpr size_t n = 1<<28;

Expand All @@ -23,14 +25,23 @@ void BM_fill1GB(benchmark::State &bm) {
}
BENCHMARK(BM_fill1GB);

void BM_fill32MB(benchmark::State &bm) {
void BM_fill128MB(benchmark::State &bm) {
for (auto _: bm) {
for (size_t i = 0; i < (1<<23); i++) {
for (size_t i = 0; i < (1<<25); i++) {
a[i] = 1;
}
}
}
BENCHMARK(BM_fill32MB);
BENCHMARK(BM_fill128MB);

void BM_fill16MB(benchmark::State &bm) {
for (auto _: bm) {
for (size_t i = 0; i < (1<<22); i++) {
a[i] = 1;
}
}
}
BENCHMARK(BM_fill16MB);

void BM_fill1MB(benchmark::State &bm) {
for (auto _: bm) {
Expand All @@ -50,13 +61,13 @@ void BM_fill128KB(benchmark::State &bm) {
}
BENCHMARK(BM_fill128KB);

void BM_fill4KB(benchmark::State &bm) {
void BM_fill16KB(benchmark::State &bm) {
for (auto _: bm) {
for (size_t i = 0; i < (1<<10); i++) {
for (size_t i = 0; i < (1<<12); i++) {
a[i] = 1;
}
}
}
BENCHMARK(BM_fill4KB);
BENCHMARK(BM_fill16KB);

BENCHMARK_MAIN();
23 changes: 23 additions & 0 deletions 07/04_fusion/00/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
cmake_minimum_required(VERSION 3.10)

set(CMAKE_CXX_STANDARD 17)
set(CMAKE_BUILD_TYPE Release)

project(main LANGUAGES CXX)

add_executable(main main.cpp)

find_package(OpenMP REQUIRED)
target_link_libraries(main PUBLIC OpenMP::OpenMP_CXX)

#find_package(TBB REQUIRED)
#target_link_libraries(main PUBLIC TBB::tbb)

find_package(benchmark REQUIRED)
target_link_libraries(main PUBLIC benchmark::benchmark)

if (MSVC)
target_compile_options(main PUBLIC /fp:fast /arch:AVX)
else()
target_compile_options(main PUBLIC -ffast-math -march=native)
endif()
46 changes: 46 additions & 0 deletions 07/04_fusion/00/main.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
#include <iostream>
#include <vector>
#include <cmath>
#include <cstring>
#include <cstdlib>
#include <array>
#include <benchmark/benchmark.h>
#include <x86intrin.h>
#include <omp.h>

// L1: 32KB
// L2: 256KB
// L3: 12MB

constexpr size_t n = 1<<28;

std::vector<float> a(n);

void BM_original(benchmark::State &bm) {
for (auto _: bm) {
#pragma omp parallel for
for (size_t i = 0; i < n; i++) {
a[i] = a[i] * 2;
}
#pragma omp parallel for
for (size_t i = 0; i < n; i++) {
a[i] = a[i] + 1;
}
benchmark::DoNotOptimize(a);
}
}
BENCHMARK(BM_original);

void BM_optimized(benchmark::State &bm) {
for (auto _: bm) {
#pragma omp parallel for
for (size_t i = 0; i < n; i++) {
a[i] = a[i] * 2;
a[i] = a[i] + 1;
}
benchmark::DoNotOptimize(a);
}
}
BENCHMARK(BM_optimized);

BENCHMARK_MAIN();
13 changes: 13 additions & 0 deletions 07/04_fusion/00/mtprint.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
#pragma once

#include <iostream>
#include <sstream>

template <class T, class ...Ts>
static void mtprint(T &&t, Ts &&...ts) {
std::stringstream ss;
ss << std::forward<T>(t);
((ss << ' ' << std::forward<Ts>(ts)), ...);
ss << std::endl;
std::cout << ss.str();
}
66 changes: 66 additions & 0 deletions 07/04_fusion/00/pod.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
#pragma once

#include <new>
#include <utility>

template <class T>
struct pod {
private:
T m_t;
public:
pod() {}

pod(pod &&p) : m_t(std::move(p.m_t)) {}

pod(pod const &p) : m_t(p.m_t) {}

pod &operator=(pod &&p) {
m_t = std::move(p.m_t);
return *this;
}

pod &operator=(pod const &p) {
m_t = p.m_t;
return *this;
}

pod(T &&t) : m_t(std::move(t)) {}

pod(T const &t) : m_t(t) {}

pod &operator=(T &&t) {
m_t = std::move(t);
return *this;
}

pod &operator=(T const &t) {
m_t = t;
return *this;
}

operator T const &() const {
return m_t;
}

operator T &() {
return m_t;
}

T const &get() const {
return m_t;
}

T &get() {
return m_t;
}

template <class ...Ts>
pod &emplace(Ts &&...ts) {
::new (&m_t) T(std::forward<Ts>(ts)...);
return *this;
}

void destroy() {
m_t.~T();
}
};
9 changes: 9 additions & 0 deletions 07/04_fusion/00/ticktock.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
#pragma once

//#include <chrono>
//#define TICK(x) auto bench_##x = std::chrono::steady_clock::now();
//#define TOCK(x) std::cout << #x ": " << std::chrono::duration_cast<std::chrono::duration<double>>(std::chrono::steady_clock::now() - bench_##x).count() << "s" << std::endl;

#include <tbb/tick_count.h>
#define TICK(x) auto bench_##x = tbb::tick_count::now();
#define TOCK(x) std::cout << #x ": " << (tbb::tick_count::now() - bench_##x).seconds() << "s" << std::endl;
23 changes: 23 additions & 0 deletions 07/04_fusion/01/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
cmake_minimum_required(VERSION 3.10)

set(CMAKE_CXX_STANDARD 17)
set(CMAKE_BUILD_TYPE Release)

project(main LANGUAGES CXX)

add_executable(main main.cpp)

find_package(OpenMP REQUIRED)
target_link_libraries(main PUBLIC OpenMP::OpenMP_CXX)

#find_package(TBB REQUIRED)
#target_link_libraries(main PUBLIC TBB::tbb)

find_package(benchmark REQUIRED)
target_link_libraries(main PUBLIC benchmark::benchmark)

if (MSVC)
target_compile_options(main PUBLIC /fp:fast /arch:AVX)
else()
target_compile_options(main PUBLIC -ffast-math -march=native)
endif()
Loading

0 comments on commit 2e3de62

Please sign in to comment.