Skip to content

Commit

Permalink
add curve
Browse files Browse the repository at this point in the history
  • Loading branch information
archibate committed Jan 15, 2022
1 parent e131ebc commit bec9d28
Show file tree
Hide file tree
Showing 55 changed files with 1,290 additions and 70 deletions.
Binary file modified 06/slides.pptx
Binary file not shown.
23 changes: 23 additions & 0 deletions 07/01_bandwidth/01/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
cmake_minimum_required(VERSION 3.10)

set(CMAKE_CXX_STANDARD 17)
set(CMAKE_BUILD_TYPE Release)

project(main LANGUAGES CXX)

add_executable(main main.cpp)

find_package(OpenMP REQUIRED)
target_link_libraries(main PUBLIC OpenMP::OpenMP_CXX)

#find_package(TBB REQUIRED)
#target_link_libraries(main PUBLIC TBB::tbb)

find_package(benchmark REQUIRED)
target_link_libraries(main PUBLIC benchmark::benchmark)

if (MSVC)
target_compile_options(main PUBLIC /fp:fast /arch:AVX)
else()
target_compile_options(main PUBLIC -ffast-math -march=native)
endif()
53 changes: 53 additions & 0 deletions 07/01_bandwidth/01/main.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
#include <iostream>
#include <vector>
#include <cmath>
#include <cstring>
#include <cstdlib>
#include <array>
#include <benchmark/benchmark.h>
#include <x86intrin.h>
#include <omp.h>

constexpr size_t n = 1<<26;

std::vector<float> a(n); // 256MB

void BM_fill(benchmark::State &bm) {
for (auto _: bm) {
for (size_t i = 0; i < n; i++) {
a[i] = 1;
}
}
}
BENCHMARK(BM_fill);

void BM_parallel_fill(benchmark::State &bm) {
for (auto _: bm) {
#pragma omp parallel for
for (size_t i = 0; i < n; i++) {
a[i] = 1;
}
}
}
BENCHMARK(BM_parallel_fill);

void BM_sine(benchmark::State &bm) {
for (auto _: bm) {
for (size_t i = 0; i < n; i++) {
a[i] = std::sin(i);
}
}
}
BENCHMARK(BM_sine);

void BM_parallel_sine(benchmark::State &bm) {
for (auto _: bm) {
#pragma omp parallel for
for (size_t i = 0; i < n; i++) {
a[i] = std::sin(i);
}
}
}
BENCHMARK(BM_parallel_sine);

BENCHMARK_MAIN();
File renamed without changes.
File renamed without changes.
File renamed without changes.
23 changes: 23 additions & 0 deletions 07/01_bandwidth/02/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
cmake_minimum_required(VERSION 3.10)

set(CMAKE_CXX_STANDARD 17)
set(CMAKE_BUILD_TYPE Release)

project(main LANGUAGES CXX)

add_executable(main main.cpp)

find_package(OpenMP REQUIRED)
target_link_libraries(main PUBLIC OpenMP::OpenMP_CXX)

#find_package(TBB REQUIRED)
#target_link_libraries(main PUBLIC TBB::tbb)

find_package(benchmark REQUIRED)
target_link_libraries(main PUBLIC benchmark::benchmark)

if (MSVC)
target_compile_options(main PUBLIC /fp:fast /arch:AVX)
else()
target_compile_options(main PUBLIC -ffast-math -march=native)
endif()
36 changes: 36 additions & 0 deletions 07/01_bandwidth/02/main.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
#include <iostream>
#include <vector>
#include <cmath>
#include <cstring>
#include <cstdlib>
#include <array>
#include <benchmark/benchmark.h>
#include <x86intrin.h>
#include <omp.h>

constexpr size_t n = 1<<28;

std::vector<float> a(n); // 1GB

void BM_serial_add(benchmark::State &bm) {
for (auto _: bm) {
for (size_t i = 0; i < n; i++) {
a[i] = a[i] + 1;
}
benchmark::DoNotOptimize(a);
}
}
BENCHMARK(BM_serial_add);

void BM_parallel_add(benchmark::State &bm) {
for (auto _: bm) {
#pragma omp parallel for
for (size_t i = 0; i < n; i++) {
a[i] = a[i] + 1;
}
benchmark::DoNotOptimize(a);
}
}
BENCHMARK(BM_parallel_add);

BENCHMARK_MAIN();
File renamed without changes.
File renamed without changes.
File renamed without changes.
23 changes: 23 additions & 0 deletions 07/01_bandwidth/03/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
cmake_minimum_required(VERSION 3.10)

set(CMAKE_CXX_STANDARD 17)
set(CMAKE_BUILD_TYPE Release)

project(main LANGUAGES CXX)

add_executable(main main.cpp)

find_package(OpenMP REQUIRED)
target_link_libraries(main PUBLIC OpenMP::OpenMP_CXX)

#find_package(TBB REQUIRED)
#target_link_libraries(main PUBLIC TBB::tbb)

find_package(benchmark REQUIRED)
target_link_libraries(main PUBLIC benchmark::benchmark)

if (MSVC)
target_compile_options(main PUBLIC /fp:fast /arch:AVX)
else()
target_compile_options(main PUBLIC -ffast-math -march=native)
endif()
40 changes: 40 additions & 0 deletions 07/01_bandwidth/03/main.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
#include <iostream>
#include <vector>
#include <cmath>
#include <cstring>
#include <cstdlib>
#include <array>
#include <benchmark/benchmark.h>
#include <x86intrin.h>
#include <omp.h>

constexpr size_t n = 1<<28;

std::vector<float> a(n); // 1GB

static float func(float x) {
return x * (x * x + x * 3.14f - 1 / (x + 1)) + 42 / (2.718f - x);
}

void BM_serial_func(benchmark::State &bm) {
for (auto _: bm) {
for (size_t i = 0; i < n; i++) {
a[i] = func(a[i]);
}
benchmark::DoNotOptimize(a);
}
}
BENCHMARK(BM_serial_func);

void BM_parallel_func(benchmark::State &bm) {
for (auto _: bm) {
#pragma omp parallel for
for (size_t i = 0; i < n; i++) {
a[i] = func(a[i]);
}
benchmark::DoNotOptimize(a);
}
}
BENCHMARK(BM_parallel_func);

BENCHMARK_MAIN();
File renamed without changes.
File renamed without changes.
9 changes: 9 additions & 0 deletions 07/01_bandwidth/03/ticktock.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
#pragma once

//#include <chrono>
//#define TICK(x) auto bench_##x = std::chrono::steady_clock::now();
//#define TOCK(x) std::cout << #x ": " << std::chrono::duration_cast<std::chrono::duration<double>>(std::chrono::steady_clock::now() - bench_##x).count() << "s" << std::endl;

#include <tbb/tick_count.h>
#define TICK(x) auto bench_##x = tbb::tick_count::now();
#define TOCK(x) std::cout << #x ": " << (tbb::tick_count::now() - bench_##x).seconds() << "s" << std::endl;
23 changes: 23 additions & 0 deletions 07/01_bandwidth/04/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
cmake_minimum_required(VERSION 3.10)

set(CMAKE_CXX_STANDARD 17)
set(CMAKE_BUILD_TYPE Release)

project(main LANGUAGES CXX)

add_executable(main main.cpp)

find_package(OpenMP REQUIRED)
target_link_libraries(main PUBLIC OpenMP::OpenMP_CXX)

#find_package(TBB REQUIRED)
#target_link_libraries(main PUBLIC TBB::tbb)

find_package(benchmark REQUIRED)
target_link_libraries(main PUBLIC benchmark::benchmark)

if (MSVC)
target_compile_options(main PUBLIC /fp:fast /arch:AVX)
else()
target_compile_options(main PUBLIC -ffast-math -march=native)
endif()
91 changes: 91 additions & 0 deletions 07/01_bandwidth/04/main.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
#include <iostream>
#include <vector>
#include <cmath>
#include <cstring>
#include <cstdlib>
#include <array>
#include <benchmark/benchmark.h>
#include <x86intrin.h>
#include <omp.h>

constexpr size_t n = 1<<28;

std::vector<float> a(n); // 1GB

static float funcA(float x) {
return sqrtf(x) * x;
}

void BM_1funcA(benchmark::State &bm) {
for (auto _: bm) {
omp_set_num_threads(1);
#pragma omp parallel for
for (size_t i = 0; i < n; i++) {
a[i] = funcA(a[i]);
}
benchmark::DoNotOptimize(a);
}
}
BENCHMARK(BM_1funcA);

void BM_2funcA(benchmark::State &bm) {
for (auto _: bm) {
omp_set_num_threads(2);
#pragma omp parallel for
for (size_t i = 0; i < n; i++) {
a[i] = funcA(a[i]);
}
benchmark::DoNotOptimize(a);
}
}
BENCHMARK(BM_2funcA);

void BM_4funcA(benchmark::State &bm) {
for (auto _: bm) {
omp_set_num_threads(4);
#pragma omp parallel for
for (size_t i = 0; i < n; i++) {
a[i] = funcA(a[i]);
}
benchmark::DoNotOptimize(a);
}
}
BENCHMARK(BM_4funcA);

void BM_6funcA(benchmark::State &bm) {
for (auto _: bm) {
omp_set_num_threads(6);
#pragma omp parallel for
for (size_t i = 0; i < n; i++) {
a[i] = funcA(a[i]);
}
benchmark::DoNotOptimize(a);
}
}
BENCHMARK(BM_6funcA);

void BM_8funcA(benchmark::State &bm) {
for (auto _: bm) {
omp_set_num_threads(8);
#pragma omp parallel for
for (size_t i = 0; i < n; i++) {
a[i] = funcA(a[i]);
}
benchmark::DoNotOptimize(a);
}
}
BENCHMARK(BM_8funcA);

void BM_10funcA(benchmark::State &bm) {
for (auto _: bm) {
omp_set_num_threads(10);
#pragma omp parallel for
for (size_t i = 0; i < n; i++) {
a[i] = funcA(a[i]);
}
benchmark::DoNotOptimize(a);
}
}
BENCHMARK(BM_10funcA);

BENCHMARK_MAIN();
13 changes: 13 additions & 0 deletions 07/01_bandwidth/04/mtprint.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
#pragma once

#include <iostream>
#include <sstream>

template <class T, class ...Ts>
static void mtprint(T &&t, Ts &&...ts) {
std::stringstream ss;
ss << std::forward<T>(t);
((ss << ' ' << std::forward<Ts>(ts)), ...);
ss << std::endl;
std::cout << ss.str();
}
Loading

0 comments on commit bec9d28

Please sign in to comment.