Skip to content

Commit

Permalink
tmp
Browse files Browse the repository at this point in the history
  • Loading branch information
archibate committed Jan 22, 2022
1 parent 7a16bfb commit b49c0d5
Show file tree
Hide file tree
Showing 16 changed files with 836 additions and 1 deletion.
23 changes: 23 additions & 0 deletions 07/03_prefetch/07/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
cmake_minimum_required(VERSION 3.10)

set(CMAKE_CXX_STANDARD 17)
set(CMAKE_BUILD_TYPE Release)

project(main LANGUAGES CXX)

add_executable(main main.cpp)

find_package(OpenMP REQUIRED)
target_link_libraries(main PUBLIC OpenMP::OpenMP_CXX)

#find_package(TBB REQUIRED)
#target_link_libraries(main PUBLIC TBB::tbb)

find_package(benchmark REQUIRED)
target_link_libraries(main PUBLIC benchmark::benchmark)

if (MSVC)
target_compile_options(main PUBLIC /fp:fast /arch:AVX)
else()
target_compile_options(main PUBLIC -ffast-math -march=native)
endif()
52 changes: 52 additions & 0 deletions 07/03_prefetch/07/main.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
#include <iostream>
#include <vector>
#include <cmath>
#include <cstring>
#include <cstdlib>
#include <array>
#include <benchmark/benchmark.h>
#include <x86intrin.h>
#include <omp.h>

// L1: 32KB
// L2: 256KB
// L3: 12MB

constexpr size_t n = 1<<27; // 512MB

std::vector<int> a(n);

void BM_write0(benchmark::State &bm) {
for (auto _: bm) {
#pragma omp parallel for
for (size_t i = 0; i < n; i++) {
a[i] = 0;
}
benchmark::DoNotOptimize(a);
}
}
BENCHMARK(BM_write0);

void BM_write1(benchmark::State &bm) {
for (auto _: bm) {
#pragma omp parallel for
for (size_t i = 0; i < n; i++) {
a[i] = 1;
}
benchmark::DoNotOptimize(a);
}
}
BENCHMARK(BM_write1);

void BM_write1_streamed(benchmark::State &bm) {
for (auto _: bm) {
#pragma omp parallel for
for (size_t i = 0; i < n; i++) {
_mm_stream_si32(&a[i], 1);
}
benchmark::DoNotOptimize(a);
}
}
BENCHMARK(BM_write1_streamed);

BENCHMARK_MAIN();
13 changes: 13 additions & 0 deletions 07/03_prefetch/07/mtprint.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
#pragma once

#include <iostream>
#include <sstream>

template <class T, class ...Ts>
static void mtprint(T &&t, Ts &&...ts) {
std::stringstream ss;
ss << std::forward<T>(t);
((ss << ' ' << std::forward<Ts>(ts)), ...);
ss << std::endl;
std::cout << ss.str();
}
66 changes: 66 additions & 0 deletions 07/03_prefetch/07/pod.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
#pragma once

#include <new>
#include <utility>

template <class T>
struct pod {
private:
T m_t;
public:
pod() {}

pod(pod &&p) : m_t(std::move(p.m_t)) {}

pod(pod const &p) : m_t(p.m_t) {}

pod &operator=(pod &&p) {
m_t = std::move(p.m_t);
return *this;
}

pod &operator=(pod const &p) {
m_t = p.m_t;
return *this;
}

pod(T &&t) : m_t(std::move(t)) {}

pod(T const &t) : m_t(t) {}

pod &operator=(T &&t) {
m_t = std::move(t);
return *this;
}

pod &operator=(T const &t) {
m_t = t;
return *this;
}

operator T const &() const {
return m_t;
}

operator T &() {
return m_t;
}

T const &get() const {
return m_t;
}

T &get() {
return m_t;
}

template <class ...Ts>
pod &emplace(Ts &&...ts) {
::new (&m_t) T(std::forward<Ts>(ts)...);
return *this;
}

void destroy() {
m_t.~T();
}
};
9 changes: 9 additions & 0 deletions 07/03_prefetch/07/ticktock.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
#pragma once

//#include <chrono>
//#define TICK(x) auto bench_##x = std::chrono::steady_clock::now();
//#define TOCK(x) std::cout << #x ": " << std::chrono::duration_cast<std::chrono::duration<double>>(std::chrono::steady_clock::now() - bench_##x).count() << "s" << std::endl;

#include <tbb/tick_count.h>
#define TICK(x) auto bench_##x = tbb::tick_count::now();
#define TOCK(x) std::cout << #x ": " << (tbb::tick_count::now() - bench_##x).seconds() << "s" << std::endl;
28 changes: 28 additions & 0 deletions 07/09_multicore/01/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
cmake_minimum_required(VERSION 3.10)

set(CMAKE_CXX_STANDARD 17)
set(CMAKE_BUILD_TYPE Release)

project(main LANGUAGES CXX)

add_executable(main main.cpp)

find_package(OpenMP REQUIRED)
target_link_libraries(main PUBLIC OpenMP::OpenMP_CXX)

#find_package(TBB)
#if (NOT TARGET TBB::tbb)
#message(WARNING "TBB not found")
#else()
#target_link_libraries(main PUBLIC TBB::tbb)
#target_compile_definitions(main PUBLIC -DWITH_TBB)
#endif()

find_package(benchmark REQUIRED)
target_link_libraries(main PUBLIC benchmark::benchmark)

if (MSVC)
target_compile_options(main PUBLIC /fp:fast /arch:AVX)
else()
target_compile_options(main PUBLIC -ffast-math -march=native -Wno-narrowing)
endif()
169 changes: 169 additions & 0 deletions 07/09_multicore/01/alignalloc.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,169 @@
#pragma once

#include <cstddef>
#include <cstdint>
#include <cstdlib>
#include <utility>
#include <type_traits>
#include <stdexcept>

// https://stackoverflow.com/questions/12942548/making-stdvector-allocate-aligned-memory
namespace detail {
void* allocate_aligned_memory(size_t align, size_t size) {
return std::aligned_alloc(align, size);
}
void deallocate_aligned_memory(void* ptr) noexcept {
std::free(ptr);
}
}

template <typename T, size_t Align = 64>
class AlignedAllocator;

template <size_t Align>
class AlignedAllocator<void, Align>
{
public:
typedef void* pointer;
typedef const void* const_pointer;
typedef void value_type;

template <class U> struct rebind { typedef AlignedAllocator<U, Align> other; };
};


template <typename T, size_t Align>
class AlignedAllocator
{
public:
typedef T value_type;
typedef T* pointer;
typedef const T* const_pointer;
typedef T& reference;
typedef const T& const_reference;
typedef size_t size_type;
typedef ptrdiff_t difference_type;

typedef std::true_type propagate_on_container_move_assignment;

template <class U>
struct rebind { typedef AlignedAllocator<U, Align> other; };

public:
AlignedAllocator() noexcept
{}

template <class U>
AlignedAllocator(const AlignedAllocator<U, Align>&) noexcept
{}

size_type
max_size() const noexcept
{ return (size_type(~0) - size_type(Align)) / sizeof(T); }

pointer
address(reference x) const noexcept
{ return std::addressof(x); }

const_pointer
address(const_reference x) const noexcept
{ return std::addressof(x); }

pointer
allocate(size_type n, typename AlignedAllocator<void, Align>::const_pointer = 0)
{
const size_type alignment = static_cast<size_type>( Align );
void* ptr = detail::allocate_aligned_memory(alignment , n * sizeof(T));
if (ptr == nullptr) {
throw std::bad_alloc();
}

return reinterpret_cast<pointer>(ptr);
}

void
deallocate(pointer p, size_type) noexcept
{ return detail::deallocate_aligned_memory(p); }

template <class U, class ...Args>
void
construct(U* p, Args&&... args)
{ ::new(reinterpret_cast<void*>(p)) U(std::forward<Args>(args)...); }

void
destroy(pointer p)
{ p->~T(); }
};


template <typename T, size_t Align>
class AlignedAllocator<const T, Align>
{
public:
typedef T value_type;
typedef const T* pointer;
typedef const T* const_pointer;
typedef const T& reference;
typedef const T& const_reference;
typedef size_t size_type;
typedef ptrdiff_t difference_type;

typedef std::true_type propagate_on_container_move_assignment;

template <class U>
struct rebind { typedef AlignedAllocator<U, Align> other; };

public:
AlignedAllocator() noexcept
{}

template <class U>
AlignedAllocator(const AlignedAllocator<U, Align>&) noexcept
{}

size_type
max_size() const noexcept
{ return (size_type(~0) - size_type(Align)) / sizeof(T); }

const_pointer
address(const_reference x) const noexcept
{ return std::addressof(x); }

pointer
allocate(size_type n, typename AlignedAllocator<void, Align>::const_pointer = 0)
{
const size_type alignment = static_cast<size_type>( Align );
void* ptr = detail::allocate_aligned_memory(alignment , n * sizeof(T));
if (ptr == nullptr) {
throw std::bad_alloc();
}

return reinterpret_cast<pointer>(ptr);
}

void
deallocate(pointer p, size_type) noexcept
{ return detail::deallocate_aligned_memory(p); }

template <class U, class ...Args>
void
construct(U* p, Args&&... args)
{ ::new(reinterpret_cast<void*>(p)) U(std::forward<Args>(args)...); }

void
destroy(pointer p)
{ p->~T(); }
};

template <typename T, size_t TAlign, typename U, size_t UAlign>
inline
bool
operator== (const AlignedAllocator<T,TAlign>&, const AlignedAllocator<U, UAlign>&) noexcept
{ return TAlign == UAlign; }

template <typename T, size_t TAlign, typename U, size_t UAlign>
inline
bool
operator!= (const AlignedAllocator<T,TAlign>&, const AlignedAllocator<U, UAlign>&) noexcept
{ return TAlign != UAlign; }

Loading

0 comments on commit b49c0d5

Please sign in to comment.