Skip to content

Commit

Permalink
refactor(ttm): change number of threads to cores and change omp sched…
Browse files Browse the repository at this point in the history
…uling policy and pin threads.
  • Loading branch information
bassoy committed Oct 30, 2024
1 parent bfe9264 commit 4ea8da9
Show file tree
Hide file tree
Showing 2 changed files with 144 additions and 152 deletions.
14 changes: 10 additions & 4 deletions include/tlib/detail/matrix_times_vector.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@

namespace tlib::ttv::detail {

static inline unsigned get_number_cores();

/** \brief computes 2d-slice-times-vector
*
Expand Down Expand Up @@ -72,7 +73,8 @@ void gemv_row_parallel(
size_t const N,
size_t const lda)
{
#pragma omp parallel for schedule(static)
static const unsigned cores = get_number_cores();
#pragma omp parallel for schedule(static) num_threads(cores) proc_bind(spread)
for(unsigned i = 0; i < M; ++i){
auto const*const __restrict ai = a+i*lda;
auto sum = value_t{};
Expand Down Expand Up @@ -134,10 +136,12 @@ void gemv_col_parallel(
constexpr auto MB = 32;
const unsigned m = M/MB;
const unsigned MBmod = M%MB;

static const unsigned cores = get_number_cores();

#pragma omp parallel firstprivate(a,b,c, MB, m, MBmod, N, lda, M)
#pragma omp parallel num_threads(cores) proc_bind(spread)
{
#pragma omp for schedule(static)
#pragma omp for schedule(static)
for(unsigned k = 0; k < m; ++k){
auto const*const __restrict ak = a+k*MB;
auto *const __restrict ck = c+k*MB;
Expand Down Expand Up @@ -275,8 +279,10 @@ inline void dot_parallel(
value_t *const __restrict c,
size_t const M) // nn
{
static const unsigned cores = get_number_cores();

auto sum = value_t{};
#pragma omp parallel for schedule(static) reduction (+:sum)
#pragma omp parallel for schedule(static) num_threads(cores) proc_bind(spread) reduction (+:sum)
for(auto k = 0ul; k < M; ++k){
sum += a[k] * b[k];
}
Expand Down
Loading

0 comments on commit 4ea8da9

Please sign in to comment.