Skip to content

Commit

Permalink
[imap] add local to global mapping
Browse files Browse the repository at this point in the history
  • Loading branch information
MarcelKoch authored and fritzgoebel committed Oct 31, 2024
1 parent 96d1f46 commit 5bc3f36
Show file tree
Hide file tree
Showing 11 changed files with 594 additions and 3 deletions.
84 changes: 84 additions & 0 deletions common/cuda_hip/distributed/index_map_kernels.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -296,6 +296,90 @@ GKO_INSTANTIATE_FOR_EACH_LOCAL_GLOBAL_INDEX_TYPE(
GKO_DECLARE_INDEX_MAP_MAP_TO_LOCAL);


template <typename LocalIndexType, typename GlobalIndexType>
void map_to_global(
std::shared_ptr<const DefaultExecutor> exec,
device_partition<const LocalIndexType, const GlobalIndexType> partition,
device_segmented_array<const GlobalIndexType> remote_global_idxs,
experimental::distributed::comm_index_type rank,
const array<LocalIndexType>& local_ids,
experimental::distributed::index_space is,
array<GlobalIndexType>& global_ids)
{
auto range_bounds = partition.offsets_begin;
auto starting_indices = partition.starting_indices_begin;
const auto& ranges_by_part = partition.ranges_by_part;
auto local_ids_it = local_ids.get_const_data();
auto input_size = local_ids.get_size();

auto policy = thrust_policy(exec);

global_ids.resize_and_reset(local_ids.get_size());
auto global_ids_it = global_ids.get_data();

auto map_local = [rank, ranges_by_part, range_bounds, starting_indices,
partition] __device__(auto lid) {
auto local_size =
static_cast<LocalIndexType>(partition.part_sizes_begin[rank]);

if (lid < 0 || lid >= local_size) {
return invalid_index<GlobalIndexType>();
}

auto local_ranges = ranges_by_part.get_segment(rank);
auto local_ranges_size =
static_cast<int64>(local_ranges.end - local_ranges.begin);

auto it = binary_search(int64(0), local_ranges_size, [=](const auto i) {
return starting_indices[local_ranges.begin[i]] >= lid;
});
auto local_range_id =
it != local_ranges_size ? it : max(int64(0), it - 1);
auto range_id = local_ranges.begin[local_range_id];

return static_cast<GlobalIndexType>(lid - starting_indices[range_id]) +
range_bounds[range_id];
};
auto map_non_local = [remote_global_idxs] __device__(auto lid) {
auto remote_size = static_cast<LocalIndexType>(
remote_global_idxs.flat_end - remote_global_idxs.flat_begin);

if (lid < 0 || lid >= remote_size) {
return invalid_index<GlobalIndexType>();
}

return remote_global_idxs.flat_begin[lid];
};
auto map_combined = [map_local, map_non_local, partition,
rank] __device__(auto lid) {
auto local_size =
static_cast<LocalIndexType>(partition.part_sizes_begin[rank]);

if (lid < local_size) {
return map_local(lid);
} else {
return map_non_local(lid - local_size);
}
};

if (is == experimental::distributed::index_space::local) {
thrust::transform(policy, local_ids_it, local_ids_it + input_size,
global_ids_it, map_local);
}
if (is == experimental::distributed::index_space::non_local) {
thrust::transform(policy, local_ids_it, local_ids_it + input_size,
global_ids_it, map_non_local);
}
if (is == experimental::distributed::index_space::combined) {
thrust::transform(policy, local_ids_it, local_ids_it + input_size,
global_ids_it, map_combined);
}
}

GKO_INSTANTIATE_FOR_EACH_LOCAL_GLOBAL_INDEX_TYPE(
GKO_DECLARE_INDEX_MAP_MAP_TO_GLOBAL);


} // namespace index_map
} // namespace GKO_DEVICE_NAMESPACE
} // namespace kernels
Expand Down
1 change: 1 addition & 0 deletions core/device_hooks/common_kernels.inc.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -259,6 +259,7 @@ namespace index_map {

GKO_STUB_LOCAL_GLOBAL_TYPE(GKO_DECLARE_INDEX_MAP_BUILD_MAPPING);
GKO_STUB_LOCAL_GLOBAL_TYPE(GKO_DECLARE_INDEX_MAP_MAP_TO_LOCAL);
GKO_STUB_LOCAL_GLOBAL_TYPE(GKO_DECLARE_INDEX_MAP_MAP_TO_GLOBAL);


} // namespace index_map
Expand Down
16 changes: 16 additions & 0 deletions core/distributed/index_map.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ namespace index_map_kernels {

GKO_REGISTER_OPERATION(build_mapping, index_map::build_mapping);
GKO_REGISTER_OPERATION(map_to_local, index_map::map_to_local);
GKO_REGISTER_OPERATION(map_to_global, index_map::map_to_global);


} // namespace index_map_kernels
Expand Down Expand Up @@ -89,6 +90,21 @@ array<LocalIndexType> index_map<LocalIndexType, GlobalIndexType>::map_to_local(
}


template <typename LocalIndexType, typename GlobalIndexType>
array<GlobalIndexType>
index_map<LocalIndexType, GlobalIndexType>::map_to_global(
const array<LocalIndexType>& local_ids, index_space index_space_v) const
{
array<GlobalIndexType> global_ids(exec_);

exec_->run(index_map_kernels::make_map_to_global(
to_device(partition_.get()), to_device(remote_global_idxs_), rank_,
local_ids, index_space_v, global_ids));

return global_ids;
}


template <typename LocalIndexType, typename GlobalIndexType>
index_map<LocalIndexType, GlobalIndexType>::index_map(
std::shared_ptr<const Executor> exec,
Expand Down
33 changes: 31 additions & 2 deletions core/distributed/index_map_kernels.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@

#include "core/base/kernel_declaration.hpp"
#include "core/base/segmented_array.hpp"
#include "core/distributed/device_partition.hpp"


namespace gko {
Expand Down Expand Up @@ -55,10 +56,13 @@ namespace kernels {
*
* - partition: the global partition
* - remote_target_ids: the owning part ids of each segment of
* remote_global_idxs
* remote_global_idxs
* - remote_global_idxs: the remote global indices, segmented by the owning part
* ids
* - rank: the part id of this process
*
* Any global index that is not in the specified local index space is mapped
* to invalid_index.
*/
#define GKO_DECLARE_INDEX_MAP_MAP_TO_LOCAL(_ltype, _gtype) \
void map_to_local( \
Expand All @@ -72,11 +76,36 @@ namespace kernels {
experimental::distributed::index_space is, array<_ltype>& local_ids)


/**
* This kernels maps local indices to global indices.
*
* The relevant input parameter from the index map are:
*
* - partition: the global partition
* - remote_global_idxs: the remote global indices, segmented by the owning part
* ids
* - rank: the part id of this process
*
* Any local index that is not part of the specified index space is mapped to
* invalid_index.
*/
#define GKO_DECLARE_INDEX_MAP_MAP_TO_GLOBAL(_ltype, _gtype) \
void map_to_global( \
std::shared_ptr<const DefaultExecutor> exec, \
device_partition<const _ltype, const _gtype> partition, \
device_segmented_array<const _gtype> remote_global_idxs, \
experimental::distributed::comm_index_type rank, \
const array<_ltype>& local_ids, \
experimental::distributed::index_space is, array<_gtype>& global_ids)


#define GKO_DECLARE_ALL_AS_TEMPLATES \
template <typename LocalIndexType, typename GlobalIndexType> \
GKO_DECLARE_INDEX_MAP_BUILD_MAPPING(LocalIndexType, GlobalIndexType); \
template <typename LocalIndexType, typename GlobalIndexType> \
GKO_DECLARE_INDEX_MAP_MAP_TO_LOCAL(LocalIndexType, GlobalIndexType)
GKO_DECLARE_INDEX_MAP_MAP_TO_LOCAL(LocalIndexType, GlobalIndexType); \
template <typename LocalIndexType, typename GlobalIndexType> \
GKO_DECLARE_INDEX_MAP_MAP_TO_GLOBAL(LocalIndexType, GlobalIndexType)


GKO_DECLARE_FOR_ALL_EXECUTOR_NAMESPACES(index_map,
Expand Down
13 changes: 13 additions & 0 deletions dpcpp/distributed/index_map_kernels.dp.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,19 @@ GKO_INSTANTIATE_FOR_EACH_LOCAL_GLOBAL_INDEX_TYPE(
GKO_DECLARE_INDEX_MAP_MAP_TO_LOCAL);


template <typename LocalIndexType, typename GlobalIndexType>
void map_to_global(
std::shared_ptr<const DefaultExecutor> exec,
device_partition<const LocalIndexType, const GlobalIndexType> partition,
device_segmented_array<const GlobalIndexType> remote_global_idxs,
experimental::distributed::comm_index_type rank,
const array<LocalIndexType>& local_ids,
experimental::distributed::index_space is,
array<GlobalIndexType>& global_ids) GKO_NOT_IMPLEMENTED;

GKO_INSTANTIATE_FOR_EACH_LOCAL_GLOBAL_INDEX_TYPE(
GKO_DECLARE_INDEX_MAP_MAP_TO_GLOBAL);

} // namespace index_map
} // namespace GKO_DEVICE_NAMESPACE
} // namespace kernels
Expand Down
14 changes: 14 additions & 0 deletions include/ginkgo/core/distributed/index_map.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,20 @@ struct index_map {
array<LocalIndexType> map_to_local(const array<GlobalIndexType>& global_ids,
index_space index_space_v) const;


/**
* Maps local indices to global indices
*
* @param local_ids the local indices to map
* @param index_space_v the index space in which the passed-in local
* indices are defined
*
* @return the mapped global indices. Any local index, that is not in the
* specified index space is mapped to invalid_index
*/
array<GlobalIndexType> map_to_global(const array<LocalIndexType>& local_ids,
index_space index_space_v) const;

/**
* \brief get size of index_space::local
*/
Expand Down
71 changes: 71 additions & 0 deletions omp/distributed/index_map_kernels.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -239,6 +239,77 @@ GKO_INSTANTIATE_FOR_EACH_LOCAL_GLOBAL_INDEX_TYPE(
GKO_DECLARE_INDEX_MAP_MAP_TO_LOCAL);


template <typename LocalIndexType, typename GlobalIndexType>
void map_to_global(
std::shared_ptr<const DefaultExecutor> exec,
device_partition<const LocalIndexType, const GlobalIndexType> partition,
device_segmented_array<const GlobalIndexType> remote_global_idxs,
experimental::distributed::comm_index_type rank,
const array<LocalIndexType>& local_ids,
experimental::distributed::index_space is,
array<GlobalIndexType>& global_ids)
{
const auto& ranges_by_part = partition.ranges_by_part;
auto local_ranges = ranges_by_part.get_segment(rank);

global_ids.resize_and_reset(local_ids.get_size());

auto local_size =
static_cast<LocalIndexType>(partition.part_sizes_begin[rank]);
auto remote_size = static_cast<LocalIndexType>(
remote_global_idxs.flat_end - remote_global_idxs.flat_begin);
size_type local_range_id = 0;
if (is == experimental::distributed::index_space::local) {
#pragma omp parallel for firstprivate(local_range_id)
for (size_type i = 0; i < local_ids.get_size(); ++i) {
auto lid = local_ids.get_const_data()[i];

if (0 <= lid && lid < local_size) {
local_range_id =
find_local_range(lid, rank, partition, local_range_id);
global_ids.get_data()[i] = map_to_global(
lid, partition, local_ranges.begin[local_range_id]);
} else {
global_ids.get_data()[i] = invalid_index<GlobalIndexType>();
}
}
}
if (is == experimental::distributed::index_space::non_local) {
#pragma omp parallel for
for (size_type i = 0; i < local_ids.get_size(); ++i) {
auto lid = local_ids.get_const_data()[i];

if (0 <= lid && lid < remote_size) {
global_ids.get_data()[i] = remote_global_idxs.flat_begin[lid];
} else {
global_ids.get_data()[i] = invalid_index<GlobalIndexType>();
}
}
}
if (is == experimental::distributed::index_space::combined) {
#pragma omp parallel for firstprivate(local_range_id)
for (size_type i = 0; i < local_ids.get_size(); ++i) {
auto lid = local_ids.get_const_data()[i];

if (0 <= lid && lid < local_size) {
local_range_id =
find_local_range(lid, rank, partition, local_range_id);
global_ids.get_data()[i] = map_to_global(
lid, partition, local_ranges.begin[local_range_id]);
} else if (local_size <= lid && lid < local_size + remote_size) {
global_ids.get_data()[i] =
remote_global_idxs.flat_begin[lid - local_size];
} else {
global_ids.get_data()[i] = invalid_index<GlobalIndexType>();
}
}
}
}

GKO_INSTANTIATE_FOR_EACH_LOCAL_GLOBAL_INDEX_TYPE(
GKO_DECLARE_INDEX_MAP_MAP_TO_GLOBAL);


} // namespace index_map
} // namespace omp
} // namespace kernels
Expand Down
71 changes: 71 additions & 0 deletions reference/distributed/index_map_kernels.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -199,6 +199,77 @@ GKO_INSTANTIATE_FOR_EACH_LOCAL_GLOBAL_INDEX_TYPE(
GKO_DECLARE_INDEX_MAP_MAP_TO_LOCAL);


template <typename LocalIndexType, typename GlobalIndexType>
void map_to_global(
std::shared_ptr<const DefaultExecutor> exec,
device_partition<const LocalIndexType, const GlobalIndexType> partition,
device_segmented_array<const GlobalIndexType> remote_global_idxs,
experimental::distributed::comm_index_type rank,
const array<LocalIndexType>& local_ids,
experimental::distributed::index_space is,
array<GlobalIndexType>& global_ids)
{
const auto& ranges_by_part = partition.ranges_by_part;
auto local_ranges = ranges_by_part.get_segment(rank);

global_ids.resize_and_reset(local_ids.get_size());

auto local_size =
static_cast<LocalIndexType>(partition.part_sizes_begin[rank]);
size_type local_range_id = 0;
auto map_local = [&](auto lid) {
if (0 <= lid && lid < local_size) {
local_range_id =
find_local_range(lid, rank, partition, local_range_id);
return map_to_global(lid, partition,
local_ranges.begin[local_range_id]);
} else {
return invalid_index<GlobalIndexType>();
}
};

auto remote_size = static_cast<LocalIndexType>(
remote_global_idxs.flat_end - remote_global_idxs.flat_begin);
auto map_non_local = [&](auto lid) {
if (0 <= lid && lid < remote_size) {
return remote_global_idxs.flat_begin[lid];
} else {
return invalid_index<GlobalIndexType>();
}
};

auto map_combined = [&](auto lid) {
if (lid < local_size) {
return map_local(lid);
} else {
return map_non_local(lid - local_size);
}
};

if (is == experimental::distributed::index_space::local) {
for (size_type i = 0; i < local_ids.get_size(); ++i) {
auto lid = local_ids.get_const_data()[i];
global_ids.get_data()[i] = map_local(lid);
}
}
if (is == experimental::distributed::index_space::non_local) {
for (size_type i = 0; i < local_ids.get_size(); ++i) {
auto lid = local_ids.get_const_data()[i];
global_ids.get_data()[i] = map_non_local(lid);
}
}
if (is == experimental::distributed::index_space::combined) {
for (size_type i = 0; i < local_ids.get_size(); ++i) {
auto lid = local_ids.get_const_data()[i];
global_ids.get_data()[i] = map_combined(lid);
}
}
}

GKO_INSTANTIATE_FOR_EACH_LOCAL_GLOBAL_INDEX_TYPE(
GKO_DECLARE_INDEX_MAP_MAP_TO_GLOBAL);


} // namespace index_map
} // namespace reference
} // namespace kernels
Expand Down
Loading

0 comments on commit 5bc3f36

Please sign in to comment.