diff --git a/apps/CMakeLists.txt b/apps/CMakeLists.txt index 828f8dd7..04579700 100644 --- a/apps/CMakeLists.txt +++ b/apps/CMakeLists.txt @@ -1,7 +1,7 @@ cmake_minimum_required(VERSION 3.19 FATAL_ERROR) -#add_subdirectory("fractal") -#add_subdirectory("lbm") -#add_subdirectory("gameOfLife") +add_subdirectory("fractal") +add_subdirectory("lbm") +add_subdirectory("gameOfLife") #add_subdirectory("poisson") -add_subdirectory("lbmMultiRes") +#add_subdirectory("lbmMultiRes") diff --git a/apps/fractal/fractal.cu b/apps/fractal/fractal.cu index 5e603e48..707da70f 100644 --- a/apps/fractal/fractal.cu +++ b/apps/fractal/fractal.cu @@ -5,8 +5,8 @@ #include "Neon/domain/dGrid.h" #include "Neon/skeleton/Skeleton.h" -template -inline void draw_pixels(const int t, FieldT& field) +template +inline void draw_pixels(const int t, Field& field) { printf("\n Exporting Frame =%d", t); int precision = 4; @@ -28,19 +28,19 @@ NEON_CUDA_HOST_DEVICE inline Neon::float_2d complex_pow(Neon::float_2d& z, Neon: return Neon::float_2d(radius * cos(angle), radius * sin(angle)); } -template -inline Neon::set::Container FractalsContainer(FieldT& pixels, +template +inline Neon::set::Container FractalsContainer(Field& pixels, int32_t& time, int32_t n) { - return pixels.getGrid().getContainer( + return pixels.getGrid().newContainer( "FractalContainer", [&, n](Neon::set::Loader& L) { auto& px = L.load(pixels); auto& t = time; return [=] NEON_CUDA_HOST_DEVICE( - const typename FieldT::Cell& idx) mutable { - auto id = px.mapToGlobal(idx); + const typename Field::Idx& idx) mutable { + auto id = px.getGlobalIndex(idx); Neon::float_2d c(-0.8, cos(t * 0.03) * 0.2); Neon::float_2d z((float(id.x) / float(n)) - 1.0f, @@ -59,7 +59,7 @@ inline Neon::set::Container FractalsContainer(FieldT& pixels, int main(int argc, char** argv) { Neon::init(); - if (Neon::sys::globalSpace::gpuSysObjStorage.numDevs() > 0) { + if ( Neon::Backend::countAvailableGpus() > 0) { int32_t n = 320; Neon::index_3d dim(2 * n, n, 1); std::vector gpu_ids{0}; @@ -90,7 +90,7 @@ int main(int argc, char** argv) skeleton.run(); pixels.updateHostData(0); - //draw_pixels(time, pixels); + draw_pixels(time, pixels); } } } \ No newline at end of file diff --git a/apps/gameOfLife/gameOfLife.cu b/apps/gameOfLife/gameOfLife.cu index 82081b45..647a16d2 100644 --- a/apps/gameOfLife/gameOfLife.cu +++ b/apps/gameOfLife/gameOfLife.cu @@ -11,10 +11,10 @@ #include "Neon/domain/dGrid.h" #include "Neon/skeleton/Skeleton.h" -template -inline void exportVTI(FieldT& voxel_1, FieldT& voxel_2, int frame_id) +template +inline void exportVTI(Field& voxel_1, Field& voxel_2, int frame_id) { - auto io = [&](int f, FieldT& voxel) { + auto io = [&](int f, Field& voxel) { printf("\n Exporting Frame =%d", f); int precision = 4; voxel.updateHostData(0); @@ -39,66 +39,66 @@ Neon::domain::Stencil createStencil() return Neon::domain::Stencil(stencil); } -template -inline Neon::set::Container GoLContainer(const FieldT& in_cells, - FieldT& out_cells, - typename FieldT::Type length) +template +inline Neon::set::Container GoLContainer(const Field& in_cells, + Field& out_cells, + typename Field::Type length) { - using T = typename FieldT::Type; - return in_cells.getGrid().getContainer( + using T = typename Field::Type; + return in_cells.getGrid().newContainer( "GoLContainer", [&, length](Neon::set::Loader& L) { - const auto& ins = L.load(in_cells, Neon::Compute::STENCIL); + const auto& ins = L.load(in_cells, Neon::Pattern::STENCIL); auto& out = L.load(out_cells); return [=] NEON_CUDA_HOST_DEVICE( - const typename FieldT::Cell& idx) mutable { - typename FieldT::ngh_idx ngh(0, 0, 0); - const T default_value = 0; - int alive = 0; - T value = 0; - T status = ins.nghVal(idx, ngh, 0, default_value).value; + const typename Field::Idx& idx) mutable { + typename Field::NghIdx ngh(0, 0, 0); + const T default_value = 0; + int alive = 0; + T value = 0; + T status = ins.getNghData(idx, ngh, 0, default_value).getData(); //+x ngh.x = 1; ngh.y = 0; ngh.z = 0; - value = ins.nghVal(idx, ngh, 0, default_value).value; + value = ins.getNghData(idx, ngh, 0, default_value).getData(); alive += (value > 0.0 ? 1 : 0); ngh.y = 1; - value = ins.nghVal(idx, ngh, 0, default_value).value; + value = ins.getNghData(idx, ngh, 0, default_value).getData(); alive += (value > 0.0 ? 1 : 0); //-x ngh.x = -1; ngh.y = 0; ngh.z = 0; - value = ins.nghVal(idx, ngh, 0, default_value).value; + value = ins.getNghData(idx, ngh, 0, default_value).getData(); alive += (value > 0.0 ? 1 : 0); ngh.y = -1; - value = ins.nghVal(idx, ngh, 0, default_value).value; + value = ins.getNghData(idx, ngh, 0, default_value).getData(); alive += (value > 0.0 ? 1 : 0); //+y ngh.x = 0; ngh.y = 1; ngh.z = 0; - value = ins.nghVal(idx, ngh, 0, default_value).value; + value = ins.getNghData(idx, ngh, 0, default_value).getData(); alive += (value > 0.0 ? 1 : 0); ngh.x = -1; - value = ins.nghVal(idx, ngh, 0, default_value).value; + value = ins.getNghData(idx, ngh, 0, default_value).getData(); alive += (value > 0.0 ? 1 : 0); //-y ngh.x = 0; ngh.y = -1; ngh.z = 0; - value = ins.nghVal(idx, ngh, 0, default_value).value; + value = ins.getNghData(idx, ngh, 0, default_value).getData(); alive += (value > 0.0 ? 1 : 0); ngh.x = 1; - value = ins.nghVal(idx, ngh, 0, default_value).value; + value = ins.getNghData(idx, ngh, 0, default_value).getData(); alive += (value > 0.0 ? 1 : 0); - auto id_global = ins.mapToGlobal(idx); + auto id_global = ins.getGlobalIndex(idx); out(idx, 0) = ((T)id_global.x / length) * (T)((alive == 3 || (alive == 2 && status) ? 1 : 0)); }; }); diff --git a/apps/lbm/lbm.cu b/apps/lbm/lbm.cu index 3cea806e..6e9408f4 100644 --- a/apps/lbm/lbm.cu +++ b/apps/lbm/lbm.cu @@ -1,7 +1,7 @@ // References // 2D LBM: https://github.com/hietwll/LBM_Taichi // 2D LBM Verification data: https://www.sciencedirect.com/science/article/pii/0021999182900584 -//For 2D/3D constants: https://en.wikipedia.org/wiki/Lattice_Boltzmann_methods +// For 2D/3D constants: https://en.wikipedia.org/wiki/Lattice_Boltzmann_methods #include #include @@ -53,7 +53,7 @@ inline void exportVTI(const int t, Field& field) /** - * Get the x, y, or z component of the lattice vector + * Get the x, y, or z component of the lattice vector * represented by the component k. */ template @@ -104,7 +104,7 @@ NEON_CUDA_HOST_DEVICE int get_e(const int k, const int id) /** - * Get the weight that corresponds to the + * Get the weight that corresponds to the * lattice component represented by the component k. */ template @@ -136,7 +136,7 @@ NEON_CUDA_HOST_DEVICE double get_w(const int k) * Update the velocity and density of the fluid after the * collide and stream step. * @param in_voxels The input lattice to use for computing - * the next collide and stream step from the + * the next collide and stream step from the * previous result. * @param old_voxels The previous output lattice from the last * computation step to be updated. @@ -154,18 +154,18 @@ NEON_CUDA_HOST_DEVICE double get_w(const int k) */ template -Neon::set::Container computeVelocity(const RealFieldT& in_voxels, - RealFieldT& old_voxels, - const MaskFeildT& mask, - RealFieldT& out_velocity, - RealFieldT& density, - RealFieldT& density_temp, - typename RealFieldT::Type tau) + typename RealField, + typename MaskFeild> +Neon::set::Container computeVelocity(const RealField& in_voxels, + RealField& old_voxels, + const MaskFeild& mask, + RealField& out_velocity, + RealField& density, + RealField& density_temp, + typename RealField::Type tau) { - using T = typename RealFieldT::Type; - return in_voxels.getGrid().getContainer( + using T = typename RealField::Type; + return in_voxels.getGrid().newContainer( "ComputeVelocity", [&, tau](Neon::set::Loader& loader) { const auto& ins = loader.load(in_voxels); auto& olds = loader.load(old_voxels); @@ -175,17 +175,17 @@ Neon::set::Container computeVelocity(const RealFieldT& in_voxels, auto& out_vel = loader.load(out_velocity); return [=] NEON_CUDA_HOST_DEVICE( - const typename RealFieldT::Cell& idx) mutable { - typename RealFieldT::ngh_idx ngh(0, 0, 0); - const T default_value = 0; - T r = 0; - T vels[DIM]; + const typename RealField::Idx& idx) mutable { + typename RealField::NghIdx ngh(0, 0, 0); + const T default_value = 0; + T r = 0; + T vels[DIM]; for (int i = 0; i < DIM; i++) { vels[i] = 0.0; } - int mask_val = m.nghVal(idx, ngh, 0, default_value).value; + int mask_val = m.getNghData(idx, ngh, 0, default_value).getData(); for (int k = 0; k < COMP; k++) { - T f = ins.nghVal(idx, ngh, k, default_value).value; + T f = ins.getNghData(idx, ngh, k, default_value).getData(); olds(idx, k) = f; r += f; for (int i = 0; i < DIM; i++) { @@ -210,10 +210,10 @@ Neon::set::Container computeVelocity(const RealFieldT& in_voxels, * Apply the collision and streaming step of the LBM algorithm. * This should be the first container in the sequence. * @param density The density of the fluid at each voxel. - * @param in_velocity The velocity of the fluid from the + * @param in_velocity The velocity of the fluid from the * previous step. * @param in_voxels The input lattice to use for computing - * the next collide and stream step from the + * the next collide and stream step from the * previous result. * @param mask The mask to use for identifying which * boundary condition to apply to a grid cell. @@ -225,31 +225,31 @@ Neon::set::Container computeVelocity(const RealFieldT& in_voxels, */ template -Neon::set::Container collideAndStream(const RealFieldT& density, - const RealFieldT& in_velocity, - const RealFieldT& in_voxels, - const MaskFeildT& mask, - RealFieldT& out_voxels, - typename RealFieldT::Type tau) + typename RealField, + typename MaskFeild> +Neon::set::Container collideAndStream(const RealField& density, + const RealField& in_velocity, + const RealField& in_voxels, + const MaskFeild& mask, + RealField& out_voxels, + typename RealField::Type tau) { - using T = typename RealFieldT::Type; - return in_voxels.getGrid().getContainer( + using T = typename RealField::Type; + return in_voxels.getGrid().newContainer( "CollideAndStream", [&, tau](Neon::set::Loader& loader) { - const auto& ins = loader.load(in_voxels, Neon::Compute::STENCIL); - const auto& in_vel = loader.load(in_velocity, Neon::Compute::STENCIL); - const auto& rho = loader.load(density, Neon::Compute::STENCIL); + const auto& ins = loader.load(in_voxels, Neon::Pattern::STENCIL); + const auto& in_vel = loader.load(in_velocity, Neon::Pattern::STENCIL); + const auto& rho = loader.load(density, Neon::Pattern::STENCIL); const auto& m = loader.load(mask); auto& out = loader.load(out_voxels); return [=] NEON_CUDA_HOST_DEVICE( - const typename RealFieldT::Cell& idx) mutable { - typename RealFieldT::ngh_idx ngh(0, 0, 0); - const T default_value = 0; + const typename RealField::Idx& idx) mutable { + typename RealField::NghIdx ngh(0, 0, 0); + const T default_value = 0; - int mask_val = m.nghVal(idx, ngh, 0, default_value).value; + int mask_val = m.getNghData(idx, ngh, 0, default_value).getData(); for (int k = 0; k < COMP; k++) { T vel = 0; @@ -258,11 +258,11 @@ Neon::set::Container collideAndStream(const RealFieldT& density, ngh.x = -get_e(k, 0); ngh.y = -get_e(k, 1); ngh.z = -get_e(k, 2); - T fold = ins.nghVal(idx, ngh, k, default_value).value; - T r = rho.nghVal(idx, ngh, 0, default_value).value; + T fold = ins.getNghData(idx, ngh, k, default_value).getData(); + T r = rho.getNghData(idx, ngh, 0, default_value).getData(); for (int i = 0; i < DIM; i++) { int e_i = get_e(k, i); - vel = in_vel.nghVal(idx, ngh, i, default_value).value; + vel = in_vel.getNghData(idx, ngh, i, default_value).getData(); eu += (e_i * vel); uv += (vel * vel); } @@ -301,26 +301,26 @@ Neon::set::Container collideAndStream(const RealFieldT& density, */ template -Neon::set::Container boundaryConditions(const RealFieldT& in_voxels, - const RealFieldT& in_velocity, - const MaskFeildT& boundary_mask, - const RealFieldT& read_density, - RealFieldT& out_voxels, - RealFieldT& out_velocity, - RealFieldT& density, - const typename RealFieldT::Type tau, - const typename RealFieldT::Type sphere_x, - const typename RealFieldT::Type sphere_y) + typename RealField, + typename MaskFeild> +Neon::set::Container boundaryConditions(const RealField& in_voxels, + const RealField& in_velocity, + const MaskFeild& boundary_mask, + const RealField& read_density, + RealField& out_voxels, + RealField& out_velocity, + RealField& density, + const typename RealField::Type tau, + const typename RealField::Type sphere_x, + const typename RealField::Type sphere_y) { - using T = typename RealFieldT::Type; - return in_voxels.getGrid().getContainer( + using T = typename RealField::Type; + return in_voxels.getGrid().newContainer( "BoundaryConditions", [&, tau](Neon::set::Loader& loader) { - const auto& ins = loader.load(in_voxels, Neon::Compute::STENCIL); - const auto& in_vel = loader.load(in_velocity, Neon::Compute::STENCIL); + const auto& ins = loader.load(in_voxels, Neon::Pattern::STENCIL); + const auto& in_vel = loader.load(in_velocity, Neon::Pattern::STENCIL); const auto& mask = loader.load(boundary_mask); - const auto& rho_old = loader.load(read_density, Neon::Compute::STENCIL); + const auto& rho_old = loader.load(read_density, Neon::Pattern::STENCIL); auto& outs = loader.load(out_voxels); auto& out_vel = loader.load(out_velocity); auto& rho = loader.load(density); @@ -363,23 +363,23 @@ Neon::set::Container boundaryConditions(const RealFieldT& in_voxel const Neon::index_3d dims = in_voxels.getDimension(); return [=] NEON_CUDA_HOST_DEVICE( - const typename RealFieldT::Cell& idx) mutable { - typename RealFieldT::ngh_idx ngh(0, 0, 0); - const T default_value = 0; - T vels[DIM]; - T new_vals[DIM]; - T e_i[DIM]; + const typename RealField::Idx& idx) mutable { + typename RealField::NghIdx ngh(0, 0, 0); + const T default_value = 0; + T vels[DIM]; + T new_vals[DIM]; + T e_i[DIM]; int nx = dims.x, ny = dims.y; - int boundary = mask.nghVal(idx, ngh, 0, default_value).value; + int boundary = mask.getNghData(idx, ngh, 0, default_value).getData(); for (int i = 0; i < DIM; i++) { - vels[i] = in_vel.nghVal(idx, ngh, i, default_value).value; + vels[i] = in_vel.getNghData(idx, ngh, i, default_value).getData(); } Neon::index_3d offsets; int dr = -1; - auto id_global = ins.mapToGlobal(idx); + auto id_global = ins.getGlobalIndex(idx); if constexpr (DIM == 2) { if (id_global.x == 0) { @@ -437,7 +437,7 @@ Neon::set::Container boundaryConditions(const RealFieldT& in_voxel ngh.y = offsets.y; ngh.z = offsets.z; - T r = rho_old.nghVal(idx, ngh, 0, default_value).value; + T r = rho_old.getNghData(idx, ngh, 0, default_value).getData(); if (boundary == 1) { // fixed boundary for (int i = 0; i < DIM; i++) { new_vals[i] = (dr != -1) ? bc_values[dr][i] : 0; @@ -445,7 +445,7 @@ Neon::set::Container boundaryConditions(const RealFieldT& in_voxel } } else if (boundary == 2) { // Neumann for (int i = 0; i < DIM; i++) { - new_vals[i] = in_vel.nghVal(idx, ngh, i, default_value).value; + new_vals[i] = in_vel.getNghData(idx, ngh, i, default_value).getData(); out_vel(idx, i) = new_vals[i]; } } else { // we can add more types later @@ -457,14 +457,14 @@ Neon::set::Container boundaryConditions(const RealFieldT& in_voxel // update density, velocity rho(idx, 0) = r; for (int i = 0; i < DIM; i++) { - vels[i] = in_vel.nghVal(idx, ngh, i, default_value).value; + vels[i] = in_vel.getNghData(idx, ngh, i, default_value).getData(); } for (int k = 0; k < COMP; k++) { for (int i = 0; i < DIM; i++) { e_i[i] = get_e(k, i); e_i[i] = 0; } - T fold = ins.nghVal(idx, ngh, k, default_value).value; + T fold = ins.getNghData(idx, ngh, k, default_value).getData(); T eu = 0; T uv = 0; for (int i = 0; i < DIM; i++) { @@ -489,22 +489,22 @@ Neon::set::Container boundaryConditions(const RealFieldT& in_voxel } -template +template inline void setup(const FlowType flow_type, - MaskFeildT& boundary_mask, - MaskFeildT& center_mask, - RealFieldT& lattice_1, - RealFieldT& lattice_2, - RealFieldT& velocity_1, - RealFieldT& velocity_2, - RealFieldT& rho_1, - RealFieldT& rho_2, - const typename RealFieldT::Type sphere_x, - const typename RealFieldT::Type sphere_y, - const typename RealFieldT::Type sphere_r) + MaskFeild& boundary_mask, + MaskFeild& center_mask, + RealField& lattice_1, + RealField& lattice_2, + RealField& velocity_1, + RealField& velocity_2, + RealField& rho_1, + RealField& rho_2, + const typename RealField::Type sphere_x, + const typename RealField::Type sphere_y, + const typename RealField::Type sphere_r) { auto dim = boundary_mask.getDimension(); - using T = typename RealFieldT::Type; + using T = typename RealField::Type; if (DIM == 2) { if (flow_type == FlowType::border) { @@ -635,21 +635,21 @@ inline void setup(const FlowType flow_type, } -template +template inline void run(const int num_frames, const FlowType flow_type, - MaskFeildT& boundary_mask, - MaskFeildT& center_mask, - RealFieldT& lattice_1, - RealFieldT& lattice_2, - RealFieldT& velocity_1, - RealFieldT& velocity_2, - RealFieldT& rho_1, - RealFieldT& rho_2, - const typename RealFieldT::Type tau, - const typename RealFieldT::Type sphere_x, - const typename RealFieldT::Type sphere_y, - const typename RealFieldT::Type sphere_r) + MaskFeild& boundary_mask, + MaskFeild& center_mask, + RealField& lattice_1, + RealField& lattice_2, + RealField& velocity_1, + RealField& velocity_2, + RealField& rho_1, + RealField& rho_2, + const typename RealField::Type tau, + const typename RealField::Type sphere_x, + const typename RealField::Type sphere_y, + const typename RealField::Type sphere_r) { const auto& backend = boundary_mask.getBackend(); @@ -698,11 +698,11 @@ int main(int argc, char** argv) std::vector gpu_ids{0}; Neon::Backend backend(gpu_ids, runtime); - //2D - //constexpr int DIM = 2; - //constexpr int COMP = 9; + // 2D + // constexpr int DIM = 2; + // constexpr int COMP = 9; - //3D + // 3D constexpr int DIM = 3; constexpr int COMP = 19; diff --git a/libNeonSet/include/Neon/set/Backend.h b/libNeonSet/include/Neon/set/Backend.h index 56b41ded..0d2997a1 100644 --- a/libNeonSet/include/Neon/set/Backend.h +++ b/libNeonSet/include/Neon/set/Backend.h @@ -15,6 +15,8 @@ // #include "Neon/core/types/devType.h" #include "Neon/set/DataSet.h" +#include + namespace Neon { using StreamIdx = int; using EventIdx = int; @@ -281,6 +283,8 @@ class Backend static std::string toString(Neon::Runtime e); + static auto countAvailableGpus() -> int32_t; + /** * * @return diff --git a/libNeonSet/src/set/Backend.cpp b/libNeonSet/src/set/Backend.cpp index a8a123c0..ee6566fb 100644 --- a/libNeonSet/src/set/Backend.cpp +++ b/libNeonSet/src/set/Backend.cpp @@ -614,4 +614,10 @@ auto Backend::isLastDevice(Neon::SetIdx id) const -> bool { return id.idx() == (deviceCount() - 1); } + +auto Backend::countAvailableGpus() -> int32_t +{ + return Neon::sys::globalSpace::gpuSysObjStorage.numDevs(); +} + } // namespace Neon