Skip to content

Commit

Permalink
refactor scene array for (multi-level) instancing into struct for code
Browse files Browse the repository at this point in the history
clarity and better performance in case of MAX_INSTANCE_LEVEL_COUNT of 1.
added special case for MAX_INSTANCE_LEVEL_COUNT of 2 as well for ~10%
perf uplift in multi_instanced_geometry tutorial.
  • Loading branch information
freibold committed Dec 3, 2024
1 parent 0ba5bc3 commit 76da0e5
Showing 1 changed file with 80 additions and 38 deletions.
118 changes: 80 additions & 38 deletions kernels/sycl/rthwif_embree.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,64 @@ const constexpr uint32_t TRAV_LOOP_FEATURES =
RTC_FEATURE_FLAG_INSTANCE_ARRAY |
RTC_FEATURE_FLAG_FILTER_FUNCTION;

/*
low-level optimized array of scenes for (multi-level) instancing. Our compiler
really does not like const sized arrays ...
*/
#if (RTC_MAX_INSTANCE_LEVEL_COUNT > 2)
struct Scenes {
Scenes(Scene* scene) : scene0(scene) {}

__forceinline void set(unsigned int level, Scene* scene) {
if (level == 0) scene0 = scene;
else scenes[level-1] = scene;
}

__forceinline Scene* get(unsigned int level) {
return level ? scenes[level-1] : scene0;
}

Scene* scene0;
Scene* scenes[RTC_MAX_INSTANCE_LEVEL_COUNT];
};
#elif (RTC_MAX_INSTANCE_LEVEL_COUNT > 1)
struct Scenes {
Scenes(Scene* scene) : scene0(scene) {}

__forceinline void set(unsigned int level, Scene* scene) {
if (level == 0) scene0 = scene;
else if (level == 1) scene1 = scene;
else scene2 = scene;
}

__forceinline Scene* get(unsigned int level) {
if (level == 0) return scene0;
else if (level == 1) return scene1;
else return scene2;
}

Scene* scene0;
Scene* scene1;
Scene* scene2;
};
#else
struct Scenes {
Scenes(Scene* scene) : scene0(scene) {}

__forceinline void set(unsigned int level, Scene* scene) {
if (level == 0) scene0 = scene;
else scene1 = scene;
}

__forceinline Scene* get(unsigned int level) {
return level ? scene1 : scene0;
}

Scene* scene0;
Scene* scene1;
};
#endif

void use_rthwif_embree() {
}

Expand All @@ -74,7 +132,7 @@ __forceinline Vec3f intel_get_hit_triangle_normal(intel_ray_query_t& query, inte
return cross(v1-v0, v2-v0);
}

__forceinline bool intersect_user_geometry(intel_ray_query_t& query, RayHit& ray, UserGeometry* geom, Scene* scenes[RTC_MAX_INSTANCE_LEVEL_COUNT], sycl::private_ptr<RayQueryContext> context, uint32_t geomID, uint32_t primID)
__forceinline bool intersect_user_geometry(intel_ray_query_t& query, RayHit& ray, UserGeometry* geom, Scenes& scenes, sycl::private_ptr<RayQueryContext> context, uint32_t geomID, uint32_t primID)
{
/* perform ray mask test */
#if defined(EMBREE_RAY_MASK)
Expand All @@ -93,8 +151,8 @@ __forceinline bool intersect_user_geometry(intel_ray_query_t& query, RayHit& ray
constexpr unsigned int bvh_level = 0;
#endif
Scene* scene = (Scene*) forward_scene;
scenes[bvh_level] = scene;
scenes.set(bvh_level+1, scene);

intel_ray_desc_t raydesc;
raydesc.origin = float3(ray.org.x, ray.org.y, ray.org.z);
raydesc.direction = float3(ray.dir.x, ray.dir.y, ray.dir.z);
Expand All @@ -116,7 +174,7 @@ __forceinline bool intersect_user_geometry(intel_ray_query_t& query, RayHit& ray
return false;
}

__forceinline bool intersect_user_geometry(intel_ray_query_t& query, Ray& ray, UserGeometry* geom, Scene* scenes[RTC_MAX_INSTANCE_LEVEL_COUNT], sycl::private_ptr<RayQueryContext> context, uint32_t geomID, uint32_t primID)
__forceinline bool intersect_user_geometry(intel_ray_query_t& query, Ray& ray, UserGeometry* geom, Scenes& scenes, sycl::private_ptr<RayQueryContext> context, uint32_t geomID, uint32_t primID)
{
/* perform ray mask test */
#if defined(EMBREE_RAY_MASK)
Expand All @@ -135,8 +193,8 @@ __forceinline bool intersect_user_geometry(intel_ray_query_t& query, Ray& ray, U
constexpr unsigned int bvh_level = 0;
#endif
Scene* scene = (Scene*) forward_scene;
scenes[bvh_level] = scene;
scenes.set(bvh_level+1, scene);

intel_ray_desc_t raydesc;
raydesc.origin = float3(ray.org.x, ray.org.y, ray.org.z);
raydesc.direction = float3(ray.dir.x, ray.dir.y, ray.dir.z);
Expand All @@ -159,10 +217,10 @@ __forceinline bool intersect_user_geometry(intel_ray_query_t& query, Ray& ray, U
}

template<typename Ray>
__forceinline bool intersect_instance(intel_ray_query_t& query, Ray& ray, Geometry* geom, Scene* scenes[RTC_MAX_INSTANCE_LEVEL_COUNT], sycl::private_ptr<RayQueryContext> context, uint32_t geomID, uint32_t primID, bool instance_array);
__forceinline bool intersect_instance(intel_ray_query_t& query, Ray& ray, Geometry* geom, Scenes& scenes, sycl::private_ptr<RayQueryContext> context, uint32_t geomID, uint32_t primID, bool instance_array);

template<>
__forceinline bool intersect_instance(intel_ray_query_t& query, RayHit& ray, Geometry* geom, Scene* scenes[RTC_MAX_INSTANCE_LEVEL_COUNT], sycl::private_ptr<RayQueryContext> context, uint32_t geomID, uint32_t primID, bool instance_array)
__forceinline bool intersect_instance(intel_ray_query_t& query, RayHit& ray, Geometry* geom, Scenes& scenes, sycl::private_ptr<RayQueryContext> context, uint32_t geomID, uint32_t primID, bool instance_array)
{
/* perform ray mask test */
#if defined(EMBREE_RAY_MASK)
Expand Down Expand Up @@ -190,7 +248,7 @@ __forceinline bool intersect_instance(intel_ray_query_t& query, RayHit& ray, Geo

const Vec3fa ray_org = xfmPoint (world2local, (Vec3f) ray.org);
const Vec3fa ray_dir = xfmVector(world2local, (Vec3f) ray.dir);
scenes[bvh_level] = object;
scenes.set(bvh_level+1, object);

intel_ray_desc_t raydesc;
raydesc.origin = float3(ray_org.x, ray_org.y, ray_org.z);
Expand Down Expand Up @@ -219,7 +277,7 @@ __forceinline bool intersect_instance(intel_ray_query_t& query, RayHit& ray, Geo
}

template<>
__forceinline bool intersect_instance(intel_ray_query_t& query, Ray& ray, Geometry* geom, Scene* scenes[RTC_MAX_INSTANCE_LEVEL_COUNT], sycl::private_ptr<RayQueryContext> context, uint32_t geomID, uint32_t primID, bool instance_array)
__forceinline bool intersect_instance(intel_ray_query_t& query, Ray& ray, Geometry* geom, Scenes& scenes, sycl::private_ptr<RayQueryContext> context, uint32_t geomID, uint32_t primID, bool instance_array)
{
/* perform ray mask test */
#if defined(EMBREE_RAY_MASK)
Expand Down Expand Up @@ -247,7 +305,7 @@ __forceinline bool intersect_instance(intel_ray_query_t& query, Ray& ray, Geomet

const Vec3fa ray_org = xfmPoint (world2local, (Vec3f) ray.org);
const Vec3fa ray_dir = xfmVector(world2local, (Vec3f) ray.dir);
scenes[bvh_level] = object;
scenes.set(bvh_level+1, object);

intel_ray_desc_t raydesc;
raydesc.origin = float3(ray_org.x, ray_org.y, ray_org.z);
Expand Down Expand Up @@ -279,7 +337,7 @@ __forceinline bool intersect_instance(intel_ray_query_t& query, Ray& ray, Geomet
}

template<typename Ray>
__forceinline bool intersect_primitive(intel_ray_query_t& query, Ray& ray, Scene* scenes[RTC_MAX_INSTANCE_LEVEL_COUNT], Geometry* geom, sycl::private_ptr<RayQueryContext> context, uint32_t geomID, uint32_t primID, const RTCFeatureFlags feature_mask)
__forceinline bool intersect_primitive(intel_ray_query_t& query, Ray& ray, Scenes& scenes, Geometry* geom, sycl::private_ptr<RayQueryContext> context, uint32_t geomID, uint32_t primID, const RTCFeatureFlags feature_mask)
{
#if defined(EMBREE_SYCL_SUPPORT) && defined(__SYCL_DEVICE_ONLY__)
bool filter = feature_mask & (RTC_FEATURE_FLAG_FILTER_FUNCTION_IN_ARGUMENTS | RTC_FEATURE_FLAG_FILTER_FUNCTION_IN_GEOMETRY);
Expand Down Expand Up @@ -522,8 +580,9 @@ __forceinline bool commit_potential_hit(intel_ray_query_t& query, Ray& ray) {
}

template<typename Ray>
__forceinline void trav_loop(intel_ray_query_t& query, Ray& ray, Scene* scene0, Scene* scenes[RTC_MAX_INSTANCE_LEVEL_COUNT], sycl::private_ptr<RayQueryContext> context, const RTCFeatureFlags feature_mask)
__forceinline void trav_loop(intel_ray_query_t& query, Ray& ray, Scene* scene, sycl::private_ptr<RayQueryContext> context, const RTCFeatureFlags feature_mask)
{
Scenes scenes(scene);
while (!intel_is_traversal_done(query))
{
intel_candidate_type_t candidate = intel_get_hit_candidate(query, intel_hit_type_potential_hit);
Expand All @@ -539,25 +598,25 @@ __forceinline void trav_loop(intel_ray_query_t& query, Ray& ray, Scene* scene0,
ray.org = Vec3ff(org.x(), org.y(), org.z(), ray.tnear());
ray.dir = Vec3ff(dir.x(), dir.y(), dir.z(), ray.time ());
ray.tfar = intel_get_hit_distance(query, intel_hit_type_committed_hit);

#if RTC_MAX_INSTANCE_LEVEL_COUNT > 1
context->user->instStackSize = bvh_level;
Scene* scene = bvh_level ? scenes[bvh_level - 1] : scene0;
Scene* scene = scenes.get(bvh_level);
#else
const unsigned int instID = intel_get_hit_instance_id(query, intel_hit_type_potential_hit);

/* assume software instancing mode by default (required for rtcForwardRay) */
Scene* scene = bvh_level ? scenes[0] : scene0;
Scene* scene = scenes.get(bvh_level);

/* if we are in hardware instancing mode and we need to read the scene from the instance */
if (bvh_level > 0 && instID != RTC_INVALID_GEOMETRY_ID) {
Instance* inst = scene0->get<Instance>(instID);
Instance* inst = scenes.get(0)->get<Instance>(instID);
scene = (Scene*) inst->object;
context->user->instID[0] = instID;
}
else if (bvh_level == 0)
context->user->instID[0] = RTC_INVALID_GEOMETRY_ID;

#endif
context->scene = scene;
Geometry* geom = scene->get(geomID);
Expand Down Expand Up @@ -593,8 +652,6 @@ SYCL_EXTERNAL __attribute__((always_inline)) void rtcIntersectRTHW(sycl::global_
{
Scene* scene = (Scene*) htraversable.get();

Scene* scenes[RTC_MAX_INSTANCE_LEVEL_COUNT];

RayQueryContext context(scene, ucontext, args);

RayHit ray;
Expand All @@ -610,19 +667,6 @@ SYCL_EXTERNAL __attribute__((always_inline)) void rtcIntersectRTHW(sycl::global_
ray.primID = RTC_INVALID_GEOMETRY_ID;
ray.geomID = RTC_INVALID_GEOMETRY_ID;

//TriangleMesh* mesh = scene->get<TriangleMesh>(0);
//TriangleMesh::Triangle triangle = mesh->triangle(0);
//Vec3fa v0 = mesh->vertex(triangle.v[0], 5.f);
//Vec3fa v1 = mesh->vertex(triangle.v[1], 5.f);
//Vec3fa v2 = mesh->vertex(triangle.v[2], 5.f);

//bool isect = TriangleIntersector().intersect(ray,v0,v1,v2,Intersect1Epilog1_HWIF<Ray>(ray, &context, 0, 0, false));
//if (isect)
// rayhit_i->hit.geomID = 0;
//else
// rayhit_i->hit.geomID = -1;
//return;

#if RTC_MAX_INSTANCE_LEVEL_COUNT > 1
for (uint32_t l=0; l<RTC_MAX_INSTANCE_LEVEL_COUNT; l++) {
ray.instID[l] = RTC_INVALID_GEOMETRY_ID;
Expand Down Expand Up @@ -664,7 +708,7 @@ SYCL_EXTERNAL __attribute__((always_inline)) void rtcIntersectRTHW(sycl::global_
intel_ray_query_sync(query);

if (args->feature_mask & TRAV_LOOP_FEATURES) {
trav_loop(query,ray,scene,scenes,&context,args->feature_mask);
trav_loop(query,ray,scene,&context,args->feature_mask);
}

bool valid = intel_has_committed_hit(query);
Expand Down Expand Up @@ -724,8 +768,6 @@ SYCL_EXTERNAL __attribute__((always_inline)) void rtcIntersectRTHW(sycl::global_
SYCL_EXTERNAL __attribute__((always_inline)) void rtcOccludedRTHW(sycl::global_ptr<RTCTraversableTy> htraversable, sycl::private_ptr<RTCRayQueryContext> ucontext, sycl::private_ptr<RTCRay> ray_i, sycl::private_ptr<RTCOccludedArguments> args)
{
Scene* scene = (Scene*) htraversable.get();

Scene* scenes[RTC_MAX_INSTANCE_LEVEL_COUNT];

RayQueryContext context(scene, ucontext, args);

Expand Down Expand Up @@ -766,7 +808,7 @@ SYCL_EXTERNAL __attribute__((always_inline)) void rtcOccludedRTHW(sycl::global_p
intel_ray_query_sync(query);

if (args->feature_mask & TRAV_LOOP_FEATURES) {
trav_loop(query,ray,scene,scenes,&context,args->feature_mask);
trav_loop(query,ray,scene,&context,args->feature_mask);
}

if (intel_has_committed_hit(query))
Expand Down

0 comments on commit 76da0e5

Please sign in to comment.