diff --git a/kernels/sycl/rthwif_embree.cpp b/kernels/sycl/rthwif_embree.cpp index 887e280a25..592b6bf178 100644 --- a/kernels/sycl/rthwif_embree.cpp +++ b/kernels/sycl/rthwif_embree.cpp @@ -62,6 +62,64 @@ const constexpr uint32_t TRAV_LOOP_FEATURES = RTC_FEATURE_FLAG_INSTANCE_ARRAY | RTC_FEATURE_FLAG_FILTER_FUNCTION; +/* + low-level optimized array of scenes for (multi-level) instancing. Our compiler + really does not like const sized arrays ... +*/ +#if (RTC_MAX_INSTANCE_LEVEL_COUNT > 2) +struct Scenes { + Scenes(Scene* scene) : scene0(scene) {} + + __forceinline void set(unsigned int level, Scene* scene) { + if (level == 0) scene0 = scene; + else scenes[level-1] = scene; + } + + __forceinline Scene* get(unsigned int level) { + return level ? scenes[level-1] : scene0; + } + + Scene* scene0; + Scene* scenes[RTC_MAX_INSTANCE_LEVEL_COUNT]; +}; +#elif (RTC_MAX_INSTANCE_LEVEL_COUNT == 2) +struct Scenes { + Scenes(Scene* scene) : scene0(scene) {} + + __forceinline void set(unsigned int level, Scene* scene) { + if (level == 0) scene0 = scene; + else if (level == 1) scene1 = scene; + else scene2 = scene; + } + + __forceinline Scene* get(unsigned int level) { + if (level == 0) return scene0; + else if (level == 1) return scene1; + else return scene2; + } + + Scene* scene0; + Scene* scene1; + Scene* scene2; +}; +#else +struct Scenes { + Scenes(Scene* scene) : scene0(scene) {} + + __forceinline void set(unsigned int level, Scene* scene) { + if (level == 0) scene0 = scene; + else scene1 = scene; + } + + __forceinline Scene* get(unsigned int level) { + return level ? scene1 : scene0; + } + + Scene* scene0; + Scene* scene1; +}; +#endif + void use_rthwif_embree() { } @@ -74,7 +132,7 @@ __forceinline Vec3f intel_get_hit_triangle_normal(intel_ray_query_t& query, inte return cross(v1-v0, v2-v0); } -__forceinline bool intersect_user_geometry(intel_ray_query_t& query, RayHit& ray, UserGeometry* geom, Scene* scenes[RTC_MAX_INSTANCE_LEVEL_COUNT], sycl::private_ptr context, uint32_t geomID, uint32_t primID) +__forceinline bool intersect_user_geometry(intel_ray_query_t& query, RayHit& ray, UserGeometry* geom, Scenes& scenes, sycl::private_ptr context, uint32_t geomID, uint32_t primID) { /* perform ray mask test */ #if defined(EMBREE_RAY_MASK) @@ -93,8 +151,8 @@ __forceinline bool intersect_user_geometry(intel_ray_query_t& query, RayHit& ray constexpr unsigned int bvh_level = 0; #endif Scene* scene = (Scene*) forward_scene; - scenes[bvh_level] = scene; - + scenes.set(bvh_level+1, scene); + intel_ray_desc_t raydesc; raydesc.origin = float3(ray.org.x, ray.org.y, ray.org.z); raydesc.direction = float3(ray.dir.x, ray.dir.y, ray.dir.z); @@ -116,7 +174,7 @@ __forceinline bool intersect_user_geometry(intel_ray_query_t& query, RayHit& ray return false; } -__forceinline bool intersect_user_geometry(intel_ray_query_t& query, Ray& ray, UserGeometry* geom, Scene* scenes[RTC_MAX_INSTANCE_LEVEL_COUNT], sycl::private_ptr context, uint32_t geomID, uint32_t primID) +__forceinline bool intersect_user_geometry(intel_ray_query_t& query, Ray& ray, UserGeometry* geom, Scenes& scenes, sycl::private_ptr context, uint32_t geomID, uint32_t primID) { /* perform ray mask test */ #if defined(EMBREE_RAY_MASK) @@ -135,8 +193,8 @@ __forceinline bool intersect_user_geometry(intel_ray_query_t& query, Ray& ray, U constexpr unsigned int bvh_level = 0; #endif Scene* scene = (Scene*) forward_scene; - scenes[bvh_level] = scene; - + scenes.set(bvh_level+1, scene); + intel_ray_desc_t raydesc; raydesc.origin = float3(ray.org.x, ray.org.y, ray.org.z); raydesc.direction = float3(ray.dir.x, ray.dir.y, ray.dir.z); @@ -159,10 +217,10 @@ __forceinline bool intersect_user_geometry(intel_ray_query_t& query, Ray& ray, U } template -__forceinline bool intersect_instance(intel_ray_query_t& query, Ray& ray, Geometry* geom, Scene* scenes[RTC_MAX_INSTANCE_LEVEL_COUNT], sycl::private_ptr context, uint32_t geomID, uint32_t primID, bool instance_array); +__forceinline bool intersect_instance(intel_ray_query_t& query, Ray& ray, Geometry* geom, Scenes& scenes, sycl::private_ptr context, uint32_t geomID, uint32_t primID, bool instance_array); template<> -__forceinline bool intersect_instance(intel_ray_query_t& query, RayHit& ray, Geometry* geom, Scene* scenes[RTC_MAX_INSTANCE_LEVEL_COUNT], sycl::private_ptr context, uint32_t geomID, uint32_t primID, bool instance_array) +__forceinline bool intersect_instance(intel_ray_query_t& query, RayHit& ray, Geometry* geom, Scenes& scenes, sycl::private_ptr context, uint32_t geomID, uint32_t primID, bool instance_array) { /* perform ray mask test */ #if defined(EMBREE_RAY_MASK) @@ -190,7 +248,7 @@ __forceinline bool intersect_instance(intel_ray_query_t& query, RayHit& ray, Geo const Vec3fa ray_org = xfmPoint (world2local, (Vec3f) ray.org); const Vec3fa ray_dir = xfmVector(world2local, (Vec3f) ray.dir); - scenes[bvh_level] = object; + scenes.set(bvh_level+1, object); intel_ray_desc_t raydesc; raydesc.origin = float3(ray_org.x, ray_org.y, ray_org.z); @@ -223,7 +281,7 @@ __forceinline bool intersect_instance(intel_ray_query_t& query, RayHit& ray, Geo } template<> -__forceinline bool intersect_instance(intel_ray_query_t& query, Ray& ray, Geometry* geom, Scene* scenes[RTC_MAX_INSTANCE_LEVEL_COUNT], sycl::private_ptr context, uint32_t geomID, uint32_t primID, bool instance_array) +__forceinline bool intersect_instance(intel_ray_query_t& query, Ray& ray, Geometry* geom, Scenes& scenes, sycl::private_ptr context, uint32_t geomID, uint32_t primID, bool instance_array) { /* perform ray mask test */ #if defined(EMBREE_RAY_MASK) @@ -251,7 +309,7 @@ __forceinline bool intersect_instance(intel_ray_query_t& query, Ray& ray, Geomet const Vec3fa ray_org = xfmPoint (world2local, (Vec3f) ray.org); const Vec3fa ray_dir = xfmVector(world2local, (Vec3f) ray.dir); - scenes[bvh_level] = object; + scenes.set(bvh_level+1, object); intel_ray_desc_t raydesc; raydesc.origin = float3(ray_org.x, ray_org.y, ray_org.z); @@ -283,7 +341,7 @@ __forceinline bool intersect_instance(intel_ray_query_t& query, Ray& ray, Geomet } template -__forceinline bool intersect_primitive(intel_ray_query_t& query, Ray& ray, Scene* scenes[RTC_MAX_INSTANCE_LEVEL_COUNT], Geometry* geom, sycl::private_ptr context, uint32_t geomID, uint32_t primID, const RTCFeatureFlags feature_mask) +__forceinline bool intersect_primitive(intel_ray_query_t& query, Ray& ray, Scenes& scenes, Geometry* geom, sycl::private_ptr context, uint32_t geomID, uint32_t primID, const RTCFeatureFlags feature_mask) { #if defined(EMBREE_SYCL_SUPPORT) && defined(__SYCL_DEVICE_ONLY__) bool filter = feature_mask & (RTC_FEATURE_FLAG_FILTER_FUNCTION_IN_ARGUMENTS | RTC_FEATURE_FLAG_FILTER_FUNCTION_IN_GEOMETRY); @@ -526,8 +584,9 @@ __forceinline bool commit_potential_hit(intel_ray_query_t& query, Ray& ray) { } template -__forceinline void trav_loop(intel_ray_query_t& query, Ray& ray, Scene* scene0, Scene* scenes[RTC_MAX_INSTANCE_LEVEL_COUNT], sycl::private_ptr context, const RTCFeatureFlags feature_mask) +__forceinline void trav_loop(intel_ray_query_t& query, Ray& ray, Scene* scene, sycl::private_ptr context, const RTCFeatureFlags feature_mask) { + Scenes scenes(scene); while (!intel_is_traversal_done(query)) { intel_candidate_type_t candidate = intel_get_hit_candidate(query, intel_hit_type_potential_hit); @@ -543,25 +602,25 @@ __forceinline void trav_loop(intel_ray_query_t& query, Ray& ray, Scene* scene0, ray.org = Vec3ff(org.x(), org.y(), org.z(), ray.tnear()); ray.dir = Vec3ff(dir.x(), dir.y(), dir.z(), ray.time ()); ray.tfar = intel_get_hit_distance(query, intel_hit_type_committed_hit); - + #if RTC_MAX_INSTANCE_LEVEL_COUNT > 1 context->user->instStackSize = bvh_level; - Scene* scene = bvh_level ? scenes[bvh_level - 1] : scene0; + Scene* scene = scenes.get(bvh_level); #else const unsigned int instID = intel_get_hit_instance_id(query, intel_hit_type_potential_hit); - + /* assume software instancing mode by default (required for rtcForwardRay) */ - Scene* scene = bvh_level ? scenes[0] : scene0; + Scene* scene = scenes.get(bvh_level); /* if we are in hardware instancing mode and we need to read the scene from the instance */ if (bvh_level > 0 && instID != RTC_INVALID_GEOMETRY_ID) { - Instance* inst = scene0->get(instID); + Instance* inst = scenes.get(0)->get(instID); scene = (Scene*) inst->object; context->user->instID[0] = instID; } else if (bvh_level == 0) context->user->instID[0] = RTC_INVALID_GEOMETRY_ID; - + #endif context->scene = scene; Geometry* geom = scene->get(geomID); @@ -597,8 +656,6 @@ SYCL_EXTERNAL __attribute__((always_inline)) void rtcIntersectRTHW(sycl::global_ { Scene* scene = (Scene*) hscene.get(); - Scene* scenes[RTC_MAX_INSTANCE_LEVEL_COUNT]; - RayQueryContext context(scene, ucontext, args); RayHit ray; @@ -613,7 +670,6 @@ SYCL_EXTERNAL __attribute__((always_inline)) void rtcIntersectRTHW(sycl::global_ ray.v = 0; ray.primID = RTC_INVALID_GEOMETRY_ID; ray.geomID = RTC_INVALID_GEOMETRY_ID; - #if RTC_MAX_INSTANCE_LEVEL_COUNT > 1 for (uint32_t l=0; lfeature_mask & TRAV_LOOP_FEATURES) { - trav_loop(query,ray,scene,scenes,&context,args->feature_mask); + trav_loop(query,ray,scene,&context,args->feature_mask); } bool valid = intel_has_committed_hit(query); @@ -715,8 +771,6 @@ SYCL_EXTERNAL __attribute__((always_inline)) void rtcIntersectRTHW(sycl::global_ SYCL_EXTERNAL __attribute__((always_inline)) void rtcOccludedRTHW(sycl::global_ptr hscene, sycl::private_ptr ucontext, sycl::private_ptr ray_i, sycl::private_ptr args) { Scene* scene = (Scene*) hscene.get(); - - Scene* scenes[RTC_MAX_INSTANCE_LEVEL_COUNT]; RayQueryContext context(scene, ucontext, args); @@ -757,7 +811,7 @@ SYCL_EXTERNAL __attribute__((always_inline)) void rtcOccludedRTHW(sycl::global_p intel_ray_query_sync(query); if (args->feature_mask & TRAV_LOOP_FEATURES) { - trav_loop(query,ray,scene,scenes,&context,args->feature_mask); + trav_loop(query,ray,scene,&context,args->feature_mask); } if (intel_has_committed_hit(query))