diff --git a/include/Core/Castor3D/Buffer/ObjectBufferPool.hpp b/include/Core/Castor3D/Buffer/ObjectBufferPool.hpp index 60192f9de8..bca0d3951a 100644 --- a/include/Core/Castor3D/Buffer/ObjectBufferPool.hpp +++ b/include/Core/Castor3D/Buffer/ObjectBufferPool.hpp @@ -87,12 +87,12 @@ namespace castor3d public: struct ModelBuffers { - explicit ModelBuffers( GpuPackedBaseBufferUPtr vtx ) - : vertex{ castor::move( vtx ) } + explicit ModelBuffers( GpuPackedBaseBufferUPtr idx ) + : index{ castor::move( idx ) } { } - GpuPackedBaseBufferUPtr vertex; + GpuPackedBaseBufferUPtr index; }; using BufferArray = castor::Vector< ModelBuffers >; @@ -119,15 +119,15 @@ namespace castor3d /** *\~english *\brief Retrieves a GPU buffer with the given size. - *\param[in] vertexCount The wanted buffer element count. + *\param[in] indexCount The wanted buffer element count. *\return The GPU buffer. *\~french *\brief Récupère un tampon GPU avec la taille donnée. - *\param[in] vertexCount Le nombre d'éléments voulu pour le tampon. + *\param[in] indexCount Le nombre d'éléments voulu pour le tampon. *\return Le tampon GPU. */ template< typename IndexT > - ObjectBufferOffset getBuffer( VkDeviceSize vertexCount ); + ObjectBufferOffset getBuffer( VkDeviceSize indexCount ); /** *\~english *\brief Releases a GPU buffer. diff --git a/include/Core/Castor3D/Buffer/ObjectBufferPool.inl b/include/Core/Castor3D/Buffer/ObjectBufferPool.inl index 266c600b74..ab5d79367c 100644 --- a/include/Core/Castor3D/Buffer/ObjectBufferPool.inl +++ b/include/Core/Castor3D/Buffer/ObjectBufferPool.inl @@ -63,17 +63,17 @@ namespace castor3d ObjectBufferOffset VertexBufferPool::getBuffer( VkDeviceSize vertexCount ) { ObjectBufferOffset result; - auto size = vertexCount * sizeof( VertexT ); + auto size = VkDeviceSize( vertexCount * sizeof( VertexT ) ); auto it = doFindBuffer( size, m_buffers ); if ( it == m_buffers.end() ) { ModelBuffers buffers{ details::createBaseBuffer< uint8_t >( m_device - , size + , std::max( size, VkDeviceSize( 65536U ) ) , VK_BUFFER_USAGE_VERTEX_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT , VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT , m_debugName + cuT( "Vertex" ) + castor::string::toString( m_buffers.size() ) - , 1u ) }; + , uint32_t( m_device.properties.limits.minMemoryMapAlignment ) ) }; m_buffers.emplace_back( castor::move( buffers ) ); it = std::next( m_buffers.begin() , ptrdiff_t( m_buffers.size() - 1u ) ); @@ -87,27 +87,27 @@ namespace castor3d //********************************************************************************************* template< typename IndexT > - ObjectBufferOffset IndexBufferPool::getBuffer( VkDeviceSize vertexCount ) + ObjectBufferOffset IndexBufferPool::getBuffer( VkDeviceSize indexCount ) { ObjectBufferOffset result; - auto size = vertexCount * sizeof(IndexT); + auto size = VkDeviceSize( indexCount * sizeof( IndexT ) ); auto it = doFindBuffer( size, m_buffers ); if ( it == m_buffers.end() ) { ModelBuffers buffers{ details::createBaseBuffer< uint8_t >( m_device - , size - , VK_BUFFER_USAGE_VERTEX_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT + , std::max( size, VkDeviceSize( 65536U ) ) + , VK_BUFFER_USAGE_INDEX_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT , VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT , m_debugName + cuT( "Index" ) + castor::string::toString( m_buffers.size() ) - , 1u ) }; + , uint32_t( m_device.properties.limits.minMemoryMapAlignment ) ) }; m_buffers.emplace_back( castor::move( buffers ) ); it = std::next( m_buffers.begin() , ptrdiff_t( m_buffers.size() - 1u ) ); } - result.buffers[uint32_t( SubmeshData::eIndex )].buffer = it->vertex.get(); - result.buffers[uint32_t( SubmeshData::eIndex )].chunk = it->vertex->allocate( size ); + result.buffers[uint32_t( SubmeshData::eIndex )].buffer = it->index.get(); + result.buffers[uint32_t( SubmeshData::eIndex )].chunk = it->index->allocate( size ); return result; } diff --git a/include/Core/Castor3D/Render/Clustered/AssignLightsToClusters.hpp b/include/Core/Castor3D/Render/Clustered/AssignLightsToClusters.hpp index eede307b3e..5f486a359a 100644 --- a/include/Core/Castor3D/Render/Clustered/AssignLightsToClusters.hpp +++ b/include/Core/Castor3D/Render/Clustered/AssignLightsToClusters.hpp @@ -11,7 +11,8 @@ namespace castor3d C3D_API crg::FramePass const & createAssignLightsToClustersPass( crg::FramePassGroup & graph , crg::FramePassArray const & previousPasses , RenderDevice const & device - , CameraUbo const & cameraUbo + , CameraUbo const & mainCameraUbo + , CameraUbo const & clustersCameraUbo , FrustumClusters & clusters ); } diff --git a/include/Core/Castor3D/Render/Clustered/RadixSortLights.hpp b/include/Core/Castor3D/Render/Clustered/BucketSortLights.hpp similarity index 53% rename from include/Core/Castor3D/Render/Clustered/RadixSortLights.hpp rename to include/Core/Castor3D/Render/Clustered/BucketSortLights.hpp index f295b7cda4..61935af8c1 100644 --- a/include/Core/Castor3D/Render/Clustered/RadixSortLights.hpp +++ b/include/Core/Castor3D/Render/Clustered/BucketSortLights.hpp @@ -1,14 +1,14 @@ /* See LICENSE file in root folder */ -#ifndef ___C3D_RadixSortLightsMortonCode_H___ -#define ___C3D_RadixSortLightsMortonCode_H___ +#ifndef ___C3D_BucketSortLightsMortonCode_H___ +#define ___C3D_BucketSortLightsMortonCode_H___ #include "ClusteredModule.hpp" namespace castor3d { - C3D_API crg::FramePassArray createRadixSortLightsPass( crg::FramePassGroup & graph + C3D_API crg::FramePassArray createBucketSortLightsPass( crg::FramePassGroup & graph , crg::FramePass const * previousPass , RenderDevice const & device , FrustumClusters & clusters ); diff --git a/include/Core/Castor3D/Render/Clustered/BuildLightsBVH.hpp b/include/Core/Castor3D/Render/Clustered/BuildLightsBVH.hpp index 08e704681c..b948a0fe4f 100644 --- a/include/Core/Castor3D/Render/Clustered/BuildLightsBVH.hpp +++ b/include/Core/Castor3D/Render/Clustered/BuildLightsBVH.hpp @@ -12,6 +12,20 @@ namespace castor3d , crg::FramePassArray const & previousPasses , RenderDevice const & device , FrustumClusters & clusters ); + C3D_API void createDisplayPointLightsBVHProgram( RenderDevice const & device + , FrustumClusters const & clusters + , CameraUbo const & mainCameraUbo + , CameraUbo const & clustersCameraUbo + , ashes::PipelineShaderStageCreateInfoArray & program + , ashes::VkDescriptorSetLayoutBindingArray & bindings + , ashes::WriteDescriptorSetArray & writes ); + C3D_API void createDisplaySpotLightsBVHProgram( RenderDevice const & device + , FrustumClusters const & clusters + , CameraUbo const & mainCameraUbo + , CameraUbo const & clustersCameraUbo + , ashes::PipelineShaderStageCreateInfoArray & program + , ashes::VkDescriptorSetLayoutBindingArray & bindings + , ashes::WriteDescriptorSetArray & writes ); } #endif diff --git a/include/Core/Castor3D/Render/Clustered/ClusteredModule.hpp b/include/Core/Castor3D/Render/Clustered/ClusteredModule.hpp index 0f04557f26..82d5b3d9c3 100644 --- a/include/Core/Castor3D/Render/Clustered/ClusteredModule.hpp +++ b/include/Core/Castor3D/Render/Clustered/ClusteredModule.hpp @@ -18,9 +18,8 @@ namespace castor3d enum class ClusterSplitScheme { eExponentialBase = 0, - eExponentialBiased = 1, - eLinear = 2, - eExponentialLinearHybrid = 3, + eLinear = 1, + eExponentialLinearHybrid = 2, CU_ScopedEnumBounds( eExponentialBase, eExponentialLinearHybrid ) }; C3D_API castor::String getName( ClusterSplitScheme value ); diff --git a/include/Core/Castor3D/Render/Clustered/ClustersConfig.hpp b/include/Core/Castor3D/Render/Clustered/ClustersConfig.hpp index 559f0a09b8..58bfdb1b8a 100644 --- a/include/Core/Castor3D/Render/Clustered/ClustersConfig.hpp +++ b/include/Core/Castor3D/Render/Clustered/ClustersConfig.hpp @@ -11,6 +11,15 @@ See LICENSE file in root folder namespace castor3d { + enum class ClusterDebugDisplay + { + eNone = 0, + eClustersAABB = 1, + eLightsAABB = 2, + eLightsBVH = 3, + CU_ScopedEnumBounds( eNone, eLightsBVH ) + }; + struct ClustersConfig { C3D_API ClustersConfig(); @@ -50,12 +59,18 @@ namespace castor3d //!\~english Enable sorting of clusters lights. //!\~french Autoriser le tri des sources lumineuses dans les clusters. castor::GroupChangeTracked< bool > enablePostAssignSort; + //!\~english Locks clusters frustum, for debug purpose. + //!\~french Verrouille le frustum des clusters, pour le débogage. + castor::GroupChangeTracked< bool > lockClustersFrustum; + //!\~english Debug display mode. + //!\~french Mode d'affichage de debug. + castor::GroupChangeTracked< ClusterDebugDisplay > debugDisplay; //!\~english The clusters Z split scheme. //!\~french Le mode découpage en Z des clusters. castor::GroupChangeTracked < ClusterSplitScheme > splitScheme; - //!\~english The biased exponential split scheme bias. - //!\~french Le décalage en mode de découpage exponentiel décalé. - castor::GroupChangeTracked< float > bias; + //!\~english The hybrid split scheme minimal threshold distance. + //!\~french La distance minimale en mode de découpage hybride. + castor::GroupChangeTracked< float > minDistance; }; C3D_API bool operator==( ClustersConfig const & lhs, ClustersConfig const & rhs ); diff --git a/include/Core/Castor3D/Render/Clustered/ClustersMask.hpp b/include/Core/Castor3D/Render/Clustered/ClustersMask.hpp index e0e951a175..f36108774a 100644 --- a/include/Core/Castor3D/Render/Clustered/ClustersMask.hpp +++ b/include/Core/Castor3D/Render/Clustered/ClustersMask.hpp @@ -1,4 +1,4 @@ -/* +/* See LICENSE file in root folder */ #ifndef ___C3D_ClustersMask_H___ @@ -11,7 +11,8 @@ namespace castor3d crg::FramePass const & createClustersMaskPass( crg::FramePassGroup & graph , crg::FramePass const & previousPass , RenderDevice const & device - , CameraUbo const & cameraUbo + , CameraUbo const & mainCameraUbo + , CameraUbo const & clustersCameraUbo , FrustumClusters & clusters , RenderTechnique & technique , RenderNodesPass *& nodesPass ); diff --git a/include/Core/Castor3D/Render/Clustered/ComputeClustersAABB.hpp b/include/Core/Castor3D/Render/Clustered/ComputeClustersAABB.hpp index 800130df02..b0a098b22b 100644 --- a/include/Core/Castor3D/Render/Clustered/ComputeClustersAABB.hpp +++ b/include/Core/Castor3D/Render/Clustered/ComputeClustersAABB.hpp @@ -11,8 +11,16 @@ namespace castor3d C3D_API crg::FramePass const & createComputeClustersAABBPass( crg::FramePassGroup & graph , crg::FramePass const * previousPass , RenderDevice const & device - , CameraUbo const & cameraUbo + , CameraUbo const & mainCameraUbo + , CameraUbo const & clustersCameraUbo , FrustumClusters const & clusters ); + C3D_API void createDisplayClustersAABBProgram( RenderDevice const & device + , FrustumClusters const & clusters + , CameraUbo const & mainCameraUbo + , CameraUbo const & clustersCameraUbo + , ashes::PipelineShaderStageCreateInfoArray & program + , ashes::VkDescriptorSetLayoutBindingArray & bindings + , ashes::WriteDescriptorSetArray & writes ); } #endif diff --git a/include/Core/Castor3D/Render/Clustered/ComputeLightsAABB.hpp b/include/Core/Castor3D/Render/Clustered/ComputeLightsAABB.hpp index bd3cb2e7a9..48bbf87e62 100644 --- a/include/Core/Castor3D/Render/Clustered/ComputeLightsAABB.hpp +++ b/include/Core/Castor3D/Render/Clustered/ComputeLightsAABB.hpp @@ -11,8 +11,16 @@ namespace castor3d C3D_API crg::FramePass const & createComputeLightsAABBPass( crg::FramePassGroup & graph , crg::FramePass const * previousPass , RenderDevice const & device - , CameraUbo const & cameraUbo + , CameraUbo const & mainCameraUbo + , CameraUbo const & clustersCameraUbo , FrustumClusters const & clusters ); + C3D_API void createDisplayLightsAABBProgram( RenderDevice const & device + , FrustumClusters const & clusters + , CameraUbo const & mainCameraUbo + , CameraUbo const & clustersCameraUbo + , ashes::PipelineShaderStageCreateInfoArray & program + , ashes::VkDescriptorSetLayoutBindingArray & bindings + , ashes::WriteDescriptorSetArray & writes ); } #endif diff --git a/include/Core/Castor3D/Render/Clustered/FrustumClusters.hpp b/include/Core/Castor3D/Render/Clustered/FrustumClusters.hpp index 4d4336e558..24ec3c33aa 100644 --- a/include/Core/Castor3D/Render/Clustered/FrustumClusters.hpp +++ b/include/Core/Castor3D/Render/Clustered/FrustumClusters.hpp @@ -9,6 +9,7 @@ See LICENSE file in root folder #include "Castor3D/Buffer/GpuBufferOffset.hpp" #include "Castor3D/Render/Clustered/ClustersConfig.hpp" #include "Castor3D/Shader/Ubos/ClustersUbo.hpp" +#include "Castor3D/Shader/Ubos/CameraUbo.hpp" #include #include @@ -17,6 +18,8 @@ See LICENSE file in root folder namespace castor3d { + class DebugDrawer; + class FrustumClusters { public: @@ -38,6 +41,15 @@ namespace castor3d *\param[in, out] updater Les données d'update. */ C3D_API void update( CpuUpdater & updater ); + /** + *\~english + *\brief Debug update. + *\param[in, out] drawer The debug drawer. + *\~french + *\brief Mise à jour du debug. + *\param[in, out] drawer Le debug drawer. + */ + C3D_API void updateDebug( DebugDrawer & drawer ); /** *\~english *\brief Registers the clusters related frame passes. @@ -77,6 +89,8 @@ namespace castor3d */ C3D_API static uint32_t getNumNodes( uint32_t numLeaves ); + C3D_API static uint32_t getBucketSortBucketSize(); + castor::Point3ui const & getDimensions()const noexcept { return m_dimensions; @@ -105,26 +119,26 @@ namespace castor3d auto & getPointLightClusterGridBuffer()const noexcept { - CU_Require( m_pointLightClusterGridBuffer ); - return *m_pointLightClusterGridBuffer; + CU_Require( m_pointBuffers.clusterGrid ); + return *m_pointBuffers.clusterGrid; } auto & getSpotLightClusterGridBuffer()const noexcept { - CU_Require( m_spotLightClusterGridBuffer ); - return *m_spotLightClusterGridBuffer; + CU_Require( m_spotBuffers.clusterGrid ); + return *m_spotBuffers.clusterGrid; } auto & getPointLightClusterIndexBuffer()const noexcept { - CU_Require( m_pointLightClusterIndexBuffer ); - return *m_pointLightClusterIndexBuffer; + CU_Require( m_pointBuffers.clusterIndex ); + return *m_pointBuffers.clusterIndex; } auto & getSpotLightClusterIndexBuffer()const noexcept { - CU_Require( m_spotLightClusterIndexBuffer ); - return *m_spotLightClusterIndexBuffer; + CU_Require( m_spotBuffers.clusterIndex ); + return *m_spotBuffers.clusterIndex; } auto & getAllLightsAABBBuffer()const noexcept @@ -139,32 +153,32 @@ namespace castor3d auto & getPointLightBVHBuffer()const noexcept { - return m_pointBVHBuffer->getBuffer(); + return m_pointBuffers.bvh->getBuffer(); } auto & getSpotLightBVHBuffer()const noexcept { - return m_spotBVHBuffer->getBuffer(); + return m_spotBuffers.bvh->getBuffer(); } ashes::BufferBase & getPointLightIndicesBuffer( uint32_t index )const noexcept { - return m_pointIndicesBuffers[index]->getBuffer(); + return m_pointBuffers.indices[index]->getBuffer(); } ashes::BufferBase & getSpotLightIndicesBuffer( uint32_t index )const noexcept { - return m_spotIndicesBuffers[index]->getBuffer(); + return m_spotBuffers.indices[index]->getBuffer(); } ashes::BufferBase & getPointLightMortonCodesBuffer( uint32_t index )const noexcept { - return m_pointMortonCodesBuffers[index]->getBuffer(); + return m_pointBuffers.mortonCodes[index]->getBuffer(); } ashes::BufferBase & getSpotLightMortonCodesBuffer( uint32_t index )const noexcept { - return m_spotMortonCodesBuffers[index]->getBuffer(); + return m_spotBuffers.mortonCodes[index]->getBuffer(); } ashes::BufferBase & getInputPointLightIndicesBuffer()const noexcept @@ -285,6 +299,11 @@ namespace castor3d return m_config; } + auto & getCameraUbo()const noexcept + { + return m_clustersCameraUbo; + } + OnClustersBuffersChanged onClusterBuffersChanged; private: @@ -294,6 +313,20 @@ namespace castor3d castor::Point4f max; }; + struct Buffers + { + Buffers( RenderDevice const & device + , castor::String const & name ); + + // Fixed size buffers, related to lights + castor::Array< ashes::BufferPtr< u32 >, 2u > mortonCodes; + castor::Array< ashes::BufferPtr< u32 >, 2u > indices; + ashes::BufferPtr< AABB > bvh; + // Variable size buffers, related to frustum dimensions + ashes::BufferBasePtr clusterGrid; + ashes::BufferBasePtr clusterIndex; + }; + private: void doUpdate(); @@ -311,28 +344,38 @@ namespace castor3d castor::GroupChangeTracked< castor::Matrix4x4f > m_cameraProjection; castor::GroupChangeTracked< castor::Matrix4x4f > m_cameraView; ClustersUbo m_clustersUbo; + CameraUbo m_clustersCameraUbo; ashes::BufferPtr< VkDispatchIndirectCommand > m_clustersIndirect; // Fixed size buffers, related to lights ashes::BufferPtr< AABB > m_allLightsAABBBuffer; ashes::BufferPtr< AABB > m_reducedLightsAABBBuffer; - castor::Array< ashes::BufferPtr< u32 >, 2u > m_pointMortonCodesBuffers; - castor::Array< ashes::BufferPtr< u32 >, 2u > m_spotMortonCodesBuffers; - castor::Array< ashes::BufferPtr< u32 >, 2u > m_pointIndicesBuffers; - castor::Array< ashes::BufferPtr< u32 >, 2u > m_spotIndicesBuffers; - ashes::BufferPtr< AABB > m_pointBVHBuffer; - ashes::BufferPtr< AABB > m_spotBVHBuffer; ashes::BufferPtr< s32 > m_mergePathPartitionsBuffer; + // Light type specific buffers + Buffers m_pointBuffers; + Buffers m_spotBuffers; // Variable size buffers, related to frustum dimensions ashes::BufferBasePtr m_aabbBuffer; - ashes::BufferBasePtr m_pointLightClusterGridBuffer; - ashes::BufferBasePtr m_spotLightClusterGridBuffer; - ashes::BufferBasePtr m_pointLightClusterIndexBuffer; - ashes::BufferBasePtr m_spotLightClusterIndexBuffer; ashes::BufferBasePtr m_clusterFlags; ashes::BufferBasePtr m_uniqueClusters; castor::Vector< ashes::BufferBasePtr > m_toDelete; + + ashes::PipelineShaderStageCreateInfoArray m_displayClustersAABBProgram; + ashes::VkDescriptorSetLayoutBindingArray m_displayClustersAABBBindings; + ashes::WriteDescriptorSetArray m_displayClustersAABBWrites; + + ashes::PipelineShaderStageCreateInfoArray m_displayLightsAABBProgram; + ashes::VkDescriptorSetLayoutBindingArray m_displayLightsAABBBindings; + ashes::WriteDescriptorSetArray m_displayLightsAABBWrites; + + ashes::PipelineShaderStageCreateInfoArray m_displayPointLightsBVHProgram; + ashes::VkDescriptorSetLayoutBindingArray m_displayPointLightsBVHBindings; + ashes::WriteDescriptorSetArray m_displayPointLightsBVHWrites; + + ashes::PipelineShaderStageCreateInfoArray m_displaySpotLightsBVHProgram; + ashes::VkDescriptorSetLayoutBindingArray m_displaySpotLightsBVHBindings; + ashes::WriteDescriptorSetArray m_displaySpotLightsBVHWrites; }; } diff --git a/include/Core/Castor3D/Render/Clustered/ReduceLightsAABB.hpp b/include/Core/Castor3D/Render/Clustered/ReduceLightsAABB.hpp index 525187ad49..54936d8dd7 100644 --- a/include/Core/Castor3D/Render/Clustered/ReduceLightsAABB.hpp +++ b/include/Core/Castor3D/Render/Clustered/ReduceLightsAABB.hpp @@ -1,4 +1,4 @@ -/* +/* See LICENSE file in root folder */ #ifndef ___C3D_ReduceLightsAABB_H___ @@ -11,7 +11,8 @@ namespace castor3d C3D_API crg::FramePass const & createReduceLightsAABBPass( crg::FramePassGroup & graph , crg::FramePass const * previousPass , RenderDevice const & device - , CameraUbo const & cameraUbo + , CameraUbo const & mainCameraUbo + , CameraUbo const & clustersCameraUbo , FrustumClusters & clusters ); } diff --git a/include/Core/Castor3D/Render/Debug/DebugDrawer.hpp b/include/Core/Castor3D/Render/Debug/DebugDrawer.hpp index 1a6c69b430..7884ea093f 100644 --- a/include/Core/Castor3D/Render/Debug/DebugDrawer.hpp +++ b/include/Core/Castor3D/Render/Debug/DebugDrawer.hpp @@ -10,6 +10,8 @@ See LICENSE file in root folder #include +#include + namespace castor3d { class DebugDrawer @@ -19,103 +21,127 @@ namespace castor3d /** *\~english *\brief Constructor. - *\param[in] parent The parent render target. - *\param[in] device The GPU device. - *\param[in] colour The target colour image. - *\param[in] depth The target depth image. + *\param[in] graph The graph. + *\param[in] previous The previous pass. + *\param[in] device The GPU device. + *\param[in] parent The parent render target. + *\param[in] colour The target colour image. + *\param[in] depth The target depth image. *\~french *\brief Constructeur. - *\param[in] parent La render target parente. - *\param[in] device Le device GPU. - *\param[in] colour L'image couleur cible. - *\param[in] depth L'image profondeur cible. + *\param[in] graph Le graphe. + *\param[in] previous La passe précédente. + *\param[in] device Le device GPU. + *\param[in] parent La render target parente. + *\param[in] colour L'image couleur cible. + *\param[in] depth L'image profondeur cible. */ - C3D_API DebugDrawer( RenderTarget & parent + C3D_API DebugDrawer( crg::FramePassGroup & graph + , crg::FramePass const * previous , RenderDevice const & device - , Texture const & colour - , Texture const & depth ); + , RenderTarget & parent + , crg::ImageViewIdArray colour + , Texture const & depth + , uint32_t const * passIndex ); /** *\~english *\brief Adds a buffer containing AABBs to draw. - *\param[in] buffer The GPU buffer. - *\param[in] offset The binary offset. - *\param[in] size The binary size. - *\param[in] count The AABB count. - *\param[in] shader The shader stages. + *\param[in] bindings The shader data bindings. + *\param[in] writes The shader data. + *\param[in] count The number of AABB to draw. + *\param[in] shader The shader used to draw the AABB. *\~french *\brief Ajoute un buffer d'AABB à dessiner. - *\param[in] buffer Le buffer GPU. - *\param[in] offset L'offset binaire. - *\param[in] size La taille binaire. - *\param[in] count Le nombre d'AABB. - *\param[in] shader Les shaders. + *\param[in] bindings Les bindings des données à passer au shader. + *\param[in] writes Les données à passer au shader. + *\param[in] count Le nombre d'AABB à dessiner. + *\param[in] shader Le shader utilisé pour dessiner les AABB. */ - C3D_API void addAabbs( VkBuffer buffer - , VkDeviceSize offset - , VkDeviceSize size + C3D_API void addAabbs( ashes::VkDescriptorSetLayoutBindingArray const & bindings + , ashes::WriteDescriptorSetArray const & writes , VkDeviceSize count - , ashes::PipelineShaderStageCreateInfoArray shader ); - /** - *\~english - *\brief Renders added objects. - *\param[in] queue The queue receiving the render commands. - *\param[in] toWait The semaphores to wait. - *\return The semaphores signaled by this render. - *\~french - *\brief Dessine les objets ajoutés. - *\param[in] queue La file recevant les commandes de dessin. - *\param[in] toWait Les sémaphores à attendre. - *\return Les sémaphores signalés par ce dessin. - */ - C3D_API crg::SemaphoreWaitArray render( ashes::Queue const & queue - , crg::SemaphoreWaitArray toWait = {} ); + , ashes::PipelineShaderStageCreateInfoArray const & shader + , bool enableDepthTest ); + + crg::FramePass const & getLastPass()const noexcept + { + return *m_lastPass; + } private: + struct Object + { + ObjectBufferOffset vertices; + ObjectBufferOffset indices; + }; + struct Pipeline { ashes::DescriptorSetLayoutPtr descriptorLayout; ashes::PipelineLayoutPtr pipelineLayout; ashes::GraphicsPipelinePtr pipeline; - ashes::DescriptorSetPoolPtr descriptorPool; - ashes::DescriptorSetPtr descriptorSet; - ObjectBufferOffset vertices; - ObjectBufferOffset indices; + + struct Instance + { + ashes::DescriptorSetPoolPtr descriptorPool; + ashes::DescriptorSetPtr descriptorSet; + }; + + castor::UnorderedMap< size_t, Instance > instances; }; struct AABBBuffer { - AABBBuffer( VkBuffer pbuffer - , VkDeviceSize poffset - , VkDeviceSize psize - , VkDeviceSize pcount - , Pipeline * ppipeline ) - : buffer{ pbuffer } - , offset{ poffset } - , size{ psize } - , count{ pcount } - , pipeline{ ppipeline } + AABBBuffer( VkDeviceSize count + , Pipeline * pipeline + , ashes::DescriptorSet * descriptorSet + , ashes::WriteDescriptorSetArray writes ); - { - } - - VkBuffer buffer; - VkDeviceSize offset; - VkDeviceSize size; VkDeviceSize count; Pipeline * pipeline; + ashes::DescriptorSet * descriptorSet; + ashes::WriteDescriptorSetArray writes; }; using PipelinePtr = castor::RawUniquePtr< Pipeline >; + class FramePass + : public crg::RenderPass + { + public: + FramePass( crg::FramePass const & framePass + , crg::GraphContext & context + , crg::RunnableGraph & graph + , RenderDevice const & device + , VkExtent2D dimensions + , uint32_t const * passIndex ); + ~FramePass()noexcept override; + + void addAabbs( ashes::VkDescriptorSetLayoutBindingArray const & bindings + , ashes::WriteDescriptorSetArray const & writes + , VkDeviceSize count + , ashes::PipelineShaderStageCreateInfoArray const & shader + , bool enableDepthTest ); + + private: + void doSubRecordInto( crg::RecordContext const & context + , VkCommandBuffer commandBuffer ); + bool doIsEnabled()const noexcept + { + return m_pending || !m_aabbs.empty(); + } + + private: + RenderDevice const & m_device; + castor::UnorderedMap< size_t, PipelinePtr > m_pipelines; + castor::Vector< AABBBuffer > m_aabbs; + Object m_aabb; + bool m_pending{}; + }; + private: - RenderDevice const & m_device; - castor::UnorderedMap< size_t, PipelinePtr > m_pipelines; - ashes::RenderPassPtr m_renderPass; - ashes::FrameBufferPtr m_framebuffer; - ashes::CommandPoolPtr m_commandPool; - castor::Array< CommandsSemaphore, 2u > m_commandBuffers; - castor::Vector< AABBBuffer > m_aabbs; - uint32_t m_index{}; + crg::FramePass const * m_lastPass{}; + FramePass * m_framePass{}; }; } diff --git a/include/Core/Castor3D/Render/Debug/DebugModule.hpp b/include/Core/Castor3D/Render/Debug/DebugModule.hpp index b846af3c0a..c979fb07ec 100644 --- a/include/Core/Castor3D/Render/Debug/DebugModule.hpp +++ b/include/Core/Castor3D/Render/Debug/DebugModule.hpp @@ -26,6 +26,29 @@ namespace castor3d CU_DeclareSmartPtr( castor3d, DebugDrawer, C3D_API ); + /** + *\~english + *\brief Adds a buffer containing AABBs to draw. + *\param[in] drawer The debug drawer. + *\param[in] bindings The shader data bindings. + *\param[in] writes The shader data. + *\param[in] count The number of AABB to draw. + *\param[in] shader The shader used to draw the AABB. + *\~french + *\brief Ajoute un buffer d'AABB à dessiner. + *\param[in] drawer Le debug drawer. + *\param[in] bindings Les bindings des données à passer au shader. + *\param[in] writes Les données à passer au shader. + *\param[in] count Le nombre d'AABB à dessiner. + *\param[in] shader Le shader utilisé pour dessiner les AABB. + */ + C3D_API void addDebugAabbs( DebugDrawer & drawer + , ashes::VkDescriptorSetLayoutBindingArray const & bindings + , ashes::WriteDescriptorSetArray const & writes + , VkDeviceSize count + , ashes::PipelineShaderStageCreateInfoArray const & shader + , bool enableDepthTest ); + //@} //@} } diff --git a/include/Core/Castor3D/Render/RenderTarget.hpp b/include/Core/Castor3D/Render/RenderTarget.hpp index 21556e8935..d408153e94 100644 --- a/include/Core/Castor3D/Render/RenderTarget.hpp +++ b/include/Core/Castor3D/Render/RenderTarget.hpp @@ -398,6 +398,11 @@ namespace castor3d { return m_frustumClusters.get(); } + + DebugDrawer & getDebugDrawer()const noexcept + { + return *m_debugDrawer; + } /**@}*/ /** *\~english @@ -557,6 +562,28 @@ namespace castor3d C3D_API Engine * getEngine( TargetContext const & context ); C3D_API RootContext * getRootContext( TargetContext const & context ); + /** + *\~english + *\brief Adds a buffer containing AABBs to draw. + *\param[in] target The target the AABB are drawn to. + *\param[in] bindings The shader data bindings. + *\param[in] writes The shader data. + *\param[in] count The number of AABB to draw. + *\param[in] shader The shader used to draw the AABB. + *\~french + *\brief Ajoute un buffer d'AABB à dessiner. + *\param[in] target La cible où sont dessinées les AABB. + *\param[in] bindings Les bindings des données à passer au shader. + *\param[in] writes Les données à passer au shader. + *\param[in] count Le nombre d'AABB à dessiner. + *\param[in] shader Le shader utilisé pour dessiner les AABB. + */ + C3D_API void addDebugAabbs( RenderTarget const & target + , ashes::VkDescriptorSetLayoutBindingArray const & bindings + , ashes::WriteDescriptorSetArray const & writes + , VkDeviceSize count + , ashes::PipelineShaderStageCreateInfoArray const & shader + , bool enableDepthTest ); } #endif diff --git a/include/Core/Castor3D/Scene/Light/Light.hpp b/include/Core/Castor3D/Scene/Light/Light.hpp index 536213cb5e..0059c524b4 100644 --- a/include/Core/Castor3D/Scene/Light/Light.hpp +++ b/include/Core/Castor3D/Scene/Light/Light.hpp @@ -330,6 +330,7 @@ namespace castor3d void setEnabled( bool value ) { m_enabled = value; + markDirty(); } void enable() @@ -434,10 +435,16 @@ namespace castor3d public: OnLightChanged onGPUChanged; - protected: - bool m_enabled{}; + private: + friend class LightCategory; + + bool & doGetDirty() + { + return m_dirty; + } + + castor::GroupChangeTracked< bool > m_enabled; std::atomic_bool m_currentShadowCaster{}; - bool m_dirty{ true }; ShadowConfig m_shadows; LightCategoryUPtr m_category; ShadowMapRPtr m_shadowMap{}; diff --git a/include/Core/Castor3D/Scene/Light/LightCategory.hpp b/include/Core/Castor3D/Scene/Light/LightCategory.hpp index 8184255557..a29cccd541 100644 --- a/include/Core/Castor3D/Scene/Light/LightCategory.hpp +++ b/include/Core/Castor3D/Scene/Light/LightCategory.hpp @@ -9,6 +9,7 @@ See LICENSE file in root folder #include "Castor3D/Shader/ShaderBuffers/LightBuffer.hpp" #include +#include #include namespace castor3d @@ -48,7 +49,7 @@ namespace castor3d Float1 shadowMapIndex; Float1 cascadeCount; Float3 posDir; - Float1 exponent; + Float1 enabled; }; static uint32_t constexpr LightMbrAlign = 4u * sizeof( float ); @@ -136,12 +137,12 @@ namespace castor3d float getDiffuseIntensity()const { - return m_intensity[0]; + return m_intensity.value()[0]; } float getSpecularIntensity()const { - return m_intensity[1]; + return m_intensity.value()[1]; } castor::Point2f const & getIntensity()const @@ -197,25 +198,18 @@ namespace castor3d { return m_light; } - - castor::Point3f & getColour() - { - return m_colour; - } - - castor::Point2f & getIntensity() - { - return m_intensity; - } /**@}*/ + protected: + bool & m_dirty; + private: LightType m_lightType; Light & m_light; uint32_t m_lightComponentCount{}; uint32_t m_shadowComponentCount{}; - castor::Point3f m_colour{ 1.0, 1.0, 1.0 }; - castor::Point2f m_intensity{ 1.0, 1.0 }; + castor::GroupChangeTracked< castor::Point3f > m_colour; + castor::GroupChangeTracked< castor::Point2f > m_intensity; /** *\~english *\brief Puts the light into the given buffer. diff --git a/include/Core/Castor3D/Scene/Light/PointLight.hpp b/include/Core/Castor3D/Scene/Light/PointLight.hpp index 3886ebb9e0..9175d0bc46 100644 --- a/include/Core/Castor3D/Scene/Light/PointLight.hpp +++ b/include/Core/Castor3D/Scene/Light/PointLight.hpp @@ -117,7 +117,7 @@ namespace castor3d private: friend class Scene; - bool m_dirtyData{ false }; + castor::GroupChangeTracked< float > m_range; castor::GroupChangeTracked< castor::Point3f > m_position; castor::Array< castor::Matrix4x4f, size_t( CubeMapFace::eCount ) > m_lightViews; diff --git a/include/Core/Castor3D/Scene/Light/SpotLight.hpp b/include/Core/Castor3D/Scene/Light/SpotLight.hpp index ee121c8a0f..60c3e533ca 100644 --- a/include/Core/Castor3D/Scene/Light/SpotLight.hpp +++ b/include/Core/Castor3D/Scene/Light/SpotLight.hpp @@ -25,6 +25,7 @@ namespace castor3d : LightCategory::LightData { Float3 direction; + Float1 exponent; Float1 outerCutoffCos; Float1 innerCutoff; Float1 outerCutoff; @@ -33,7 +34,6 @@ namespace castor3d Float1 innerCutoffCos; Float1 outerCutOffTan; Float1 pad0; - Float1 pad1; }; static constexpr uint32_t LightDataSize = uint32_t( ashes::getAlignedSize( sizeof( LightData ), LightMbrAlign ) ); static constexpr uint32_t LightDataComponents = LightDataSize / LightMbrAlign; @@ -162,7 +162,6 @@ namespace castor3d void doAccept( ConfigurationVisitorBase & vis )override; private: - bool m_dirtyData{ false }; bool m_dirtyShadow{ true }; castor::GroupChangeTracked< float > m_range; castor::GroupChangeTracked< float > m_exponent; diff --git a/include/Core/Castor3D/Scene/MovableObject.hpp b/include/Core/Castor3D/Scene/MovableObject.hpp index 2b4613b0f3..73d02fd8c5 100644 --- a/include/Core/Castor3D/Scene/MovableObject.hpp +++ b/include/Core/Castor3D/Scene/MovableObject.hpp @@ -113,6 +113,9 @@ namespace castor3d //!\~english The node change notification index. //!\~french L'indice de notifcation des changements du noeud. OnSceneNodeChangedConnection m_notifyIndex; + //!\~english The object "to update" status. + //!\~french Le statut d'objet "à mettre à jour". + bool m_dirty{ true }; }; template< typename CacheT > diff --git a/include/Core/Castor3D/Shader/Shaders/GlslBitonicSort.hpp b/include/Core/Castor3D/Shader/Shaders/GlslBitonicSort.hpp new file mode 100644 index 0000000000..344c0722c9 --- /dev/null +++ b/include/Core/Castor3D/Shader/Shaders/GlslBitonicSort.hpp @@ -0,0 +1,176 @@ +/* +See LICENSE file in root folder +*/ +#ifndef ___C3D_GlslBitonicSort_H___ +#define ___C3D_GlslBitonicSort_H___ + +#include "Castor3D/Shader/Shaders/SdwModule.hpp" + +#include + +namespace castor3d::shader +{ + /** + *\arg ValueSizeT The size of the value type. + */ + template< uint32_t ValueSizeT > + struct BitonicSortT + { + static uint32_t constexpr bucketSize{ 4096u / ValueSizeT }; + + uint32_t threadsCount; + + BitonicSortT( sdw::ComputeWriter & writer + , uint32_t batchesPerPass ) + : threadsCount{ bucketSize / ( batchesPerPass << 1u ) } + , m_batchSize{ writer.declConstant( "gBatchSize", sdw::UInt{ bucketSize / batchesPerPass } ) } + , m_numThreads{ writer.declConstant( "gNumThreads", sdw::UInt{ threadsCount } ) } + , m_maxUInt{ writer.declConstant( "gMaxUInt", 0xFFFFFFFF_u ) } + { + } + /** + *\arg ValueT The value type. + */ + template< typename ValueT > + void sortT( sdw::ShaderWriter & writer + , sdw::UInt elementOffset + , sdw::UInt elementCount + , sdw::UInt groupIndex + , [[maybe_unused]] sdw::UInt threadIndex + , sdw::Array< sdw::UInt > const & inputKeys + , sdw::Array< sdw::UInt > const & outputKeys + , sdw::Array< ValueT > const & inputValues + , sdw::Array< ValueT > const & outputValues + , ValueT const & invalidValue ) + { + auto gsKeys = writer.declSharedVariable< sdw::UInt >( "gsKeys", bucketSize ); + auto gsValues = writer.declSharedVariable< ValueT >( "gsValues", bucketSize, inputValues.isEnabled() ); + + auto bitInsert0 = [&]( sdw::UInt const & value + , sdw::UInt const & bit ) + { + return ( ( ( m_maxUInt << bit ) & value ) << 1u ) | ( ~( m_maxUInt << bit ) & value ); + }; + + auto bitonicSort = [&]() + { + auto batchSizeLog = writer.declLocale( "batchSizeLog", writer.cast< sdw::UInt >( findMSB( m_batchSize ) ) ); + + // we process a power of two number of elements, + auto passCount = writer.declLocale( "passCount", 1u + writer.cast< sdw::UInt >( findMSB( elementCount - 1u ) ) ); + auto roundedElementCount = writer.declLocale( "roundedElementCount", 1u << passCount ); + auto batchCount = writer.declLocale( "batchCount", ( roundedElementCount + m_batchSize - 1u ) >> batchSizeLog ); + // Load data into shared memory. Pad missing values with max ints. + + FOR( writer, sdw::UInt, batch, 0_u, batch < batchCount, ++batch ) + { + // each thread loads a pair of values per batch. + auto i1 = writer.declLocale( "i1", groupIndex + batch * m_batchSize ); + auto i2 = writer.declLocale( "i2", i1 + ( m_batchSize >> 1u ) ); + gsKeys[i1] = writer.ternary( i1 < elementCount, inputKeys[elementOffset + i1], m_maxUInt ); + gsKeys[i2] = writer.ternary( i2 < elementCount, inputKeys[elementOffset + i2], m_maxUInt ); + gsValues[i1] = writer.ternary( i1 < elementCount, inputValues[elementOffset + i1], invalidValue ); + gsValues[i2] = writer.ternary( i2 < elementCount, inputValues[elementOffset + i2], invalidValue ); + } + ROF + + shader::groupMemoryBarrierWithGroupSync( writer ); + + // Each loop iteration produces blocks of size k that are monotonic (alternatively increasing and decreasing) + // thus, producing blocks of size 2*k that are bitonic. + // as a result, the last pass produces a single block sorted in ascending order + FOR( writer, sdw::UInt, pass, 0_u, pass < passCount, ++pass ) + { + auto k = writer.declLocale( "k", 1_u << ( pass + 1u ) ); + // Each iteration compares and optionally swap elements in pairs exactly once for each element + FOR( writer, sdw::UInt, subPass, 0_u, subPass <= pass, ++subPass ) + { + FOR( writer, sdw::UInt, batch, 0_u, batch < batchCount, ++batch ) + { + auto indexBit = writer.declLocale( "indexBit", groupIndex + batch * m_numThreads ); + auto relPass = writer.declLocale( "relPass", pass - subPass ); + auto indexFirst = writer.declLocale( "indexFirst", bitInsert0( indexBit, relPass ) ); + auto indexSecond = writer.declLocale( "indexSecond", indexFirst | ( 1u << relPass ) ); + auto keyFirst = writer.declLocale( "keyFirst", gsKeys[indexFirst] ); + auto keySecond = writer.declLocale( "keySecond", gsKeys[indexSecond] ); + auto valFirst = writer.declLocale( "valFirst", gsValues[indexFirst] ); + auto valSecond = writer.declLocale( "valSecond", gsValues[indexSecond] ); + shader::groupMemoryBarrierWithGroupSync( writer ); + + IF( writer, writer.ternary( ( indexFirst & k ) == 0_u, 1_u, 0_u ) ^ writer.ternary( keyFirst <= keySecond, 1_u, 0_u ) ) + { + gsKeys[indexFirst] = keySecond; + gsKeys[indexSecond] = keyFirst; + gsValues[indexFirst] = valSecond; + gsValues[indexSecond] = valFirst; + } + FI + + shader::groupMemoryBarrierWithGroupSync( writer ); + } + ROF + } + ROF + } + ROF + + // Now commit the results to global memory. + FOR( writer, sdw::UInt, batch, 0_u, batch < batchCount, ++batch ) + { + auto i1 = writer.declLocale( "i1", groupIndex + batch * m_batchSize ); + auto i2 = writer.declLocale( "i2", i1 + ( m_batchSize >> 1u ) ); + + IF( writer, i1 < elementCount ) + { + outputKeys[elementOffset + i1] = gsKeys[i1]; + outputValues[elementOffset + i1] = gsValues[i1]; + } + FI + IF( writer, i2 < elementCount ) + { + outputKeys[elementOffset + i2] = gsKeys[i2]; + outputValues[elementOffset + i2] = gsValues[i2]; + } + FI + } + ROF + }; + elementCount = min( sdw::UInt{ bucketSize }, elementCount ); + + IF( writer, elementCount > 1_u ) + { + bitonicSort(); + } + FI + } + /** + *\arg ValueT The value type. + */ + template< typename ValueT > + void sortT( sdw::ComputeWriter & writer + , sdw::UInt elementOffset + , sdw::UInt elementCount + , sdw::UInt groupIndex + , sdw::UInt threadIndex + , sdw::Array< sdw::UInt > const & inputKeys + , sdw::Array< sdw::UInt > const & outputKeys + , ValueT const & invalidValue ) + { + sdw::Array< ValueT > inputValues = writer.declGlobalArray< ValueT >( "c3d_dummyInValues", 1u, false ); + sdw::Array< ValueT > outputValues = writer.declGlobalArray< ValueT >( "c3d_dummyOutValues", 1u, false ); + sortT( writer + , elementOffset, elementCount + , groupIndex, threadIndex + , inputKeys, outputKeys + , inputValues, outputValues + , invalidValue ); + } + + private: + sdw::UInt m_batchSize; + sdw::UInt m_numThreads; + sdw::UInt m_maxUInt; + }; +} + +#endif diff --git a/include/Core/Castor3D/Shader/Shaders/GlslLight.hpp b/include/Core/Castor3D/Shader/Shaders/GlslLight.hpp index 6fc002e49e..4df112fccd 100644 --- a/include/Core/Castor3D/Shader/Shaders/GlslLight.hpp +++ b/include/Core/Castor3D/Shader/Shaders/GlslLight.hpp @@ -25,7 +25,7 @@ namespace castor3d::shader , sdw::IntField< "shadowMapIndex" > , sdw::UIntField< "cascadeCount" > , sdw::Vec3Field< "posDir" > - , sdw::FloatField< "exponent" > > + , sdw::UIntField< "enabled" > > { friend class LightsBuffer; friend struct DirectionalLight; @@ -44,10 +44,10 @@ namespace castor3d::shader auto intensity()const { return getMember< "intensity" >(); } auto range()const { return getMember< "range" >(); } auto shadowMapIndex()const { return getMember< "shadowMapIndex" >(); } + auto enabled()const { return getMember< "enabled" >(); } private: auto posDir()const { return getMember< "posDir" >(); } - auto exponent()const { return getMember< "exponent" >(); } auto cascadeCount()const { return getMember< "cascadeCount" >(); } }; @@ -69,6 +69,7 @@ namespace castor3d::shader auto colour()const { return base().colour(); } auto intensity()const { return base().intensity(); } auto shadowMapIndex()const { return base().shadowMapIndex(); } + auto enabled()const { return base().enabled() != 0_u; } auto cascadeCount()const { return base().cascadeCount(); } auto direction()const { return base().posDir(); } @@ -96,6 +97,7 @@ namespace castor3d::shader auto intensity()const { return base().intensity(); } auto range()const { return base().range(); } auto shadowMapIndex()const { return base().shadowMapIndex(); } + auto enabled()const { return base().enabled() != 0_u; } auto position()const { return base().posDir(); } }; @@ -105,6 +107,7 @@ namespace castor3d::shader , sdw::type::MemoryLayout::eC , sdw::StructFieldT< Light, "base" > , sdw::Vec3Field< "direction" > + , sdw::FloatField< "exponent" > , sdw::FloatField< "outerCutOffCos" > , sdw::FloatField< "innerCutOff" > , sdw::FloatField< "outerCutOff" > @@ -130,10 +133,11 @@ namespace castor3d::shader auto intensity()const { return base().intensity(); } auto range()const { return base().range(); } auto shadowMapIndex()const { return base().shadowMapIndex(); } + auto enabled()const { return base().enabled() != 0_u; } auto position()const { return base().posDir(); } - auto exponent()const { return base().exponent(); } auto direction()const { return getMember< "direction" >(); } + auto exponent()const { return getMember< "exponent" >(); } auto innerCutOff()const { return getMember< "innerCutOff" >(); } auto outerCutOff()const { return getMember< "outerCutOff" >(); } auto innerCutOffCos()const { return getMember< "innerCutOffCos" >(); } diff --git a/include/Core/Castor3D/Shader/Shaders/GlslRadixSort.hpp b/include/Core/Castor3D/Shader/Shaders/GlslRadixSort.hpp new file mode 100644 index 0000000000..4b2c8d43e5 --- /dev/null +++ b/include/Core/Castor3D/Shader/Shaders/GlslRadixSort.hpp @@ -0,0 +1,175 @@ +/* +See LICENSE file in root folder +*/ +#ifndef ___C3D_GlslRadixSort_H___ +#define ___C3D_GlslRadixSort_H___ + +#include "Castor3D/Shader/Shaders/SdwModule.hpp" + +#include + +namespace castor3d::shader +{ + /** + *\arg ValueSizeT The size of the value type. + */ + template< uint32_t ValueSizeT > + struct RadixSortT + { + static uint32_t constexpr bucketSize{ 1024u / ValueSizeT }; + uint32_t threadsCount{ bucketSize }; + /** + *\param sortBits The number of bits to consider sorting. + */ + RadixSortT( sdw::ComputeWriter & writer + , uint32_t sortBits ) + : m_sortBits{ sortBits } + , m_maxUInt{ writer.declConstant( "gMaxUInt", 0xFFFFFFFF_u ) } + { + } + /** + *\arg ValueT The value type. + */ + template< typename ValueT > + void sortT( sdw::ShaderWriter & writer + , sdw::UInt elementOffset + , sdw::UInt elementCount + , sdw::UInt groupIndex + , sdw::UInt threadIndex + , sdw::Array< sdw::UInt > const & inputKeys + , sdw::Array< sdw::UInt > const & outputKeys + , sdw::Array< ValueT > const & inputValues + , sdw::Array< ValueT > const & outputValues + , ValueT const & invalidValue )const + { + auto gsKeys = writer.declSharedVariable< sdw::UInt >( "gsKeys", threadsCount ); // A temporary buffer to store the input keys. (threadsCount * sizeof(uint) Bytes) + auto gsValues = writer.declSharedVariable< ValueT >( "gsValues", threadsCount, inputValues.isEnabled() ); // A temporary buffer to store the input values. (threadsCount * ValueSizeT Bytes) + auto gsE = writer.declSharedVariable< sdw::UInt >( "gsE", threadsCount ); // Set a 1 for all false sort keys (b == 0) and a 0 for all true sort keys (b == 1) (threadsCount * sizeof(uint) Bytes) + auto gsF = writer.declSharedVariable< sdw::UInt >( "gsF", threadsCount ); // Scan the splits. This results in the output index of all false sort keys (b == 0) (threadsCount * sizeof(uint) Bytes) + auto gsD = writer.declSharedVariable< sdw::UInt >( "gsD", threadsCount ); // The desination index for the ouput key and value. (threadsCount * sizeof(uint) Bytes) + auto gsTotalFalses = writer.declSharedVariable< sdw::UInt >( "gsTotalFalses" ); // The result of e[threadsCount - 1] + f[threadsCount - 1]; (4 Bytes) + + // Store the input key and values into shared memory. + gsKeys[groupIndex] = writer.ternary( threadIndex < elementCount, inputKeys[elementOffset + threadIndex], m_maxUInt ); + gsValues[groupIndex] = writer.ternary( threadIndex < elementCount, inputValues[elementOffset + threadIndex], invalidValue ); + + // Loop over the bits starting at the least-significant bit. + FOR( writer, sdw::UInt, b, 0_u, b < m_sortBits, ++b ) + { + // 1. In a temporary buffer in shared memory, we set a 1 for all false + // sort keys (b = 0) and a 0 for all true sort keys. + gsE[groupIndex] = writer.ternary( ( ( gsKeys[groupIndex] >> b ) & 1_u ) == 0_u + , 1_u + , 0_u ); + + // Sync group shared memory writes. + shader::groupMemoryBarrierWithGroupSync( writer ); + + IF( writer, groupIndex == 0_u ) + { + gsF[groupIndex] = 0_u; + } + ELSE + { + gsF[groupIndex] = gsE[groupIndex - 1_u]; + } + FI + + // Sync group shared memory writes. + shader::groupMemoryBarrierWithGroupSync( writer ); + auto temp = writer.declLocale( "temp", 0_u ); + + // 2. We then scan (prefix sum) this buffer. This is the enumerate operation; + // each false sort key now contains its destination address in the scan + // output, which we will call f. These first two steps are equivalent to + // a stream compaction operation on all false sort keys. + for ( u32 i = 1; i < bucketSize; i <<= 1u ) + { + temp = gsF[groupIndex]; + + IF( writer, groupIndex > i ) + { + temp += gsF[groupIndex - i]; + } + FI + + // Sync group shared memory reads before writes. + shader::groupMemoryBarrierWithGroupSync( writer ); + + gsF[groupIndex] = temp; + + // Sync group shared memory writes. + shader::groupMemoryBarrierWithGroupSync( writer ); + } + + // 3. The last element in the scan's output now contains the total + // number of false sort keys. We write this value to a shared + // variable, gs_TotalFalses. + IF ( writer, groupIndex == 0_u ) + { + gsTotalFalses = gsE[bucketSize - 1u] + gsF[bucketSize - 1u]; + } + FI + + // Sync group shared memory writes. + shader::groupMemoryBarrierWithGroupSync( writer ); + + // 4. Now we compute the destination address for the true sort keys. For + // a sort key at index i, this address is t = i - f + totalFalses. We + // then select between t and f depending on the value of b to get the + // destination address d of each fragment. + gsD[groupIndex] = writer.ternary( gsE[groupIndex] == 1u + , gsF[groupIndex] + , groupIndex - gsF[groupIndex] + gsTotalFalses ); + + // 5. Finally, we scatter the original sort keys to destination address + // d. The scatter pattern is a perfect permutation of the input, so + // we see no write conflicts with this scatter. + auto key = writer.declLocale( "key", gsKeys[groupIndex] ); + auto value = writer.declLocale( "value", gsValues[groupIndex], invalidValue.isEnabled() ); + + // Sync group shared memory reads before writes. + shader::groupMemoryBarrierWithGroupSync( writer ); + + gsKeys[gsD[groupIndex]] = key; + gsValues[gsD[groupIndex]] = value; + + // Sync group shared memory writes. + shader::groupMemoryBarrierWithGroupSync( writer ); + } + ROF + + // Now commit the results to global memory. + outputKeys[elementOffset + threadIndex] = gsKeys[groupIndex]; + outputValues[elementOffset + threadIndex] = gsValues[groupIndex]; + } + /** + *\arg ValueT The value type. + */ + template< typename ValueT > + void sortT( sdw::ComputeWriter & writer + , sdw::UInt elementOffset + , sdw::UInt elementCount + , sdw::UInt groupIndex + , sdw::UInt threadIndex + , sdw::Array< sdw::UInt > const & inputKeys + , sdw::Array< sdw::UInt > const & outputKeys + , ValueT const & invalidValue ) + { + sdw::Array< ValueT > inputValues = writer.declGlobalArray< ValueT >( "c3d_dummyInValues", 1u, false ); + sdw::Array< ValueT > outputValues = writer.declGlobalArray< ValueT >( "c3d_dummyOutValues", 1u, false ); + sortT( writer + , elementOffset, elementCount + , groupIndex, threadIndex + , inputKeys, outputKeys + , inputValues, outputValues + , invalidValue ); + } + + private: + uint32_t m_sortBits; + sdw::UInt m_maxUInt; + }; +} + +#endif diff --git a/include/Core/Castor3D/Shader/Ubos/CameraUbo.hpp b/include/Core/Castor3D/Shader/Ubos/CameraUbo.hpp index 4f66130342..660cc53811 100644 --- a/include/Core/Castor3D/Shader/Ubos/CameraUbo.hpp +++ b/include/Core/Castor3D/Shader/Ubos/CameraUbo.hpp @@ -306,18 +306,24 @@ namespace castor3d }; } -#define C3D_CameraEx( writer, binding, set, enabled )\ - sdw::UniformBuffer camera{ writer\ - , "C3D_Camera"\ - , "c3d_camera"\ +#define C3D_CameraNamedEx( writer, suffix, binding, set, enabled )\ + sdw::UniformBuffer camera##suffix{ writer\ + , "C3D_Camera"#suffix\ + , "c3d_camera"#suffix\ , uint32_t( binding )\ , uint32_t( set )\ , sdw::type::MemoryLayout::eStd140\ , enabled };\ - auto c3d_cameraData = camera.declMember< castor3d::shader::CameraData >( "c", enabled );\ - camera.end() + auto c3d_cameraData##suffix = camera##suffix.declMember< castor3d::shader::CameraData >( "c", enabled );\ + camera##suffix.end() + +#define C3D_CameraEx( writer, binding, set, enabled )\ + C3D_CameraNamedEx( writer, , binding, set, enabled ) #define C3D_Camera( writer, binding, set )\ C3D_CameraEx( writer, binding, set, true ) +#define C3D_CameraNamed( writer, suffix, binding, set )\ + C3D_CameraNamedEx( writer, suffix, binding, set, true ) + #endif diff --git a/include/Core/Castor3D/Shader/Ubos/ClustersUbo.hpp b/include/Core/Castor3D/Shader/Ubos/ClustersUbo.hpp index 18a99717b7..56b587b2a2 100644 --- a/include/Core/Castor3D/Shader/Ubos/ClustersUbo.hpp +++ b/include/Core/Castor3D/Shader/Ubos/ClustersUbo.hpp @@ -29,7 +29,7 @@ namespace castor3d , sdw::UIntField< "spotLightLevels" > , sdw::UIntField< "pointLightCount" > , sdw::UIntField< "spotLightCount" > - , sdw::FloatField< "bias" > + , sdw::FloatField< "minDistance" > , sdw::UIntField< "enableWaveIntrinsics" > , sdw::FloatField< "pad1" > , sdw::FloatField< "pad2" > > @@ -55,14 +55,14 @@ namespace castor3d auto pointLightCount()const { return getMember< "pointLightCount" >(); } auto spotLightCount()const { return getMember< "spotLightCount" >(); } auto splitScheme()const { return getMember< "splitScheme" >(); } - auto bias()const { return getMember< "bias" >(); } + auto minDistance()const { return getMember< "minDistance" >(); } auto enableWaveIntrinsics()const { return getMember< "enableWaveIntrinsics" >(); } C3D_API sdw::RetU32Vec3 computeClusterIndex3D( sdw::UInt32 const index ); C3D_API sdw::RetU32Vec3 computeClusterIndex3D( sdw::Vec2 const screenPos , sdw::Float viewZ , sdw::Vec4 const clustersLightsData ); - C3D_API sdw::RetUInt32 computeClusterIndex1D( sdw::U32Vec3 const clusterIndex3D ); + C3D_API sdw::UInt32 computeClusterIndex1D( sdw::U32Vec3 const clusterIndex3D ); C3D_API sdw::RetVec2 getClusterDepthBounds( sdw::U32Vec3 const clusterIndex3D , sdw::Vec4 const clustersLightsData , sdw::Vec4 const lightsAABBRange ); @@ -116,7 +116,7 @@ namespace castor3d , uint32_t pointLightsCount , uint32_t spotLightsCount , ClusterSplitScheme splitScheme - , float bias + , float minDistance , bool enableWaveIntrinsics ); void createPassBinding( crg::FramePass & pass diff --git a/include/Core/Castor3D/Shader/Ubos/UbosModule.hpp b/include/Core/Castor3D/Shader/Ubos/UbosModule.hpp index 969dd4c6bf..4b7ddb4435 100644 --- a/include/Core/Castor3D/Shader/Ubos/UbosModule.hpp +++ b/include/Core/Castor3D/Shader/Ubos/UbosModule.hpp @@ -244,8 +244,8 @@ namespace castor3d uint32_t pointLightsCount{}; // The number of spot lights. uint32_t spotLightsCount{}; - // The biased exponential split scheme bias. - float bias{}; + // The hybrid split scheme minimal threshold distance. + float minDistance{}; // If clustered lights processing uses wave intrinsics. uint32_t enableWaveIntrinsics{}; float pad0{}; diff --git a/include/Core/CastorUtils/Config/PlatformConfig.hpp b/include/Core/CastorUtils/Config/PlatformConfig.hpp index 0a457e923d..396b4b3fe4 100644 --- a/include/Core/CastorUtils/Config/PlatformConfig.hpp +++ b/include/Core/CastorUtils/Config/PlatformConfig.hpp @@ -14,6 +14,32 @@ See LICENSE file in root folder # define CU_PlatformApple #endif +#if defined( __x86_64__ ) || defined( _M_X64 ) +# define CU_ArchX86_64 +#elif defined(i386) || defined(__i386__) || defined(__i386) || defined(_M_IX86) +# define CU_ArchX86_32 +#elif defined(__ARM_ARCH_2__) +# define CU_ArchARM2 +#elif defined(__ARM_ACH_3__) || defined(__ARM_ARCH_3M__) +# define CU_ArchARMv3 +#elif defined(__ARM_ARCH_4T__) || defined(__TARGET_ARM_4T) +# define CU_ArchARM4T +#elif defined(__ARM_ARCH_5_) || defined(__ARM_ARCH_5E_) +# define CU_ArchARM5 +#elif defined(__ARM_ARCH_5T_) || defined(__ARM_ARCH_5TE_) || defined(__ARM_ARCH_5TEJ_) +# define CU_ArchARM5T +#elif defined(__ARM_ARCH_6T2_) +# define CU_ArchARM6T2 +#elif defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) +# define CU_ArchARM6 +#elif defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) || defined(__ARM_ARCH_7S__) +# define CU_ArchARM7 +#elif defined(__aarch64__) || defined(_M_ARM64) +# define CU_ArchARM64 +#else +# error "Yet unsupported architecture" +#endif + #if defined( CU_PlatformWindows ) # if defined( CastorUtils_EXPORTS ) # define CU_API __declspec(dllexport) diff --git a/include/Core/CastorUtils/Miscellaneous/CpuInformations.hpp b/include/Core/CastorUtils/Miscellaneous/CpuInformations.hpp index cefae95aaf..24e64ee28d 100644 --- a/include/Core/CastorUtils/Miscellaneous/CpuInformations.hpp +++ b/include/Core/CastorUtils/Miscellaneous/CpuInformations.hpp @@ -28,10 +28,6 @@ namespace castor uint32_t m_coreCount{ 0u }; String m_vendor{}; String m_model{}; - std::bitset< 32 > m_f_1_ECX{ 0 }; - std::bitset< 32 > m_f_1_EDX{ 0 }; - std::bitset< 32 > m_f_7_EBX{ 0 }; - std::bitset< 32 > m_f_7_ECX{ 0 }; }; public: @@ -72,416 +68,6 @@ namespace castor { return m_internal.m_model; } - /** - *\~english - *\return The SSE3 instructions support. - *\~french - *\return Le support des instructions SSE3. - */ - bool SSE3()const - { - return m_internal.m_f_1_ECX[0]; - } - /** - *\~english - *\return The PCLMULQDQ instructions support. - *\~french - *\return Le support des instructions PCLMULQDQ. - */ - bool PCLMULQDQ()const - { - return m_internal.m_f_1_ECX[1]; - } - /** - *\~english - *\return The MONITOR instructions support. - *\~french - *\return Le support des instructions MONITOR. - */ - bool MONITOR()const - { - return m_internal.m_f_1_ECX[3]; - } - /** - *\~english - *\return The SSSE3 instructions support. - *\~french - *\return Le support des instructions SSSE3. - */ - bool SSSE3()const - { - return m_internal.m_f_1_ECX[9]; - } - /** - *\~english - *\return The FMA instructions support. - *\~french - *\return Le support des instructions FMA. - */ - bool FMA()const - { - return m_internal.m_f_1_ECX[12]; - } - /** - *\~english - *\return The CMPXCHG16B instructions support. - *\~french - *\return Le support des instructions CMPXCHG16B. - */ - bool CMPXCHG16B()const - { - return m_internal.m_f_1_ECX[13]; - } - /** - *\~english - *\return The SSE4.1 instructions support. - *\~french - *\return Le support des instructions SSE4.1. - */ - bool SSE41()const - { - return m_internal.m_f_1_ECX[19]; - } - /** - *\~english - *\return The SSE4.2 instructions support. - *\~french - *\return Le support des instructions SSE4.2. - */ - bool SSE42()const - { - return m_internal.m_f_1_ECX[20]; - } - /** - *\~english - *\return The MOVBE instructions support. - *\~french - *\return Le support des instructions MOVBE. - */ - bool MOVBE()const - { - return m_internal.m_f_1_ECX[22]; - } - /** - *\~english - *\return The POPCNT instructions support. - *\~french - *\return Le support des instructions POPCNT. - */ - bool POPCNT()const - { - return m_internal.m_f_1_ECX[23]; - } - /** - *\~english - *\return The AES instructions support. - *\~french - *\return Le support des instructions AES. - */ - bool AES()const - { - return m_internal.m_f_1_ECX[25]; - } - /** - *\~english - *\return The XSAVE instructions support. - *\~french - *\return Le support des instructions XSAVE. - */ - bool XSAVE()const - { - return m_internal.m_f_1_ECX[26]; - } - /** - *\~english - *\return The OSXSAVE instructions support. - *\~french - *\return Le support des instructions OSXSAVE. - */ - bool OSXSAVE()const - { - return m_internal.m_f_1_ECX[27]; - } - /** - *\~english - *\return The AVX instructions support. - *\~french - *\return Le support des instructions AVX. - */ - bool AVX()const - { - return m_internal.m_f_1_ECX[28]; - } - /** - *\~english - *\return The F16C instructions support. - *\~french - *\return Le support des instructions F16C. - */ - bool F16C()const - { - return m_internal.m_f_1_ECX[29]; - } - /** - *\~english - *\return The RDRAND instructions support. - *\~french - *\return Le support des instructions RDRAND. - */ - bool RDRAND()const - { - return m_internal.m_f_1_ECX[30]; - } - /** - *\~english - *\return The MSR instructions support. - *\~french - *\return Le support des instructions MSR. - */ - bool MSR()const - { - return m_internal.m_f_1_EDX[5]; - } - /** - *\~english - *\return The CX8 instructions support. - *\~french - *\return Le support des instructions CX8. - */ - bool CX8()const - { - return m_internal.m_f_1_EDX[8]; - } - /** - *\~english - *\return The SEP instructions support. - *\~french - *\return Le support des instructions SEP. - */ - bool SEP()const - { - return m_internal.m_f_1_EDX[11]; - } - /** - *\~english - *\return The CMOV instructions support. - *\~french - *\return Le support des instructions CMOV. - */ - bool CMOV()const - { - return m_internal.m_f_1_EDX[15]; - } - /** - *\~english - *\return The CLFSH instructions support. - *\~french - *\return Le support des instructions CLFSH. - */ - bool CLFSH()const - { - return m_internal.m_f_1_EDX[19]; - } - /** - *\~english - *\return The MMX instructions support. - *\~french - *\return Le support des instructions MMX. - */ - bool MMX()const - { - return m_internal.m_f_1_EDX[23]; - } - /** - *\~english - *\return The FXSR instructions support. - *\~french - *\return Le support des instructions FXSR. - */ - bool FXSR()const - { - return m_internal.m_f_1_EDX[24]; - } - /** - *\~english - *\return The SSE instructions support. - *\~french - *\return Le support des instructions SSE. - */ - bool SSE()const - { - return m_internal.m_f_1_EDX[25]; - } - /** - *\~english - *\return The SSE2 instructions support. - *\~french - *\return Le support des instructions SSE2. - */ - bool SSE2()const - { - return m_internal.m_f_1_EDX[26]; - } - /** - *\~english - *\return The FSGSBASE instructions support. - *\~french - *\return Le support des instructions FSGSBASE. - */ - bool FSGSBASE()const - { - return m_internal.m_f_7_EBX[0]; - } - /** - *\~english - *\return The BMI1 instructions support. - *\~french - *\return Le support des instructions BMI1. - */ - bool BMI1()const - { - return m_internal.m_f_7_EBX[3]; - } - /** - *\~english - *\return The HLE instructions support. - *\~french - *\return Le support des instructions HLE. - */ - bool HLE()const - { - return m_internal.m_isIntel && m_internal.m_f_7_EBX[4]; - } - /** - *\~english - *\return The AVX2 instructions support. - *\~french - *\return Le support des instructions AVX2. - */ - bool AVX2()const - { - return m_internal.m_f_7_EBX[5]; - } - /** - *\~english - *\return The BMI2 instructions support. - *\~french - *\return Le support des instructions BMI2. - */ - bool BMI2()const - { - return m_internal.m_f_7_EBX[8]; - } - /** - *\~english - *\return The ERMS instructions support. - *\~french - *\return Le support des instructions ERMS. - */ - bool ERMS()const - { - return m_internal.m_f_7_EBX[9]; - } - /** - *\~english - *\return The INVPCID instructions support. - *\~french - *\return Le support des instructions INVPCID. - */ - bool INVPCID()const - { - return m_internal.m_f_7_EBX[10]; - } - /** - *\~english - *\return The RTM instructions support. - *\~french - *\return Le support des instructions RTM. - */ - bool RTM()const - { - return m_internal.m_isIntel && m_internal.m_f_7_EBX[11]; - } - /** - *\~english - *\return The AVX512F instructions support. - *\~french - *\return Le support des instructions AVX512F. - */ - bool AVX512F()const - { - return m_internal.m_f_7_EBX[16]; - } - /** - *\~english - *\return The RDSEED instructions support. - *\~french - *\return Le support des instructions RDSEED. - */ - bool RDSEED()const - { - return m_internal.m_f_7_EBX[18]; - } - /** - *\~english - *\return The ADX instructions support. - *\~french - *\return Le support des instructions ADX. - */ - bool ADX()const - { - return m_internal.m_f_7_EBX[19]; - } - /** - *\~english - *\return The AVX512PF instructions support. - *\~french - *\return Le support des instructions AVX512PF. - */ - bool AVX512PF()const - { - return m_internal.m_f_7_EBX[26]; - } - /** - *\~english - *\return The AVX512ER instructions support. - *\~french - *\return Le support des instructions AVX512ER. - */ - bool AVX512ER()const - { - return m_internal.m_f_7_EBX[27]; - } - /** - *\~english - *\return The AVX512CD instructions support. - *\~french - *\return Le support des instructions AVX512CD. - */ - bool AVX512CD()const - { - return m_internal.m_f_7_EBX[28]; - } - /** - *\~english - *\return The SHA instructions support. - *\~french - *\return Le support des instructions SHA. - */ - bool SHA()const - { - return m_internal.m_f_7_EBX[29]; - } - /** - *\~english - *\return The PREFETCHWT1 instructions support. - *\~french - *\return Le support des instructions PREFETCHWT1. - */ - bool PREFETCHWT1()const - { - return m_internal.m_f_7_ECX[0]; - } private: CU_API static CpuInformationsInternal const m_internal; diff --git a/source/Core/Castor3D/Buffer/ObjectBufferPool.cpp b/source/Core/Castor3D/Buffer/ObjectBufferPool.cpp index a68a3dafff..1d6dd67b67 100644 --- a/source/Core/Castor3D/Buffer/ObjectBufferPool.cpp +++ b/source/Core/Castor3D/Buffer/ObjectBufferPool.cpp @@ -159,10 +159,10 @@ namespace castor3d , m_buffers.end() , [&bufferOffset]( ModelBuffers const & lookup ) { - return &lookup.vertex->getBuffer() == &bufferOffset.getBuffer( SubmeshData::eIndex ); + return &lookup.index->getBuffer() == &bufferOffset.getBuffer( SubmeshData::eIndex ); } ); CU_Require( it != m_buffers.end() ); - it->vertex->deallocate( bufferOffset.buffers[uint32_t( SubmeshData::eIndex )].chunk ); + it->index->deallocate( bufferOffset.buffers[uint32_t( SubmeshData::eIndex )].chunk ); } IndexBufferPool::BufferArray::iterator IndexBufferPool::doFindBuffer( VkDeviceSize size @@ -172,7 +172,7 @@ namespace castor3d auto it = array.begin(); while ( it != array.end() - && !it->vertex->hasAvailable( size ) ) + && !it->index->hasAvailable( size ) ) { ++it; } diff --git a/source/Core/Castor3D/CMakeLists.txt b/source/Core/Castor3D/CMakeLists.txt index 1cae7c7527..16a9a50f7a 100644 --- a/source/Core/Castor3D/CMakeLists.txt +++ b/source/Core/Castor3D/CMakeLists.txt @@ -1126,6 +1126,7 @@ source_group( "Source Files\\Render" FILES ${${PROJECT_NAME}_FOLDER_SRC_FILES} ) set( ${PROJECT_NAME}_FOLDER_SRC_FILES ${CASTOR_SOURCE_DIR}/source/Core/${PROJECT_NAME}/Render/Clustered/AssignLightsToClusters.cpp + ${CASTOR_SOURCE_DIR}/source/Core/${PROJECT_NAME}/Render/Clustered/BucketSortLights.cpp ${CASTOR_SOURCE_DIR}/source/Core/${PROJECT_NAME}/Render/Clustered/BuildLightsBVH.cpp ${CASTOR_SOURCE_DIR}/source/Core/${PROJECT_NAME}/Render/Clustered/ClustersConfig.cpp ${CASTOR_SOURCE_DIR}/source/Core/${PROJECT_NAME}/Render/Clustered/ClusteredModule.cpp @@ -1136,12 +1137,12 @@ set( ${PROJECT_NAME}_FOLDER_SRC_FILES ${CASTOR_SOURCE_DIR}/source/Core/${PROJECT_NAME}/Render/Clustered/FindUniqueClusters.cpp ${CASTOR_SOURCE_DIR}/source/Core/${PROJECT_NAME}/Render/Clustered/FrustumClusters.cpp ${CASTOR_SOURCE_DIR}/source/Core/${PROJECT_NAME}/Render/Clustered/MergeSortLights.cpp - ${CASTOR_SOURCE_DIR}/source/Core/${PROJECT_NAME}/Render/Clustered/RadixSortLights.cpp ${CASTOR_SOURCE_DIR}/source/Core/${PROJECT_NAME}/Render/Clustered/ReduceLightsAABB.cpp ${CASTOR_SOURCE_DIR}/source/Core/${PROJECT_NAME}/Render/Clustered/SortAssignedLights.cpp ) set( ${PROJECT_NAME}_FOLDER_HDR_FILES ${CASTOR_SOURCE_DIR}/include/Core/${PROJECT_NAME}/Render/Clustered/AssignLightsToClusters.hpp + ${CASTOR_SOURCE_DIR}/include/Core/${PROJECT_NAME}/Render/Clustered/BucketSortLights.hpp ${CASTOR_SOURCE_DIR}/include/Core/${PROJECT_NAME}/Render/Clustered/BuildLightsBVH.hpp ${CASTOR_SOURCE_DIR}/include/Core/${PROJECT_NAME}/Render/Clustered/ClustersConfig.hpp ${CASTOR_SOURCE_DIR}/include/Core/${PROJECT_NAME}/Render/Clustered/ClusteredModule.hpp @@ -1152,7 +1153,6 @@ set( ${PROJECT_NAME}_FOLDER_HDR_FILES ${CASTOR_SOURCE_DIR}/include/Core/${PROJECT_NAME}/Render/Clustered/FindUniqueClusters.hpp ${CASTOR_SOURCE_DIR}/include/Core/${PROJECT_NAME}/Render/Clustered/FrustumClusters.hpp ${CASTOR_SOURCE_DIR}/include/Core/${PROJECT_NAME}/Render/Clustered/MergeSortLights.hpp - ${CASTOR_SOURCE_DIR}/include/Core/${PROJECT_NAME}/Render/Clustered/RadixSortLights.hpp ${CASTOR_SOURCE_DIR}/include/Core/${PROJECT_NAME}/Render/Clustered/ReduceLightsAABB.hpp ${CASTOR_SOURCE_DIR}/include/Core/${PROJECT_NAME}/Render/Clustered/SortAssignedLights.hpp ) @@ -2014,6 +2014,7 @@ set( ${PROJECT_NAME}_FOLDER_HDR_FILES ${CASTOR_SOURCE_DIR}/include/Core/${PROJECT_NAME}/Shader/Shaders/GlslAppendBuffer.hpp ${CASTOR_SOURCE_DIR}/include/Core/${PROJECT_NAME}/Shader/Shaders/GlslBackground.hpp ${CASTOR_SOURCE_DIR}/include/Core/${PROJECT_NAME}/Shader/Shaders/GlslBaseIO.hpp + ${CASTOR_SOURCE_DIR}/include/Core/${PROJECT_NAME}/Shader/Shaders/GlslBitonicSort.hpp ${CASTOR_SOURCE_DIR}/include/Core/${PROJECT_NAME}/Shader/Shaders/GlslBlendComponents.hpp ${CASTOR_SOURCE_DIR}/include/Core/${PROJECT_NAME}/Shader/Shaders/GlslBuffer.hpp ${CASTOR_SOURCE_DIR}/include/Core/${PROJECT_NAME}/Shader/Shaders/GlslBRDFHelpers.hpp @@ -2039,6 +2040,7 @@ set( ${PROJECT_NAME}_FOLDER_HDR_FILES ${CASTOR_SOURCE_DIR}/include/Core/${PROJECT_NAME}/Shader/Shaders/GlslOutputs.inl ${CASTOR_SOURCE_DIR}/include/Core/${PROJECT_NAME}/Shader/Shaders/GlslOverlaySurface.hpp ${CASTOR_SOURCE_DIR}/include/Core/${PROJECT_NAME}/Shader/Shaders/GlslPassShaders.hpp + ${CASTOR_SOURCE_DIR}/include/Core/${PROJECT_NAME}/Shader/Shaders/GlslRadixSort.hpp ${CASTOR_SOURCE_DIR}/include/Core/${PROJECT_NAME}/Shader/Shaders/GlslRay.hpp ${CASTOR_SOURCE_DIR}/include/Core/${PROJECT_NAME}/Shader/Shaders/GlslReflection.hpp ${CASTOR_SOURCE_DIR}/include/Core/${PROJECT_NAME}/Shader/Shaders/GlslShadow.hpp diff --git a/source/Core/Castor3D/Render/Clustered/AssignLightsToClusters.cpp b/source/Core/Castor3D/Render/Clustered/AssignLightsToClusters.cpp index 3c06e746b9..522641b60c 100644 --- a/source/Core/Castor3D/Render/Clustered/AssignLightsToClusters.cpp +++ b/source/Core/Castor3D/Render/Clustered/AssignLightsToClusters.cpp @@ -34,7 +34,8 @@ namespace castor3d { enum BindingPoints { - eCamera, + eMainCamera, + eClustersCamera, eLights, eClusters, eAllLightsAABB, @@ -68,8 +69,13 @@ namespace castor3d , 34636833_u /* 6 levels +32^5 */ } ); // Inputs - C3D_Camera( writer - , eCamera + C3D_CameraNamed( writer + , Main + , eMainCamera + , 0u ); + C3D_CameraNamed( writer + , Clusters + , eClustersCamera , 0u ); shader::LightsBuffer lights{ writer , eLights @@ -314,8 +320,8 @@ namespace castor3d auto spot = writer.declLocale( "spot" , lights.getSpotLight( lights.getPointsEnd() + lightIndex * castor3d::SpotLight::LightDataComponents ) ); auto cone = writer.declLocale( "cone" - , shader::Cone{ c3d_cameraData.worldToCurView( vec4( spot.position(), 1.0_f ) ).xyz() - , c3d_cameraData.worldToCurView( -spot.direction() ) + , shader::Cone{ c3d_cameraDataClusters.worldToCurView( vec4( spot.position(), 1.0_f ) ).xyz() + , c3d_cameraDataClusters.worldToCurView( -spot.direction() ) , computeRange( spot ) , spot.outerCutOffCos() , spot.outerCutOffSin() @@ -759,7 +765,8 @@ namespace castor3d crg::FramePass const & createAssignLightsToClustersPass( crg::FramePassGroup & graph , crg::FramePassArray const & previousPasses , RenderDevice const & device - , CameraUbo const & cameraUbo + , CameraUbo const & mainCameraUbo + , CameraUbo const & clustersCameraUbo , FrustumClusters & clusters ) { auto const & lights = clusters.getCamera().getScene()->getLightCache(); @@ -783,7 +790,8 @@ namespace castor3d return result; } ); passNoDepth.addDependencies( previousPasses ); - cameraUbo.createPassBinding( passNoDepth, dspclst::eCamera ); + mainCameraUbo.createPassBinding( passNoDepth, dspclst::eMainCamera ); + clustersCameraUbo.createPassBinding( passNoDepth, dspclst::eClustersCamera ); lights.createPassBinding( passNoDepth, dspclst::eLights ); clusters.getClustersUbo().createPassBinding( passNoDepth, dspclst::eClusters ); createInputStoragePassBinding( passNoDepth, uint32_t( dspclst::eAllLightsAABB ), cuT( "C3D_AllLightsAABB" ), clusters.getAllLightsAABBBuffer(), 0u, ashes::WholeSize ); @@ -795,16 +803,16 @@ namespace castor3d createInputStoragePassBinding( passNoDepth, uint32_t( dspclst::ePointLightBVH ), cuT( "C3D_PointLightsBVH" ), clusters.getPointLightBVHBuffer(), 0u, ashes::WholeSize ); createInputStoragePassBinding( passNoDepth, uint32_t( dspclst::eSpotLightBVH ), cuT( "C3D_SpotLightsBVH" ), clusters.getSpotLightBVHBuffer(), 0u, ashes::WholeSize ); createInputStoragePassBinding( passNoDepth, uint32_t( dspclst::ePointLightIndices ), cuT( "C3D_PointLightIndices" ) - , { &clusters.getOutputPointLightIndicesBuffer(), &clusters.getInputPointLightIndicesBuffer(), &clusters.getOutputPointLightIndicesBuffer() - , &clusters.getOutputPointLightIndicesBuffer(), &clusters.getInputPointLightIndicesBuffer(), &clusters.getOutputPointLightIndicesBuffer() - , &clusters.getOutputPointLightIndicesBuffer(), &clusters.getInputPointLightIndicesBuffer(), &clusters.getOutputPointLightIndicesBuffer() - , &clusters.getOutputPointLightIndicesBuffer(), &clusters.getInputPointLightIndicesBuffer(), &clusters.getOutputPointLightIndicesBuffer() } + , { &clusters.getOutputPointLightIndicesBuffer(), &clusters.getOutputPointLightIndicesBuffer(), &clusters.getInputPointLightIndicesBuffer() + , &clusters.getOutputPointLightIndicesBuffer(), &clusters.getOutputPointLightIndicesBuffer(), &clusters.getInputPointLightIndicesBuffer() + , &clusters.getOutputPointLightIndicesBuffer(), &clusters.getOutputPointLightIndicesBuffer(), &clusters.getInputPointLightIndicesBuffer() + , &clusters.getOutputPointLightIndicesBuffer(), &clusters.getOutputPointLightIndicesBuffer(), &clusters.getInputPointLightIndicesBuffer() } , 0u, ashes::WholeSize ); createInputStoragePassBinding( passNoDepth, uint32_t( dspclst::eSpotLightIndices ), cuT( "C3D_SpotLightIndices" ) - , { &clusters.getOutputSpotLightIndicesBuffer(), &clusters.getInputSpotLightIndicesBuffer(), &clusters.getOutputSpotLightIndicesBuffer() - , &clusters.getOutputSpotLightIndicesBuffer(), &clusters.getInputSpotLightIndicesBuffer(), &clusters.getOutputSpotLightIndicesBuffer() - , &clusters.getOutputSpotLightIndicesBuffer(), &clusters.getInputSpotLightIndicesBuffer(), &clusters.getOutputSpotLightIndicesBuffer() - , &clusters.getOutputSpotLightIndicesBuffer(), &clusters.getInputSpotLightIndicesBuffer(), &clusters.getOutputSpotLightIndicesBuffer() } + , { &clusters.getOutputSpotLightIndicesBuffer(), &clusters.getOutputSpotLightIndicesBuffer(), &clusters.getInputSpotLightIndicesBuffer() + , &clusters.getOutputSpotLightIndicesBuffer(), &clusters.getOutputSpotLightIndicesBuffer(), &clusters.getInputSpotLightIndicesBuffer() + , &clusters.getOutputSpotLightIndicesBuffer(), &clusters.getOutputSpotLightIndicesBuffer(), &clusters.getInputSpotLightIndicesBuffer() + , &clusters.getOutputSpotLightIndicesBuffer(), &clusters.getOutputSpotLightIndicesBuffer(), &clusters.getInputSpotLightIndicesBuffer() } , 0u, ashes::WholeSize ); auto & passDepth = graph.createPass( "AssignLightsToClustersDepth" @@ -824,7 +832,8 @@ namespace castor3d return result; } ); passDepth.addDependency( passNoDepth ); - cameraUbo.createPassBinding( passDepth, dspclst::eCamera ); + mainCameraUbo.createPassBinding( passDepth, dspclst::eMainCamera ); + clustersCameraUbo.createPassBinding( passDepth, dspclst::eClustersCamera ); lights.createPassBinding( passDepth, dspclst::eLights ); clusters.getClustersUbo().createPassBinding( passDepth, dspclst::eClusters ); createInputStoragePassBinding( passDepth, uint32_t( dspclst::eAllLightsAABB ), cuT( "C3D_AllLightsAABB" ), clusters.getAllLightsAABBBuffer(), 0u, ashes::WholeSize ); @@ -836,16 +845,16 @@ namespace castor3d createInputStoragePassBinding( passDepth, uint32_t( dspclst::ePointLightBVH ), cuT( "C3D_PointLightsBVH" ), clusters.getPointLightBVHBuffer(), 0u, ashes::WholeSize ); createInputStoragePassBinding( passDepth, uint32_t( dspclst::eSpotLightBVH ), cuT( "C3D_SpotLightsBVH" ), clusters.getSpotLightBVHBuffer(), 0u, ashes::WholeSize ); createInputStoragePassBinding( passDepth, uint32_t( dspclst::ePointLightIndices ), cuT( "C3D_PointLightIndices" ) - , { &clusters.getOutputPointLightIndicesBuffer(), &clusters.getInputPointLightIndicesBuffer(), &clusters.getOutputPointLightIndicesBuffer() - , &clusters.getOutputPointLightIndicesBuffer(), &clusters.getInputPointLightIndicesBuffer(), &clusters.getOutputPointLightIndicesBuffer() - , &clusters.getOutputPointLightIndicesBuffer(), &clusters.getInputPointLightIndicesBuffer(), &clusters.getOutputPointLightIndicesBuffer() - , &clusters.getOutputPointLightIndicesBuffer(), &clusters.getInputPointLightIndicesBuffer(), &clusters.getOutputPointLightIndicesBuffer() } + , { &clusters.getOutputPointLightIndicesBuffer(), &clusters.getOutputPointLightIndicesBuffer(), &clusters.getInputPointLightIndicesBuffer() + , &clusters.getOutputPointLightIndicesBuffer(), &clusters.getOutputPointLightIndicesBuffer(), &clusters.getInputPointLightIndicesBuffer() + , &clusters.getOutputPointLightIndicesBuffer(), &clusters.getOutputPointLightIndicesBuffer(), &clusters.getInputPointLightIndicesBuffer() + , &clusters.getOutputPointLightIndicesBuffer(), &clusters.getOutputPointLightIndicesBuffer(), &clusters.getInputPointLightIndicesBuffer() } , 0u, ashes::WholeSize ); createInputStoragePassBinding( passDepth, uint32_t( dspclst::eSpotLightIndices ), cuT( "C3D_SpotLightIndices" ) - , { &clusters.getOutputSpotLightIndicesBuffer(), &clusters.getInputSpotLightIndicesBuffer(), &clusters.getOutputSpotLightIndicesBuffer() - , &clusters.getOutputSpotLightIndicesBuffer(), &clusters.getInputSpotLightIndicesBuffer(), &clusters.getOutputSpotLightIndicesBuffer() - , &clusters.getOutputSpotLightIndicesBuffer(), &clusters.getInputSpotLightIndicesBuffer(), &clusters.getOutputSpotLightIndicesBuffer() - , &clusters.getOutputSpotLightIndicesBuffer(), &clusters.getInputSpotLightIndicesBuffer(), &clusters.getOutputSpotLightIndicesBuffer() } + , { &clusters.getOutputSpotLightIndicesBuffer(), &clusters.getOutputSpotLightIndicesBuffer(), &clusters.getInputSpotLightIndicesBuffer() + , &clusters.getOutputSpotLightIndicesBuffer(), &clusters.getOutputSpotLightIndicesBuffer(), &clusters.getInputSpotLightIndicesBuffer() + , &clusters.getOutputSpotLightIndicesBuffer(), &clusters.getOutputSpotLightIndicesBuffer(), &clusters.getInputSpotLightIndicesBuffer() + , &clusters.getOutputSpotLightIndicesBuffer(), &clusters.getOutputSpotLightIndicesBuffer(), &clusters.getInputSpotLightIndicesBuffer() } , 0u, ashes::WholeSize ); createInputStoragePassBinding( passDepth, uint32_t( dspclst::eUniqueClusters ), cuT( "C3D_UniqueClusters" ), clusters.getUniqueClustersBuffer(), 0u, ashes::WholeSize ); diff --git a/source/Core/Castor3D/Render/Clustered/BucketSortLights.cpp b/source/Core/Castor3D/Render/Clustered/BucketSortLights.cpp new file mode 100644 index 0000000000..52c732c23b --- /dev/null +++ b/source/Core/Castor3D/Render/Clustered/BucketSortLights.cpp @@ -0,0 +1,264 @@ +#include "Castor3D/Render/Clustered/BucketSortLights.hpp" + +#include "Castor3D/Engine.hpp" +#include "Castor3D/Cache/LightCache.hpp" +#include "Castor3D/Render/RenderDevice.hpp" +#include "Castor3D/Render/RenderSystem.hpp" +#include "Castor3D/Render/Clustered/FrustumClusters.hpp" +#include "Castor3D/Scene/Camera.hpp" +#include "Castor3D/Scene/Scene.hpp" +#include "Castor3D/Scene/Light/PointLight.hpp" +#include "Castor3D/Scene/Light/SpotLight.hpp" +#include "Castor3D/Shader/Program.hpp" +#include "Castor3D/Shader/Shaders/GlslBitonicSort.hpp" +#include "Castor3D/Shader/Shaders/GlslRadixSort.hpp" + +#include + +#include +#include + +namespace castor3d +{ + //********************************************************************************************* + + namespace buksrt + { + enum BindingPoints + { + eInputKeys, + eInputValues, + eOutputKeys, + eOutputValues, + }; + + static ShaderPtr createShader( RenderDevice const & device ) + { + sdw::ComputeWriter writer{ &device.renderSystem.getEngine()->getShaderAllocator() }; + + auto inputKeysBuffer = writer.declStorageBuffer( "c3d_inputKeysBuffer" + , uint32_t( eInputKeys ) + , 0u ); + auto c3d_inputKeys = inputKeysBuffer.declMemberArray< sdw::UInt >( "ik" ); + inputKeysBuffer.end(); + + auto inputValuesBuffer = writer.declStorageBuffer( "c3d_inputValuesBuffer" + , uint32_t( eInputValues ) + , 0u ); + auto c3d_inputValues = inputValuesBuffer.declMemberArray< sdw::UInt >( "iv" ); + inputValuesBuffer.end(); + + auto outputKeysBuffer = writer.declStorageBuffer( "c3d_outputKeysBuffer" + , uint32_t( eOutputKeys ) + , 0u ); + auto c3d_outputKeys = outputKeysBuffer.declMemberArray< sdw::UInt >( "ok" ); + outputKeysBuffer.end(); + + auto outputValuesBuffer = writer.declStorageBuffer( "c3d_outputValuesBuffer" + , uint32_t( eOutputValues ) + , 0u ); + auto c3d_outputValues = outputValuesBuffer.declMemberArray< sdw::UInt >( "ov" ); + outputValuesBuffer.end(); + + sdw::PushConstantBuffer pcb{ writer, "C3D_DispatchData", "c3d_dispatchData" }; + auto c3d_numElements = pcb.declMember< sdw::UInt >( "c3d_numElements" ); + pcb.end(); + + shader::RadixSortT< 4u > bucket{ writer, 30u }; + + writer.implementMainT< sdw::VoidT >( bucket.threadsCount + , [&]( sdw::ComputeIn const & in ) + { + //// In our case, the input keys are 30-bit morton codes. + bucket.sortT( writer + , 0_u, c3d_numElements + , in.localInvocationIndex, in.globalInvocationID.x() + , c3d_inputKeys, c3d_outputKeys + , c3d_inputValues, c3d_outputValues, 0xFFFFFFFF_u ); + } ); + + return writer.getBuilder().releaseShader(); + } + + class FramePass + : public crg::RunnablePass + { + public: + FramePass( crg::FramePass const & framePass + , crg::GraphContext & context + , crg::RunnableGraph & graph + , RenderDevice const & device + , FrustumClusters & clusters + , LightType lightType ) + : crg::RunnablePass{ framePass + , context + , graph + , { [this]( uint32_t index ){ doInitialise( index ); } + , GetPipelineStateCallback( [](){ return crg::getPipelineState( VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT ); } ) + , [this]( crg::RecordContext & recContext, VkCommandBuffer cb, uint32_t i ){ doRecordInto( recContext, cb, i ); } + , GetPassIndexCallback( [](){ return 0u; } ) + , IsEnabledCallback( [this](){ return doIsEnabled(); } ) + , IsComputePassCallback( [](){ return true; } ) } + , crg::ru::Config{ 1u, true /* resettable */ } } + , m_clusters{ clusters } + , m_lightCache{ clusters.getCamera().getScene()->getLightCache() } + , m_lightType{ lightType } + , m_pipeline{ framePass, context, graph, device, this, m_lightType } + { + } + + void resetPipeline( uint32_t index ) + { + resetCommandBuffer( index ); + m_pipeline.pipeline.resetPipeline( ashes::makeVkArray< VkPipelineShaderStageCreateInfo >( m_pipeline.createInfo ), index ); + doCreatePipeline( index, m_pipeline ); + reRecordCurrent(); + } + + private: + struct Pipeline + { + ShaderModule shader; + ashes::PipelineShaderStageCreateInfoArray createInfo; + crg::cp::ConfigData cpConfig; + crg::PipelineHolder pipeline; + + Pipeline( crg::FramePass const & framePass + , crg::GraphContext & context + , crg::RunnableGraph & graph + , RenderDevice const & device + , FramePass const * parent + , LightType lightType ) + : shader{ VK_SHADER_STAGE_COMPUTE_BIT, cuT( "BucketSort/" ) + getName( lightType ), createShader( device ) } + , createInfo{ ashes::PipelineShaderStageCreateInfoArray{ makeShaderState( device, shader ) } } + , cpConfig{ crg::getDefaultV< InitialiseCallback >() + , nullptr + , IsEnabledCallback( [parent]() { return parent->doIsEnabled(); } ) + , GetPassIndexCallback( []() { return 0u; } ) + , crg::getDefaultV< RecordCallback >() + , crg::getDefaultV< RecordCallback >() + , 1u + , 1u + , 1u } + , pipeline{ framePass + , context + , graph + , crg::pp::Config{} + .program( ashes::makeVkArray< VkPipelineShaderStageCreateInfo >( createInfo ) ) + .pushConstants( VkPushConstantRange{ VK_SHADER_STAGE_COMPUTE_BIT, 0u, 4u } ) + , VK_PIPELINE_BIND_POINT_COMPUTE + , 1u } + { + } + }; + + private: + struct DispatchData + { + u32 numElements; + }; + + FrustumClusters & m_clusters; + LightCache const & m_lightCache; + LightType m_lightType; + Pipeline m_pipeline; + + private: + void doInitialise( uint32_t index ) + { + m_pipeline.pipeline.initialise(); + doCreatePipeline( index, m_pipeline ); + } + + bool doIsEnabled()const + { + return m_clusters.getConfig().sortLights + && m_clusters.needsLightsUpdate() + && m_lightCache.getLightsBufferCount( m_lightType ) > 0; + } + + void doRecordInto( crg::RecordContext & context + , VkCommandBuffer commandBuffer + , uint32_t index ) + { + // Build bottom level of the BVH. + auto lightsCount = m_lightCache.getLightsBufferCount( m_lightType ); + auto numThreadGroups = castor::divRoundUp( lightsCount, FrustumClusters::getBucketSortBucketSize() ); + DispatchData data{ lightsCount }; + m_pipeline.pipeline.recordInto( context, commandBuffer, index ); + m_context.vkCmdPushConstants( commandBuffer, m_pipeline.pipeline.getPipelineLayout(), VK_SHADER_STAGE_COMPUTE_BIT, 0u, 4u, &data ); + m_context.vkCmdDispatch( commandBuffer, numThreadGroups, 1u, 1u ); + } + + void doCreatePipeline( uint32_t index + , Pipeline & pipeline )const + { + auto & program = pipeline.pipeline.getProgram( index ); + VkComputePipelineCreateInfo createInfo{ VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO + , nullptr + , 0u + , program.front() + , pipeline.pipeline.getPipelineLayout() + , VkPipeline{} + , 0u }; + pipeline.pipeline.createPipeline( index, createInfo ); + } + }; + } + + //********************************************************************************************* + + crg::FramePassArray createBucketSortLightsPass( crg::FramePassGroup & graph + , crg::FramePass const * previousPass + , RenderDevice const & device + , FrustumClusters & clusters ) + { + // Point lights + auto & point = graph.createPass( "BucketSort/Point" + , [&clusters, &device]( crg::FramePass const & framePass + , crg::GraphContext & context + , crg::RunnableGraph & runnableGraph ) + { + auto result = castor::make_unique< buksrt::FramePass >( framePass + , context + , runnableGraph + , device + , clusters + , LightType::ePoint ); + device.renderSystem.getEngine()->registerTimer( castor::makeString( framePass.getFullName() ) + , result->getTimer() ); + return result; + } ); + point.addDependency( *previousPass ); + createInputStoragePassBinding( point, uint32_t( buksrt::eInputKeys ), cuT( "C3D_InLightMortonCodes" ), clusters.getOutputPointLightMortonCodesBuffer(), 0u, ashes::WholeSize ); + createInputStoragePassBinding( point, uint32_t( buksrt::eInputValues ), cuT( "C3D_InLightIndices" ), clusters.getOutputPointLightIndicesBuffer(), 0u, ashes::WholeSize ); + createClearableOutputStorageBinding( point, uint32_t( buksrt::eOutputKeys ), cuT( "C3D_OutLightMortonCodes" ), clusters.getInputPointLightMortonCodesBuffer(), 0u, ashes::WholeSize ); + createClearableOutputStorageBinding( point, uint32_t( buksrt::eOutputValues ), cuT( "C3D_OutLightIndices" ), clusters.getInputPointLightIndicesBuffer(), 0u, ashes::WholeSize ); + + // Spot lights + auto & spot = graph.createPass( "BucketSort/Spot" + , [&clusters, &device]( crg::FramePass const & framePass + , crg::GraphContext & context + , crg::RunnableGraph & runnableGraph ) + { + auto result = castor::make_unique< buksrt::FramePass >( framePass + , context + , runnableGraph + , device + , clusters + , LightType::eSpot ); + device.renderSystem.getEngine()->registerTimer( castor::makeString( framePass.getFullName() ) + , result->getTimer() ); + return result; + } ); + spot.addDependency( *previousPass ); + createInputStoragePassBinding( spot, uint32_t( buksrt::eInputKeys ), cuT( "C3D_InLightMortonCodes" ), clusters.getOutputSpotLightMortonCodesBuffer(), 0u, ashes::WholeSize ); + createInputStoragePassBinding( spot, uint32_t( buksrt::eInputValues ), cuT( "C3D_InLightIndices" ), clusters.getOutputSpotLightIndicesBuffer(), 0u, ashes::WholeSize ); + createClearableOutputStorageBinding( spot, uint32_t( buksrt::eOutputKeys ), cuT( "C3D_OutLightMortonCodes" ), clusters.getInputSpotLightMortonCodesBuffer(), 0u, ashes::WholeSize ); + createClearableOutputStorageBinding( spot, uint32_t( buksrt::eOutputValues ), cuT( "C3D_OutLightIndices" ), clusters.getInputSpotLightIndicesBuffer(), 0u, ashes::WholeSize ); + + return { &point, &spot }; + } + + //********************************************************************************************* +} diff --git a/source/Core/Castor3D/Render/Clustered/BuildLightsBVH.cpp b/source/Core/Castor3D/Render/Clustered/BuildLightsBVH.cpp index 1b5309a976..ba04295535 100644 --- a/source/Core/Castor3D/Render/Clustered/BuildLightsBVH.cpp +++ b/source/Core/Castor3D/Render/Clustered/BuildLightsBVH.cpp @@ -11,6 +11,7 @@ #include "Castor3D/Scene/Light/SpotLight.hpp" #include "Castor3D/Shader/Program.hpp" #include "Castor3D/Shader/Shaders/GlslAABB.hpp" +#include "Castor3D/Shader/Shaders/GlslBaseIO.hpp" #include "Castor3D/Shader/Shaders/GlslAppendBuffer.hpp" #include "Castor3D/Shader/Shaders/GlslClusteredLights.hpp" #include "Castor3D/Shader/Shaders/GlslLight.hpp" @@ -20,6 +21,7 @@ #include #include +#include #include #include @@ -490,6 +492,102 @@ namespace castor3d }; } + namespace dspbvh + { + enum BindingPoints + { + eMainCamera, + eClustersCamera, + eLightBVH, + }; + + static ShaderPtr createDebugDisplayShader( RenderDevice const & device ) + { + sdw::TraditionalGraphicsWriter writer{ &device.renderSystem.getEngine()->getShaderAllocator() }; + + C3D_CameraNamed( writer + , Main + , eMainCamera + , 0u ); + C3D_CameraNamed( writer + , Clusters + , eClustersCamera + , 0u ); + C3D_LightBVH( writer + , eLightBVH + , 0u ); + + auto numBVHNodes = writer.declConstantArray< sdw::UInt >( "numBVHNodes" + , std::vector< sdw::UInt >{ 0_u /* Level 0 */ + , 1_u /* Level 1 */ + , 33_u /* Level 2 */ + , 1057_u /* Level 3 */ + , 33825_u /* Level 4 */ + , 1082401_u /* Level 5 */ + , 34636833_u /* Level 6 */ } ); + auto colorPalette = writer.declConstantArray( "colorPalette" + , std::vector< sdw::Vec4 >{ vec4( 0.25_f, 0.25_f, 1.00_f, 1.0_f ) + , vec4( 0.25_f, 1.00_f, 0.25_f, 1.0_f ) + , vec4( 0.25_f, 1.00_f, 1.00_f, 1.0_f ) + , vec4( 1.00_f, 0.25_f, 0.25_f, 1.0_f ) + , vec4( 1.00_f, 0.25_f, 1.00_f, 1.0_f ) + , vec4( 1.00_f, 1.00_f, 0.25_f, 1.0_f ) } ); + + auto getNodeLevel = writer.implementFunction< sdw::UInt >( "getNodeLevel" + , [&]( sdw::UInt const & nodeIndex ) + { + auto i = writer.declLocale( "i", 0_u ); + + WHILE( writer, nodeIndex > numBVHNodes[i] && i < 5_u ) + { + ++i; + } + ELIHW + + writer.returnStmt( i ); + } + , sdw::InUInt{ writer, "nodeIndex" } ); + + writer.implementEntryPointT< shader::Position4FT, shader::Colour4FT >( [&writer, &c3d_cameraDataMain, &c3d_cameraDataClusters, &c3d_lightBVH, &colorPalette, &getNodeLevel]( sdw::VertexInT< shader::Position4FT > const & in + , sdw::VertexOutT< shader::Colour4FT > out ) + { + auto aabb = writer.declLocale( "aabb" + , c3d_lightBVH[in.instanceIndex] ); + + IF( writer, aabb.min().x() == aabb.max().x() + && aabb.min().y() == aabb.max().y() + && aabb.min().z() == aabb.max().z() ) + { + out.vtx.position = vec4( -100.0_f ); + } + ELSE + { + auto position = writer.declLocale( "position" + , in.position() ); + position.x() = mix( aabb.min().x(), aabb.max().x(), position.x() ); + position.y() = mix( aabb.min().y(), aabb.max().y(), position.y() ); + position.z() = mix( aabb.min().z(), aabb.max().z(), position.z() ); + // Convert from clusters view position to world position + position = c3d_cameraDataClusters.curViewToWorld( position ); + position.w() = 1.0_f; + // Then from world to main camera proj. + out.vtx.position = c3d_cameraDataMain.worldToCurProj( position ); + + out.colour() = colorPalette[getNodeLevel( writer.cast< sdw::UInt >( in.instanceIndex ) )]; + } + FI + } ); + + writer.implementEntryPointT< shader::Colour4FT, shader::Colour4FT >( []( sdw::FragmentInT< shader::Colour4FT > const & in + , sdw::FragmentOutT< shader::Colour4FT > const & out ) + { + out.colour() = in.colour(); + } ); + + return writer.getBuilder().releaseShader(); + } + } + //********************************************************************************************* crg::FramePassArray createBuildLightsBVHPass( crg::FramePassGroup & graph @@ -517,8 +615,8 @@ namespace castor3d clusters.getClustersUbo().createPassBinding( point, lgtbvh::eClusters ); createInputStoragePassBinding( point, uint32_t( lgtbvh::eAllLightsAABB ), cuT( "C3D_AllLightsAABB" ), clusters.getAllLightsAABBBuffer(), 0u, ashes::WholeSize ); createInputStoragePassBinding( point, uint32_t( lgtbvh::eLightIndices ), cuT( "C3D_PointLightIndices" ) - , { &clusters.getOutputPointLightIndicesBuffer(), &clusters.getInputPointLightIndicesBuffer(), &clusters.getOutputPointLightIndicesBuffer() - , &clusters.getOutputPointLightIndicesBuffer(), &clusters.getInputPointLightIndicesBuffer(), &clusters.getOutputPointLightIndicesBuffer() } + , { &clusters.getOutputPointLightIndicesBuffer(), &clusters.getOutputPointLightIndicesBuffer(), &clusters.getInputPointLightIndicesBuffer() + , &clusters.getOutputPointLightIndicesBuffer(), &clusters.getOutputPointLightIndicesBuffer(), &clusters.getInputPointLightIndicesBuffer() } , 0u, ashes::WholeSize ); createClearableOutputStorageBinding( point, uint32_t( lgtbvh::eLightBVH ), cuT( "C3D_PointLightBVH" ), clusters.getPointLightBVHBuffer(), 0u, ashes::WholeSize ); @@ -542,13 +640,61 @@ namespace castor3d clusters.getClustersUbo().createPassBinding( spot, lgtbvh::eClusters ); createInputStoragePassBinding( spot, uint32_t( lgtbvh::eAllLightsAABB ), cuT( "C3D_AllLightsAABB" ), clusters.getAllLightsAABBBuffer(), 0u, ashes::WholeSize ); createInputStoragePassBinding( spot, uint32_t( lgtbvh::eLightIndices ), cuT( "C3D_SpotLightIndices" ) - , { &clusters.getOutputSpotLightIndicesBuffer(), &clusters.getInputSpotLightIndicesBuffer(), &clusters.getOutputSpotLightIndicesBuffer() - , &clusters.getOutputSpotLightIndicesBuffer(), &clusters.getInputSpotLightIndicesBuffer(), &clusters.getOutputSpotLightIndicesBuffer() } + , { &clusters.getOutputSpotLightIndicesBuffer(), &clusters.getOutputSpotLightIndicesBuffer(), &clusters.getInputSpotLightIndicesBuffer() + , &clusters.getOutputSpotLightIndicesBuffer(), &clusters.getOutputSpotLightIndicesBuffer(), &clusters.getInputSpotLightIndicesBuffer() } , 0u, ashes::WholeSize ); createClearableOutputStorageBinding( spot, uint32_t( lgtbvh::eLightBVH ), cuT( "C3D_SpotLightBVH" ), clusters.getSpotLightBVHBuffer(), 0u, ashes::WholeSize ); return { &point, &spot }; } + void createDisplayPointLightsBVHProgram( RenderDevice const & device + , FrustumClusters const & clusters + , CameraUbo const & mainCameraUbo + , CameraUbo const & clustersCameraUbo + , ashes::PipelineShaderStageCreateInfoArray & program + , ashes::VkDescriptorSetLayoutBindingArray & bindings + , ashes::WriteDescriptorSetArray & writes ) + { + ProgramModule programModule{ "PointLightsBVH", dspbvh::createDebugDisplayShader( device ) }; + program = makeProgramStates( device, programModule ); + + bindings.push_back( VkDescriptorSetLayoutBinding{ dspbvh::eMainCamera, VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, 1u, VK_SHADER_STAGE_VERTEX_BIT, nullptr } ); + bindings.push_back( VkDescriptorSetLayoutBinding{ dspbvh::eClustersCamera, VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, 1u, VK_SHADER_STAGE_VERTEX_BIT, nullptr } ); + bindings.push_back( VkDescriptorSetLayoutBinding{ dspbvh::eLightBVH, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1u, VK_SHADER_STAGE_VERTEX_BIT, nullptr } ); + + writes.emplace_back( dspbvh::eMainCamera, 0u, VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER + , ashes::VkDescriptorBufferInfoArray{ VkDescriptorBufferInfo{ mainCameraUbo.getUbo().getBuffer().getBuffer(), mainCameraUbo.getUbo().getByteOffset(), mainCameraUbo.getUbo().getByteRange() } } ); + writes.emplace_back( dspbvh::eClustersCamera, 0u, VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER + , ashes::VkDescriptorBufferInfoArray{ VkDescriptorBufferInfo{ clustersCameraUbo.getUbo().getBuffer().getBuffer(), clustersCameraUbo.getUbo().getByteOffset(), clustersCameraUbo.getUbo().getByteRange() } } ); + auto const & bvhBuffer = clusters.getPointLightBVHBuffer(); + writes.emplace_back( dspbvh::eLightBVH, 0u, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER + , ashes::VkDescriptorBufferInfoArray{ VkDescriptorBufferInfo{ bvhBuffer, 0u, bvhBuffer.getSize() } } ); + } + + void createDisplaySpotLightsBVHProgram( RenderDevice const & device + , FrustumClusters const & clusters + , CameraUbo const & mainCameraUbo + , CameraUbo const & clustersCameraUbo + , ashes::PipelineShaderStageCreateInfoArray & program + , ashes::VkDescriptorSetLayoutBindingArray & bindings + , ashes::WriteDescriptorSetArray & writes ) + { + ProgramModule programModule{ "SpotLightsBVH", dspbvh::createDebugDisplayShader( device ) }; + program = makeProgramStates( device, programModule ); + + bindings.push_back( VkDescriptorSetLayoutBinding{ dspbvh::eMainCamera, VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, 1u, VK_SHADER_STAGE_VERTEX_BIT, nullptr } ); + bindings.push_back( VkDescriptorSetLayoutBinding{ dspbvh::eClustersCamera, VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, 1u, VK_SHADER_STAGE_VERTEX_BIT, nullptr } ); + bindings.push_back( VkDescriptorSetLayoutBinding{ dspbvh::eLightBVH, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1u, VK_SHADER_STAGE_VERTEX_BIT, nullptr } ); + + writes.emplace_back( dspbvh::eMainCamera, 0u, VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER + , ashes::VkDescriptorBufferInfoArray{ VkDescriptorBufferInfo{ mainCameraUbo.getUbo().getBuffer().getBuffer(), mainCameraUbo.getUbo().getByteOffset(), mainCameraUbo.getUbo().getByteRange() } } ); + writes.emplace_back( dspbvh::eClustersCamera, 0u, VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER + , ashes::VkDescriptorBufferInfoArray{ VkDescriptorBufferInfo{ clustersCameraUbo.getUbo().getBuffer().getBuffer(), clustersCameraUbo.getUbo().getByteOffset(), clustersCameraUbo.getUbo().getByteRange() } } ); + auto const & bvhBuffer = clusters.getSpotLightBVHBuffer(); + writes.emplace_back( dspbvh::eLightBVH, 0u, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER + , ashes::VkDescriptorBufferInfoArray{ VkDescriptorBufferInfo{ bvhBuffer, 0u, bvhBuffer.getSize() } } ); + } + //********************************************************************************************* } diff --git a/source/Core/Castor3D/Render/Clustered/ClusteredModule.cpp b/source/Core/Castor3D/Render/Clustered/ClusteredModule.cpp index ff12b27ea0..8801eef409 100644 --- a/source/Core/Castor3D/Render/Clustered/ClusteredModule.cpp +++ b/source/Core/Castor3D/Render/Clustered/ClusteredModule.cpp @@ -8,8 +8,6 @@ namespace castor3d { case castor3d::ClusterSplitScheme::eExponentialBase: return cuT( "exponential" ); - case castor3d::ClusterSplitScheme::eExponentialBiased: - return cuT( "exponential_biased" ); case castor3d::ClusterSplitScheme::eLinear: return cuT( "linear" ); case castor3d::ClusterSplitScheme::eExponentialLinearHybrid: diff --git a/source/Core/Castor3D/Render/Clustered/ClustersConfig.cpp b/source/Core/Castor3D/Render/Clustered/ClustersConfig.cpp index a282b353d3..7fa68765e6 100644 --- a/source/Core/Castor3D/Render/Clustered/ClustersConfig.cpp +++ b/source/Core/Castor3D/Render/Clustered/ClustersConfig.cpp @@ -229,7 +229,7 @@ namespace castor3d } else { - params[0]->get( blockContext->clustersConfig->bias ); + params[0]->get( blockContext->clustersConfig->minDistance ); } } CU_EndAttribute() @@ -259,8 +259,10 @@ namespace castor3d , enableReduceWarpOptimisation{ dirty, false } , enableBVHWarpOptimisation{ dirty, true } , enablePostAssignSort{ dirty, false } + , lockClustersFrustum{ dirty, false } + , debugDisplay{ dirty, ClusterDebugDisplay::eNone } , splitScheme{ dirty, ClusterSplitScheme::eExponentialLinearHybrid } - , bias{ dirty, 1.0f } + , minDistance{ dirty, 1.0f } { } @@ -268,10 +270,13 @@ namespace castor3d { if ( enabled ) { - static castor::StringArray names{ cuT( "Exponential" ) - , cuT( "Biased Exponential" ) + static castor::StringArray splitSchemeNames{ cuT( "Exponential" ) , cuT( "Linear" ) , cuT( "Hybrid" ) }; + static castor::StringArray debugDisplayNames{ cuT( "None" ) + , cuT( "Clusters AABB" ) + , cuT( "Lights AABB" ) + , cuT( "Lights BVH" ) }; visitor.visit( cuT( "Clusters" ) ); visitor.visit( cuT( "Use BVH" ), useLightsBVH ); @@ -285,12 +290,17 @@ namespace castor3d visitor.visit( cuT( "Enable Post Assignment Sort" ), enablePostAssignSort ); visitor.visit( cuT( "Split Scheme" ) , splitScheme - , names - , ConfigurationVisitorBase::OnEnumValueChangeT< ClusterSplitScheme >( [this]( ClusterSplitScheme, ClusterSplitScheme newV ) + , splitSchemeNames + , ConfigurationVisitorBase::OnEnumValueChangeT< ClusterSplitScheme >( [this]( ClusterSplitScheme, ClusterSplitScheme newV ){ splitScheme = newV; } ) ); + visitor.visit( cuT( "Minimal Threshold Distance" ), minDistance ); + visitor.visit( cuT( "[Debug] Lock Clusters Frustum" ), lockClustersFrustum ); + visitor.visit( cuT( "[Debug] Display AABBs" ) + , debugDisplay + , debugDisplayNames + , ConfigurationVisitorBase::OnEnumValueChangeT< ClusterDebugDisplay >( [this]( ClusterDebugDisplay, ClusterDebugDisplay newV ) { - splitScheme = newV; + debugDisplay = newV; } ) ); - visitor.visit( cuT( "Biased Exponential Bias" ), bias ); } } @@ -312,6 +322,7 @@ namespace castor3d clustersContext.addParser( cuT( "enable_bvh_warp_optimisation" ), clscfg::parserClustersBVHWarpOptimisation, { makeDefaultedParameter< ParameterType::eBool >( true ) } ); clustersContext.addParser( cuT( "split_scheme" ), clscfg::parserClustersSplitScheme, { makeParameter< ParameterType::eCheckedText, ClusterSplitScheme >() } ); clustersContext.addParser( cuT( "bias" ), clscfg::parserClustersBias, { makeDefaultedParameter< ParameterType::eFloat >( 1.0f ) } ); + clustersContext.addParser( cuT( "min_distance" ), clscfg::parserClustersBias, { makeDefaultedParameter< ParameterType::eFloat >( 1.0f ) } ); clustersContext.addPopParser( cuT( "}" ), clscfg::parserClustersEnd ); } @@ -327,6 +338,6 @@ namespace castor3d && lhs.enableReduceWarpOptimisation == rhs.enableReduceWarpOptimisation && lhs.enableBVHWarpOptimisation == rhs.enableBVHWarpOptimisation && lhs.splitScheme == rhs.splitScheme - && lhs.bias == rhs.bias; + && lhs.minDistance == rhs.minDistance; } } diff --git a/source/Core/Castor3D/Render/Clustered/ClustersMask.cpp b/source/Core/Castor3D/Render/Clustered/ClustersMask.cpp index 2bb47d264b..977f334991 100644 --- a/source/Core/Castor3D/Render/Clustered/ClustersMask.cpp +++ b/source/Core/Castor3D/Render/Clustered/ClustersMask.cpp @@ -232,13 +232,14 @@ namespace castor3d crg::FramePass const & createClustersMaskPass( crg::FramePassGroup & graph , crg::FramePass const & previousPass , RenderDevice const & device - , CameraUbo const & cameraUbo + , CameraUbo const & mainCameraUbo + , CameraUbo const & clustersCameraUbo , FrustumClusters & clusters , RenderTechnique & technique , RenderNodesPass *& nodesPass ) { auto & result = graph.createPass( "NodesPass" - , [&nodesPass, &device, &technique]( crg::FramePass const & framePass + , [&nodesPass, &device, &technique, &clustersCameraUbo]( crg::FramePass const & framePass , crg::GraphContext & context , crg::RunnableGraph & runnableGraph ) { @@ -250,7 +251,7 @@ namespace castor3d , technique.getTargetDepth() , technique.getSsaoConfig() , RenderNodesPassDesc{ technique.getTargetExtent() - , technique.getCameraUbo() + , clustersCameraUbo , technique.getSceneUbo() , technique.getRenderTarget().getCuller() , RenderFilter::eNone diff --git a/source/Core/Castor3D/Render/Clustered/ComputeClustersAABB.cpp b/source/Core/Castor3D/Render/Clustered/ComputeClustersAABB.cpp index 66ce1d0dfd..f2d9fdf0b8 100644 --- a/source/Core/Castor3D/Render/Clustered/ComputeClustersAABB.cpp +++ b/source/Core/Castor3D/Render/Clustered/ComputeClustersAABB.cpp @@ -6,6 +6,7 @@ #include "Castor3D/Render/Clustered/FrustumClusters.hpp" #include "Castor3D/Shader/Program.hpp" #include "Castor3D/Shader/Shaders/GlslAABB.hpp" +#include "Castor3D/Shader/Shaders/GlslBaseIO.hpp" #include "Castor3D/Shader/Shaders/GlslClusteredLights.hpp" #include "Castor3D/Shader/Ubos/CameraUbo.hpp" #include "Castor3D/Shader/Ubos/ClustersUbo.hpp" @@ -13,6 +14,7 @@ #include #include +#include #include #include @@ -25,7 +27,8 @@ namespace castor3d { enum BindingPoints { - eCamera, + eMainCamera, + eClustersCamera, eClusters, eReducedLightsAABB, eClustersAABB, @@ -37,8 +40,13 @@ namespace castor3d sdw::ComputeWriter writer{ &device.renderSystem.getEngine()->getShaderAllocator() }; // Inputs - C3D_Camera( writer - , eCamera + C3D_CameraNamed( writer + , Main + , eMainCamera + , 0u ); + C3D_CameraNamed( writer + , Clusters + , eClustersCamera , 0u ); C3D_Clusters( writer , eClusters @@ -56,7 +64,7 @@ namespace castor3d { // Convert to normalized texture coordinates in the range [0 .. 1]. auto texCoord = writer.declLocale( "texCoord" - , screen.xy() / vec2( c3d_cameraData.renderSize() ) ); + , screen.xy() / vec2( c3d_cameraDataClusters.renderSize() ) ); // Convert to clip space auto clip = writer.declLocale( "clip" @@ -64,7 +72,7 @@ namespace castor3d , screen.zw() ) ); auto view = writer.declLocale( "view" - , c3d_cameraData.projToView( clip ) ); + , c3d_cameraDataClusters.projToView( clip ) ); view /= view.w(); writer.returnStmt( view ); } @@ -188,12 +196,78 @@ namespace castor3d }; } + namespace dspclsb + { + enum BindingPoints + { + eMainCamera, + eClustersCamera, + eClusters, + eClustersAABB, + }; + + static ShaderPtr createDebugDisplayShader( RenderDevice const & device + , FrustumClusters const & frustumClusters ) + { + sdw::TraditionalGraphicsWriter writer{ &device.renderSystem.getEngine()->getShaderAllocator() }; + + C3D_CameraNamed( writer + , Main + , eMainCamera + , 0u ); + C3D_CameraNamed( writer + , Clusters + , eClustersCamera + , 0u ); + C3D_Clusters( writer + , eClusters + , 0u + , &frustumClusters.getConfig() ); + C3D_ClustersAABB( writer + , eClustersAABB + , 0u ); + + writer.implementEntryPointT< shader::Position4FT, shader::Colour4FT >( [&writer, &c3d_cameraDataMain, &c3d_cameraDataClusters, &c3D_clustersAABB, &c3d_clustersData]( sdw::VertexInT< shader::Position4FT > const & in + , sdw::VertexOutT< shader::Colour4FT > out ) + { + auto clusterIndex3D = writer.declLocale( "clusterIndex3D" + , c3d_clustersData.computeClusterIndex3D( writer.cast< sdw::UInt >( in.instanceIndex ) ) ); + auto aabb = writer.declLocale( "aabb" + , c3D_clustersAABB[in.instanceIndex] ); + auto position = writer.declLocale( "position" + , in.position() ); + position.x() = mix( aabb.min().x(), aabb.max().x(), position.x() ); + position.y() = mix( aabb.min().y(), aabb.max().y(), position.y() ); + position.z() = mix( aabb.min().z(), aabb.max().z(), position.z() ); + // Convert from clusters view position to world position + position = c3d_cameraDataClusters.curViewToWorld( position ); + position.w() = 1.0_f; + // Then from world to main camera proj. + out.vtx.position = c3d_cameraDataMain.worldToCurProj( position ); + + out.colour() = vec4( writer.cast< sdw::Float >( clusterIndex3D.x() ) / writer.cast< sdw::Float >( c3d_clustersData.dimensions().x() ) + , writer.cast< sdw::Float >( clusterIndex3D.y() ) / writer.cast< sdw::Float >( c3d_clustersData.dimensions().y() ) + , writer.cast< sdw::Float >( clusterIndex3D.z() ) / writer.cast< sdw::Float >( c3d_clustersData.dimensions().z() ) + , 1.0_f ); + } ); + + writer.implementEntryPointT< shader::Colour4FT, shader::Colour4FT >( []( sdw::FragmentInT< shader::Colour4FT > const & in + , sdw::FragmentOutT< shader::Colour4FT > const & out ) + { + out.colour() = in.colour(); + } ); + + return writer.getBuilder().releaseShader(); + } + } + //********************************************************************************************* crg::FramePass const & createComputeClustersAABBPass( crg::FramePassGroup & graph , crg::FramePass const * previousPass , RenderDevice const & device - , CameraUbo const & cameraUbo + , CameraUbo const & mainCameraUbo + , CameraUbo const & clustersCameraUbo , FrustumClusters const & clusters ) { auto & pass = graph.createPass( "ComputeClustersAABB" @@ -216,12 +290,41 @@ namespace castor3d return result; }); pass.addDependency( *previousPass ); - cameraUbo.createPassBinding( pass, cptclsb::eCamera ); + mainCameraUbo.createPassBinding( pass, cptclsb::eMainCamera ); + clustersCameraUbo.createPassBinding( pass, cptclsb::eClustersCamera ); clusters.getClustersUbo().createPassBinding( pass, cptclsb::eClusters ); createInputStoragePassBinding( pass, uint32_t( cptclsb::eReducedLightsAABB ), cuT( "C3D_ReducedLightsAABB" ), clusters.getReducedLightsAABBBuffer(), 0u, ashes::WholeSize ); createClearableOutputStorageBinding( pass, uint32_t( cptclsb::eClustersAABB ), cuT( "C3D_ClustersAABB" ), clusters.getClustersAABBBuffer(), 0u, ashes::WholeSize ); return pass; } + void createDisplayClustersAABBProgram( RenderDevice const & device + , FrustumClusters const & clusters + , CameraUbo const & mainCameraUbo + , CameraUbo const & clustersCameraUbo + , ashes::PipelineShaderStageCreateInfoArray & program + , ashes::VkDescriptorSetLayoutBindingArray & bindings + , ashes::WriteDescriptorSetArray & writes ) + { + ProgramModule programModule{ "ClustersAABB", dspclsb::createDebugDisplayShader( device, clusters ) }; + program = makeProgramStates( device, programModule ); + + bindings.push_back( VkDescriptorSetLayoutBinding{ dspclsb::eMainCamera, VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, 1u, VK_SHADER_STAGE_VERTEX_BIT, nullptr } ); + bindings.push_back( VkDescriptorSetLayoutBinding{ dspclsb::eClustersCamera, VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, 1u, VK_SHADER_STAGE_VERTEX_BIT, nullptr } ); + bindings.push_back( VkDescriptorSetLayoutBinding{ dspclsb::eClusters, VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, 1u, VK_SHADER_STAGE_VERTEX_BIT, nullptr } ); + bindings.push_back( VkDescriptorSetLayoutBinding{ dspclsb::eClustersAABB, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1u, VK_SHADER_STAGE_VERTEX_BIT, nullptr } ); + + writes.emplace_back( dspclsb::eMainCamera, 0u, VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER + , ashes::VkDescriptorBufferInfoArray{ VkDescriptorBufferInfo{ mainCameraUbo.getUbo().getBuffer().getBuffer(), mainCameraUbo.getUbo().getByteOffset(), mainCameraUbo.getUbo().getByteRange() } } ); + writes.emplace_back( dspclsb::eClustersCamera, 0u, VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER + , ashes::VkDescriptorBufferInfoArray{ VkDescriptorBufferInfo{ clustersCameraUbo.getUbo().getBuffer().getBuffer(), clustersCameraUbo.getUbo().getByteOffset(), clustersCameraUbo.getUbo().getByteRange() } } ); + auto & clustersUbo = clusters.getClustersUbo(); + writes.emplace_back( dspclsb::eClusters, 0u, VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER + , ashes::VkDescriptorBufferInfoArray{ VkDescriptorBufferInfo{ clustersUbo.getUbo().getBuffer().getBuffer(), clustersUbo.getUbo().getByteOffset(), clustersUbo.getUbo().getByteRange() } } ); + auto & aabbBuffer = clusters.getClustersAABBBuffer(); + writes.emplace_back( dspclsb::eClustersAABB, 0u, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER + , ashes::VkDescriptorBufferInfoArray{ VkDescriptorBufferInfo{ aabbBuffer, 0u, aabbBuffer.getSize() } } ); + } + //********************************************************************************************* } diff --git a/source/Core/Castor3D/Render/Clustered/ComputeLightsAABB.cpp b/source/Core/Castor3D/Render/Clustered/ComputeLightsAABB.cpp index bb3144c8b8..092a871216 100644 --- a/source/Core/Castor3D/Render/Clustered/ComputeLightsAABB.cpp +++ b/source/Core/Castor3D/Render/Clustered/ComputeLightsAABB.cpp @@ -10,6 +10,7 @@ #include "Castor3D/Scene/Light/SpotLight.hpp" #include "Castor3D/Shader/Program.hpp" #include "Castor3D/Shader/Shaders/GlslAABB.hpp" +#include "Castor3D/Shader/Shaders/GlslBaseIO.hpp" #include "Castor3D/Shader/Shaders/GlslClusteredLights.hpp" #include "Castor3D/Shader/Shaders/GlslLight.hpp" #include "Castor3D/Shader/Ubos/CameraUbo.hpp" @@ -18,6 +19,7 @@ #include #include +#include #include #include @@ -30,7 +32,8 @@ namespace castor3d { enum BindingPoints { - eCamera, + eMainCamera, + eClustersCamera, eClusters, eLights, eAllLightsAABB, @@ -39,11 +42,18 @@ namespace castor3d static ShaderPtr createShader( RenderDevice const & device , ClustersConfig const & config ) { + static float constexpr FltMax = std::numeric_limits< float >::max(); + sdw::ComputeWriter writer{ &device.renderSystem.getEngine()->getShaderAllocator() }; // Inputs - C3D_Camera( writer - , eCamera + C3D_CameraNamed( writer + , Main + , eMainCamera + , 0u ); + C3D_CameraNamed( writer + , Clusters + , eClustersCamera , 0u ); C3D_Clusters( writer , eClusters @@ -63,10 +73,23 @@ namespace castor3d , lights.getDirectionalsEnd() + lightIndex * PointLight::LightDataComponents ); auto point = writer.declLocale( "point" , lights.getPointLight( lightOffset ) ); - auto vsPosition = writer.declLocale( "vsPosition" - , c3d_cameraData.worldToCurView( vec4( point.position(), 1.0_f ) ).xyz() ); + auto result = writer.declLocale< shader::AABB >( "result" ); + + IF( writer, point.enabled() ) + { + auto vsPosition = writer.declLocale( "vsPosition" + , c3d_cameraDataClusters.worldToCurView( vec4( point.position(), 1.0_f ) ).xyz() ); - writer.returnStmt( shader::AABB{ vsPosition, computeRange( point ) } ); + result = shader::AABB{ vsPosition, computeRange( point ) }; + } + ELSE + { + result = shader::AABB{ vec4( sdw::Float{ FltMax }, FltMax, FltMax, 1.0f ) + , vec4( sdw::Float{ -FltMax }, -FltMax, -FltMax, 1.0f ) }; + } + FI + + writer.returnStmt( result ); } , sdw::InUInt{ writer, "lightIndex" } ); @@ -94,55 +117,69 @@ namespace castor3d , lights.getPointsEnd() + lightIndex * SpotLight::LightDataComponents ); auto spot = writer.declLocale( "spot" , lights.getSpotLight( lightOffset ) ); + auto result = writer.declLocale< shader::AABB >( "result" ); - if ( config.useSpotTightBoundingBox ) + IF( writer, spot.enabled() ) { - auto vsApex = writer.declLocale( "vsApex" - , c3d_cameraData.worldToCurView( vec4( spot.position(), 1.0_f ) ).xyz() ); - auto vsDirection = writer.declLocale( "vsDirection" - , c3d_cameraData.worldToCurView( -spot.direction() ) ); - - auto largeRange = writer.declLocale( "largeRange" - , computeRange( spot ) ); - auto smallRange = writer.declLocale( "smallRange" - , largeRange * spot.outerCutOffCos() ); - auto baseRadius = writer.declLocale( "baseRadius" - , smallRange * spot.outerCutOffTan() ); - - auto smallBase = writer.declLocale( "smallBase" - , vsApex + smallRange * vsDirection ); - - IF( writer, dot( vsDirection, vec3( 0.0_f, 0.0_f, -1.0_f ) ) > 0.999_f ) + if ( config.useSpotTightBoundingBox ) { - // Light is looking the same direction as the camera. - // Weird bug here, resulting in both small and large AABB having min.z == max.z - // whilst everything looks good when debugging step by step in RenderDoc... - // Hence just take the disk AABB - auto e = writer.declLocale( "e" - , baseRadius * sqrt( vec3( 1.0_f ) - vsDirection * vsDirection ) ); - - writer.returnStmt( shader::AABB{ vec4( min( vsApex, smallBase - e ), 1.0_f ) - , vec4( max( vsApex, smallBase + e ), 1.0_f ) } ); + auto vsApex = writer.declLocale( "vsApex" + , c3d_cameraDataClusters.worldToCurView( vec4( spot.position(), 1.0_f ) ).xyz() ); + auto vsDirection = writer.declLocale( "vsDirection" + , c3d_cameraDataClusters.worldToCurView( -spot.direction() ) ); + + auto largeRange = writer.declLocale( "largeRange" + , computeRange( spot ) ); + auto smallRange = writer.declLocale( "smallRange" + , largeRange * spot.outerCutOffCos() ); + auto baseRadius = writer.declLocale( "baseRadius" + , smallRange * spot.outerCutOffTan() ); + + auto smallBase = writer.declLocale( "smallBase" + , vsApex + smallRange * vsDirection ); + + IF( writer, dot( vsDirection, vec3( 0.0_f, 0.0_f, -1.0_f ) ) > 0.999_f ) + { + // Light is looking the same direction as the camera. + // Weird bug here, resulting in both small and large AABB having min.z == max.z + // whilst everything looks good when debugging step by step in RenderDoc... + // Hence just take the disk AABB + auto e = writer.declLocale( "e" + , baseRadius * sqrt( vec3( 1.0_f ) - vsDirection * vsDirection ) ); + + result = shader::AABB{ vec4( min( vsApex, smallBase - e ), 1.0_f ) + , vec4( max( vsApex, smallBase + e ), 1.0_f ) }; + } + ELSE + { + auto smallAABB = writer.declLocale( "smallAABB" + , getConeAABB( vsApex, smallBase, baseRadius ) ); + + auto largeBase = writer.declLocale( "largeBase" + , vsApex + largeRange * vsDirection ); + auto largeAABB = writer.declLocale( "largeAABB" + , getConeAABB( vsApex, largeBase, baseRadius ) ); + + result = shader::AABB{ min( smallAABB.min(), largeAABB.min() ) + , max( smallAABB.max(), largeAABB.max() ) }; + } + FI + } + else + { + auto vsPosition = writer.declLocale( "vsPosition" + , c3d_cameraDataClusters.worldToCurView( vec4( spot.position(), 1.0_f ) ).xyz() ); + result = shader::AABB{ vsPosition, computeRange( spot ) }; } - FI - - auto smallAABB = writer.declLocale( "smallAABB" - , getConeAABB( vsApex, smallBase, baseRadius ) ); - - auto largeBase = writer.declLocale( "largeBase" - , vsApex + largeRange * vsDirection ); - auto largeAABB = writer.declLocale( "largeAABB" - , getConeAABB( vsApex, largeBase, baseRadius ) ); - - writer.returnStmt( shader::AABB{ min( smallAABB.min(), largeAABB.min() ) - , max( smallAABB.max(), largeAABB.max() ) } ); } - else + ELSE { - auto vsPosition = writer.declLocale( "vsPosition" - , c3d_cameraData.worldToCurView( vec4( spot.position(), 1.0_f ) ).xyz() ); - writer.returnStmt( shader::AABB{ vsPosition, computeRange( spot ) } ); + result = shader::AABB{ vec4( sdw::Float{ FltMax }, FltMax, FltMax, 1.0f ) + , vec4( sdw::Float{ -FltMax }, -FltMax, -FltMax, 1.0f ) }; } + FI + + writer.returnStmt( result ); } , sdw::InUInt{ writer, "lightIndex" } ); @@ -249,12 +286,78 @@ namespace castor3d }; } + namespace dsplgtb + { + enum BindingPoints + { + eMainCamera, + eClustersCamera, + eLightsAABB, + }; + + static ShaderPtr createDebugDisplayShader( RenderDevice const & device + , FrustumClusters const & frustumClusters ) + { + sdw::TraditionalGraphicsWriter writer{ &device.renderSystem.getEngine()->getShaderAllocator() }; + + C3D_CameraNamed( writer + , Main + , eMainCamera + , 0u ); + C3D_CameraNamed( writer + , Clusters + , eClustersCamera + , 0u ); + C3D_AllLightsAABB( writer + , eLightsAABB + , 0u ); + + auto colorPalette = writer.declConstantArray( "colorPalette" + , std::vector< sdw::Vec4 >{ vec4( 0.25_f, 0.25_f, 0.25_f, 1.0_f ) + , vec4( 0.25_f, 0.25_f, 1.00_f, 1.0_f ) + , vec4( 0.25_f, 1.00_f, 0.25_f, 1.0_f ) + , vec4( 0.25_f, 1.00_f, 1.00_f, 1.0_f ) + , vec4( 1.00_f, 0.25_f, 0.25_f, 1.0_f ) + , vec4( 1.00_f, 0.25_f, 1.00_f, 1.0_f ) + , vec4( 1.00_f, 1.00_f, 0.25_f, 1.0_f ) + , vec4( 1.00_f, 1.00_f, 1.00_f, 1.0_f ) } ); + + writer.implementEntryPointT< shader::Position4FT, shader::Colour4FT >( [&writer, &c3d_cameraDataMain, &c3d_cameraDataClusters, &c3d_allLightsAABB, &colorPalette]( sdw::VertexInT< shader::Position4FT > const & in + , sdw::VertexOutT< shader::Colour4FT > out ) + { + auto aabb = writer.declLocale( "aabb" + , c3d_allLightsAABB[in.instanceIndex] ); + auto position = writer.declLocale( "position" + , in.position() ); + position.x() = mix( aabb.min().x(), aabb.max().x(), position.x() ); + position.y() = mix( aabb.min().y(), aabb.max().y(), position.y() ); + position.z() = mix( aabb.min().z(), aabb.max().z(), position.z() ); + // Convert from clusters view position to world position + position = c3d_cameraDataClusters.curViewToWorld( position ); + position.w() = 1.0_f; + // Then from world to main camera proj. + out.vtx.position = c3d_cameraDataMain.worldToCurProj( position ); + + out.colour() = colorPalette[in.instanceIndex % 8]; + } ); + + writer.implementEntryPointT< shader::Colour4FT, shader::Colour4FT >( []( sdw::FragmentInT< shader::Colour4FT > const & in + , sdw::FragmentOutT< shader::Colour4FT > const & out ) + { + out.colour() = in.colour(); + } ); + + return writer.getBuilder().releaseShader(); + } + } + //********************************************************************************************* crg::FramePass const & createComputeLightsAABBPass( crg::FramePassGroup & graph , crg::FramePass const * previousPass , RenderDevice const & device - , CameraUbo const & cameraUbo + , CameraUbo const & mainCameraUbo + , CameraUbo const & clustersCameraUbo , FrustumClusters const & clusters ) { auto & pass = graph.createPass( "ComputeLightsAABB" @@ -273,9 +376,10 @@ namespace castor3d device.renderSystem.getEngine()->registerTimer( castor::makeString( framePass.getFullName() ) , result->getTimer() ); return result; - }); + } ); pass.addDependency( *previousPass ); - cameraUbo.createPassBinding( pass, cptlgtb::eCamera ); + mainCameraUbo.createPassBinding( pass, cptlgtb::eMainCamera ); + clustersCameraUbo.createPassBinding( pass, cptlgtb::eClustersCamera ); clusters.getClustersUbo().createPassBinding( pass, cptlgtb::eClusters ); auto const & lights = clusters.getCamera().getScene()->getLightCache(); lights.createPassBinding( pass, cptlgtb::eLights ); @@ -283,5 +387,29 @@ namespace castor3d return pass; } + void createDisplayLightsAABBProgram( RenderDevice const & device + , FrustumClusters const & clusters + , CameraUbo const & mainCameraUbo + , CameraUbo const & clustersCameraUbo + , ashes::PipelineShaderStageCreateInfoArray & program + , ashes::VkDescriptorSetLayoutBindingArray & bindings + , ashes::WriteDescriptorSetArray & writes ) + { + ProgramModule programModule{ "LightsAABB", dsplgtb::createDebugDisplayShader( device, clusters ) }; + program = makeProgramStates( device, programModule ); + + bindings.push_back( VkDescriptorSetLayoutBinding{ dsplgtb::eMainCamera, VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, 1u, VK_SHADER_STAGE_VERTEX_BIT, nullptr } ); + bindings.push_back( VkDescriptorSetLayoutBinding{ dsplgtb::eClustersCamera, VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, 1u, VK_SHADER_STAGE_VERTEX_BIT, nullptr } ); + bindings.push_back( VkDescriptorSetLayoutBinding{ dsplgtb::eLightsAABB, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1u, VK_SHADER_STAGE_VERTEX_BIT, nullptr } ); + + writes.emplace_back( dsplgtb::eMainCamera, 0u, VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER + , ashes::VkDescriptorBufferInfoArray{ VkDescriptorBufferInfo{ mainCameraUbo.getUbo().getBuffer().getBuffer(), mainCameraUbo.getUbo().getByteOffset(), mainCameraUbo.getUbo().getByteRange() } } ); + writes.emplace_back( dsplgtb::eClustersCamera, 0u, VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER + , ashes::VkDescriptorBufferInfoArray{ VkDescriptorBufferInfo{ clustersCameraUbo.getUbo().getBuffer().getBuffer(), clustersCameraUbo.getUbo().getByteOffset(), clustersCameraUbo.getUbo().getByteRange() } } ); + auto & aabbBuffer = clusters.getAllLightsAABBBuffer(); + writes.emplace_back( dsplgtb::eLightsAABB, 0u, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER + , ashes::VkDescriptorBufferInfoArray{ VkDescriptorBufferInfo{ aabbBuffer, 0u, aabbBuffer.getSize() } } ); + } + //********************************************************************************************* } diff --git a/source/Core/Castor3D/Render/Clustered/FrustumClusters.cpp b/source/Core/Castor3D/Render/Clustered/FrustumClusters.cpp index bc473cf24b..3bad60edbf 100644 --- a/source/Core/Castor3D/Render/Clustered/FrustumClusters.cpp +++ b/source/Core/Castor3D/Render/Clustered/FrustumClusters.cpp @@ -12,15 +12,19 @@ #include "Castor3D/Render/Clustered/ComputeLightsMortonCode.hpp" #include "Castor3D/Render/Clustered/FindUniqueClusters.hpp" #include "Castor3D/Render/Clustered/MergeSortLights.hpp" -#include "Castor3D/Render/Clustered/RadixSortLights.hpp" +#include "Castor3D/Render/Clustered/BucketSortLights.hpp" #include "Castor3D/Render/Clustered/ReduceLightsAABB.hpp" #include "Castor3D/Render/Clustered/SortAssignedLights.hpp" +#include "Castor3D/Render/Debug/DebugModule.hpp" #include "Castor3D/Scene/Camera.hpp" #include "Castor3D/Scene/Scene.hpp" #include "Castor3D/Scene/Light/DirectionalLight.hpp" #include "Castor3D/Scene/Light/Light.hpp" #include "Castor3D/Scene/Light/PointLight.hpp" #include "Castor3D/Scene/Light/SpotLight.hpp" +#include "Castor3D/Shader/Shaders/GlslBitonicSort.hpp" +#include "Castor3D/Shader/Shaders/GlslRadixSort.hpp" +#include "Castor3D/Shader/Ubos/CameraUbo.hpp" #include @@ -75,6 +79,38 @@ namespace castor3d //********************************************************************************************* + FrustumClusters::Buffers::Buffers( RenderDevice const & device + , castor::String const & name ) + : mortonCodes{ { makeBuffer< u32 >( device + , MaxLightsCount + , VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_TRANSFER_SRC_BIT + , VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT + , cuT( "C3D_" ) + name + cuT( "LightMortonCodesA" ) ) + , makeBuffer< u32 >( device + , MaxLightsCount + , VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_TRANSFER_SRC_BIT + , VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT + , cuT( "C3D_" ) + name + cuT( "LightMortonCodesB" ) ) } } + , indices{ { makeBuffer< u32 >( device + , MaxLightsCount + , VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_TRANSFER_SRC_BIT + , VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT + , cuT( "C3D_" ) + name + cuT( "LightIndicesA" ) ) + , makeBuffer< u32 >( device + , MaxLightsCount + , VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_TRANSFER_SRC_BIT + , VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT + , cuT( "C3D_" ) + name + cuT( "LightIndicesB" ) ) } } + , bvh{ makeBuffer< AABB >( device + , getNumNodes( MaxLightsCount ) + , VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT + , VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT + , cuT( "C3D_" ) + name + cuT( "LightBVH" ) ) } + { + } + + //********************************************************************************************* + FrustumClusters::FrustumClusters( RenderDevice const & device , Camera const & camera , ClustersConfig const & config ) @@ -86,6 +122,7 @@ namespace castor3d , m_cameraProjection{ m_clustersDirty, castor::Matrix4x4f{} } , m_cameraView{ m_clustersDirty, castor::Matrix4x4f{} } , m_clustersUbo{ m_device } + , m_clustersCameraUbo{ m_device } , m_clustersIndirect{ makeBuffer< VkDispatchIndirectCommand >( m_device , getNumNodes( MaxLightsCount ) , VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_INDIRECT_BUFFER_BIT @@ -101,56 +138,8 @@ namespace castor3d , VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT , VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT , cuT( "C3D_ReducedLightsAABB" ) ) } - , m_pointMortonCodesBuffers{ { makeBuffer< u32 >( m_device - , MaxLightsCount - , VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_TRANSFER_SRC_BIT - , VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT - , cuT( "C3D_PointLightMortonCodesA" ) ) - , makeBuffer< u32 >( m_device - , MaxLightsCount - , VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_TRANSFER_SRC_BIT - , VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT - , cuT( "C3D_PointLightMortonCodesB" ) ) } } - , m_spotMortonCodesBuffers{ { makeBuffer< u32 >( m_device - , MaxLightsCount - , VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_TRANSFER_SRC_BIT - , VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT - , cuT( "C3D_SpotLightMortonCodesA" ) ) - , makeBuffer< u32 >( m_device - , MaxLightsCount - , VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_TRANSFER_SRC_BIT - , VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT - , cuT( "C3D_SpotLightMortonCodesB" ) ) } } - , m_pointIndicesBuffers{ { makeBuffer< u32 >( m_device - , MaxLightsCount - , VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_TRANSFER_SRC_BIT - , VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT - , cuT( "C3D_PointLightIndicesA" ) ) - , makeBuffer< u32 >( m_device - , MaxLightsCount - , VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_TRANSFER_SRC_BIT - , VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT - , cuT( "C3D_PointLightIndicesB" ) ) } } - , m_spotIndicesBuffers{ { makeBuffer< u32 >( m_device - , MaxLightsCount - , VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_TRANSFER_SRC_BIT - , VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT - , cuT( "C3D_SpotLightIndicesA" ) ) - , makeBuffer< u32 >( m_device - , MaxLightsCount - , VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_TRANSFER_SRC_BIT - , VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT - , cuT( "C3D_SpotLightIndicesB" ) ) } } - , m_pointBVHBuffer{ makeBuffer< AABB >( m_device - , getNumNodes( MaxLightsCount ) - , VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT - , VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT - , cuT( "C3D_PointLightBVH" ) ) } - , m_spotBVHBuffer{ makeBuffer< AABB >( m_device - , getNumNodes( MaxLightsCount ) - , VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT - , VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT - , cuT( "C3D_SpotLightBVH" ) ) } + , m_pointBuffers{ m_device, cuT( "Point" ) } + , m_spotBuffers{ m_device, cuT( "Spot" ) } { static uint32_t constexpr NumThreadsPerThreadGroup = 256u; static uint32_t constexpr ElementsPerThread = 8u; @@ -158,8 +147,8 @@ namespace castor3d // The maximum number of elements that need to be sorted. uint32_t maxElements = MaxLightsCount; - // Radix sort will sort Morton codes (keys) into chunks of SORT_NUM_THREADS_PER_THREAD_GROUP size. - uint32_t chunkSize = NumThreadsPerThreadGroup; + // Radix sort will sort Morton codes (keys) into chunks of BucketSortBucketSize size. + uint32_t chunkSize = getBucketSortBucketSize(); // The number of chunks that need to be merge sorted after Radix sort finishes. uint32_t numChunks = castor::divRoundUp( maxElements, chunkSize ); // The number of sort groups that are needed to sort the first set of chunks. @@ -176,7 +165,7 @@ namespace castor3d // needed by a single sort group multiplied by the maximum number of sort groups. uint32_t maxMergePathPartitions = numMergePathPartitionsPerSortGroup * maxSortGroups; - m_mergePathPartitionsBuffer = makeBuffer< s32 >( m_device + m_mergePathPartitionsBuffer = makeBuffer< s32 >( device , maxMergePathPartitions , VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT , VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT @@ -199,7 +188,7 @@ namespace castor3d , lightCache.getLightsBufferCount( LightType::ePoint ) , lightCache.getLightsBufferCount( LightType::eSpot ) , m_config.splitScheme - , m_config.bias + , m_config.minDistance , m_config.enablePostAssignSort ); auto it = updater.dirtyScenes.find( scene ); m_lightsDirty = lightCache.hasClusteredLights() @@ -210,6 +199,68 @@ namespace castor3d m_first = m_camera.getEngine()->areUpdateOptimisationsEnabled() ? std::max( 0, m_first - 1 ) : 5; + + if ( !m_config.lockClustersFrustum.value() ) + { + auto jitter = updater.jitter; + auto jitterProjSpace = jitter * 2.0f; + jitterProjSpace[0] /= float( m_camera.getWidth() ); + jitterProjSpace[1] /= float( m_camera.getHeight() ); + m_clustersCameraUbo.cpuUpdate( m_camera + , updater.debugIndex + , true + , jitterProjSpace ); + } + } + + void FrustumClusters::updateDebug( DebugDrawer & drawer ) + { + if ( m_config.debugDisplay.value() == ClusterDebugDisplay::eClustersAABB ) + { + addDebugAabbs( drawer + , m_displayClustersAABBBindings + , m_displayClustersAABBWrites + , getDimensions()->x * getDimensions()->y * getDimensions()->z + , m_displayClustersAABBProgram + , true ); + } + else if ( m_config.debugDisplay.value() == ClusterDebugDisplay::eLightsAABB ) + { + auto scene = m_camera.getScene(); + auto const & lightCache = scene->getLightCache(); + addDebugAabbs( drawer + , m_displayLightsAABBBindings + , m_displayLightsAABBWrites + , lightCache.getLightsBufferCount( LightType::ePoint ) + + lightCache.getLightsBufferCount( LightType::eSpot ) + , m_displayLightsAABBProgram + , true ); + } + else if ( m_config.debugDisplay.value() == ClusterDebugDisplay::eLightsBVH ) + { + auto scene = m_camera.getScene(); + auto const & lightCache = scene->getLightCache(); + + if ( auto count = lightCache.getLightsBufferCount( LightType::ePoint ) ) + { + addDebugAabbs( drawer + , m_displayPointLightsBVHBindings + , m_displayPointLightsBVHWrites + , FrustumClusters::getNumNodes( count ) + , m_displayPointLightsBVHProgram + , false ); + } + + if ( auto count = lightCache.getLightsBufferCount( LightType::eSpot ) ) + { + addDebugAabbs( drawer + , m_displaySpotLightsBVHBindings + , m_displaySpotLightsBVHWrites + , FrustumClusters::getNumNodes( count ) + , m_displaySpotLightsBVHProgram + , false ); + } + } } crg::FramePass const & FrustumClusters::createFramePasses( crg::FramePassGroup & parentGraph @@ -218,29 +269,61 @@ namespace castor3d , CameraUbo const & cameraUbo , RenderNodesPass *& nodesPass ) { + if ( m_displayClustersAABBProgram.empty() ) + { + createDisplayClustersAABBProgram( m_device, *this, cameraUbo, m_clustersCameraUbo + , m_displayClustersAABBProgram + , m_displayClustersAABBBindings + , m_displayClustersAABBWrites ); + } + + if ( m_displayLightsAABBProgram.empty() ) + { + createDisplayLightsAABBProgram( m_device, *this, cameraUbo, m_clustersCameraUbo + , m_displayLightsAABBProgram + , m_displayLightsAABBBindings + , m_displayLightsAABBWrites ); + } + + if ( m_displayPointLightsBVHProgram.empty() ) + { + createDisplayPointLightsBVHProgram( m_device, *this, cameraUbo, m_clustersCameraUbo + , m_displayPointLightsBVHProgram + , m_displayPointLightsBVHBindings + , m_displayPointLightsBVHWrites ); + } + + if ( m_displaySpotLightsBVHProgram.empty() ) + { + createDisplaySpotLightsBVHProgram( m_device, *this, cameraUbo, m_clustersCameraUbo + , m_displaySpotLightsBVHProgram + , m_displaySpotLightsBVHBindings + , m_displaySpotLightsBVHWrites ); + } + auto & graph = parentGraph.createPassGroup( "Clusters" ); crg::FramePassArray lastPasses{ 1u, previousPass }; lastPasses = { &createComputeLightsAABBPass( graph, lastPasses.front() - , m_device, cameraUbo, *this ) }; + , m_device, cameraUbo, m_clustersCameraUbo, *this ) }; lastPasses = { &createReduceLightsAABBPass( graph, lastPasses.front() - , m_device, cameraUbo, *this ) }; + , m_device, cameraUbo, m_clustersCameraUbo, *this ) }; lastPasses = { &createComputeClustersAABBPass( graph, lastPasses.front() - , m_device, cameraUbo, *this ) }; + , m_device, cameraUbo, m_clustersCameraUbo, *this ) }; lastPasses = { &createClustersMaskPass( graph, *lastPasses.front() - , m_device, cameraUbo, *this + , m_device, cameraUbo, m_clustersCameraUbo, *this , technique, nodesPass ) }; lastPasses = { &createFindUniqueClustersPass( graph, *lastPasses.front() , m_device, *this ) }; lastPasses = { &createComputeLightsMortonCodePass( graph, lastPasses.front() , m_device, *this ) }; - lastPasses = createRadixSortLightsPass( graph, lastPasses.front() + lastPasses = createBucketSortLightsPass( graph, lastPasses.front() , m_device, *this ); lastPasses = createMergeSortLightsPass( graph, lastPasses , m_device, *this ); lastPasses = createBuildLightsBVHPass( graph, lastPasses , m_device, *this ); lastPasses = { &createAssignLightsToClustersPass( graph, lastPasses - , m_device, cameraUbo, *this ) }; + , m_device, cameraUbo, m_clustersCameraUbo, *this ) }; return createSortAssignedLightsPass( graph, lastPasses , m_device, *this ); } @@ -276,6 +359,11 @@ namespace castor3d return numNodes; } + uint32_t FrustumClusters::getBucketSortBucketSize() + { + return shader::RadixSortT< 4u >::bucketSize; + } + void FrustumClusters::doUpdate() { m_toDelete.clear(); @@ -293,10 +381,10 @@ namespace castor3d { auto indexCount = cellCount * MaxLightsPerCluster; frscls::updateBuffer< AABB >( m_device, cellCount, cuT( "ClustersAABB" ), m_aabbBuffer, m_toDelete ); - frscls::updateBuffer< castor::Point2ui >( m_device, cellCount, cuT( "PointLightClusterGrid" ), m_pointLightClusterGridBuffer, m_toDelete ); - frscls::updateBuffer< castor::Point2ui >( m_device, cellCount, cuT( "SpotLightClusterGrid" ), m_spotLightClusterGridBuffer, m_toDelete ); - frscls::updateBuffer< u32 >( m_device, indexCount, cuT( "PointLightClusterIndex" ), m_pointLightClusterIndexBuffer, m_toDelete ); - frscls::updateBuffer< u32 >( m_device, indexCount, cuT( "SpotLightClusterIndex" ), m_spotLightClusterIndexBuffer, m_toDelete ); + frscls::updateBuffer< castor::Point2ui >( m_device, cellCount, cuT( "PointLightClusterGrid" ), m_pointBuffers.clusterGrid, m_toDelete ); + frscls::updateBuffer< castor::Point2ui >( m_device, cellCount, cuT( "SpotLightClusterGrid" ), m_spotBuffers.clusterGrid, m_toDelete ); + frscls::updateBuffer< u32 >( m_device, indexCount, cuT( "PointLightClusterIndex" ), m_pointBuffers.clusterIndex, m_toDelete ); + frscls::updateBuffer< u32 >( m_device, indexCount, cuT( "SpotLightClusterIndex" ), m_spotBuffers.clusterIndex, m_toDelete ); frscls::updateBuffer< u32 >( m_device, cellCount, cuT( "ClusterFlags" ), m_clusterFlags, m_toDelete ); frscls::updateBuffer< u32 >( m_device, cellCount, cuT( "UniqueClusters" ), m_uniqueClusters, m_toDelete ); onClusterBuffersChanged( *this ); diff --git a/source/Core/Castor3D/Render/Clustered/MergeSortLights.cpp b/source/Core/Castor3D/Render/Clustered/MergeSortLights.cpp index 78394fe3e4..e14762bf2d 100644 --- a/source/Core/Castor3D/Render/Clustered/MergeSortLights.cpp +++ b/source/Core/Castor3D/Render/Clustered/MergeSortLights.cpp @@ -14,6 +14,7 @@ #include "Castor3D/Shader/Shaders/GlslAppendBuffer.hpp" #include "Castor3D/Shader/Shaders/GlslClusteredLights.hpp" #include "Castor3D/Shader/Shaders/GlslLight.hpp" +#include "Castor3D/Shader/Shaders/GlslRadixSort.hpp" #include "Castor3D/Shader/Shaders/GlslUtils.hpp" #include "Castor3D/Shader/Ubos/CameraUbo.hpp" #include "Castor3D/Shader/Ubos/ClustersUbo.hpp" @@ -31,10 +32,9 @@ namespace castor3d namespace merge { - static uint32_t constexpr NumThreads = 256u; static uint32_t constexpr NumThreadsPerThreadGroup = 256u; static uint32_t constexpr NumValuesPerThread = 8u; - static uint32_t constexpr NumValuesPerThreadGroup = NumThreads * NumValuesPerThread; + static uint32_t constexpr NumValuesPerThreadGroup = NumThreadsPerThreadGroup * NumValuesPerThread; enum BindingPoints { @@ -89,19 +89,19 @@ namespace castor3d auto gsValues = writer.declSharedVariable< sdw::UInt >( "gsValues", NumValuesPerThreadGroup ); // Intermediate values. (8,192 Bytes) /** - * MergePath is a binary search over two sorted arrays that finds the - * point in list A and list B to begin a merge operation. - * Based on: https://moderngpu.github.io/bulkinsert.html#mergepath - * Retrieved on: Aug 9, 2016. - * - * @param a0 The first element in list A. - * @param aCount The number of elements in A. - * @param b0 The first element in list B. - * @param bCount The number of elements in B. - * @param diag The cross diagonal of the merge matrix where the merge path is computed. - * @param bUseSharedMem Whether to read from shared memory or global memory. - * @return - */ + * MergePath is a binary search over two sorted arrays that finds the + * point in list A and list B to begin a merge operation. + * Based on: https://moderngpu.github.io/bulkinsert.html#mergepath + * Retrieved on: Aug 9, 2016. + * + * @param a0 The first element in list A. + * @param aCount The number of elements in A. + * @param b0 The first element in list B. + * @param bCount The number of elements in B. + * @param diag The cross diagonal of the merge matrix where the merge path is computed. + * @param bUseSharedMem Whether to read from shared memory or global memory. + * @return + */ auto mergePath = writer.implementFunction< sdw::Int >( "c3d_mergePath" , [&]( sdw::Int const & a0, sdw::Int const & aCount , sdw::Int const & b0, sdw::Int const & bCount @@ -138,8 +138,8 @@ namespace castor3d , sdw::InInt{ writer, "diag" } , sdw::InBoolean{ writer, "bUseSharedMem" } ); /** - * Perform a serial merge using shared memory. Write results to global memory. - */ + * Perform a serial merge using shared memory. Write results to global memory. + */ auto serialMerge = writer.implementFunction< sdw::Void >( "c3d_serialMerge" , [&]( sdw::Int a0, sdw::Int const & a1 , sdw::Int b0, sdw::Int const & b1 @@ -186,7 +186,7 @@ namespace castor3d , sdw::InInt{ writer, "numValues" } , sdw::InInt{ writer, "out0" } ); - writer.implementMainT< sdw::VoidT >( NumThreads + writer.implementMainT< sdw::VoidT >( NumThreadsPerThreadGroup , [&]( sdw::ComputeIn const & in ) { auto const & threadIndex = in.globalInvocationID.x(); @@ -434,119 +434,144 @@ namespace castor3d , VkCommandBuffer commandBuffer , uint32_t index ) { + // The number of threads per thread group. + constexpr u32 threadsPerThreadGroupCount = NumThreadsPerThreadGroup; + // The number of values that each thread sorts. + constexpr u32 valuesPerThreadCount = NumValuesPerThread; + // The number of values that each thread group will sort. + constexpr u32 valuesPerThreadGroupCount = threadsPerThreadGroupCount * valuesPerThreadCount; + auto totalValues = m_lightCache.getLightsBufferCount( m_lightType ); - auto chunkSize = NumThreadsPerThreadGroup; + DispatchData data{ totalValues, 0u }; + + // The size of a single chunk that keys will be sorted into. + auto chunkSize = FrustumClusters::getBucketSortBucketSize(); // The total number of complete chunks to sort. - auto numChunks = getLightsMortonCodeChunkCount( totalValues ); - DispatchData data{ totalValues, 0u }; + auto chunksCount = getLightsMortonCodeChunkCount( totalValues ); - while ( numChunks > 1u ) + while ( chunksCount > 1u ) { data.chunkSize = chunkSize; // Number of sort groups required to sort all chunks. // Each sort group merge sorts 2 chunks into a single chunk. - auto numSortGroups = numChunks / 2u; + auto sortGroupsCount = chunksCount / 2u; + + // The number of thread groups that are required per sort group. + auto threadGroupsPerSortGroupCount = castor::divRoundUp( chunkSize * 2u, valuesPerThreadGroupCount ); // Compute merge path partitions per thread group. { - m_partitions.pipeline.recordInto( context, commandBuffer, index ); - - // The number of thread groups that are required per sort group. - auto numThreadGroupsPerSortGroup = castor::divRoundUp( chunkSize * 2u, NumValuesPerThreadGroup ); - // The number of merge path partitions that need to be computed. - auto numMergePathPartitionsPerSortGroup = numThreadGroupsPerSortGroup + 1u; - auto totalMergePathPartitions = numMergePathPartitionsPerSortGroup * numSortGroups; + auto mergePathPartitionsPerSortGroupCount = threadGroupsPerSortGroupCount + 1u; + auto totalMergePathPartitions = mergePathPartitionsPerSortGroupCount * sortGroupsCount; // The number of thread groups needed to compute all merge path partitions. - auto numThreadGroups = castor::divRoundUp( totalMergePathPartitions, NumThreadsPerThreadGroup ); + auto threadGroupsCount = castor::divRoundUp( totalMergePathPartitions, threadsPerThreadGroupCount ); + doMergeTransitionBarrier( context, commandBuffer, index ); + m_partitions.pipeline.recordInto( context, commandBuffer, index ); m_context.vkCmdPushConstants( commandBuffer, m_partitions.pipeline.getPipelineLayout(), VK_SHADER_STAGE_COMPUTE_BIT, 0u, 8u, &data ); - m_context.vkCmdDispatch( commandBuffer, numThreadGroups, 1u, 1u ); - - // Add an explicit barrier for MergePathPartitions. - // This is required since the MergePathPartitions structured buffer is being used - // as a UAV in the MergePathPartions compute shader and as an SRV in the MergeSort - // compute shader. Because the MergePathPartions argument is not rebound between - // dispatches, no implicit UAV barrier will be added to the command list and MergeSort - // will likely not see the correct merge path partitions. - // To resolve this, an explicit UAV barrier is added for the resource. - doBarriers( context, commandBuffer, index ); + m_context.vkCmdDispatch( commandBuffer, threadGroupsCount, 1u, 1u ); } // Perform merge sort using merge path partitions computed from the previous step. { - m_merge.pipeline.recordInto( context, commandBuffer, index ); - - // The number of values that each sort group will sort. - // Each sort group merges 2 chunks into 1. - auto numValuesPerSortGroup = std::min( chunkSize * 2u, totalValues ); - - // The number of thread groups required to sort all values. - auto numThreadGroupsPerSortGroup = castor::divRoundUp( numValuesPerSortGroup, NumValuesPerThreadGroup ); - - // The number of values that each thread group will sort. - constexpr u32 numValuesPerThreadGroup = NumThreadsPerThreadGroup * NumValuesPerThread; - // Don't dispatch thread groups that will perform no work: // we need at least one thread group for each sort group; no more than the number of sort groups times // the number of thread groups per sort groups (if there is an odd number of chunks, the last chunk // needs no merge); enough thread groups to sort all values. + const u32 threadGroupsCount = std::max( sortGroupsCount + , std::min( threadGroupsPerSortGroupCount * sortGroupsCount + , castor::divRoundUp( totalValues, valuesPerThreadGroupCount ) ) ); - - // The number of thread groups required to sort all values. - const u32 numThreadGroups = std::max( numSortGroups - , std::min( numThreadGroupsPerSortGroup * numSortGroups - , castor::divRoundUp( totalValues, numValuesPerThreadGroup ) ) ); + doAllBarriers( context, commandBuffer, index ); + m_merge.pipeline.recordInto( context, commandBuffer, index ); m_context.vkCmdPushConstants( commandBuffer, m_merge.pipeline.getPipelineLayout(), VK_SHADER_STAGE_COMPUTE_BIT, 0u, 8u, &data ); - m_context.vkCmdDispatch( commandBuffer, numThreadGroups, 1u, 1u ); + m_context.vkCmdDispatch( commandBuffer, threadGroupsCount, 1u, 1u ); + } - if ( numChunks & 1 ) - { - // copy last chunk, there is no merging required but we still need data in the destination buffer - // note: no additional barriers as we are still doing read from source-> write to dest, so that should be good - u32 lastChunkOffset = chunkSize * ( numChunks - 1 ) * sizeof( u32 ); - auto lastChunkSize = u32( totalValues * sizeof( u32 ) - lastChunkOffset ); - auto srcMorton = m_pass.buffers[0].buffer( index ); - auto srcIndices = m_pass.buffers[1].buffer( index ); - auto dstMorton = m_pass.buffers[0].buffer( 1u - index ); - auto dstIndices = m_pass.buffers[1].buffer( 1u - index ); - VkBufferCopy region{ lastChunkOffset, lastChunkOffset, lastChunkSize }; - m_context.vkCmdCopyBuffer( commandBuffer, srcMorton, dstMorton, 1u, ®ion ); - m_context.vkCmdCopyBuffer( commandBuffer, srcIndices, dstIndices, 1u, ®ion ); - } + if ( chunksCount & 1 ) + { + // copy last chunk, there is no merging required but we still need data in the destination buffer + // note: no additional barriers as we are still doing read from source-> write to dest, so that should be good + u32 lastChunkOffset = chunkSize * ( chunksCount - 1 ) * sizeof( u32 ); + auto lastChunkSize = u32( totalValues * sizeof( u32 ) - lastChunkOffset ); + auto srcMorton = m_pass.buffers[0].buffer( index ); + auto srcIndices = m_pass.buffers[1].buffer( index ); + auto dstMorton = m_pass.buffers[0].buffer( 1u - index ); + auto dstIndices = m_pass.buffers[1].buffer( 1u - index ); + VkBufferCopy region{ lastChunkOffset, lastChunkOffset, lastChunkSize }; + m_context.vkCmdCopyBuffer( commandBuffer, srcMorton, dstMorton, 1u, ®ion ); + m_context.vkCmdCopyBuffer( commandBuffer, srcIndices, dstIndices, 1u, ®ion ); } // Ping-pong the buffers index = 1u - index; chunkSize *= 2; - numChunks = castor::divRoundUp( totalValues, chunkSize ); + chunksCount = castor::divRoundUp( totalValues, chunkSize ); } } - void doBarriers( crg::RecordContext & context + void doMergeTransitionBarrier( crg::RecordContext & context , VkCommandBuffer commandBuffer , uint32_t passIndex )const { + auto & attach = m_pass.buffers.back(); + auto buffer = attach.buffer( passIndex ); + auto currentState = context.getAccessState( buffer, attach.getBufferRange() ); + context.memoryBarrier( commandBuffer + , buffer + , attach.getBufferRange() + , currentState.access + , currentState.pipelineStage + , crg::AccessState{ VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT } + , true ); + } + + void doAllBarriers( crg::RecordContext & context + , VkCommandBuffer commandBuffer + , uint32_t passIndex )const + { + uint32_t bufferIndex{}; + for ( auto & attach : m_pass.buffers ) { if ( !attach.isNoTransition() - && attach.isStorageBuffer() - && attach.isClearableBuffer() ) + && attach.isStorageBuffer() ) { - auto currentState = context.getAccessState( attach.buffer( passIndex ) - , attach.getBufferRange() ); + auto buffer = attach.buffer( passIndex ); + auto currentState = context.getAccessState( buffer, attach.getBufferRange() ); + crg::AccessState dstState; + + if ( bufferIndex < 2u ) + { + // Input buffer + dstState = { VK_ACCESS_SHADER_READ_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT }; + } + else if ( bufferIndex < 4u ) + { + // Output buffer + dstState = { VK_ACCESS_SHADER_WRITE_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT }; + } + else + { + // Merge Path Transition Buffer + dstState = { VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT }; + } + context.memoryBarrier( commandBuffer - , attach.buffer( passIndex ) + , buffer , attach.getBufferRange() , currentState.access , currentState.pipelineStage - , crg::AccessState{ VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT } + , dstState , true ); } + + ++bufferIndex; } } @@ -587,11 +612,11 @@ namespace castor3d auto & point = graph.createPass( "MergeSort/Point" , [&clusters, &device]( crg::FramePass const & framePass , crg::GraphContext & context - , crg::RunnableGraph & graph ) + , crg::RunnableGraph & runnableGraph ) { auto result = castor::make_unique< merge::FramePass >( framePass , context - , graph + , runnableGraph , device , clusters , LightType::ePoint ); @@ -610,11 +635,11 @@ namespace castor3d auto & spot = graph.createPass( "MergeSort/Spot" , [&clusters, &device]( crg::FramePass const & framePass , crg::GraphContext & context - , crg::RunnableGraph & graph ) + , crg::RunnableGraph & runnableGraph ) { auto result = castor::make_unique< merge::FramePass >( framePass , context - , graph + , runnableGraph , device , clusters , LightType::eSpot ); diff --git a/source/Core/Castor3D/Render/Clustered/RadixSortLights.cpp b/source/Core/Castor3D/Render/Clustered/RadixSortLights.cpp deleted file mode 100644 index c1ed380918..0000000000 --- a/source/Core/Castor3D/Render/Clustered/RadixSortLights.cpp +++ /dev/null @@ -1,374 +0,0 @@ -#include "Castor3D/Render/Clustered/RadixSortLights.hpp" - -#include "Castor3D/Engine.hpp" -#include "Castor3D/Cache/LightCache.hpp" -#include "Castor3D/Render/RenderDevice.hpp" -#include "Castor3D/Render/RenderSystem.hpp" -#include "Castor3D/Render/Clustered/FrustumClusters.hpp" -#include "Castor3D/Scene/Camera.hpp" -#include "Castor3D/Scene/Scene.hpp" -#include "Castor3D/Scene/Light/PointLight.hpp" -#include "Castor3D/Scene/Light/SpotLight.hpp" -#include "Castor3D/Shader/Program.hpp" -#include "Castor3D/Shader/Shaders/GlslAABB.hpp" -#include "Castor3D/Shader/Shaders/GlslAppendBuffer.hpp" -#include "Castor3D/Shader/Shaders/GlslClusteredLights.hpp" -#include "Castor3D/Shader/Shaders/GlslLight.hpp" -#include "Castor3D/Shader/Shaders/GlslUtils.hpp" -#include "Castor3D/Shader/Ubos/CameraUbo.hpp" -#include "Castor3D/Shader/Ubos/ClustersUbo.hpp" - -#include - -#include - -#include -#include - -namespace castor3d -{ - //********************************************************************************************* - - namespace radix - { - static uint32_t constexpr NumThreads = 256u; - static uint32_t constexpr NumThreadsPerThreadGroup = 256u; - - enum BindingPoints - { - eInputKeys, - eInputValues, - eOutputKeys, - eOutputValues, - }; - - static ShaderPtr createShader( RenderDevice const & device ) - { - sdw::ComputeWriter writer{ &device.renderSystem.getEngine()->getShaderAllocator() }; - - auto inputKeysBuffer = writer.declStorageBuffer( "c3d_inputKeysBuffer" - , uint32_t( eInputKeys ) - , 0u ); - auto c3d_inputKeys = inputKeysBuffer.declMemberArray< sdw::UInt >( "ik" ); - inputKeysBuffer.end(); - - auto inputValuesBuffer = writer.declStorageBuffer( "c3d_inputValuesBuffer" - , uint32_t( eInputValues ) - , 0u ); - auto c3d_inputValues = inputValuesBuffer.declMemberArray< sdw::UInt >( "iv" ); - inputValuesBuffer.end(); - - auto outputKeysBuffer = writer.declStorageBuffer( "c3d_outputKeysBuffer" - , uint32_t( eOutputKeys ) - , 0u ); - auto c3d_outputKeys = outputKeysBuffer.declMemberArray< sdw::UInt >( "ok" ); - outputKeysBuffer.end(); - - auto outputValuesBuffer = writer.declStorageBuffer( "c3d_outputValuesBuffer" - , uint32_t( eOutputValues ) - , 0u ); - auto c3d_outputValues = outputValuesBuffer.declMemberArray< sdw::UInt >( "ov" ); - outputValuesBuffer.end(); - - sdw::PushConstantBuffer pcb{ writer, "C3D_DispatchData", "c3d_dispatchData" }; - auto c3d_numElements = pcb.declMember< sdw::UInt >( "c3d_numElements" ); - auto c3d_chunkSize = pcb.declMember< sdw::UInt >( "c3d_chunkSize" ); - pcb.end(); - - auto gsKeys = writer.declSharedVariable< sdw::UInt >( "gsKeys", NumThreads ); // A temporary buffer to store the input keys. (1,024 Bytes) - auto gsValues = writer.declSharedVariable< sdw::UInt >( "gsValues", NumThreads ); // A temporary buffer to store the input values. (1,024 Bytes) - auto gsE = writer.declSharedVariable< sdw::UInt >( "gsE", NumThreads ); // Set a 1 for all false sort keys (b == 0) and a 0 for all true sort keys (b == 1) (1,024 Bytes) - auto gsF = writer.declSharedVariable< sdw::UInt >( "gsF", NumThreads ); // Scan the splits. This results in the output index of all false sort keys (b == 0) (1,024 Bytes) - auto gsD = writer.declSharedVariable< sdw::UInt >( "gsD", NumThreads ); // The desination index for the ouput key and value. (1,024 Bytes) - auto gsTotalFalses = writer.declSharedVariable< sdw::UInt >( "gsTotalFalses" ); // The result of e[NUM_THREADS - 1] + f[NUM_THREADS - 1]; (4 Bytes) - - writer.implementMainT< sdw::VoidT >( NumThreads - , [&]( sdw::ComputeIn const & in ) - { - // The number of bits to consider sorting. - // In this case, the input keys are 30-bit morton codes. - const u32 NumBits = 30u; - - auto const & groupIndex = in.localInvocationIndex; - auto const & threadIndex = in.globalInvocationID.x(); - - // Store the input key and values into shared memory. - gsKeys[groupIndex] = writer.ternary( threadIndex < c3d_numElements, c3d_inputKeys[threadIndex], sdw::UInt{ UINT_MAX } ); - gsValues[groupIndex] = writer.ternary( threadIndex < c3d_numElements, c3d_inputValues[threadIndex], sdw::UInt{ UINT_MAX } ); - - // Loop over the bits starting at the least-significant bit. - FOR( writer, sdw::UInt, b, 0_u, b < NumBits, ++b ) - { - // 1. In a temporary buffer in shared memory, we set a 1 for all false - // sort keys (b = 0) and a 0 for all true sort keys. - gsE[groupIndex] = writer.ternary( ( ( gsKeys[groupIndex] >> b ) & 1_u ) == 0_u - , 1_u - , 0_u ); - - // Sync group shared memory writes. - shader::groupMemoryBarrierWithGroupSync( writer ); - - IF( writer, groupIndex == 0_u ) - { - gsF[groupIndex] = 0_u; - } - ELSE - { - gsF[groupIndex] = gsE[groupIndex - 1_u]; - } - FI - - // Sync group shared memory writes. - shader::groupMemoryBarrierWithGroupSync( writer ); - auto temp = writer.declLocale( "temp", 0_u ); - - // 2. We then scan (prefix sum) this buffer. This is the enumerate operation; - // each false sort key now contains its destination address in the scan - // output, which we will call f. These first two steps are equivalent to - // a stream compaction operation on all false sort keys. - for ( u32 i = 1; i < NumThreads; i <<= 1u ) - { - temp = gsF[groupIndex]; - - IF( writer, groupIndex > i ) - { - temp += gsF[groupIndex - i]; - } - FI - - // Sync group shared memory reads before writes. - shader::groupMemoryBarrierWithGroupSync( writer ); - - gsF[groupIndex] = temp; - - // Sync group shared memory writes. - shader::groupMemoryBarrierWithGroupSync( writer ); - } - - // 3. The last element in the scan's output now contains the total - // number of false sort keys. We write this value to a shared - // variable, gs_TotalFalses. - IF ( writer, groupIndex == 0_u ) - { - gsTotalFalses = gsE[NumThreads - 1u] + gsF[NumThreads - 1u]; - } - FI - - // Sync group shared memory writes. - shader::groupMemoryBarrierWithGroupSync( writer ); - - // 4. Now we compute the destination address for the true sort keys. For - // a sort key at index i, this address is t = i - f + totalFalses. We - // then select between t and f depending on the value of b to get the - // destination address d of each fragment. - gsD[groupIndex] = writer.ternary( gsE[groupIndex] == 1u - , gsF[groupIndex] - , groupIndex - gsF[groupIndex] + gsTotalFalses ); - - // 5. Finally, we scatter the original sort keys to destination address - // d. The scatter pattern is a perfect permutation of the input, so - // we see no write conflicts with this scatter. - auto key = writer.declLocale( "key", gsKeys[groupIndex] ); - auto value = writer.declLocale( "value", gsValues[groupIndex] ); - - // Sync group shared memory reads before writes. - shader::groupMemoryBarrierWithGroupSync( writer ); - - gsKeys[gsD[groupIndex]] = key; - gsValues[gsD[groupIndex]] = value; - - // Sync group shared memory writes. - shader::groupMemoryBarrierWithGroupSync( writer ); - } - ROF - - // Now commit the results to global memory. - c3d_outputKeys[threadIndex] = gsKeys[groupIndex]; - c3d_outputValues[threadIndex] = gsValues[groupIndex]; - } ); - return writer.getBuilder().releaseShader(); - } - - class FramePass - : public crg::RunnablePass - { - public: - FramePass( crg::FramePass const & framePass - , crg::GraphContext & context - , crg::RunnableGraph & graph - , RenderDevice const & device - , FrustumClusters & clusters - , LightType lightType ) - : crg::RunnablePass{ framePass - , context - , graph - , { [this]( uint32_t index ){ doInitialise( index ); } - , GetPipelineStateCallback( [](){ return crg::getPipelineState( VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT ); } ) - , [this]( crg::RecordContext & recContext, VkCommandBuffer cb, uint32_t i ){ doRecordInto( recContext, cb, i ); } - , GetPassIndexCallback( [](){ return 0u; } ) - , IsEnabledCallback( [this](){ return doIsEnabled(); } ) - , IsComputePassCallback( [](){ return true; } ) } - , crg::ru::Config{ 1u, true /* resettable */ } } - , m_clusters{ clusters } - , m_lightCache{ clusters.getCamera().getScene()->getLightCache() } - , m_lightType{ lightType } - , m_pipeline{ framePass, context, graph, device, this, m_lightType } - { - } - - void resetPipeline( uint32_t index ) - { - resetCommandBuffer( index ); - m_pipeline.pipeline.resetPipeline( ashes::makeVkArray< VkPipelineShaderStageCreateInfo >( m_pipeline.createInfo ), index ); - doCreatePipeline( index, m_pipeline ); - reRecordCurrent(); - } - - private: - struct Pipeline - { - ShaderModule shader; - ashes::PipelineShaderStageCreateInfoArray createInfo; - crg::cp::ConfigData cpConfig; - crg::PipelineHolder pipeline; - - Pipeline( crg::FramePass const & framePass - , crg::GraphContext & context - , crg::RunnableGraph & graph - , RenderDevice const & device - , FramePass const * parent - , LightType lightType ) - : shader{ VK_SHADER_STAGE_COMPUTE_BIT, cuT( "RadixSort/" ) + getName( lightType ), createShader( device ) } - , createInfo{ ashes::PipelineShaderStageCreateInfoArray{ makeShaderState( device, shader ) } } - , cpConfig{ crg::getDefaultV< InitialiseCallback >() - , nullptr - , IsEnabledCallback( [parent]() { return parent->doIsEnabled(); } ) - , GetPassIndexCallback( []() { return 0u; } ) - , crg::getDefaultV< RecordCallback >() - , crg::getDefaultV< RecordCallback >() - , 1u - , 1u - , 1u } - , pipeline{ framePass - , context - , graph - , crg::pp::Config{} - .program( ashes::makeVkArray< VkPipelineShaderStageCreateInfo >( createInfo ) ) - .pushConstants( VkPushConstantRange{ VK_SHADER_STAGE_COMPUTE_BIT, 0u, 8u } ) - , VK_PIPELINE_BIND_POINT_COMPUTE - , 1u } - { - } - }; - - private: - struct DispatchData - { - u32 numElements; - u32 chunkSize; - }; - - FrustumClusters & m_clusters; - LightCache const & m_lightCache; - LightType m_lightType; - Pipeline m_pipeline; - - private: - void doInitialise( uint32_t index ) - { - m_pipeline.pipeline.initialise(); - doCreatePipeline( index, m_pipeline ); - } - - bool doIsEnabled()const - { - return m_clusters.getConfig().sortLights - && m_clusters.needsLightsUpdate() - && m_lightCache.getLightsBufferCount( m_lightType ) > 0; - } - - void doRecordInto( crg::RecordContext & context - , VkCommandBuffer commandBuffer - , uint32_t index ) - { - // Build bottom level of the BVH. - auto lightsCount = m_lightCache.getLightsBufferCount( m_lightType ); - auto numThreadGroups = castor::divRoundUp( lightsCount, NumThreadsPerThreadGroup ); - DispatchData data{ lightsCount, 0u }; - m_pipeline.pipeline.recordInto( context, commandBuffer, index ); - m_context.vkCmdPushConstants( commandBuffer, m_pipeline.pipeline.getPipelineLayout(), VK_SHADER_STAGE_COMPUTE_BIT, 0u, 8u, &data ); - m_context.vkCmdDispatch( commandBuffer, numThreadGroups, 1u, 1u ); - } - - void doCreatePipeline( uint32_t index - , Pipeline & pipeline )const - { - auto & program = pipeline.pipeline.getProgram( index ); - VkComputePipelineCreateInfo createInfo{ VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO - , nullptr - , 0u - , program.front() - , pipeline.pipeline.getPipelineLayout() - , VkPipeline{} - , 0u }; - pipeline.pipeline.createPipeline( index, createInfo ); - } - }; - } - - //********************************************************************************************* - - crg::FramePassArray createRadixSortLightsPass( crg::FramePassGroup & graph - , crg::FramePass const * previousPass - , RenderDevice const & device - , FrustumClusters & clusters ) - { - // Point lights - auto & point = graph.createPass( "RadixSort/Point" - , [&clusters, &device]( crg::FramePass const & framePass - , crg::GraphContext & context - , crg::RunnableGraph & graph ) - { - auto result = castor::make_unique< radix::FramePass >( framePass - , context - , graph - , device - , clusters - , LightType::ePoint ); - device.renderSystem.getEngine()->registerTimer( castor::makeString( framePass.getFullName() ) - , result->getTimer() ); - return result; - } ); - point.addDependency( *previousPass ); - createInputStoragePassBinding( point, uint32_t( radix::eInputKeys ), cuT( "C3D_InLightMortonCodes" ), clusters.getOutputPointLightMortonCodesBuffer(), 0u, ashes::WholeSize ); - createInputStoragePassBinding( point, uint32_t( radix::eInputValues ), cuT( "C3D_InLightIndices" ), clusters.getOutputPointLightIndicesBuffer(), 0u, ashes::WholeSize ); - createClearableOutputStorageBinding( point, uint32_t( radix::eOutputKeys ), cuT( "C3D_OutLightMortonCodes" ), clusters.getInputPointLightMortonCodesBuffer(), 0u, ashes::WholeSize ); - createClearableOutputStorageBinding( point, uint32_t( radix::eOutputValues ), cuT( "C3D_OutLightIndices" ), clusters.getInputPointLightIndicesBuffer(), 0u, ashes::WholeSize ); - - // Spot lights - auto & spot = graph.createPass( "RadixSort/Spot" - , [&clusters, &device]( crg::FramePass const & framePass - , crg::GraphContext & context - , crg::RunnableGraph & graph ) - { - auto result = castor::make_unique< radix::FramePass >( framePass - , context - , graph - , device - , clusters - , LightType::eSpot ); - device.renderSystem.getEngine()->registerTimer( castor::makeString( framePass.getFullName() ) - , result->getTimer() ); - return result; - } ); - spot.addDependency( *previousPass ); - createInputStoragePassBinding( spot, uint32_t( radix::eInputKeys ), cuT( "C3D_InLightMortonCodes" ), clusters.getOutputSpotLightMortonCodesBuffer(), 0u, ashes::WholeSize ); - createInputStoragePassBinding( spot, uint32_t( radix::eInputValues ), cuT( "C3D_InLightIndices" ), clusters.getOutputSpotLightIndicesBuffer(), 0u, ashes::WholeSize ); - createClearableOutputStorageBinding( spot, uint32_t( radix::eOutputKeys ), cuT( "C3D_OutLightMortonCodes" ), clusters.getInputSpotLightMortonCodesBuffer(), 0u, ashes::WholeSize ); - createClearableOutputStorageBinding( spot, uint32_t( radix::eOutputValues ), cuT( "C3D_OutLightIndices" ), clusters.getInputSpotLightIndicesBuffer(), 0u, ashes::WholeSize ); - - return { &point, &spot }; - } - - //********************************************************************************************* -} diff --git a/source/Core/Castor3D/Render/Clustered/ReduceLightsAABB.cpp b/source/Core/Castor3D/Render/Clustered/ReduceLightsAABB.cpp index f1cb7ac2b7..06c27b8be7 100644 --- a/source/Core/Castor3D/Render/Clustered/ReduceLightsAABB.cpp +++ b/source/Core/Castor3D/Render/Clustered/ReduceLightsAABB.cpp @@ -35,7 +35,8 @@ namespace castor3d { enum BindingPoints { - eCamera, + eMainCamera, + eClustersCamera, eClusters, eAllLightsAABB, eReducedLightsAABB, @@ -51,8 +52,13 @@ namespace castor3d sdw::ComputeWriter writer{ &device.renderSystem.getEngine()->getShaderAllocator() }; // Inputs - C3D_Camera( writer - , eCamera + C3D_CameraNamed( writer + , Main + , eMainCamera + , 0u ); + C3D_CameraNamed( writer + , Clusters + , eClustersCamera , 0u ); C3D_Clusters( writer , eClusters @@ -78,7 +84,7 @@ namespace castor3d auto logStepReduction = [&]( sdw::UInt const & groupIndex , sdw::UInt const & groupID ) { - // If we can assume that NUM_THREADS is a power of 2, we can compute + // If we can assume that NumThreads is a power of 2, we can compute // the reduction index by performing a bit shift. This is equivalent to // halving the number of values (and threads) that must perform the reduction // operation. @@ -250,8 +256,8 @@ namespace castor3d auto lightsAABBRange = writer.declLocale< sdw::Vec4 >( "lightsAABBRange" ); c3d_clustersData.computeGlobalLightsData( lightsMin , lightsMax - , c3d_cameraData.nearPlane() - , c3d_cameraData.farPlane() + , c3d_cameraDataClusters.nearPlane() + , c3d_cameraDataClusters.farPlane() , clustersLightsData , lightsAABBRange ); c3d_clustersLightsData = clustersLightsData; @@ -490,7 +496,8 @@ namespace castor3d crg::FramePass const & createReduceLightsAABBPass( crg::FramePassGroup & graph , crg::FramePass const * previousPass , RenderDevice const & device - , CameraUbo const & cameraUbo + , CameraUbo const & mainCameraUbo + , CameraUbo const & clustersCameraUbo , FrustumClusters & clusters ) { auto & first = graph.createPass( "ReduceLightsAABB/First" @@ -509,7 +516,8 @@ namespace castor3d return result; } ); first.addDependency( *previousPass ); - cameraUbo.createPassBinding( first, rdclgb::eCamera ); + mainCameraUbo.createPassBinding( first, rdclgb::eMainCamera ); + clustersCameraUbo.createPassBinding( first, rdclgb::eClustersCamera ); clusters.getClustersUbo().createPassBinding( first, rdclgb::eClusters ); createInputStoragePassBinding( first, uint32_t( rdclgb::eAllLightsAABB ), cuT( "C3D_AllLightsAABB" ), clusters.getAllLightsAABBBuffer(), 0u, ashes::WholeSize ); createClearableOutputStorageBinding( first, uint32_t( rdclgb::eReducedLightsAABB ), cuT( "C3D_ReducedLightsAABB" ), clusters.getReducedLightsAABBBuffer(), 0u, ashes::WholeSize ); @@ -530,7 +538,8 @@ namespace castor3d return result; } ); second.addDependency( first ); - cameraUbo.createPassBinding( second, rdclgb::eCamera ); + mainCameraUbo.createPassBinding( second, rdclgb::eMainCamera ); + clustersCameraUbo.createPassBinding( second, rdclgb::eClustersCamera ); clusters.getClustersUbo().createPassBinding( second, rdclgb::eClusters ); createInOutStoragePassBinding( second, uint32_t( rdclgb::eReducedLightsAABB ), cuT( "C3D_ReducedLightsAABB" ), clusters.getReducedLightsAABBBuffer(), 0u, ashes::WholeSize ); diff --git a/source/Core/Castor3D/Render/Clustered/SortAssignedLights.cpp b/source/Core/Castor3D/Render/Clustered/SortAssignedLights.cpp index a23b9aab9a..e07c19307e 100644 --- a/source/Core/Castor3D/Render/Clustered/SortAssignedLights.cpp +++ b/source/Core/Castor3D/Render/Clustered/SortAssignedLights.cpp @@ -12,6 +12,7 @@ #include "Castor3D/Shader/Program.hpp" #include "Castor3D/Shader/Shaders/GlslAABB.hpp" #include "Castor3D/Shader/Shaders/GlslAppendBuffer.hpp" +#include "Castor3D/Shader/Shaders/GlslBitonicSort.hpp" #include "Castor3D/Shader/Shaders/GlslClusteredLights.hpp" #include "Castor3D/Shader/Shaders/GlslLight.hpp" #include "Castor3D/Shader/Shaders/GlslUtils.hpp" @@ -31,12 +32,6 @@ namespace castor3d namespace sort { - static uint32_t constexpr BlockSize = 1024u; - static uint32_t constexpr BatchesPerPass = 8u; - static uint32_t constexpr BatchSize = BlockSize / BatchesPerPass; - static uint32_t constexpr ValuesPerThread = BatchesPerPass << 1u; - static uint32_t constexpr NumThreads = BlockSize / ValuesPerThread; - enum class BindingPoints { eClusters, @@ -48,6 +43,7 @@ namespace castor3d , ClustersConfig const & config ) { sdw::ComputeWriter writer{ &device.renderSystem.getEngine()->getShaderAllocator() }; + shader::BitonicSortT< 4u > bitonic{ writer, 8u }; C3D_Clusters( writer , BindingPoints::eClusters @@ -60,103 +56,7 @@ namespace castor3d , BindingPoints::eClusterGrid , 0u ); - auto gBatchSize = writer.declConstant( "gBatchSize", sdw::UInt{ BatchSize } ); - auto gBatchSizeLog = writer.declGlobal( "gBatchSizeLog", writer.cast< sdw::UInt >( findMSB( gBatchSize ) ) ); - auto gNumThreads = writer.declConstant( "gNumThreads", sdw::UInt{ NumThreads } ); - auto gMaxUInt = writer.declConstant( "gMaxInt", 0xFFFFFFFF_u ); - - auto gsKeys = writer.declSharedVariable< sdw::UInt >( "gsKeys", BlockSize ); - - auto bitInsert0 = writer.implementFunction< sdw::UInt >( "bitInsert0" - , [&]( sdw::UInt const & value - , sdw::UInt const & bit ) - { - writer.returnStmt( ( ( ( gMaxUInt << bit ) & value ) << 1u ) | ( ~( gMaxUInt << bit ) & value ) ); - } - , sdw::InUInt{ writer, "value" } - , sdw::InUInt{ writer, "bit" } ); - - auto sortLights = writer.implementFunction< sdw::Void >( "c3d_sortLights" - , [&]( sdw::UInt const & groupIndex - , sdw::UInt const & offset - , sdw::UInt const & numElements ) - { - // start with simple version, do everything in group shared memory - - // we process a power of two number of elements, - auto passCount = writer.declLocale( "passCount", 1u + writer.cast< sdw::UInt >( findMSB( numElements - 1u ) ) ); - auto roundedElementCount = writer.declLocale( "roundedElementCount", 1u << passCount ); - auto batchCount = writer.declLocale( "batchCount", ( roundedElementCount + gBatchSize - 1u ) >> gBatchSizeLog ); - // Load data into shared memory. Pad missing values with max ints. - - FOR( writer, sdw::UInt, batch, 0_u, batch < batchCount, ++batch ) - { - // each thread loads a pair of values per batch. - auto i1 = writer.declLocale( "i1", groupIndex + batch * gBatchSize ); - auto i2 = writer.declLocale( "i2", i1 + ( gBatchSize >> 1u ) ); - gsKeys[i1] = writer.ternary( i1 < numElements, c3d_lightClusterIndex[offset + i1], gMaxUInt ); - gsKeys[i2] = writer.ternary( i2 < numElements, c3d_lightClusterIndex[offset + i2], gMaxUInt ); - } - ROF - - shader::groupMemoryBarrierWithGroupSync( writer ); - - // Each loop iteration produces blocks of size k that are monotonic (alternatively increasing and decreasing) - // thus, producing blocks of size 2*k that are bitonic. - // as a result, the last pass produces a single block sorted in ascending order - FOR( writer, sdw::UInt, pass, 0_u, pass < passCount, ++pass ) - { - auto k = writer.declLocale( "k", 1_u << ( pass + 1u ) ); - // Each iteration compares and optionally swap elements in pairs exactly once for each element - FOR( writer, sdw::UInt, subPass, 0_u, subPass <= pass, ++subPass ) - { - FOR( writer, sdw::UInt, batch, 0_u, batch < batchCount, ++batch ) - { - auto indexFirst = writer.declLocale( "indexFirst", bitInsert0( groupIndex + batch * gNumThreads, ( pass - subPass ) ) ); - auto indexSecond = writer.declLocale( "indexSecond", indexFirst | ( 1u << ( pass - subPass ) ) ); - auto valFirst = writer.declLocale( "valFirst", gsKeys[indexFirst] ); - auto valSecond = writer.declLocale( "valSecond", gsKeys[indexSecond] ); - shader::groupMemoryBarrierWithGroupSync( writer ); - - IF( writer, writer.ternary( ( indexFirst & k ) == 0_u, 1_u, 0_u ) ^ writer.ternary( valFirst <= valSecond, 1_u, 0_u ) ) - { - gsKeys[indexFirst] = valSecond; - gsKeys[indexSecond] = valFirst; - } - FI - - shader::groupMemoryBarrierWithGroupSync( writer ); - } - ROF - } - ROF - } - ROF - - // Now commit the results to global memory. - FOR( writer, sdw::UInt, batch, 0_u, batch < batchCount, ++batch ) - { - auto i1 = writer.declLocale( "i1", groupIndex + batch * gBatchSize ); - auto i2 = writer.declLocale( "i2", i1 + ( gBatchSize >> 1u ) ); - - IF( writer, i1 < numElements ) - { - c3d_lightClusterIndex[offset + i1] = gsKeys[i1]; - } - FI - IF( writer, i2 < numElements ) - { - c3d_lightClusterIndex[offset + i2] = gsKeys[i2]; - } - FI - } - ROF - } - , sdw::InUInt{ writer, "groupIndex" } - , sdw::InUInt{ writer, "offset" } - , sdw::InUInt{ writer, "numElements" } ); - - writer.implementMainT< sdw::VoidT >( NumThreads + writer.implementMainT< sdw::VoidT >( bitonic.threadsCount , [&]( sdw::ComputeIn const & in ) { auto clusterIndex3D = writer.declLocale( "clusterIndex3D" @@ -168,13 +68,13 @@ namespace castor3d auto startOffset = writer.declLocale( "startOffset" , clusterLights.x() ); auto lightCount = writer.declLocale( "lightCount" - , min( sdw::UInt{ BlockSize }, clusterLights.y() ) ); + , min( sdw::UInt{ shader::BitonicSortT< 4u >::bucketSize }, clusterLights.y() ) ); - IF( writer, lightCount > 1_u ) - { - sortLights( in.localInvocationIndex, startOffset, lightCount ); - } - FI + bitonic.sortT( writer + , startOffset, lightCount + , in.localInvocationIndex, in.globalInvocationID.x() + , c3d_lightClusterIndex, c3d_lightClusterIndex + , sdw::UInt{ 0xFFFFFFFFU } ); } ); return writer.getBuilder().releaseShader(); } @@ -229,7 +129,7 @@ namespace castor3d .groupCountX( clusters.getDimensions()->x ) .groupCountY( clusters.getDimensions()->y ) .groupCountZ( clusters.getDimensions()->z ) - .isEnabled( crg::RunnablePass::IsEnabledCallback( [&clusters](){ return clusters.getCamera().getScene()->getLightCache().hasClusteredLights() && clusters.getConfig().enablePostAssignSort; } ) ) + .isEnabled( crg::RunnablePass::IsEnabledCallback( [&clusters](){ return clusters.getConfig().enablePostAssignSort && !clusters.getCamera().getScene()->getLightCache().getLights( LightType::ePoint ).empty(); } ) ) , clusters , LightType::ePoint ); device.renderSystem.getEngine()->registerTimer( castor::makeString( framePass.getFullName() ) @@ -255,7 +155,7 @@ namespace castor3d .groupCountX( clusters.getDimensions()->x ) .groupCountY( clusters.getDimensions()->y ) .groupCountZ( clusters.getDimensions()->z ) - .isEnabled( crg::RunnablePass::IsEnabledCallback( [&clusters](){ return clusters.getCamera().getScene()->getLightCache().hasClusteredLights() && clusters.getConfig().enablePostAssignSort; } ) ) + .isEnabled( crg::RunnablePass::IsEnabledCallback( [&clusters](){ return clusters.getConfig().enablePostAssignSort && !clusters.getCamera().getScene()->getLightCache().getLights( LightType::eSpot ).empty(); } ) ) , clusters , LightType::eSpot ); device.renderSystem.getEngine()->registerTimer( castor::makeString( framePass.getFullName() ) diff --git a/source/Core/Castor3D/Render/Debug/DebugDrawer.cpp b/source/Core/Castor3D/Render/Debug/DebugDrawer.cpp index e74b9ad451..eab2334a74 100644 --- a/source/Core/Castor3D/Render/Debug/DebugDrawer.cpp +++ b/source/Core/Castor3D/Render/Debug/DebugDrawer.cpp @@ -5,9 +5,12 @@ #include "Castor3D/Buffer/InstantUploadData.hpp" #include "Castor3D/Buffer/ObjectBufferPool.hpp" #include "Castor3D/Miscellaneous/makeVkType.hpp" +#include #include "Castor3D/Render/RenderDevice.hpp" #include "Castor3D/Render/RenderQueue.hpp" +#include "Castor3D/Render/RenderSystem.hpp" #include "Castor3D/Render/RenderTarget.hpp" +#include "Castor3D/Shader/Ubos/CameraUbo.hpp" #include @@ -25,91 +28,74 @@ namespace castor3d { namespace dbgdrw { - static ashes::RenderPassPtr createRenderPass( RenderDevice const & device - , castor::String const & name - , Texture const & colour - , Texture const & depth ) + static size_t hash( VkPipelineShaderStageCreateInfo const & shader ) { - ashes::VkAttachmentDescriptionArray attaches{ VkAttachmentDescription{ 0u - , depth.getFormat() - , VK_SAMPLE_COUNT_1_BIT - , VK_ATTACHMENT_LOAD_OP_LOAD - , VK_ATTACHMENT_STORE_OP_STORE - , VK_ATTACHMENT_LOAD_OP_DONT_CARE - , VK_ATTACHMENT_STORE_OP_DONT_CARE - , VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL - , VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL } - , VkAttachmentDescription{ 1u - , colour.getFormat() - , VK_SAMPLE_COUNT_1_BIT - , VK_ATTACHMENT_LOAD_OP_LOAD - , VK_ATTACHMENT_STORE_OP_STORE - , VK_ATTACHMENT_LOAD_OP_DONT_CARE - , VK_ATTACHMENT_STORE_OP_DONT_CARE - , VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL - , VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL } }; - ashes::SubpassDescription subpassesDesc{ 0u - , VK_PIPELINE_BIND_POINT_GRAPHICS - , {} - , { { 1u, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL } } - , {} - , VkAttachmentReference{ 0u, VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL } - , {} }; - ashes::SubpassDescriptionArray subpasses; - subpasses.push_back( castor::move( subpassesDesc ) ); - ashes::VkSubpassDependencyArray dependencies{ { VK_SUBPASS_EXTERNAL - , 0u - , VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT - , VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT - , VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT - , VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT - , VK_DEPENDENCY_BY_REGION_BIT } - , { 0u - , VK_SUBPASS_EXTERNAL - , VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT - , VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT - , VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT - , VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT - , VK_DEPENDENCY_BY_REGION_BIT } }; - ashes::RenderPassCreateInfo createInfo{ 0u - , castor::move( attaches ) - , castor::move( subpasses ) - , castor::move( dependencies ) }; - return device->createRenderPass( castor::toUtf8( name ) + "/Debug" - , castor::move( createInfo ) ); + size_t result = std::hash< VkFlags >{}( shader.flags ); + castor::hashCombine( result, shader.module ); + castor::hashCombine( result, shader.stage ); + return result; } - static ashes::FrameBufferPtr createFrameBuffer( ashes::RenderPass const & renderPass - , castor::String const & name - , Texture const & colour - , Texture const & depth ) + static size_t hash( VkDescriptorImageInfo const & write ) { - ashes::VkImageViewArray fbAttaches; - auto extent = colour.getExtent(); - fbAttaches.emplace_back( depth.targetView ); - fbAttaches.emplace_back( colour.targetView ); - return renderPass.createFrameBuffer( castor::toUtf8( name ) + "/Debug" - , makeVkStruct< VkFramebufferCreateInfo >( 0u - , renderPass - , uint32_t( fbAttaches.size() ) - , fbAttaches.data() - , extent.width - , extent.height - , 1u ) ); + size_t result = std::hash< VkSampler >{}( write.sampler ); + castor::hashCombine( result, write.imageView ); + castor::hashCombine( result, write.imageLayout ); + return result; } - static size_t hash( VkPipelineShaderStageCreateInfo const & shader ) + static size_t hash( VkDescriptorBufferInfo const & write ) { - size_t result = std::hash< VkFlags >{}( shader.flags ); - castor::hashCombine( result, shader.module ); - castor::hashCombine( result, shader.stage ); + size_t result = std::hash< VkBuffer >{}( write.buffer ); + castor::hashCombine( result, write.offset ); + castor::hashCombine( result, write.range ); + return result; + } + + static size_t hash( VkWriteDescriptorSet const & write ) + { + size_t result = std::hash< uint32_t >{}( write.dstBinding ); + castor::hashCombine( result, write.dstArrayElement ); + castor::hashCombine( result, write.descriptorCount ); + castor::hashCombine( result, write.descriptorType ); + + if ( write.pImageInfo ) + { + for ( auto & imageInfo : castor::makeArrayView( write.pImageInfo, write.descriptorCount ) ) + { + castor::hashCombine( result, hash( imageInfo ) ); + } + } + + if ( write.pImageInfo ) + { + for ( auto & imageInfo : castor::makeArrayView( write.pImageInfo, write.descriptorCount ) ) + { + castor::hashCombine( result, hash( imageInfo ) ); + } + } + + if ( write.pBufferInfo ) + { + for ( auto & bufferInfo : castor::makeArrayView( write.pBufferInfo, write.descriptorCount ) ) + { + castor::hashCombine( result, hash( bufferInfo ) ); + } + } + + if ( write.pTexelBufferView ) + { + for ( auto & texelBufferView : castor::makeArrayView( write.pTexelBufferView, write.descriptorCount ) ) + { + castor::hashCombine( result, texelBufferView ); + } + } + return result; } - static size_t hash( VkBuffer buffer - , VkDeviceSize offset - , VkDeviceSize size - , ashes::PipelineShaderStageCreateInfoArray const & shaders ) + static size_t hash( ashes::PipelineShaderStageCreateInfoArray const & shaders + , bool enableDepthTest ) { size_t result{}; @@ -117,140 +103,251 @@ namespace castor3d { castor::hashCombine( result, hash( shader ) ); } - - castor::hashCombine( result, buffer ); - castor::hashCombine( result, offset ); - castor::hashCombine( result, size ); + + castor::hashCombine( result, enableDepthTest ); + return result; + } + + static size_t hash( ashes::WriteDescriptorSetArray const & writes ) + { + size_t result{}; + + for ( auto & write : writes ) + { + castor::hashCombine( result, hash( write ) ); + } + return result; } } //********************************************************************************************* - DebugDrawer::DebugDrawer( RenderTarget & parent + void addDebugAabbs( DebugDrawer & drawer + , ashes::VkDescriptorSetLayoutBindingArray const & bindings + , ashes::WriteDescriptorSetArray const & writes + , VkDeviceSize count + , ashes::PipelineShaderStageCreateInfoArray const & shader + , bool enableDepthTest ) + { + drawer.addAabbs( bindings, writes, count, shader, enableDepthTest ); + } + + //********************************************************************************************* + + DebugDrawer::AABBBuffer::AABBBuffer( VkDeviceSize pcount + , Pipeline * ppipeline + , ashes::DescriptorSet * pdescriptorSet + , ashes::WriteDescriptorSetArray pwrites ) + : count{ pcount } + , pipeline{ ppipeline } + , descriptorSet{ pdescriptorSet } + , writes{ std::move( pwrites ) } + { + } + + //********************************************************************************************* + + DebugDrawer::FramePass::FramePass( crg::FramePass const & framePass + , crg::GraphContext & context + , crg::RunnableGraph & graph , RenderDevice const & device - , Texture const & colour - , Texture const & depth ) - : castor::OwnedBy< RenderTarget >{ parent } + , VkExtent2D dimensions + , uint32_t const * passIndex ) + : crg::RenderPass{ framePass + , context + , graph + , { crg::defaultV< InitialiseCallback > + , [this]( crg::RecordContext & recContext, VkCommandBuffer cb, uint32_t ){ doSubRecordInto( recContext, cb ); } + , crg::defaultV< GetSubpassContentsCallback > + , GetPassIndexCallback( [passIndex](){ return *passIndex; } ) + , IsEnabledCallback( [this](){ return doIsEnabled(); } ) } + , dimensions + , crg::ru::Config{ 2u } } , m_device{ device } - , m_renderPass{ dbgdrw::createRenderPass( m_device, parent.getName(), colour, depth ) } - , m_framebuffer{ dbgdrw::createFrameBuffer( *m_renderPass, parent.getName(), colour, depth ) } - , m_commandPool{ m_device->createCommandPool( castor::toUtf8( parent.getName() ) + "/Debug", m_device.getGraphicsQueueFamilyIndex(), 0u ) } - , m_commandBuffers{ CommandsSemaphore{ m_commandPool->createCommandBuffer( castor::toUtf8( parent.getName() ) + "/Debug/0" ) - , m_device->createSemaphore( castor::toUtf8( parent.getName() ) + "/Debug/0" ) } - , CommandsSemaphore{ m_commandPool->createCommandBuffer( castor::toUtf8( parent.getName() ) + "/Debug/1" ) - , m_device->createSemaphore( castor::toUtf8( parent.getName() ) + "/Debug/1" ) } } { + m_aabb.vertices = m_device.vertexPools->getBuffer< castor::Point4f >( 8u ); + m_aabb.indices = m_device.indexPools->getBuffer< uint32_t >( 24u ); + { + static castor::Array< castor::Point4f, 8u > const vertexData{ castor::Point4f{ 0.0f, 0.0f, 0.0f, 1.0f } + , castor::Point4f{ 1.0f, 0.0f, 0.0f, 1.0f } + , castor::Point4f{ 0.0f, 1.0f, 0.0f, 1.0f } + , castor::Point4f{ 1.0f, 1.0f, 0.0f, 1.0f } + , castor::Point4f{ 0.0f, 0.0f, 1.0f, 1.0f } + , castor::Point4f{ 1.0f, 0.0f, 1.0f, 1.0f } + , castor::Point4f{ 0.0f, 1.0f, 1.0f, 1.0f } + , castor::Point4f{ 1.0f, 1.0f, 1.0f, 1.0f } }; + static castor::Array< castor::Point2ui, 12u > const indexData{ castor::Point2ui{ 0u, 1u } + , castor::Point2ui{ 1u, 3u } + , castor::Point2ui{ 3u, 2u } + , castor::Point2ui{ 2u, 0u } + , castor::Point2ui{ 4u, 5u } + , castor::Point2ui{ 5u, 7u } + , castor::Point2ui{ 7u, 6u } + , castor::Point2ui{ 6u, 4u } + , castor::Point2ui{ 0u, 4u } + , castor::Point2ui{ 1u, 5u } + , castor::Point2ui{ 2u, 6u } + , castor::Point2ui{ 3u, 7u } }; + auto queue = m_device.graphicsData(); + InstantDirectUploadData uploader{ *queue->queue + , m_device + , cuT( "RenderCube" ) + , *queue->commandPool }; + uploader->pushUpload( vertexData.data()->constPtr() + , vertexData.size() * sizeof( castor::Point4f ) + , m_aabb.vertices.getBuffer( SubmeshData::ePositions ) + , m_aabb.vertices.getOffset( SubmeshData::ePositions ) + , VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT + , VK_PIPELINE_STAGE_VERTEX_INPUT_BIT ); + uploader->pushUpload( indexData.data()->constPtr() + , indexData.size() * sizeof( castor::Point2ui ) + , m_aabb.indices.getBuffer( SubmeshData::eIndex ) + , m_aabb.indices.getOffset( SubmeshData::eIndex ) + , VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT + , VK_PIPELINE_STAGE_VERTEX_INPUT_BIT ); + } + } + + DebugDrawer::FramePass::~FramePass()noexcept + { + m_device.indexPools->putBuffer( m_aabb.indices ); + m_device.vertexPools->putBuffer( m_aabb.vertices ); } - void DebugDrawer::addAabbs( VkBuffer buffer - , VkDeviceSize offset - , VkDeviceSize size + void DebugDrawer::FramePass::addAabbs( ashes::VkDescriptorSetLayoutBindingArray const & bindings + , ashes::WriteDescriptorSetArray const & writes , VkDeviceSize count - , ashes::PipelineShaderStageCreateInfoArray shader ) + , ashes::PipelineShaderStageCreateInfoArray const & shader + , bool enableDepthTest ) { - auto hash = dbgdrw::hash( buffer, offset, size, shader ); + auto renderPass = getRenderPass( 0u ); + m_pending = true; + + if ( !renderPass ) + { + return; + } + + auto hash = dbgdrw::hash( shader, enableDepthTest ); auto [it, res] = m_pipelines.try_emplace( hash ); if ( res ) { - auto name = castor::toUtf8( getOwner()->getName() ) + "/Debug/AABB/" + castor::string::toMbString( hash ); - auto & extent = m_framebuffer->getDimensions(); + auto name = castor::toUtf8( m_pass.getName() ) + "/AABB/" + castor::string::toMbString( hash ); + auto & extent = doGetHolder().getRenderSize(); ashes::PipelineVertexInputStateCreateInfo vertexState{ 0u , ashes::VkVertexInputBindingDescriptionArray{ VkVertexInputBindingDescription{ 0u, 16u, VK_VERTEX_INPUT_RATE_VERTEX } } , ashes::VkVertexInputAttributeDescriptionArray{ VkVertexInputAttributeDescription{ 0u, 0u, VK_FORMAT_R32G32B32A32_SFLOAT, 0u } } }; ashes::PipelineViewportStateCreateInfo viewportState{ 0u , { makeViewport( castor::Point2ui{ extent.width, extent.height } ) } , { makeScissor( castor::Point2ui{ extent.width, extent.height } ) } }; - it->second->descriptorLayout = m_device->createDescriptorSetLayout( name - , ashes::VkDescriptorSetLayoutBindingArray{ VkDescriptorSetLayoutBinding{ 0u, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1u, VK_SHADER_STAGE_VERTEX_BIT, nullptr } } ); + it->second = std::make_unique< Pipeline >(); + it->second->descriptorLayout = m_device->createDescriptorSetLayout( name, bindings ); it->second->pipelineLayout = m_device->createPipelineLayout( name , *it->second->descriptorLayout ); ashes::GraphicsPipelineCreateInfo graphics{ 0u - , castor::move( shader ) + , shader , castor::move( vertexState ) - , ashes::PipelineInputAssemblyStateCreateInfo{ 0u, VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST } + , ashes::PipelineInputAssemblyStateCreateInfo{ 0u, VK_PRIMITIVE_TOPOLOGY_LINE_LIST } , ashes::nullopt , castor::move( viewportState ) - , ashes::PipelineRasterizationStateCreateInfo{ 0u, VK_FALSE, VK_FALSE, VK_POLYGON_MODE_FILL, VK_CULL_MODE_NONE } + , ashes::PipelineRasterizationStateCreateInfo{ 0u, VK_FALSE, VK_FALSE, VK_POLYGON_MODE_LINE, VK_CULL_MODE_NONE } , ashes::PipelineMultisampleStateCreateInfo{} - , ashes::PipelineDepthStencilStateCreateInfo{ 0u, VK_TRUE, VK_FALSE, VK_COMPARE_OP_GREATER } + , ashes::PipelineDepthStencilStateCreateInfo{ 0u, enableDepthTest, VK_FALSE, VK_COMPARE_OP_GREATER } , ashes::PipelineColorBlendStateCreateInfo{} , ashes::nullopt , static_cast< VkPipelineLayout >( *it->second->pipelineLayout ) - , static_cast< VkRenderPass >( *m_renderPass ) }; + , getRenderPass( 0u ) }; it->second->pipeline = m_device->createPipeline( name, castor::move( graphics ) ); - - it->second->descriptorPool = it->second->descriptorLayout->createPool( name, 1u ); - it->second->descriptorSet = it->second->descriptorPool->createDescriptorSet( name ); - it->second->vertices = m_device.vertexPools->getBuffer< castor::Point4f >( 36u ); - { - castor::Vector< castor::Point4f > vertexData - { - castor::Point4f{ -1, +1, -1, +1 }, castor::Point4f{ +1, -1, -1, +1 }, castor::Point4f{ -1, -1, -1, +1 }, castor::Point4f{ +1, -1, -1, +1 }, castor::Point4f{ -1, +1, -1, +1 }, castor::Point4f{ +1, +1, -1, +1 },// Back - castor::Point4f{ -1, -1, +1, +1 }, castor::Point4f{ -1, +1, -1, +1 }, castor::Point4f{ -1, -1, -1, +1 }, castor::Point4f{ -1, +1, -1, +1 }, castor::Point4f{ -1, -1, +1, +1 }, castor::Point4f{ -1, +1, +1, +1 },// Left - castor::Point4f{ +1, -1, -1, +1 }, castor::Point4f{ +1, +1, +1, +1 }, castor::Point4f{ +1, -1, +1, +1 }, castor::Point4f{ +1, +1, +1, +1 }, castor::Point4f{ +1, -1, -1, +1 }, castor::Point4f{ +1, +1, -1, +1 },// Right - castor::Point4f{ -1, -1, +1, +1 }, castor::Point4f{ +1, +1, +1, +1 }, castor::Point4f{ -1, +1, +1, +1 }, castor::Point4f{ +1, +1, +1, +1 }, castor::Point4f{ -1, -1, +1, +1 }, castor::Point4f{ +1, -1, +1, +1 },// Front - castor::Point4f{ -1, +1, -1, +1 }, castor::Point4f{ +1, +1, +1, +1 }, castor::Point4f{ +1, +1, -1, +1 }, castor::Point4f{ +1, +1, +1, +1 }, castor::Point4f{ -1, +1, -1, +1 }, castor::Point4f{ -1, +1, +1, +1 },// Top - castor::Point4f{ -1, -1, -1, +1 }, castor::Point4f{ +1, -1, -1, +1 }, castor::Point4f{ -1, -1, +1, +1 }, castor::Point4f{ +1, -1, -1, +1 }, castor::Point4f{ +1, -1, +1, +1 }, castor::Point4f{ -1, -1, +1, +1 },// Bottom - }; - auto queue = m_device.graphicsData(); - InstantDirectUploadData uploader{ *queue->queue - , m_device - , cuT( "RenderCube" ) - , *m_commandPool }; - uploader->pushUpload( vertexData.data() - , it->second->vertices.getAskedSize( SubmeshData::ePositions ) - , it->second->vertices.getBuffer( SubmeshData::ePositions ) - , it->second->vertices.getOffset( SubmeshData::ePositions ) - , VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT - , VK_PIPELINE_STAGE_VERTEX_INPUT_BIT ); - } } - - m_aabbs.emplace_back( buffer, offset, size, count, it->second.get() ); - } - crg::SemaphoreWaitArray DebugDrawer::render( ashes::Queue const & queue - , crg::SemaphoreWaitArray toWait ) - { - if ( m_aabbs.empty() ) + auto ihash = dbgdrw::hash( writes ); + auto [iit, ires] = it->second->instances.try_emplace( ihash ); + + if ( ires ) { - return toWait; + auto name = castor::toUtf8( m_pass.getName() ) + "/AABB/" + castor::string::toMbString( hash ) + "/" + castor::string::toMbString( ihash ); + iit->second.descriptorPool = it->second->descriptorLayout->createPool( name, 1U ); + iit->second.descriptorSet = iit->second.descriptorPool->createDescriptorSet( name ); + iit->second.descriptorSet->setBindings( writes ); + iit->second.descriptorSet->update(); } - auto const & commands = m_commandBuffers[m_index]; - commands.commandBuffer->begin(); - commands.commandBuffer->beginDebugBlock( { "Staging Texture Upload" - , makeFloatArray( getOwner()->getEngine()->getNextRainbowColour() ) } ); - commands.commandBuffer->beginRenderPass( *m_renderPass - , *m_framebuffer - , { defaultClearDepthStencil, transparentBlackClearColor } - , VK_SUBPASS_CONTENTS_INLINE); + m_aabbs.emplace_back( count, it->second.get(), iit->second.descriptorSet.get(), writes ); + } - for ( auto const & aabb : m_aabbs ) + void DebugDrawer::FramePass::doSubRecordInto( crg::RecordContext const & context + , VkCommandBuffer commandBuffer ) + { + if ( !m_aabbs.empty() ) { - commands.commandBuffer->bindPipeline( *aabb.pipeline->pipeline ); - commands.commandBuffer->bindDescriptorSet( *aabb.pipeline->descriptorSet, *aabb.pipeline->pipelineLayout ); - commands.commandBuffer->bindVertexBuffer( 0u, aabb.pipeline->vertices.getBuffer( SubmeshData::ePositions ), aabb.pipeline->vertices.getOffset( SubmeshData::ePositions ) ); + VkBuffer vertexBuffer = m_aabb.vertices.getBuffer( SubmeshData::ePositions ); + VkDeviceSize vertexOffset = m_aabb.vertices.getOffset( SubmeshData::ePositions ); + VkBuffer indexBuffer = m_aabb.indices.getBuffer( SubmeshData::eIndex ); + VkDeviceSize indexOffset = m_aabb.indices.getOffset( SubmeshData::eIndex ); - if ( aabb.pipeline->indices ) - { - commands.commandBuffer->bindIndexBuffer( aabb.pipeline->indices.getBuffer( SubmeshData::eIndex ), aabb.pipeline->indices.getOffset( SubmeshData::eIndex ), VK_INDEX_TYPE_UINT16 ); - commands.commandBuffer->drawIndexed( aabb.pipeline->indices.getAskedSize( SubmeshData::eIndex ) / sizeof( uint16_t ), uint32_t( aabb.count ), 0u, 0u, 0u ); - } - else + for ( auto const & aabb : m_aabbs ) { - commands.commandBuffer->draw( aabb.pipeline->indices.getAskedSize( SubmeshData::ePositions ) / sizeof( castor::Point4f ), uint32_t( aabb.count ), 0u, 0u ); + VkDescriptorSet descriptorSet = *aabb.descriptorSet; + + context->vkCmdBindPipeline( commandBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *aabb.pipeline->pipeline ); + context->vkCmdBindDescriptorSets( commandBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *aabb.pipeline->pipelineLayout, 0u, 1u, &descriptorSet, 0u, nullptr ); + context->vkCmdBindVertexBuffers( commandBuffer, 0u, 1u, &vertexBuffer, &vertexOffset ); + context->vkCmdBindIndexBuffer( commandBuffer, indexBuffer, indexOffset, VK_INDEX_TYPE_UINT32 ); + context->vkCmdDrawIndexed( commandBuffer, 24u, uint32_t( aabb.count ), 0u, 0u, 0u ); } + + m_aabbs.clear(); } - commands.commandBuffer->endRenderPass(); - commands.commandBuffer->endDebugBlock(); - commands.commandBuffer->end(); + m_pending = false; + } + + //********************************************************************************************* - m_index = 1u - m_index; - return { crg::SemaphoreWait{ commands.submit( queue, toWait ), VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT } }; + DebugDrawer::DebugDrawer( crg::FramePassGroup & graph + , crg::FramePass const * previous + , RenderDevice const & device + , RenderTarget & parent + , crg::ImageViewIdArray colour + , Texture const & depth + , uint32_t const * passIndex ) + : castor::OwnedBy< RenderTarget >{ parent } + { + auto extent = makeExtent2D( depth.getExtent() ); + auto & pass = graph.createPass( "DebugDraw" + , [this, &device, extent, passIndex]( crg::FramePass const & framePass + , crg::GraphContext & context + , crg::RunnableGraph & runnableGraph ) + { + auto result = castor::make_unique< FramePass >( framePass + , context + , runnableGraph + , device + , extent + , passIndex ); + m_framePass = result.get(); + device.renderSystem.getEngine()->registerTimer( castor::makeString( framePass.getFullName() ) + , result->getTimer() ); + return result; + } ); + pass.addDependency( *previous ); + pass.addInOutColourView( std::move( colour ) ); + pass.addInputDepthStencilView( depth.targetViewId ); + m_lastPass = &pass; + } + + void DebugDrawer::addAabbs( ashes::VkDescriptorSetLayoutBindingArray const & bindings + , ashes::WriteDescriptorSetArray const & writes + , VkDeviceSize count + , ashes::PipelineShaderStageCreateInfoArray const & shader + , bool enableDepthTest ) + { + if ( m_framePass && count > 0u ) + { + m_framePass->addAabbs( bindings, writes, count, shader, enableDepthTest ); + } } //********************************************************************************************* diff --git a/source/Core/Castor3D/Render/Opaque/OpaqueRendering.cpp b/source/Core/Castor3D/Render/Opaque/OpaqueRendering.cpp index 7327c5d8a8..32fc51980d 100644 --- a/source/Core/Castor3D/Render/Opaque/OpaqueRendering.cpp +++ b/source/Core/Castor3D/Render/Opaque/OpaqueRendering.cpp @@ -260,7 +260,7 @@ namespace castor3d auto targetResult = getOwner()->getTargetResult(); auto targetDepth = getOwner()->getTargetDepth(); - auto & result = m_graph.createPass( isDeferredLighting ? castor::MbString{ "DeferredVisibility" } : castor::MbString{ "Visibility" } + auto & result = m_graph.createPass( isDeferredLighting ? castor::MbString{ "DeferredVisibilityResolve" } : castor::MbString{ "VisibilityResolve" } , [this, targetResult, targetDepth, progress, isDeferredLighting, &previous]( crg::FramePass const & framePass , crg::GraphContext & context , crg::RunnableGraph & runnableGraph ) diff --git a/source/Core/Castor3D/Render/Opaque/VisibilityResolvePass.cpp b/source/Core/Castor3D/Render/Opaque/VisibilityResolvePass.cpp index 847d9a19d8..868eb35221 100644 --- a/source/Core/Castor3D/Render/Opaque/VisibilityResolvePass.cpp +++ b/source/Core/Castor3D/Render/Opaque/VisibilityResolvePass.cpp @@ -14,6 +14,7 @@ #include "Castor3D/Render/RenderNodesPass.hpp" #include "Castor3D/Render/RenderTechnique.hpp" #include "Castor3D/Render/Clustered/ClustersConfig.hpp" +#include "Castor3D/Render/Clustered/FrustumClusters.hpp" #include "Castor3D/Render/Culling/PipelineNodes.hpp" #include "Castor3D/Render/EnvironmentMap/EnvironmentMap.hpp" #include "Castor3D/Render/Node/BillboardRenderNode.hpp" @@ -77,7 +78,8 @@ namespace castor3d enum InOutBindings : uint32_t { - eCamera, + eMainCamera, + eClustersCamera, eScene, eModels, eBillboards, @@ -1110,8 +1112,13 @@ namespace castor3d shader::CookTorranceBRDF cookTorrance{ writer, brdf }; auto index = uint32_t( InOutBindings::eCount ); - C3D_Camera( writer - , InOutBindings::eCamera + C3D_CameraNamed( writer + , Main + , InOutBindings::eMainCamera + , Sets::eInOuts ); + C3D_CameraNamed( writer + , Clusters + , InOutBindings::eClustersCamera , Sets::eInOuts ); C3D_Scene( writer , InOutBindings::eScene @@ -1214,7 +1221,7 @@ namespace castor3d { shader::DebugOutput output{ debugConfig , cuT( "Opaque" ) - , c3d_cameraData.debugIndex() + , c3d_cameraDataMain.debugIndex() , outResult , areDebugTargetsEnabled }; @@ -1247,7 +1254,7 @@ namespace castor3d , modelData , material , depth - , c3d_cameraData + , c3d_cameraDataMain , c3d_billboardData , baseSurface ); auto components = writer.declLocale( "components" @@ -1276,7 +1283,7 @@ namespace castor3d } auto incident = writer.declLocale( "incident" - , reflections.computeIncident( shader::getXYZ( baseSurface.worldPosition ), c3d_cameraData.position() ) ); + , reflections.computeIncident( shader::getXYZ( baseSurface.worldPosition ), c3d_cameraDataMain.position() ) ); if ( components.transmission ) { @@ -1301,11 +1308,11 @@ namespace castor3d lightingModel->finish( passShaders , surface , utils - , c3d_cameraData.position() + , c3d_cameraDataClusters.position() , components ); auto lightSurface = shader::LightSurface::create( writer , "lightSurface" - , c3d_cameraData.position() + , c3d_cameraDataClusters.position() , surface.worldPosition , getXYZ( surface.viewPosition ) , surface.clipPosition @@ -1404,7 +1411,7 @@ namespace castor3d , components , lightSurface , *backgroundModel - , c3d_cameraData + , c3d_cameraDataMain , directLighting , indirectLighting , vec2( ipixel ) @@ -1468,36 +1475,36 @@ namespace castor3d { if ( flags.hasFog() ) { - outResult = fog.apply( c3d_sceneData.getBackgroundColour( utils, c3d_cameraData.gamma() ) + outResult = fog.apply( c3d_sceneData.getBackgroundColour( utils, c3d_cameraDataMain.gamma() ) , outResult , shader::getRawXYZ( baseSurface.worldPosition ) - , c3d_cameraData.position() + , c3d_cameraDataMain.position() , c3d_sceneData ); if ( outputScattering ) { - outScattering = fog.apply( c3d_sceneData.getBackgroundColour( utils, c3d_cameraData.gamma() ) + outScattering = fog.apply( c3d_sceneData.getBackgroundColour( utils, c3d_cameraDataMain.gamma() ) , outScattering , shader::getRawXYZ( baseSurface.worldPosition ) - , c3d_cameraData.position() + , c3d_cameraDataMain.position() , c3d_sceneData ); } } auto linearDepth = writer.declLocale( "linearDepth" - , utils.lineariseDepth( depth, c3d_cameraData.nearPlane(), c3d_cameraData.farPlane() ) ); + , utils.lineariseDepth( depth, c3d_cameraDataMain.nearPlane(), c3d_cameraDataMain.farPlane() ) ); backgroundModel->applyVolume( vec2( ipixel ) , linearDepth - , vec2( c3d_cameraData.renderSize() ) - , c3d_cameraData.depthPlanes() + , vec2( c3d_cameraDataMain.renderSize() ) + , c3d_cameraDataMain.depthPlanes() , outResult ); if ( outputScattering ) { backgroundModel->applyVolume( vec2( ipixel ) , linearDepth - , vec2( c3d_cameraData.renderSize() ) - , c3d_cameraData.depthPlanes() + , vec2( c3d_cameraDataMain.renderSize() ) + , c3d_cameraDataMain.depthPlanes() , outScattering ); } } @@ -1543,7 +1550,10 @@ namespace castor3d auto stages = VkShaderStageFlags( VisibilityResolvePass::useCompute() ? VK_SHADER_STAGE_COMPUTE_BIT : VK_SHADER_STAGE_FRAGMENT_BIT ); - bindings.emplace_back( makeDescriptorSetLayoutBinding( InOutBindings::eCamera + bindings.emplace_back( makeDescriptorSetLayoutBinding( InOutBindings::eMainCamera + , VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER + , stages ) ); + bindings.emplace_back( makeDescriptorSetLayoutBinding( InOutBindings::eClustersCamera , VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER , stages ) ); bindings.emplace_back( makeDescriptorSetLayoutBinding( InOutBindings::eScene @@ -1651,7 +1661,8 @@ namespace castor3d static ashes::DescriptorSetPtr createInDescriptorSet( castor::String const & name , ashes::DescriptorSetPool const & pool , crg::RunnableGraph & graph - , CameraUbo const & cameraUbo + , CameraUbo const & mainCameraUbo + , CameraUbo const & clustersCameraUbo , SceneUbo const & sceneUbo , RenderTechnique const & technique , Scene const & scene @@ -1664,7 +1675,8 @@ namespace castor3d auto const & engine = *scene.getOwner(); auto const & matCache = engine.getMaterialCache(); ashes::WriteDescriptorSetArray writes; - writes.push_back( cameraUbo.getDescriptorWrite( InOutBindings::eCamera ) ); + writes.push_back( mainCameraUbo.getDescriptorWrite( InOutBindings::eMainCamera ) ); + writes.push_back( clustersCameraUbo.getDescriptorWrite( InOutBindings::eClustersCamera ) ); writes.push_back( sceneUbo.getDescriptorWrite( InOutBindings::eScene ) ); writes.push_back( makeDescriptorWrite( scene.getModelBuffer() , InOutBindings::eModels @@ -2718,7 +2730,8 @@ namespace castor3d result->vtxDescriptorPool = result->vtxDescriptorLayout->createPool( MaxPipelines ); result->ioDescriptorPool = result->ioDescriptorLayout->createPool( 1u ); - result->ioDescriptorSet = visres::createInDescriptorSet( getName(), *result->ioDescriptorPool, m_graph, m_cameraUbo, m_sceneUbo, *m_parent, getScene() + result->ioDescriptorSet = visres::createInDescriptorSet( getName(), *result->ioDescriptorPool, m_graph + , m_cameraUbo, m_parent->getRenderTarget().getFrustumClusters()->getCameraUbo(), m_sceneUbo, *m_parent, getScene() , getClustersConfig()->enabled, m_targetImage, hasSsao() ? m_ssao : nullptr, &getIndirectLighting(), m_deferredLightingFilter ); pipelines.push_back( castor::move( result ) ); it = std::next( pipelines.begin(), ptrdiff_t( pipelines.size() - 1u ) ); diff --git a/source/Core/Castor3D/Render/RenderTarget.cpp b/source/Core/Castor3D/Render/RenderTarget.cpp index 2696acf21f..bf1379a6f1 100644 --- a/source/Core/Castor3D/Render/RenderTarget.cpp +++ b/source/Core/Castor3D/Render/RenderTarget.cpp @@ -617,6 +617,7 @@ namespace castor3d auto & camera = *getCamera(); auto & scene = *getScene(); + updater.jitter = m_jitter; updater.scene = &scene; updater.camera = &camera; camera.resize( m_size ); @@ -640,6 +641,11 @@ namespace castor3d if ( m_frustumClusters ) { m_frustumClusters->update( updater ); + + if ( m_debugDrawer ) + { + m_frustumClusters->updateDebug( *m_debugDrawer ); + } } m_hdrConfigUbo->cpuUpdate( getHdrConfig() ); @@ -720,11 +726,6 @@ namespace castor3d && getCamera() ) { result = doRender( queue, signalsToWait ); - - if ( m_debugDrawer ) - { - result = m_debugDrawer->render( queue, castor::move( result ) ); - } } return result; @@ -1035,6 +1036,14 @@ namespace castor3d if ( result ) { m_combinePassSource = srgbSource; + m_debugDrawer = castor::makeUnique< DebugDrawer >( m_graph.getDefaultGroup() + , previousPass + , device + , *this + , crg::ImageViewIdArray{ srgbSource->sampledViewId, srgbTarget->sampledViewId } + , m_renderTechnique->getDepth() + , &m_combinePassIndex ); + previousPass = &m_debugDrawer->getLastPass(); m_combinePass = &doCreateCombinePass( progress , crg::ImageViewIdArray{ srgbSource->sampledViewId, srgbTarget->sampledViewId } ); m_combinePass->addDependency( *previousPass ); @@ -1065,7 +1074,6 @@ namespace castor3d runnable->record(); m_initialised = result; } ) ); - m_debugDrawer = castor::makeUnique< DebugDrawer >(*this, device, m_combined, m_renderTechnique->getDepth() ); } auto mbName = castor::toUtf8( getName() ); @@ -1361,4 +1369,14 @@ namespace castor3d ? context.window->root : context.texture->root; } + + void addDebugAabbs( RenderTarget const & target + , ashes::VkDescriptorSetLayoutBindingArray const & bindings + , ashes::WriteDescriptorSetArray const & writes + , VkDeviceSize count + , ashes::PipelineShaderStageCreateInfoArray const & shader + , bool enableDepthTest ) + { + addDebugAabbs( target.getDebugDrawer(), bindings, writes, count, shader, enableDepthTest ); + } } diff --git a/source/Core/Castor3D/Render/RenderTechnique.cpp b/source/Core/Castor3D/Render/RenderTechnique.cpp index ace8c9f625..6c24e5e68e 100644 --- a/source/Core/Castor3D/Render/RenderTechnique.cpp +++ b/source/Core/Castor3D/Render/RenderTechnique.cpp @@ -611,7 +611,7 @@ namespace castor3d renderPass.update( updater ); } ); - auto jitter = m_renderTarget.getJitter(); + auto jitter = updater.jitter; auto jitterProjSpace = jitter * 2.0f; jitterProjSpace[0] /= float( camera.getWidth() ); jitterProjSpace[1] /= float( camera.getHeight() ); diff --git a/source/Core/Castor3D/Scene/Light/Light.cpp b/source/Core/Castor3D/Scene/Light/Light.cpp index f668dcde06..21b8beac08 100644 --- a/source/Core/Castor3D/Scene/Light/Light.cpp +++ b/source/Core/Castor3D/Scene/Light/Light.cpp @@ -18,6 +18,7 @@ namespace castor3d , LightFactory & factory , LightType lightType ) : MovableObject{ name, scene, MovableType::eLight, node } + , m_enabled{ m_dirty, true, [this](){ markDirty(); } } { m_category = factory.create( lightType, castor::ref( *this ) ); } diff --git a/source/Core/Castor3D/Scene/Light/LightCategory.cpp b/source/Core/Castor3D/Scene/Light/LightCategory.cpp index 214053b16d..7ee79a3bdd 100644 --- a/source/Core/Castor3D/Scene/Light/LightCategory.cpp +++ b/source/Core/Castor3D/Scene/Light/LightCategory.cpp @@ -16,10 +16,13 @@ namespace castor3d , Light & light , uint32_t lightComponentCount , uint32_t shadowComponentCount ) - : m_lightType{ lightType } + : m_dirty{ light.doGetDirty() } + , m_lightType{ lightType } , m_light{ light } , m_lightComponentCount{ lightComponentCount } , m_shadowComponentCount{ shadowComponentCount } + , m_colour{ m_dirty, { 1.0, 1.0, 1.0 }, [this](){ getLight().markDirty(); } } + , m_intensity{ m_dirty, { 1.0, 1.0 }, [this](){ getLight().markDirty(); } } { } @@ -31,14 +34,16 @@ namespace castor3d base.intensity = getIntensity(); base.shadowMapIndex = float( m_light.getShadowMapIndex() ); + base.enabled = ( ( m_light.isEnabled() && m_light.getParent()->isVisible() ) + ? 1.0f : 0.0f ); + doFillLightBuffer( data ); } void LightCategory::accept( ConfigurationVisitorBase & vis ) { vis.visit( cuT( "Colour" ), m_colour ); - vis.visit( cuT( "Diffuse Intensity" ), m_intensity->x ); - vis.visit( cuT( "Specular Intensity" ), m_intensity->y ); + vis.visit( cuT( "Intensity" ), m_intensity ); doAccept( vis ); } @@ -156,13 +161,13 @@ namespace castor3d void LightCategory::setDiffuseIntensity( float value ) { - m_intensity[0] = value; + ( *m_intensity )->x = value; getLight().markDirty(); } void LightCategory::setSpecularIntensity( float value ) { - m_intensity[1] = value; + ( *m_intensity )->y = value; getLight().markDirty(); } diff --git a/source/Core/Castor3D/Scene/Light/PointLight.cpp b/source/Core/Castor3D/Scene/Light/PointLight.cpp index 362c4d4762..3e27735a4d 100644 --- a/source/Core/Castor3D/Scene/Light/PointLight.cpp +++ b/source/Core/Castor3D/Scene/Light/PointLight.cpp @@ -27,9 +27,8 @@ namespace castor3d PointLight::PointLight( Light & light ) : LightCategory{ LightType::ePoint, light, LightDataComponents, ShadowDataComponents } - , m_dirtyData{ true } - , m_range{ m_dirtyData, 10.0f } - , m_position{ m_dirtyData } + , m_range{ m_dirty, 10.0f, [this](){ getLight().markDirty(); } } + , m_position{ m_dirty, [this](){ getLight().markDirty(); } } { } @@ -129,7 +128,7 @@ namespace castor3d auto range = computeRange( getIntensity(), m_range.value() ); m_cubeBox.load( castor::Point3f{ -range, -range, -range } , castor::Point3f{ range, range, range } ); - m_farPlane = m_range; + m_farPlane = m_range.value(); } void PointLight::updateShadow( int32_t index ) @@ -154,21 +153,11 @@ namespace castor3d void PointLight::setAttenuation( castor::Point3f const & attenuation ) { m_range = getMaxDistance( *this, attenuation ); - - if ( m_dirtyData ) - { - getLight().markDirty(); - } } void PointLight::setRange( float value ) { m_range = value; - - if ( m_dirtyData ) - { - getLight().markDirty(); - } } void PointLight::doFillLightBuffer( castor::Point4f * data )const diff --git a/source/Core/Castor3D/Scene/Light/SpotLight.cpp b/source/Core/Castor3D/Scene/Light/SpotLight.cpp index c85971333f..470366892d 100644 --- a/source/Core/Castor3D/Scene/Light/SpotLight.cpp +++ b/source/Core/Castor3D/Scene/Light/SpotLight.cpp @@ -38,11 +38,10 @@ namespace castor3d SpotLight::SpotLight( Light & light ) : LightCategory{ LightType::eSpot, light, LightDataComponents, ShadowDataComponents } - , m_dirtyData{ true } - , m_range{ m_dirtyData, 10.0f } - , m_exponent{ m_dirtyData, 1.0f } - , m_innerCutOff{ m_dirtyData, 22.5_degrees } - , m_outerCutOff{ m_dirtyData, 45.0_degrees } + , m_range{ m_dirty, 10.0f, [this](){ getLight().markDirty(); } } + , m_exponent{ m_dirty, 1.0f, [this](){ getLight().markDirty(); } } + , m_innerCutOff{ m_dirty, 22.5_degrees, [this](){ getLight().markDirty(); } } + , m_outerCutOff{ m_dirty, 45.0_degrees, [this](){ getLight().markDirty(); } } , m_lightView{ m_dirtyShadow } , m_lightProj{ m_dirtyShadow } { @@ -174,7 +173,6 @@ namespace castor3d m_cubeBox.load( aabb.getMin() * range , aabb.getMax() * range ); m_farPlane = range; - m_dirtyData = false; } void SpotLight::updateShadow( Camera & lightCamera @@ -212,51 +210,26 @@ namespace castor3d void SpotLight::setAttenuation( castor::Point3f const & attenuation ) { m_range = getMaxDistance( *this, attenuation ); - - if ( m_dirtyData ) - { - getLight().markDirty(); - } } void SpotLight::setRange( float range ) { m_range = range; - - if ( m_dirtyData ) - { - getLight().markDirty(); - } } void SpotLight::setExponent( float exponent ) { m_exponent = exponent; - - if ( m_dirtyData ) - { - getLight().markDirty(); - } } void SpotLight::setInnerCutOff( castor::Angle const & cutOff ) { m_innerCutOff = cutOff; - - if ( m_dirtyData ) - { - getLight().markDirty(); - } } void SpotLight::setOuterCutOff( castor::Angle const & cutOff ) { m_outerCutOff = cutOff; - - if ( m_dirtyData ) - { - getLight().markDirty(); - } } void SpotLight::doFillLightBuffer( castor::Point4f * data )const diff --git a/source/Core/Castor3D/Shader/ShaderBuffers/LightBuffer.cpp b/source/Core/Castor3D/Shader/ShaderBuffers/LightBuffer.cpp index 2a801a4a31..7ea84e73e8 100644 --- a/source/Core/Castor3D/Shader/ShaderBuffers/LightBuffer.cpp +++ b/source/Core/Castor3D/Shader/ShaderBuffers/LightBuffer.cpp @@ -198,6 +198,7 @@ namespace castor3d for ( auto i = uint32_t( type ); i < uint32_t( LightType::eMax ); ++i ) { auto begin = m_typeSortedLights[i].begin() + index; + index = 0u; for ( auto it : castor::makeArrayView( begin, m_typeSortedLights[i].end() ) ) { diff --git a/source/Core/Castor3D/Shader/Shaders/GlslLight.cpp b/source/Core/Castor3D/Shader/Shaders/GlslLight.cpp index 5f4a9bf43c..8bf6e09ac4 100644 --- a/source/Core/Castor3D/Shader/Shaders/GlslLight.cpp +++ b/source/Core/Castor3D/Shader/Shaders/GlslLight.cpp @@ -154,17 +154,18 @@ namespace castor3d::shader lightData = getLightData( offset ); result.direction() = normalize( lightData.xyz() ); - result.outerCutOffCos() = lightData.w(); + result.exponent() = lightData.w(); lightData = getLightData( offset ); - result.innerCutOff() = lightData.x(); - result.outerCutOff() = lightData.y(); - result.innerCutOffSin() = lightData.z(); - result.outerCutOffSin() = lightData.w(); + result.outerCutOffCos() = lightData.x(); + result.innerCutOff() = lightData.y(); + result.outerCutOff() = lightData.z(); + result.innerCutOffSin() = lightData.w(); lightData = getLightData( offset ); - result.innerCutOffCos() = lightData.x(); - result.outerCutOffTan() = lightData.y(); + result.outerCutOffSin() = lightData.x(); + result.innerCutOffCos() = lightData.y(); + result.outerCutOffTan() = lightData.z(); m_writer.returnStmt( result ); } @@ -195,7 +196,7 @@ namespace castor3d::shader lightData = getLightData( offset ); light.posDir() = lightData.xyz(); - light.exponent() = lightData.w(); + light.enabled() = m_writer.cast< sdw::UInt >( lightData.w() ); } //********************************************************************************************* @@ -324,13 +325,17 @@ namespace castor3d::shader { auto directionalLight = m_writer.declLocale( "directionalLight" , getDirectionalLight( cur ) ); - lightingModel->compute( debugOutput - , directionalLight - , components - , backgroundModel - , lightSurface - , receivesShadows - , parentOutput ); + IF( m_writer, directionalLight.enabled() ) + { + lightingModel->compute( debugOutput + , directionalLight + , components + , backgroundModel + , lightSurface + , receivesShadows + , parentOutput ); + } + FI cur += castor3d::DirectionalLight::LightDataComponents; } ELIHW @@ -388,13 +393,17 @@ namespace castor3d::shader { auto directionalLight = m_writer.declLocale( "directionalLight" , getDirectionalLight( cur ) ); - lightingModel->computeAllButDiffuse( debugOutput - , directionalLight - , components - , backgroundModel - , lightSurface - , receivesShadows - , parentOutput ); + IF( m_writer, directionalLight.enabled() ) + { + lightingModel->computeAllButDiffuse( debugOutput + , directionalLight + , components + , backgroundModel + , lightSurface + , receivesShadows + , parentOutput ); + } + FI cur += castor3d::DirectionalLight::LightDataComponents; } ELIHW @@ -451,12 +460,16 @@ namespace castor3d::shader { auto directionalLight = m_writer.declLocale( "directionalLight" , getDirectionalLight( cur ) ); - output += lightingModel->computeDiffuse( debugOutput - , directionalLight - , components - , backgroundModel - , lightSurface - , receivesShadows ); + IF( m_writer, directionalLight.enabled() ) + { + output += lightingModel->computeDiffuse( debugOutput + , directionalLight + , components + , backgroundModel + , lightSurface + , receivesShadows ); + } + FI cur += castor3d::DirectionalLight::LightDataComponents; } ELIHW @@ -503,13 +516,17 @@ namespace castor3d::shader { auto directionalLight = m_writer.declLocale( "directionalLight" , getDirectionalLight( cur ) ); - lightingModel->compute( debugOutput - , directionalLight - , components - , backgroundModel - , lightSurface - , receivesShadows - , output ); + IF( m_writer, directionalLight.enabled() ) + { + lightingModel->compute( debugOutput + , directionalLight + , components + , backgroundModel + , lightSurface + , receivesShadows + , output ); + } + FI cur += castor3d::DirectionalLight::LightDataComponents; } ELIHW @@ -520,12 +537,16 @@ namespace castor3d::shader { auto pointLight = m_writer.declLocale( "pointLight" , getPointLight( cur ) ); - lightingModel->compute( debugOutput - , pointLight - , components - , lightSurface - , receivesShadows - , output ); + IF( m_writer, pointLight.enabled() ) + { + lightingModel->compute( debugOutput + , pointLight + , components + , lightSurface + , receivesShadows + , output ); + } + FI cur += castor3d::PointLight::LightDataComponents; } ELIHW @@ -536,12 +557,16 @@ namespace castor3d::shader { auto spotLight = m_writer.declLocale( "spotLight" , getSpotLight( cur ) ); - lightingModel->compute( debugOutput - , spotLight - , components - , lightSurface - , receivesShadows - , output ); + IF( m_writer, spotLight.enabled() ) + { + lightingModel->compute( debugOutput + , spotLight + , components + , lightSurface + , receivesShadows + , output ); + } + FI cur += castor3d::SpotLight::LightDataComponents; } ELIHW @@ -568,13 +593,17 @@ namespace castor3d::shader { auto directionalLight = m_writer.declLocale( "directionalLight" , getDirectionalLight( cur ) ); - lightingModel->computeAllButDiffuse( debugOutput - , directionalLight - , components - , backgroundModel - , lightSurface - , receivesShadows - , output ); + IF( m_writer, directionalLight.enabled() ) + { + lightingModel->computeAllButDiffuse( debugOutput + , directionalLight + , components + , backgroundModel + , lightSurface + , receivesShadows + , output ); + } + FI cur += castor3d::DirectionalLight::LightDataComponents; } ELIHW @@ -585,12 +614,16 @@ namespace castor3d::shader { auto pointLight = m_writer.declLocale( "pointLight" , getPointLight( cur ) ); - lightingModel->computeAllButDiffuse( debugOutput - , pointLight - , components - , lightSurface - , receivesShadows - , output ); + IF( m_writer, pointLight.enabled() ) + { + lightingModel->computeAllButDiffuse( debugOutput + , pointLight + , components + , lightSurface + , receivesShadows + , output ); + } + FI cur += castor3d::PointLight::LightDataComponents; } ELIHW @@ -601,12 +634,16 @@ namespace castor3d::shader { auto spotLight = m_writer.declLocale( "spotLight" , getSpotLight( cur ) ); - lightingModel->computeAllButDiffuse( debugOutput - , spotLight - , components - , lightSurface - , receivesShadows - , output ); + IF( m_writer, spotLight.enabled() ) + { + lightingModel->computeAllButDiffuse( debugOutput + , spotLight + , components + , lightSurface + , receivesShadows + , output ); + } + FI cur += castor3d::SpotLight::LightDataComponents; } ELIHW @@ -631,12 +668,16 @@ namespace castor3d::shader { auto directionalLight = m_writer.declLocale( "directionalLight" , getDirectionalLight( cur ) ); - output += lightingModel->computeDiffuse( debugOutput - , directionalLight - , components - , backgroundModel - , lightSurface - , receivesShadows ); + IF( m_writer, directionalLight.enabled() ) + { + output += lightingModel->computeDiffuse( debugOutput + , directionalLight + , components + , backgroundModel + , lightSurface + , receivesShadows ); + } + FI cur += castor3d::DirectionalLight::LightDataComponents; } ELIHW @@ -647,11 +688,15 @@ namespace castor3d::shader { auto pointLight = m_writer.declLocale( "pointLight" , getPointLight( cur ) ); - output += lightingModel->computeDiffuse( debugOutput - , pointLight - , components - , lightSurface - , receivesShadows ); + IF( m_writer, pointLight.enabled() ) + { + output += lightingModel->computeDiffuse( debugOutput + , pointLight + , components + , lightSurface + , receivesShadows ); + } + FI cur += castor3d::PointLight::LightDataComponents; } ELIHW @@ -662,11 +707,15 @@ namespace castor3d::shader { auto spotLight = m_writer.declLocale( "spotLight" , getSpotLight( cur ) ); - output += lightingModel->computeDiffuse( debugOutput - , spotLight - , components - , lightSurface - , receivesShadows ); + IF( m_writer, spotLight.enabled() ) + { + output += lightingModel->computeDiffuse( debugOutput + , spotLight + , components + , lightSurface + , receivesShadows ); + } + FI cur += castor3d::SpotLight::LightDataComponents; } ELIHW diff --git a/source/Core/Castor3D/Shader/Ubos/ClustersUbo.cpp b/source/Core/Castor3D/Shader/Ubos/ClustersUbo.cpp index 83cc4f6026..6a6487bd71 100644 --- a/source/Core/Castor3D/Shader/Ubos/ClustersUbo.cpp +++ b/source/Core/Castor3D/Shader/Ubos/ClustersUbo.cpp @@ -50,45 +50,38 @@ namespace castor3d , sdw::Float const & viewZ , sdw::Vec4 const & clustersLightsData ) { + auto nearZ = clustersLightsData.x(); + auto farZ = clustersLightsData.y(); + auto i = writer.declLocale( "i" , screenPos.x() / writer.cast< sdw::Float >( clusterSize().x() ) ); auto j = writer.declLocale( "j" , screenPos.y() / writer.cast< sdw::Float >( clusterSize().y() ) ); - sdw::UInt const ExponentialBase = 0_u; - sdw::UInt const ExponentialBiased = 1_u; - sdw::UInt const Linear = 2_u; - sdw::UInt const ExponentialLinearHybrid = 3_u; + sdw::UInt const Exponential= 0_u; + sdw::UInt const Linear = 1_u; + sdw::UInt const Hybrid = 2_u; auto k = writer.declLocale( "k", 0.0_f ); - IF( writer, splitScheme() == ExponentialBase ) - { - k = floor( sdw::log( -viewZ ) * clustersLightsData.z() - clustersLightsData.w() ); - } - ELSEIF( splitScheme() == ExponentialBiased ) + IF( writer, splitScheme() == Exponential ) { - auto nearZ = writer.declLocale( "nearZ" - , clustersLightsData.x() ); - auto farZ = writer.declLocale( "farZ" - , clustersLightsData.y() ); - k = max( 0.0_f, floor( sdw::log( -viewZ / nearZ ) * clustersLightsData.w() - writer.cast< sdw::Float >( dimensions().z() ) * bias() ) ); + auto multiply = clustersLightsData.z(); + auto add = clustersLightsData.w(); + k = floor( sdw::log( -viewZ ) * multiply - add ); } ELSEIF( splitScheme() == Linear ) { - k = floor( writer.cast< sdw::Float >( dimensions().z() ) * ( -viewZ - clustersLightsData.z() ) / ( clustersLightsData.w() - clustersLightsData.z() ) ); + k = floor( writer.cast< sdw::Float >( dimensions().z() ) * ( -viewZ - nearZ ) / ( farZ - nearZ ) ); } ELSE { - auto nearZ = writer.declLocale( "nearZ" - , clustersLightsData.x() ); - auto farZ = writer.declLocale( "farZ" - , clustersLightsData.y() ); + auto d = clustersLightsData.z(); auto limZ = writer.declLocale( "limZ" - , max( bias(), nearZ ) ); + , max( minDistance(), nearZ ) ); auto depthBias = writer.declLocale( "depthBias" , sdw::log( limZ / nearZ ) / sdw::log( farZ / limZ ) ); - k = max( 0.0_f, floor( sdw::log( -viewZ / nearZ ) * clustersLightsData.z() - writer.cast< sdw::Float >( dimensions().z() ) * depthBias ) ); + k = max( 0.0_f, floor( sdw::log( -viewZ / nearZ ) * d - writer.cast< sdw::Float >( dimensions().z() ) * depthBias ) ); } FI @@ -104,20 +97,9 @@ namespace castor3d , pclustersLightsData ); } - sdw::RetUInt32 ClustersData::computeClusterIndex1D( sdw::U32Vec3 const pclusterIndex3D ) + sdw::UInt32 ClustersData::computeClusterIndex1D( sdw::U32Vec3 const clusterIndex3D ) { - if ( !m_computeClusterIndex1D ) - { - auto & writer = *getWriter(); - m_computeClusterIndex1D = writer.implementFunction< sdw::UInt32 >( "c3d_computeClusterIndex1D" - , [this, &writer]( sdw::U32Vec3 const & clusterIndex3D ) - { - writer.returnStmt( clusterIndex3D.x() + ( dimensions().x() * ( clusterIndex3D.y() + dimensions().y() * clusterIndex3D.z() ) ) ); - } - , sdw::InU32Vec3{ writer, "clusterIndex3D" } ); - } - - return m_computeClusterIndex1D( pclusterIndex3D ); + return clusterIndex3D.x() + ( dimensions().x() * ( clusterIndex3D.y() + dimensions().y() * clusterIndex3D.z() ) ); } sdw::RetVec2 ClustersData::getClusterDepthBounds( sdw::U32Vec3 const pclusterIndex3D @@ -132,54 +114,47 @@ namespace castor3d , sdw::Vec4 const & clustersLightsData , sdw::Vec4 const & ) { - auto nearZ = writer.declLocale( "nearZ" - , clustersLightsData.x() ); - auto farZ = writer.declLocale( "farZ" - , clustersLightsData.y() ); + auto nearZ = clustersLightsData.x(); + auto farZ = clustersLightsData.y(); + auto clustersZ = writer.cast< sdw::Float >( dimensions().z() ); - sdw::UInt const ExponentialBase = 0_u; - sdw::UInt const ExponentialBiased = 1_u; - sdw::UInt const Linear = 2_u; - sdw::UInt const ExponentialLinearHybrid = 3_u; + sdw::UInt const Exponential = 0_u; + sdw::UInt const Linear = 1_u; + sdw::UInt const Hybrid = 2_u; - IF( writer, splitScheme() == ExponentialBase ) - { - auto nearTile = writer.declLocale( "nearTile" - , -nearZ * pow( farZ / nearZ, writer.cast< sdw::Float >( clusterIndex3D.z() ) / writer.cast< sdw::Float >( dimensions().z() ) ) ); - auto farTile = writer.declLocale( "farTile" - , -nearZ * pow( farZ / nearZ, writer.cast< sdw::Float >( clusterIndex3D.z() + 1_u ) / writer.cast< sdw::Float >( dimensions().z() ) ) ); - writer.returnStmt( vec2( nearTile, farTile ) ); - } - ELSEIF( splitScheme() == ExponentialBiased ) + IF( writer, splitScheme() == Exponential ) { auto nearTile = writer.declLocale( "nearTile" - , -nearZ * pow( farZ / nearZ, ( writer.cast< sdw::Float >( clusterIndex3D.z() ) + writer.cast< sdw::Float >( dimensions().z() ) * bias() ) / clustersLightsData.z() ) ); + , -nearZ * pow( farZ / nearZ, writer.cast< sdw::Float >( clusterIndex3D.z() ) / clustersZ ) ); auto farTile = writer.declLocale( "farTile" - , -nearZ * pow( farZ / nearZ, ( writer.cast< sdw::Float >( clusterIndex3D.z() + 1_u ) + writer.cast< sdw::Float >( dimensions().z() ) * bias() ) / clustersLightsData.z() ) ); + , -nearZ * pow( farZ / nearZ, writer.cast< sdw::Float >( clusterIndex3D.z() + 1_u ) / clustersZ ) ); writer.returnStmt( vec2( nearTile, farTile ) ); } ELSEIF( splitScheme() == Linear ) { auto nearTile = writer.declLocale( "nearTile" - , -nearZ - writer.cast< sdw::Float >( clusterIndex3D.z() ) * ( farZ - nearZ ) / writer.cast< sdw::Float >( dimensions().z() ) ); + , -nearZ - writer.cast< sdw::Float >( clusterIndex3D.z() ) * ( farZ - nearZ ) / clustersZ ); auto farTile = writer.declLocale( "farTile" - , -nearZ - writer.cast< sdw::Float >( clusterIndex3D.z() + 1_u ) * ( farZ - nearZ ) / writer.cast< sdw::Float >( dimensions().z() ) ); + , -nearZ - writer.cast< sdw::Float >( clusterIndex3D.z() + 1_u ) * ( farZ - nearZ ) / clustersZ ); writer.returnStmt( vec2( nearTile, farTile ) ); } ELSE { + auto e = clustersLightsData.w(); auto limZ = writer.declLocale( "limZ" - , max( bias(), nearZ ) ); + , max( minDistance(), nearZ ) ); auto depthBias = writer.declLocale( "depthBias" , sdw::log( limZ / nearZ ) / sdw::log( farZ / limZ ) ); + auto curSlice = clusterIndex3D.z(); + auto nxtSlice = clusterIndex3D.z() + 1_u; auto nearTile = writer.declLocale( "nearTile" - , writer.ternary( clusterIndex3D.z() == 0_u + , writer.ternary( curSlice == 0_u , -nearZ - , -clustersLightsData.w() * pow( farZ / nearZ, writer.cast< sdw::Float >( clusterIndex3D.z() ) / ( writer.cast< sdw::Float >( dimensions().z() ) * ( 1.0_f + depthBias ) ) ) ) ); + , -e * pow( farZ / nearZ, writer.cast< sdw::Float >( curSlice ) / ( clustersZ * ( 1.0_f + depthBias ) ) ) ) ); auto farTile = writer.declLocale( "farTile" - , writer.ternary( clusterIndex3D.z() == 0_u + , writer.ternary( nxtSlice == 0_u , -nearZ - , -clustersLightsData.w() * pow( farZ / nearZ, writer.cast< sdw::Float >( clusterIndex3D.z() + 1_u ) / ( writer.cast< sdw::Float >( dimensions().z() ) * ( 1.0_f + depthBias ) ) ) ) ); + , -e * pow( farZ / nearZ, writer.cast< sdw::Float >( nxtSlice ) / ( clustersZ * ( 1.0_f + depthBias ) ) ) ) ); writer.returnStmt( vec2( nearTile, farTile ) ); } FI @@ -233,12 +208,11 @@ namespace castor3d auto nearZ = writer.getVariable < sdw::Float >( "nearZ" ); auto farZ = writer.getVariable < sdw::Float >( "farZ" ); - sdw::UInt const ExponentialBase = 0_u; - sdw::UInt const ExponentialBiased = 1_u; - sdw::UInt const Linear = 2_u; - sdw::UInt const ExponentialLinearHybrid = 3_u; + sdw::UInt const Exponential = 0_u; + sdw::UInt const Linear = 1_u; + sdw::UInt const Hybrid = 2_u; - IF( writer, splitScheme() == ExponentialBase ) + IF( writer, splitScheme() == Exponential ) { auto multiply = writer.declLocale( "multiply" , writer.cast< sdw::Float >( dimensions().z() ) / sdw::log( farZ / nearZ ) ); @@ -246,22 +220,14 @@ namespace castor3d , multiply * sdw::log( nearZ ) ); clustersLightsData = vec4( nearZ, farZ, multiply, add ); } - ELSEIF( splitScheme() == ExponentialBiased ) - { - auto d = writer.declLocale( "d" - , writer.cast< sdw::Float >( dimensions().z() ) * ( 1.0_f + bias() ) ); - auto e = writer.declLocale( "e" - , d / sdw::log( farZ / nearZ ) ); - clustersLightsData = vec4( nearZ, farZ, d, e ); - } ELSEIF( splitScheme() == Linear ) { - clustersLightsData = vec4( nearZ, farZ, nearZ, farZ ); + clustersLightsData = vec4( nearZ, farZ, 0.0_f, 0.0_f ); } ELSE { auto limZ = writer.declLocale( "limZ" - , max( bias(), nearZ ) ); + , max( minDistance(), nearZ ) ); auto depthBias = writer.declLocale( "depthBias" , sdw::log( limZ / nearZ ) / sdw::log( farZ / limZ ) ); auto nTimesOnePlusB = writer.declLocale( "nTimesOnePlusB" @@ -310,7 +276,7 @@ namespace castor3d , uint32_t pointLightsCount , uint32_t spotLightsCount , ClusterSplitScheme splitScheme - , float bias + , float minDistance , bool enableWaveIntrinsics ) { CU_Require( m_ubo ); @@ -323,7 +289,7 @@ namespace castor3d configuration.pointLightLevelsCount = FrustumClusters::getNumLevels( pointLightsCount ); configuration.spotLightLevelsCount = FrustumClusters::getNumLevels( spotLightsCount ); configuration.splitScheme = uint32_t( splitScheme ); - configuration.bias = bias; + configuration.minDistance = minDistance; configuration.enableWaveIntrinsics = enableWaveIntrinsics ? 1u : 0u; } diff --git a/source/Core/CastorUtils/Miscellaneous/CpuInformations.cpp b/source/Core/CastorUtils/Miscellaneous/CpuInformations.cpp index ca35492363..48539ebf56 100644 --- a/source/Core/CastorUtils/Miscellaneous/CpuInformations.cpp +++ b/source/Core/CastorUtils/Miscellaneous/CpuInformations.cpp @@ -5,147 +5,37 @@ namespace castor { - namespace cpuinf - { - static String makeString( int32_t v ) - { - MbString result; - auto c = char( ( v >> 0 ) & 0xff ); - - if ( c ) - { - result += c; - c = char( ( v >> 8 ) & 0xff ); - } - - if ( c ) - { - result += c; - c = char( ( v >> 16 ) & 0xff ); - } - - if ( c ) - { - result += c; - c = char( ( v >> 24 ) & 0xff ); - } - - if ( c ) - { - result += c; - } - - return castor::makeString( result ); - }; - } - namespace platform { - void callCpuid( uint32_t func, Array< int32_t, 4 > & data ); uint32_t getCoreCount(); String getCPUModel(); + String getCPUVendor(); } CpuInformations::CpuInformationsInternal::CpuInformationsInternal() { - Vector< Array< int32_t, 4 > > datas{}; - Array< int32_t, 4 > data; - platform::callCpuid( 0u, data ); - auto ids = data[0]; - - for ( int32_t i = 0; i < ids; ++i ) - { - platform::callCpuid( uint32_t( i ), data ); - datas.push_back( data ); - } - - if ( !datas.empty() ) - { - m_vendor = cpuinf::makeString( datas[0][1] ); - m_vendor += cpuinf::makeString( datas[0][3] ); - m_vendor += cpuinf::makeString( datas[0][2] ); - - if ( m_vendor == cuT( "GenuineIntel" ) ) - { - m_isIntel = true; - } - else if ( m_vendor == cuT( "AuthenticAMD" ) ) - { - m_isAMD = true; - } - } + m_coreCount = platform::getCoreCount(); + m_model = platform::getCPUModel(); + m_vendor = platform::getCPUVendor(); - // load bitset with flags for function 0x00000001 - if ( ids >= 1 ) + if ( m_vendor == cuT( "GenuineIntel" ) ) { - m_f_1_ECX = uint64_t( datas[1][2] ); - m_f_1_EDX = uint64_t( datas[1][3] ); + m_isIntel = true; } - - // load bitset with flags for function 0x00000007 - if ( ids >= 7 ) + else if ( m_vendor == cuT( "AuthenticAMD" ) ) { - m_f_7_EBX = uint64_t( datas[7][1] ); - m_f_7_ECX = uint64_t( datas[7][2] ); + m_isAMD = true; } - - m_coreCount = platform::getCoreCount(); - m_model = platform::getCPUModel(); } CpuInformations::CpuInformationsInternal const CpuInformations::m_internal; OutputStream & operator<<( OutputStream & stream, CpuInformations const & object ) { - auto support = []( bool supported ) - { - return ( supported ? "supported" : "not supported" ); - }; - stream << "CPU informations:" << std::endl; stream << " Vendor: " << object.getVendor() << std::endl; stream << " Model: " << object.getModel() << std::endl; - stream << " Core count: " << object.getCoreCount() << std::endl; - stream << " ADX: " << support( object.ADX() ) << std::endl; - stream << " AES: " << support( object.AES() ) << std::endl; - stream << " AVX: " << support( object.AVX() ) << std::endl; - stream << " AVX2: " << support( object.AVX2() ) << std::endl; - stream << " AVX512CD: " << support( object.AVX512CD() ) << std::endl; - stream << " AVX512ER: " << support( object.AVX512ER() ) << std::endl; - stream << " AVX512F: " << support( object.AVX512F() ) << std::endl; - stream << " AVX512PF: " << support( object.AVX512PF() ) << std::endl; - stream << " BMI1: " << support( object.BMI1() ) << std::endl; - stream << " BMI2: " << support( object.BMI2() ) << std::endl; - stream << " CLFSH: " << support( object.CLFSH() ) << std::endl; - stream << " CMPXCHG16B: " << support( object.CMPXCHG16B() ) << std::endl; - stream << " CX8: " << support( object.CX8() ) << std::endl; - stream << " ERMS: " << support( object.ERMS() ) << std::endl; - stream << " F16C: " << support( object.F16C() ) << std::endl; - stream << " FMA: " << support( object.FMA() ) << std::endl; - stream << " FSGSBASE: " << support( object.FSGSBASE() ) << std::endl; - stream << " FXSR: " << support( object.FXSR() ) << std::endl; - stream << " HLE: " << support( object.HLE() ) << std::endl; - stream << " INVPCID: " << support( object.INVPCID() ) << std::endl; - stream << " MMX: " << support( object.MMX() ) << std::endl; - stream << " MONITOR: " << support( object.MONITOR() ) << std::endl; - stream << " MOVBE: " << support( object.MOVBE() ) << std::endl; - stream << " MSR: " << support( object.MSR() ) << std::endl; - stream << " OSXSAVE: " << support( object.OSXSAVE() ) << std::endl; - stream << " PCLMULQDQ: " << support( object.PCLMULQDQ() ) << std::endl; - stream << " POPCNT: " << support( object.POPCNT() ) << std::endl; - stream << " PREFETCHWT1: " << support( object.PREFETCHWT1() ) << std::endl; - stream << " RDRAND: " << support( object.RDRAND() ) << std::endl; - stream << " RDSEED: " << support( object.RDSEED() ) << std::endl; - stream << " RTM: " << support( object.RTM() ) << std::endl; - stream << " SEP: " << support( object.SEP() ) << std::endl; - stream << " SHA: " << support( object.SHA() ) << std::endl; - stream << " SSE: " << support( object.SSE() ) << std::endl; - stream << " SSE2: " << support( object.SSE2() ) << std::endl; - stream << " SSE3: " << support( object.SSE3() ) << std::endl; - stream << " SSE4.1: " << support( object.SSE41() ) << std::endl; - stream << " SSE4.2: " << support( object.SSE42() ) << std::endl; - stream << " SSSE3: " << support( object.SSSE3() ) << std::endl; - stream << " XSAVE: " << support( object.XSAVE() ); + stream << " Core count: " << object.getCoreCount(); return stream; } } diff --git a/source/Core/CastorUtils/Multithreading/SpinMutex.cpp b/source/Core/CastorUtils/Multithreading/SpinMutex.cpp index 4fc0846f46..0db35caf20 100644 --- a/source/Core/CastorUtils/Multithreading/SpinMutex.cpp +++ b/source/Core/CastorUtils/Multithreading/SpinMutex.cpp @@ -7,6 +7,10 @@ See LICENSE file in root folder # include #endif +#if defined( __ARM_ACLE ) +# include +#endif + namespace castor { void SpinMutex::lock()noexcept @@ -22,9 +26,10 @@ namespace castor { #if defined( CU_CompilerMSVC ) _mm_pause(); -#elif defined( CU_PlatformAndroid ) -#else +#elif defined( CU_ArchX86_64 ) || defined( CU_ArchX86_32 ) __builtin_ia32_pause(); +#elif defined( __ARM_ACLE ) + __yield(); #endif } } diff --git a/source/Core/CastorUtils/Platform/Android/AndroidCpuInformations.cpp b/source/Core/CastorUtils/Platform/Android/AndroidCpuInformations.cpp index 79c031b7b4..8d362f9db0 100644 --- a/source/Core/CastorUtils/Platform/Android/AndroidCpuInformations.cpp +++ b/source/Core/CastorUtils/Platform/Android/AndroidCpuInformations.cpp @@ -12,9 +12,6 @@ namespace castor::platform #pragma clang diagnostic ignored "-Wmissing-prototypes" #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wmissing-declarations" - void callCpuid( uint32_t func, Array< int32_t, 4 > & p_data ) - { - } uint32_t getCoreCount() { @@ -25,6 +22,12 @@ namespace castor::platform { return String{}; } + + String getCPUVendor() + { + return String{}; + } + #pragma GCC diagnostic pop #pragma clang diagnostic pop } diff --git a/source/Core/CastorUtils/Platform/Linux/LinuxCpuInformations.cpp b/source/Core/CastorUtils/Platform/Linux/LinuxCpuInformations.cpp index f8ed07e1a3..1f324d0976 100644 --- a/source/Core/CastorUtils/Platform/Linux/LinuxCpuInformations.cpp +++ b/source/Core/CastorUtils/Platform/Linux/LinuxCpuInformations.cpp @@ -11,25 +11,13 @@ #include #include -namespace castor::platform -{ #pragma clang diagnostic push #pragma clang diagnostic ignored "-Wmissing-prototypes" #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wmissing-declarations" - void callCpuid( uint32_t func, Array< int32_t, 4 > & p_data ) - { - uint32_t a{}; - uint32_t b{}; - uint32_t c{}; - uint32_t d{}; - __get_cpuid( func, &a, &b, &c, &d ); - p_data[0] = int32_t( a ); - p_data[1] = int32_t( b ); - p_data[2] = int32_t( c ); - p_data[3] = int32_t( d ); - } +namespace castor::platform +{ uint32_t getCoreCount() { char res[128]; @@ -68,8 +56,99 @@ namespace castor::platform return makeString( result ); } +} + +#if defined( CU_ArchX86_64 ) || defined( CU_ArchX86_32 ) +# include + +namespace castor::platform +{ + namespace linux_x86 + { + static String makeString( int32_t v ) + { + MbString result; + auto c = char( ( v >> 0 ) & 0xff ); + + if ( c ) + { + result += c; + c = char( ( v >> 8 ) & 0xff ); + } + + if ( c ) + { + result += c; + c = char( ( v >> 16 ) & 0xff ); + } + + if ( c ) + { + result += c; + c = char( ( v >> 24 ) & 0xff ); + } + + if ( c ) + { + result += c; + } + + return castor::makeString( result ); + }; + + static void callCpuid( uint32_t func, Array< int32_t, 4 > & p_data ) + { + uint32_t a{}; + uint32_t b{}; + uint32_t c{}; + uint32_t d{}; + __get_cpuid( func, &a, &b, &c, &d ); + p_data[0] = int32_t( a ); + p_data[1] = int32_t( b ); + p_data[2] = int32_t( c ); + p_data[3] = int32_t( d ); + } + } + + String getCPUVendor() + { + Vector< Array< int32_t, 4 > > datas{}; + Array< int32_t, 4 > data; + linux_x86::callCpuid( 0u, data ); + auto ids = data[0]; + + for ( int32_t i = 0; i < ids; ++i ) + { + linux_x86::callCpuid( uint32_t( i ), data ); + datas.push_back( data ); + } + + String result; + + if ( !datas.empty() ) + { + result = linux_x86::makeString( datas[0][1] ); + result += linux_x86::makeString( datas[0][3] ); + result += linux_x86::makeString( datas[0][2] ); + } + + return string::trim( result ); + } +} + +#else + +namespace castor::platform +{ + String getCPUVendor() + { + return String{}; + } +} + +#endif + #pragma GCC diagnostic pop #pragma clang diagnostic pop -} #endif diff --git a/source/Core/CastorUtils/Platform/MacOS/MacOSCpuInformations.cpp b/source/Core/CastorUtils/Platform/MacOS/MacOSCpuInformations.cpp index 4446112d05..cbd203c1c6 100644 --- a/source/Core/CastorUtils/Platform/MacOS/MacOSCpuInformations.cpp +++ b/source/Core/CastorUtils/Platform/MacOS/MacOSCpuInformations.cpp @@ -6,29 +6,16 @@ #include "CastorUtils/Miscellaneous/CpuInformations.hpp" #include "CastorUtils/Miscellaneous/StringUtils.hpp" -#include #include -namespace castor::platform -{ #pragma clang diagnostic push #pragma clang diagnostic ignored "-Wmissing-prototypes" #pragma clang diagnostic ignored "-Wzero-as-null-pointer-constant" #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wmissing-declarations" - void callCpuid( uint32_t func, Array< int32_t, 4 > & p_data ) - { - uint32_t a{}; - uint32_t b{}; - uint32_t c{}; - uint32_t d{}; - __get_cpuid( func, &a, &b, &c, &d ); - p_data[0] = int32_t( a ); - p_data[1] = int32_t( b ); - p_data[2] = int32_t( c ); - p_data[3] = int32_t( d ); - } +namespace castor::platform +{ uint32_t getCoreCount() { Array< int, 4u > mib; @@ -69,8 +56,99 @@ namespace castor::platform return makeString( result ); } +} + +#if defined( CU_ArchX86_64 ) || defined( CU_ArchX86_32 ) +# include + +namespace castor::platform +{ + namespace macos_x86 + { + static String makeString( int32_t v ) + { + MbString result; + auto c = char( ( v >> 0 ) & 0xff ); + + if ( c ) + { + result += c; + c = char( ( v >> 8 ) & 0xff ); + } + + if ( c ) + { + result += c; + c = char( ( v >> 16 ) & 0xff ); + } + + if ( c ) + { + result += c; + c = char( ( v >> 24 ) & 0xff ); + } + + if ( c ) + { + result += c; + } + + return castor::makeString( result ); + }; + + static void callCpuid( uint32_t func, Array< int32_t, 4 > & p_data ) + { + uint32_t a{}; + uint32_t b{}; + uint32_t c{}; + uint32_t d{}; + __get_cpuid( func, &a, &b, &c, &d ); + p_data[0] = int32_t( a ); + p_data[1] = int32_t( b ); + p_data[2] = int32_t( c ); + p_data[3] = int32_t( d ); + } + } + + String getCPUVendor() + { + Vector< Array< int32_t, 4 > > datas{}; + Array< int32_t, 4 > data; + macos_x86::callCpuid( 0u, data ); + auto ids = data[0]; + + for ( int32_t i = 0; i < ids; ++i ) + { + macos_x86::callCpuid( uint32_t( i ), data ); + datas.push_back( data ); + } + + String result; + + if ( !datas.empty() ) + { + result = macos_x86::makeString( datas[0][1] ); + result += macos_x86::makeString( datas[0][3] ); + result += macos_x86::makeString( datas[0][2] ); + } + + return string::trim( result ); + } +} + +#else + +namespace castor::platform +{ + String getCPUVendor() + { + return String{}; + } +} + +#endif + #pragma GCC diagnostic pop #pragma clang diagnostic pop -} #endif diff --git a/source/Core/CastorUtils/Platform/Win32/Win32CpuInformations.cpp b/source/Core/CastorUtils/Platform/Win32/Win32CpuInformations.cpp index 54878b9805..4806daaead 100644 --- a/source/Core/CastorUtils/Platform/Win32/Win32CpuInformations.cpp +++ b/source/Core/CastorUtils/Platform/Win32/Win32CpuInformations.cpp @@ -7,11 +7,23 @@ #include "CastorUtils/Miscellaneous/StringUtils.hpp" #include -#include namespace castor::platform { - namespace win32 + uint32_t getCoreCount() + { + SYSTEM_INFO sysinfo = { 0 }; + ::GetSystemInfo( &sysinfo ); + return uint32_t( sysinfo.dwNumberOfProcessors ); + } +} + +#if defined( CU_ArchX86_64 ) || defined( CU_ArchX86_32 ) +# include + +namespace castor::platform +{ + namespace win32_x86 { static String makeString( int32_t v ) { @@ -43,59 +55,94 @@ namespace castor::platform return castor::makeString( result ); }; - } -#if defined( CU_CompilerMSVC ) +# if defined( CU_CompilerMSVC ) - void callCpuid( uint32_t func, Array< int32_t, 4 > & data ) - { - __cpuid( data.data(), int( func ) ); + static void callCpuid( uint32_t func, Array< int32_t, 4 > & data ) + { + __cpuid( data.data(), int( func ) ); + } + +# else + + static void callCpuid( uint32_t func, Array< int32_t, 4 > & data ) + { + uint32_t a; + uint32_t b; + uint32_t c; + uint32_t d; + __get_cpuid( func, &a, &b, &c, &d ); + data[0] = int32_t( a ); + data[1] = int32_t( b ); + data[2] = int32_t( c ); + data[3] = int32_t( d ); + } + +# endif } -#else + String getCPUModel() + { + Array< Array< int32_t, 4 >, 3 > brand{}; + win32_x86::callCpuid( 0x80000002, brand[0] ); + win32_x86::callCpuid( 0x80000003, brand[1] ); + win32_x86::callCpuid( 0x80000004, brand[2] ); + auto result = win32_x86::makeString( brand[0][0] ); + result += win32_x86::makeString( brand[0][1] ); + result += win32_x86::makeString( brand[0][2] ); + result += win32_x86::makeString( brand[0][3] ); + result += win32_x86::makeString( brand[1][0] ); + result += win32_x86::makeString( brand[1][1] ); + result += win32_x86::makeString( brand[1][2] ); + result += win32_x86::makeString( brand[1][3] ); + result += win32_x86::makeString( brand[2][0] ); + result += win32_x86::makeString( brand[2][1] ); + result += win32_x86::makeString( brand[2][2] ); + result += win32_x86::makeString( brand[2][3] ); + return string::trim( result ); + } - void callCpuid( uint32_t func, Array< int32_t, 4 > & data ) + String getCPUVendor() { - uint32_t a; - uint32_t b; - uint32_t c; - uint32_t d; - __get_cpuid( func, &a, &b, &c, &d ); - data[0] = int32_t( a ); - data[1] = int32_t( b ); - data[2] = int32_t( c ); - data[3] = int32_t( d ); + Vector< Array< int32_t, 4 > > datas{}; + Array< int32_t, 4 > data; + win32_x86::callCpuid( 0u, data ); + auto ids = data[0]; + + for ( int32_t i = 0; i < ids; ++i ) + { + win32_x86::callCpuid( uint32_t( i ), data ); + datas.push_back( data ); + } + + String result; + + if ( !datas.empty() ) + { + result = win32_x86::makeString( datas[0][1] ); + result += win32_x86::makeString( datas[0][3] ); + result += win32_x86::makeString( datas[0][2] ); + } + + return string::trim( result ); } +} -#endif +#else - uint32_t getCoreCount() +namespace castor::platform +{ + String getCPUModel() { - SYSTEM_INFO sysinfo = { 0 }; - ::GetSystemInfo( &sysinfo ); - return uint32_t( sysinfo.dwNumberOfProcessors ); + return String{}; } - String getCPUModel() + String getCPUVendor() { - Array< Array< int32_t, 4 >, 3 > brand{}; - callCpuid( 0x80000002, brand[0] ); - callCpuid( 0x80000003, brand[1] ); - callCpuid( 0x80000004, brand[2] ); - auto result = win32::makeString( brand[0][0] ); - result += win32::makeString( brand[0][1] ); - result += win32::makeString( brand[0][2] ); - result += win32::makeString( brand[0][3] ); - result += win32::makeString( brand[1][0] ); - result += win32::makeString( brand[1][1] ); - result += win32::makeString( brand[1][2] ); - result += win32::makeString( brand[1][3] ); - result += win32::makeString( brand[2][0] ); - result += win32::makeString( brand[2][1] ); - result += win32::makeString( brand[2][2] ); - result += win32::makeString( brand[2][3] ); - return string::trim( result ); + return String{}; } } #endif + +#endif diff --git a/source/Core/SceneExporter/Text/TextClustersConfig.cpp b/source/Core/SceneExporter/Text/TextClustersConfig.cpp index 4079a73eab..a6ba57e9a0 100644 --- a/source/Core/SceneExporter/Text/TextClustersConfig.cpp +++ b/source/Core/SceneExporter/Text/TextClustersConfig.cpp @@ -17,7 +17,6 @@ namespace castor static const String splitSchemes[uint32_t( castor3d::ClusterSplitScheme::eCount )] = { getName( castor3d::ClusterSplitScheme::eExponentialBase ), - getName( castor3d::ClusterSplitScheme::eExponentialBiased ), getName( castor3d::ClusterSplitScheme::eLinear ), getName( castor3d::ClusterSplitScheme::eExponentialLinearHybrid ), }; @@ -37,7 +36,7 @@ namespace castor && writeOpt( file, cuT( "enable_reduce_warp_optimisation" ), object.enableReduceWarpOptimisation.value(), false ) && writeOpt( file, cuT( "enable_bvh_warp_optimisation" ), object.enableBVHWarpOptimisation.value(), true ) && writeOpt( file, cuT( "split_scheme" ), splitSchemes[uint32_t( object.splitScheme.value() )], splitSchemes[uint32_t( castor3d::ClusterSplitScheme::eExponentialLinearHybrid )] ) - && writeOpt( file, cuT( "bias" ), object.bias.value(), 1.0f ); + && writeOpt( file, cuT( "min_distance" ), object.minDistance.value(), 1.0f ); } return result;