Skip to content

Commit

Permalink
v4.5.0:
Browse files Browse the repository at this point in the history
HIGHLIGHTS:

- NRD: "ResourceDesc::stateNeeded" renamed to "ResourceDesc::descriptorType" to match "ResourceRangeDesc::descriptorType" because the former is a concatenation of the latter
- NRD: extended API for comfortable shader "printf" support (VK only)
- REBLUR: minor bug fixes

DETAILS:

- NRD: added shader "printf" support (VK only)
- NRD: "ResourceDesc::stateNeeded" renamed to "ResourceDesc::descriptorType" to match "ResourceRangeDesc::descriptorType"
- REBLUR: added missing "materialID" test for "vmb" specular motion
- REBLUR: improved fast history accumulation, which improves color clamping behavior
- NRD INTEGRATION: updated NRI
- Updated comments
- Updated README
- Updated UPDATE
- Updated deps
  • Loading branch information
dzhdanNV committed Feb 20, 2024
1 parent 26d8b79 commit 3426f9e
Show file tree
Hide file tree
Showing 21 changed files with 137 additions and 80 deletions.
1 change: 1 addition & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -192,6 +192,7 @@ if (NOT NRD_DISABLE_SHADER_COMPILATION)

if (NRD_EMBEDS_SPIRV_SHADERS)
set (SHADERMAKE_COMMANDS ${SHADERMAKE_COMMANDS} COMMAND ShaderMake -p SPIRV --compiler "${DXC_SPIRV_PATH}" ${SHADERMAKE_GENERAL_ARGS}
-D VULKAN
--sRegShift 100
--tRegShift 200
--bRegShift 300
Expand Down
2 changes: 1 addition & 1 deletion External/MathLib
2 changes: 1 addition & 1 deletion External/ShaderMake
6 changes: 3 additions & 3 deletions Include/NRD.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,9 +28,9 @@ license agreement from NVIDIA CORPORATION is strictly prohibited.
#include <cstddef>

#define NRD_VERSION_MAJOR 4
#define NRD_VERSION_MINOR 4
#define NRD_VERSION_BUILD 3
#define NRD_VERSION_DATE "2 January 2024"
#define NRD_VERSION_MINOR 5
#define NRD_VERSION_BUILD 0
#define NRD_VERSION_DATE "20 February 2024"

#if defined(_MSC_VER)
#define NRD_CALL __fastcall
Expand Down
8 changes: 4 additions & 4 deletions Include/NRDDescs.h
Original file line number Diff line number Diff line change
Expand Up @@ -452,7 +452,7 @@ namespace nrd

struct ResourceDesc
{
DescriptorType stateNeeded;
DescriptorType descriptorType;
ResourceType type;
uint16_t indexInPool;
};
Expand All @@ -477,7 +477,7 @@ namespace nrd
ComputeShaderDesc computeShaderSPIRV;
const char* shaderFileName;
const char* shaderEntryPointName;
const ResourceRangeDesc* resourceRanges;
const ResourceRangeDesc* resourceRanges; // up to 2 ranges: "TEXTURE" inputs (optional) and "TEXTURE_STORAGE" outputs
uint32_t resourceRangesNum;

// Hint that pipeline has a constant buffer with shared parameters from "InstanceDesc"
Expand Down Expand Up @@ -528,9 +528,9 @@ namespace nrd
{
// ( Optional )
const char* name;
Identifier identifier; // which denoiser this dispatch belongs to
Identifier identifier; // denoiser this dispatch belongs to

// Concatenated resources for all "resourceRanges" descriptions in DenoiserDesc::pipelines[ pipelineIndex ]
// Concatenated resources for all "resourceRanges" in "DenoiserDesc::pipelines[ pipelineIndex ]"
const ResourceDesc* resources;
uint32_t resourcesNum;

Expand Down
1 change: 1 addition & 0 deletions Include/NRDSettings.h
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,7 @@ namespace nrd
float splitScreen = 0.0f;

// For internal needs
uint16_t printfAt[2] = {9999, 9999}; // thread (pixel) position
float debug = 0.0f;

// (pixels) - viewport origin
Expand Down
4 changes: 2 additions & 2 deletions Integration/NRDIntegration.h
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ license agreement from NVIDIA CORPORATION is strictly prohibited.
// "state->texture" represents the resource, the rest represents the state.
struct NrdIntegrationTexture
{
nri::TextureTransitionBarrierDesc* state;
nri::TextureBarrierDesc* state;
nri::Format format;
};

Expand Down Expand Up @@ -112,7 +112,7 @@ class NrdIntegration
std::vector<NrdIntegrationTexture> m_TexturePool;
std::map<uint64_t, nri::Descriptor*> m_CachedDescriptors;
std::vector<std::vector<nri::Descriptor*>> m_DescriptorsInFlight;
std::vector<nri::TextureTransitionBarrierDesc> m_ResourceState;
std::vector<nri::TextureBarrierDesc> m_ResourceState;
std::vector<nri::PipelineLayout*> m_PipelineLayouts;
std::vector<nri::Pipeline*> m_Pipelines;
std::vector<nri::Memory*> m_MemoryAllocations;
Expand Down
34 changes: 17 additions & 17 deletions Integration/NRDIntegration.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ license agreement from NVIDIA CORPORATION is strictly prohibited.
#include "NRDIntegration.h"

static_assert(NRD_VERSION_MAJOR >= 4 && NRD_VERSION_MINOR >= 4, "Unsupported NRD version!");
static_assert(NRI_VERSION_MAJOR >= 1 && NRI_VERSION_MINOR >= 110, "Unsupported NRI version!");
static_assert(NRI_VERSION_MAJOR >= 1 && NRI_VERSION_MINOR >= 118, "Unsupported NRI version!");

#ifdef _WIN32
#define alloca _alloca
Expand Down Expand Up @@ -175,14 +175,14 @@ void NrdIntegration::CreatePipelines()
nri::DescriptorRangeDesc* resourcesRanges = descriptorRanges + 1;

// Constant buffer
const nri::DynamicConstantBufferDesc dynamicConstantBufferDesc = {constantBufferOffset + instanceDesc.constantBufferRegisterIndex, nri::ShaderStage::COMPUTE};
const nri::DynamicConstantBufferDesc dynamicConstantBufferDesc = {constantBufferOffset + instanceDesc.constantBufferRegisterIndex, nri::StageBits::COMPUTE_SHADER};
descriptorSetConstantBuffer.dynamicConstantBuffers = &dynamicConstantBufferDesc;

// Samplers
samplersRange->descriptorType = nri::DescriptorType::SAMPLER;
samplersRange->baseRegisterIndex = samplerOffset + instanceDesc.samplersBaseRegisterIndex;
samplersRange->descriptorNum = instanceDesc.samplersNum;
samplersRange->visibility = nri::ShaderStage::COMPUTE;
samplersRange->shaderStages = nri::StageBits::COMPUTE_SHADER;

// Pipelines
for (uint32_t i = 0; i < instanceDesc.pipelinesNum; i++)
Expand All @@ -207,7 +207,7 @@ void NrdIntegration::CreatePipelines()
}

resourcesRanges[j].descriptorNum = nrdResourceRange.descriptorsNum;
resourcesRanges[j].visibility = nri::ShaderStage::COMPUTE;
resourcesRanges[j].shaderStages = nri::StageBits::COMPUTE_SHADER;
}

// Descriptor sets
Expand All @@ -232,7 +232,7 @@ void NrdIntegration::CreatePipelines()
pipelineLayoutDesc.descriptorSetNum = descriptorSetNum;
pipelineLayoutDesc.descriptorSets = descriptorSetDescs;
pipelineLayoutDesc.ignoreGlobalSPIRVOffsets = true;
pipelineLayoutDesc.stageMask = nri::PipelineLayoutShaderStageBits::COMPUTE;
pipelineLayoutDesc.shaderStages = nri::StageBits::COMPUTE_SHADER;

nri::PipelineLayout* pipelineLayout = nullptr;
NRD_INTEGRATION_ABORT_ON_FAILURE(m_NRI->CreatePipelineLayout(*m_Device, pipelineLayoutDesc, pipelineLayout));
Expand All @@ -247,7 +247,7 @@ void NrdIntegration::CreatePipelines()
computeShader.bytecode = nrdComputeShader.bytecode;
computeShader.size = nrdComputeShader.size;
computeShader.entryPointName = nrdPipelineDesc.shaderEntryPointName;
computeShader.stage = nri::ShaderStage::COMPUTE;
computeShader.stage = nri::StageBits::COMPUTE_SHADER;
#ifdef PROJECT_NAME
}
else
Expand All @@ -256,7 +256,7 @@ void NrdIntegration::CreatePipelines()

nri::ComputePipelineDesc pipelineDesc = {};
pipelineDesc.pipelineLayout = pipelineLayout;
pipelineDesc.computeShader = computeShader;
pipelineDesc.shader = computeShader;

nri::Pipeline* pipeline = nullptr;
NRD_INTEGRATION_ABORT_ON_FAILURE(m_NRI->CreateComputePipeline(*m_Device, pipelineDesc, pipeline));
Expand Down Expand Up @@ -300,7 +300,7 @@ void NrdIntegration::CreateResources(uint16_t resourceWidth, uint16_t resourceHe
nrdTexture.format = format;
m_TexturePool[i] = nrdTexture;

nrdTexture.state[0] = nri::TextureTransitionFromUnknown(texture, {nri::AccessBits::UNKNOWN, nri::TextureLayout::UNKNOWN}, 0, 1);
nrdTexture.state[0] = nri::TextureBarrierFromUnknown(texture, {nri::AccessBits::UNKNOWN, nri::Layout::UNKNOWN}, 0, 1);

// Adjust memory usage
nri::MemoryDesc memoryDesc = {};
Expand Down Expand Up @@ -481,10 +481,10 @@ void NrdIntegration::Dispatch(nri::CommandBuffer& commandBuffer, nri::Descriptor
nri::DescriptorRangeUpdateDesc* resourceRanges = (nri::DescriptorRangeUpdateDesc*)alloca(sizeof(nri::DescriptorRangeUpdateDesc) * pipelineDesc.resourceRangesNum);
memset(resourceRanges, 0, sizeof(nri::DescriptorRangeUpdateDesc) * pipelineDesc.resourceRangesNum);

nri::TextureTransitionBarrierDesc* transitions = (nri::TextureTransitionBarrierDesc*)alloca(sizeof(nri::TextureTransitionBarrierDesc) * dispatchDesc.resourcesNum);
memset(transitions, 0, sizeof(nri::TextureTransitionBarrierDesc) * dispatchDesc.resourcesNum);
nri::TextureBarrierDesc* transitions = (nri::TextureBarrierDesc*)alloca(sizeof(nri::TextureBarrierDesc) * dispatchDesc.resourcesNum);
memset(transitions, 0, sizeof(nri::TextureBarrierDesc) * dispatchDesc.resourcesNum);

nri::TransitionBarrierDesc transitionBarriers = {};
nri::BarrierGroupDesc transitionBarriers = {};
transitionBarriers.textures = transitions;

uint32_t n = 0;
Expand Down Expand Up @@ -513,12 +513,12 @@ void NrdIntegration::Dispatch(nri::CommandBuffer& commandBuffer, nri::Descriptor
NRD_INTEGRATION_ASSERT(nrdTexture->format != nri::Format::UNKNOWN, "Format must be valid!");
}

const nri::AccessBits nextAccess = nrdResource.stateNeeded == nrd::DescriptorType::TEXTURE ? nri::AccessBits::SHADER_RESOURCE : nri::AccessBits::SHADER_RESOURCE_STORAGE;
const nri::TextureLayout nextLayout = nrdResource.stateNeeded == nrd::DescriptorType::TEXTURE ? nri::TextureLayout::SHADER_RESOURCE : nri::TextureLayout::GENERAL;
bool isStateChanged = nextAccess != nrdTexture->state->nextState.acessBits || nextLayout != nrdTexture->state->nextState.layout;
bool isStorageBarrier = nextAccess == nri::AccessBits::SHADER_RESOURCE_STORAGE && nrdTexture->state->nextState.acessBits == nri::AccessBits::SHADER_RESOURCE_STORAGE;
const nri::AccessBits nextAccess = nrdResource.descriptorType == nrd::DescriptorType::TEXTURE ? nri::AccessBits::SHADER_RESOURCE : nri::AccessBits::SHADER_RESOURCE_STORAGE;
const nri::Layout nextLayout = nrdResource.descriptorType == nrd::DescriptorType::TEXTURE ? nri::Layout::SHADER_RESOURCE : nri::Layout::SHADER_RESOURCE_STORAGE;
bool isStateChanged = nextAccess != nrdTexture->state->after.access || nextLayout != nrdTexture->state->after.layout;
bool isStorageBarrier = nextAccess == nri::AccessBits::SHADER_RESOURCE_STORAGE && nrdTexture->state->after.access == nri::AccessBits::SHADER_RESOURCE_STORAGE;
if (isStateChanged || isStorageBarrier)
transitions[transitionBarriers.textureNum++] = nri::TextureTransitionFromState(*nrdTexture->state, {nextAccess, nextLayout}, 0, 1);
transitions[transitionBarriers.textureNum++] = nri::TextureBarrierFromState(*nrdTexture->state, {nextAccess, nextLayout}, 0, 1);

uint64_t resource = m_NRI->GetTextureNativeObject(*nrdTexture->state->texture, 0);
uint64_t key = NRD_CreateDescriptorKey(resource, isStorage);
Expand Down Expand Up @@ -595,7 +595,7 @@ void NrdIntegration::Dispatch(nri::CommandBuffer& commandBuffer, nri::Descriptor
m_NRI->UpdateDescriptorRanges(*descriptorSets[descriptorSetResourcesIndex], nri::ALL_NODES, instanceDesc.samplersSpaceIndex == instanceDesc.resourcesSpaceIndex ? 1 : 0, pipelineDesc.resourceRangesNum, resourceRanges);

// Rendering
m_NRI->CmdPipelineBarrier(commandBuffer, &transitionBarriers, nullptr, nri::BarrierDependency::ALL_STAGES);
m_NRI->CmdBarrier(commandBuffer, transitionBarriers);
m_NRI->CmdSetPipelineLayout(commandBuffer, *pipelineLayout);

nri::Pipeline* pipeline = m_Pipelines[dispatchDesc.pipelineIndex];
Expand Down
43 changes: 30 additions & 13 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# NVIDIA REAL-TIME DENOISERS v4.4.3 (NRD)
# NVIDIA REAL-TIME DENOISERS v4.5.0 (NRD)

[![Build NRD SDK](https://github.com/NVIDIAGameWorks/RayTracingDenoiser/actions/workflows/build.yml/badge.svg)](https://github.com/NVIDIAGameWorks/RayTracingDenoiser/actions/workflows/build.yml)

Expand All @@ -18,7 +18,7 @@ For quick starting see *[NRD sample](https://github.com/NVIDIAGameWorks/NRDSampl
Performance on RTX 4080 @ 1440p (native resolution, default denoiser settings):
- `REBLUR_DIFFUSE_SPECULAR` - 2.45 ms
- `RELAX_DIFFUSE_SPECULAR` - 2.90 ms
- `SIGMA_DIFFUSE_SPECULAR` - 0.30 ms
- `SIGMA_SHADOW` - 0.30 ms

Supported signal types:
- *RELAX*:
Expand Down Expand Up @@ -58,10 +58,10 @@ CMake options:
- `NRD_DXC_CUSTOM_PATH` - custom DXC to use if Vulkan SDK is not installed
- `NRD_NORMAL_ENCODING` - *normal* encoding for the entire library
- `NRD_ROUGHNESS_ENCODING` - *roughness* encoding for the entire library
- `NRD_EMBEDS_DXBC_SHADERS` - NRD compiles and embeds DXBC shaders (ON by default on Windows)
- `NRD_EMBEDS_DXIL_SHADERS` - NRD compiles and embeds DXIL shaders (ON by default on Windows)
- `NRD_EMBEDS_SPIRV_SHADERS` - NRD compiles and embeds SPIRV shaders (ON by default)
- `NRD_DISABLE_SHADER_COMPILATION` - disable shader compilation on the NRD side, NRD assumes that shaders are already compiled externally and have been put into `NRD_SHADERS_PATH` folder
- `NRD_EMBEDS_DXBC_SHADERS` - *NRD* compiles and embeds DXBC shaders (ON by default on Windows)
- `NRD_EMBEDS_DXIL_SHADERS` - *NRD* compiles and embeds DXIL shaders (ON by default on Windows)
- `NRD_EMBEDS_SPIRV_SHADERS` - *NRD* compiles and embeds SPIRV shaders (ON by default)
- `NRD_DISABLE_SHADER_COMPILATION` - disable shader compilation on the *NRD* side, *NRD* assumes that shaders are already compiled externally and have been put into `NRD_SHADERS_PATH` folder

`NRD_NORMAL_ENCODING` and `NRD_ROUGHNESS_ENCODING` can be defined only *once* during project deployment. These settings are dumped in `NRDEncoding.hlsli` file, which needs to be included on the application side prior `NRD.hlsli` inclusion to deliver encoding settings matching *NRD* settings. `LibraryDesc` includes encoding settings too. It can be used to verify that the library meets the application expectations.

Expand Down Expand Up @@ -452,8 +452,9 @@ nrd::InstanceCreationDesc instanceCreationDesc = {};
instanceCreationDesc.denoisers = denoiserDescs;
instanceCreationDesc.denoisersNum = GetCountOf(denoiserDescs);

// NRD itself is flexible and supports any kind of DRS, but NRD INTEGRATION pre-allocate resources with
// statically defines dimensions. DRS works only by adjusting the viewport: "CommonSettings::rectSize"
// NRD itself is flexible and supports any kind of dynamic resolution scaling, but NRD INTEGRATION pre-
// allocates resources with statically defined dimensions. DRS is only supported by adjusting the viewport
// via "CommonSettings::rectSize"
bool result = NRD.Initialize(resourceWidth, resourceHeight, instanceCreationDesc, *nriDevice, NRI, NRI);

//=======================================================================================================
Expand Down Expand Up @@ -488,8 +489,8 @@ for (uint32_t i = 0; i < N; i++)
// Useful information:
// SRV = nri::AccessBits::SHADER_RESOURCE, nri::TextureLayout::SHADER_RESOURCE
// UAV = nri::AccessBits::SHADER_RESOURCE_STORAGE, nri::TextureLayout::GENERAL
entryDesc.nextAccess = ConvertResourceStateToAccessBits( myResource->GetCurrentState() );
entryDesc.nextLayout = ConvertResourceStateToLayout( myResource->GetCurrentState() );
entryDesc.nextState.accessBits = ConvertResourceStateToAccessBits( myResource->GetCurrentState() );
entryDesc.nextState.layout = ConvertResourceStateToLayout( myResource->GetCurrentState() );
}

//=======================================================================================================
Expand Down Expand Up @@ -644,15 +645,31 @@ IN_MV = GetMotionAt( B );
## INTERACTION WITH `INFs` AND `NANs`
- NRD doesn't touch pixels outside of viewport: `INFs / NANs` are allowed
- NRD doesn't touch pixels outside of denoising range: `INFs / NANs` are allowed
- *NRD* doesn't touch pixels outside of viewport: `INFs / NANs` are allowed
- *NRD* doesn't touch pixels outside of denoising range: `INFs / NANs` are allowed
- `INFs / NANs` are not allowed for pixels inside the viewport and denoising range
- `INFs` can be used in `IN_VIEWZ`, but not recommended
## INTERACTION WITH FRAME GENERATION TECHNIQUES
Frame generation (FG) techniques boost FPS by interpolating between 2 last available frames. *NRD* works better when framerate increases, because it gets more data per second. It's not the case for FG, because all rendering pipeline underlying passes (like, denoising) continue to work on the original non-boosted framerate.
## HAIR DENOISING TIPS
*NRD* tries to preserve jittering at least on geometrical edges, it's essential for upscalers, which are usually applied at the end of the rendering pipeline. It naturally moves the problem of anti-aliasing to the application side. In order, it implies the following obvious suggestions:
- trace at higher resolution, denoise, apply AA and downscale
- apply a high-quality upscaler in "AA-only" mode, i.e. without reducing the tracing resolution (for example, *DLSS* in *DLAA mode*)
Sub-pixel thin geometry of strand-based hair transforms "normals guide" into jittering & flickering pixel mess, i.e. the guide itself becomes noisy. It worsens denoising IQ. At least for *NRD* better to replace geometry normals in "normals guide" with a vector `= normalize( cross( T, B ) )`, where:
- `T` - hair strand tangent vector
- `B` - is not a classic binormal, it's more an averaged direction to a bunch of closest hair strands (in many cases it's a binormal vector of underlying head / body mesh)
- `B` can be simplified to `normalize( cross( V, T ) )`, where `V` is the view vector
- in other words, `B` must follow the following rules:
- `cross( T, B ) != 0`
- `B` must not follow hair strand "tube"
Hair strands tangent vectors *can't* be used as "normals guide" for *NRD* due to BRDF and curvature related calculations, requiring a vector, which can be considered a "normal" vector.
# RECOMMENDATIONS AND BEST PRACTICES: LESSER TIPS
**[NRD]** The *NRD API* has been designed to support integration into native VULKAN apps. If the RHI you work with is DX11-like, not all provided data will be needed.
Expand All @@ -675,7 +692,7 @@ Frame generation (FG) techniques boost FPS by interpolating between 2 last avail
**[NRD]** *NRD* can track camera motion internally. For the first time pass all MVs set to 0 (you can use `CommonSettings::motionVectorScale = {0}` for this) and set `CommonSettings::isMotionVectorInWorldSpace = true`, it will allow you to simplify the initial integration. Enable application-provided MVs after getting denoising working on static objects.
**[NRD]** Using 2D MVs can lead to massive history reset on moving objects, because 2D motion provides information only about pixel screen position but not about real 3D world position. Consider using 2.5D or 3D MVs instead. 2.5D motion, which is 2D motion with additionally provided `viewZ` delta (i.e. `viewZprev = viewZ + MV.z`), is even better, because it has the same benefits as 3D motion, but doesn't suffer from imprecision problems caused by world-space delta rounding to FP16 during MV patching on the NRD side.
**[NRD]** Using 2D MVs can lead to massive history reset on moving objects, because 2D motion provides information only about pixel screen position but not about real 3D world position. Consider using 2.5D or 3D MVs instead. 2.5D motion, which is 2D motion with additionally provided `viewZ` delta (i.e. `viewZprev = viewZ + MV.z`), is even better, because it has the same benefits as 3D motion, but doesn't suffer from imprecision problems caused by world-space delta rounding to FP16 during MV patching on the *NRD* side.
**[NRD]** Firstly, try to get a working reprojection on a diffuse signal for camera rotations only (without camera motion).
Expand Down
4 changes: 2 additions & 2 deletions Resources/Version.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ Versioning rules:
*/

#define VERSION_MAJOR 4
#define VERSION_MINOR 4
#define VERSION_BUILD 3
#define VERSION_MINOR 5
#define VERSION_BUILD 0

#define VERSION_STRING STR(VERSION_MAJOR.VERSION_MINOR.VERSION_BUILD encoding=NRD_NORMAL_ENCODING.NRD_ROUGHNESS_ENCODING)
14 changes: 14 additions & 0 deletions Shaders/Include/Common.hlsli
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,20 @@ license agreement from NVIDIA CORPORATION is strictly prohibited.
/* Not an elegant way to solve loop variables declaration duplication problem */ \
int i, j

// Printf
/*
Usage:
#ifdef PRINTF_AVAILABLE
PrintfAt( "a = %f, b = %f, c = %u", a, b, c );
#endif
*/
#if( defined( __hlsl_dx_compiler ) && defined( VULKAN ) )
#define PRINTF_AVAILABLE
#define PrintfAt(...) \
if( uint( pixelPos.x ) == gPrintfAt.x && uint( pixelPos.y ) == gPrintfAt.y ) \
printf(__VA_ARGS__)
#endif

//==================================================================================================================
// SHARED FUNCTIONS
//==================================================================================================================
Expand Down
Loading

0 comments on commit 3426f9e

Please sign in to comment.