diff --git a/src/core/src/render_techniques/migi/migi.comp b/src/core/src/render_techniques/migi/migi.comp index 6c19d58..290ebb1 100644 --- a/src/core/src/render_techniques/migi/migi.comp +++ b/src/core/src/render_techniques/migi/migi.comp @@ -146,6 +146,9 @@ void SSRC_ClearCounters() { // Allocate fixed uniform probes for current frame [numthreads(WAVE_SIZE, 1, 1)] void SSRC_AllocateUniformProbes (int DispatchID : SV_DispatchThreadID) { + if(DispatchID > MI.UniformScreenProbeCount) { + return; + } ProbeHeader Header; Header.ScreenPosition = GetUniformScreenProbeScreenPosition(DispatchID); float Depth = g_DepthTexture.Load(int3(Header.ScreenPosition, 0)).x; @@ -160,6 +163,11 @@ void SSRC_AllocateUniformProbes (int DispatchID : SV_DispatchThreadID) { } BasisGroupOffset = WaveReadLaneFirst(BasisGroupOffset); Header.BasisOffset = BasisGroupOffset + BasisOffset; + // Clip over the maximum basis count + if(Header.BasisOffset + BasisCount > MI.MaxBasisCount) { + Header.Class = 0; + } + // Negative depths stands for invalid probes Header.LinearDepth = (bValid ? 1 : -1) * GetLinearDepth(Depth); int2 TileCoords = int2(DispatchID % MI.TileDimensions.x, DispatchID / MI.TileDimensions.x); @@ -185,7 +193,7 @@ void CalculateSSRCSampleWeightsForUniformScreenProbes ( out float4 Weights, bool bPrevious = false ) { - int2 ScreenCoordsProbeGrid = clamp(ScreenCoords - GetTileJitter(SSRC_TILE_SIZE, bPrevious), 0, MI.ScreenDimensions - 1.xx); + int2 ScreenCoordsProbeGrid = clamp(ScreenCoords - GetTileJitter(bPrevious), 0, MI.ScreenDimensions - 1.xx); int2 TileCoordsX00 = min(ScreenCoordsProbeGrid / SSRC_TILE_SIZE, MI.TileDimensions - 2); // Pad the bilinear filtering weights int BilinearExpand = 1; @@ -259,7 +267,7 @@ void CalculateSSRCSampleWeights ( int NumAdaptiveProbes = bPrevious ? g_RWPreviousTileAdaptiveProbeCountTexture[TileCoords] : g_RWTileAdaptiveProbeCountTexture[TileCoords]; - + [unroll(1 << (SSRC_MAX_ADAPTIVE_PROBE_LAYERS * 2))] for (uint AdaptiveProbeListIndex = 0; AdaptiveProbeListIndex < NumAdaptiveProbes; AdaptiveProbeListIndex++) { // TODO reconstruct probe data from G-Buffer to reduce VRAM bandwidth when shading @@ -304,40 +312,49 @@ void SSRC_AllocateAdaptiveProbes (int DispatchID : SV_DispatchThreadID, int Loca LocalNumProbesToAllocate = 0; } GroupMemoryBarrierWithGroupSync(); +// This macro is passed in by compiler arguments +#ifndef SSRC_ADAPTIVE_PROBE_LAYER +#define SSRC_ADAPTIVE_PROBE_LAYER 0 +#endif + const int AdaptiveProbeDownsampleFactor = SSRC_TILE_SIZE / (2 << SSRC_ADAPTIVE_PROBE_LAYER); + { - int2 DownsampledTileDimensions = MI.TileDimensions / g_AdaptiveProbeDownsampleFactor; + int2 DownsampledTileDimensions = MI.ScreenDimensions / AdaptiveProbeDownsampleFactor; int2 TileCoords = int2( DispatchID % DownsampledTileDimensions.x, DispatchID / DownsampledTileDimensions.x ); - // Compute the screen coords for current adaptive probe - int2 AdaptiveProbeScreenPosition = TileCoords * g_AdaptiveProbeDownsampleFactor + GetTileJitter(g_AdaptiveProbeDownsampleFactor); - float Depth = g_DepthTexture.Load(int3(AdaptiveProbeScreenPosition, 0)).x; - bool bValid = Depth < 1.f; - if(bValid) { - float3 WorldPosition = RecoverWorldPositionHiRes(AdaptiveProbeScreenPosition); - float LinearDepth = GetLinearDepth(Depth); - float3 GeometryNormal = normalize(g_GeometryNormalTexture.Load(int3(AdaptiveProbeScreenPosition, 0)).xyz * 2.f - 1.f); - SSRC_SampleData Sample; - - CalculateSSRCSampleWeights( - AdaptiveProbeScreenPosition, - WorldPosition, - LinearDepth, - GeometryNormal, - Sample - ); - - float Epsilon = .01f; - Sample.Weights /= max(dot(Sample.Weights, 1), Epsilon); - - float LightingIsValid = (dot(Sample.Weights, 1) < 1.0f - Epsilon) ? 0.0f : 1.0f; - - if (!LightingIsValid) - { - int ListIndex; - InterlockedAdd(LocalNumProbesToAllocate, 1, ListIndex); - LocalProbeScreenPositionsToAllocate[ListIndex] = AdaptiveProbeScreenPosition; + // Omit the adaptive probe that overlaps with the previous layer + if(any((TileCoords&1) != 0)) { + // Compute the screen coords for current adaptive probe + int2 AdaptiveProbeScreenPosition = TileCoords * AdaptiveProbeDownsampleFactor + GetTileJitter(); + float Depth = g_DepthTexture.Load(int3(AdaptiveProbeScreenPosition, 0)).x; + bool bValid = Depth < 1.f; + if(bValid) { + float3 WorldPosition = RecoverWorldPositionHiRes(AdaptiveProbeScreenPosition); + float LinearDepth = GetLinearDepth(Depth); + float3 GeometryNormal = normalize(g_GeometryNormalTexture.Load(int3(AdaptiveProbeScreenPosition, 0)).xyz * 2.f - 1.f); + SSRC_SampleData Sample; + + CalculateSSRCSampleWeights( + AdaptiveProbeScreenPosition, + WorldPosition, + LinearDepth, + GeometryNormal, + Sample + ); + + float Epsilon = .01f; + Sample.Weights /= max(dot(Sample.Weights, 1), Epsilon); + + float LightingIsValid = (dot(Sample.Weights, 1) < 1.0f - Epsilon) ? 0.0f : 1.0f; + + if (!LightingIsValid) + { + int ListIndex; + InterlockedAdd(LocalNumProbesToAllocate, 1, ListIndex); + LocalProbeScreenPositionsToAllocate[ListIndex] = AdaptiveProbeScreenPosition; + } } } } @@ -352,7 +369,7 @@ void SSRC_AllocateAdaptiveProbes (int DispatchID : SV_DispatchThreadID, int Loca int AdaptiveProbeIndex = LocalID + LocalAdaptiveProbeOffset; - if(LocalID < LocalNumProbesToAllocate && AdaptiveProbeIndex < MI.MaxAdaptiveProbeCount) { + if(!MI.NoAdaptiveProbes && LocalID < LocalNumProbesToAllocate && AdaptiveProbeIndex < MI.MaxAdaptiveProbeCount) { // The probe must be valid upon allocation. int ScreenProbeIndex1 = AdaptiveProbeIndex + MI.UniformScreenProbeCount; ProbeHeader Header; @@ -368,6 +385,10 @@ void SSRC_AllocateAdaptiveProbes (int DispatchID : SV_DispatchThreadID, int Loca } BasisGroupOffset = WaveReadLaneFirst(BasisGroupOffset); Header.BasisOffset = BasisGroupOffset + BasisOffset; + // Clip over the maximum basis count + if(Header.BasisOffset + BasisCount > MI.MaxBasisCount) { + Header.Class = 0; + } Header.LinearDepth = GetLinearDepth(Depth); Header.Position = RecoverWorldPositionHiRes(Header.ScreenPosition); Header.Normal = normalize(g_GeometryNormalTexture.Load(int3(Header.ScreenPosition, 0)).xyz * 2.f - 1.f); @@ -396,6 +417,8 @@ int LocalSGNewIndex[SSRC_MAX_NUM_BASIS_PER_PROBE * 4]; void SSRC_ReprojectProbeHistory (int LocalID : SV_GroupThreadID, int GroupID : SV_GroupID) { int2 ProbeIndex = int2(GroupID % MI.TileDimensions.x, GroupID / MI.TileDimensions.x); ProbeHeader Header = GetScreenProbeHeader(ProbeIndex); + // FIXME reprojection and clamping is missing!!! + asdfasfasdfasdfasdf SSRC_SampleData Sample; CalculateSSRCSampleWeights( Header.ScreenPosition, @@ -428,6 +451,7 @@ void SSRC_ReprojectProbeHistory (int LocalID : SV_GroupThreadID, int GroupID : S #if SSRC_MAX_NUM_BASIS_PER_PROBE <= 8 int BasisRankBase = 0; #else + [unroll((SSRC_MAX_NUM_BASIS_PER_PROBE * 4 + WAVE_SIZE - 1) / WAVE_SIZE)] for(int BasisRankBase = 0; BasisRankBase < NumBasis; BasisRankBase += WAVE_SIZE) { #endif int BasisRank = BasisRankBase + LocalID; @@ -441,7 +465,7 @@ void SSRC_ReprojectProbeHistory (int LocalID : SV_GroupThreadID, int GroupID : S BasisOffset = BasisOffsets[BasisRank < NumBasis0 + BasisCount[2] ? 2 : 3] + BasisRank - NumBasis0; Weight = Sample.Weights[BasisRank < NumBasis0 + BasisCount[2] ? 2 : 3]; } - SGData SG = FetchBasisData(BasisOffset + BasisRank); + SGData SG = FetchBasisData(BasisOffset + BasisRank, true); // FIXME directional reprojection is missing // Scale the color by the weight SG.Color = SG.Color * Weight; @@ -460,6 +484,7 @@ void SSRC_ReprojectProbeHistory (int LocalID : SV_GroupThreadID, int GroupID : S #if SSRC_MAX_NUM_BASIS_PER_PROBE <= 8 int BasisRankBase = 0; #else + [unroll((SSRC_MAX_NUM_BASIS_PER_PROBE * 4 + WAVE_SIZE - 1) / WAVE_SIZE)] for(int BasisRankBase = 0; BasisRankBase < NumBasis; BasisRankBase += WAVE_SIZE) { #endif int BasisRank = BasisRankBase + LocalID; @@ -476,6 +501,7 @@ void SSRC_ReprojectProbeHistory (int LocalID : SV_GroupThreadID, int GroupID : S #if SSRC_MAX_NUM_BASIS_PER_PROBE > 8 } #endif + [unroll((SSRC_MAX_NUM_BASIS_PER_PROBE * 4 + WAVE_SIZE - 1) / WAVE_SIZE)] for(int i = 0; i * WAVE_SIZE < NumBasis; i++) { int BasisRank = i * WAVE_SIZE + LocalID; if(BasisRank < NumBasis) { @@ -483,6 +509,7 @@ void SSRC_ReprojectProbeHistory (int LocalID : SV_GroupThreadID, int GroupID : S } } GroupMemoryBarrierWithGroupSync(); + [unroll((SSRC_MAX_NUM_BASIS_PER_PROBE * 4 + WAVE_SIZE - 1) / WAVE_SIZE)] for(int i = 0; i * WAVE_SIZE < NumBasis; i++) { int BasisRank = i * WAVE_SIZE + LocalID; if(BasisRank < NumBasis) { @@ -499,19 +526,21 @@ void SSRC_ReprojectProbeHistory (int LocalID : SV_GroupThreadID, int GroupID : S // Progressively merges the candidate SGs. { const int REPEAT = (SSRC_MAX_NUM_BASIS_PER_PROBE * 4 + WAVE_SIZE-1) / WAVE_SIZE; - + [unroll(4)] for(int Iteration = 0; MergeCount > 0 && Iteration < 4; Iteration ++) { // Compute the distance between each pair of SGs { #if SSRC_MAX_NUM_BASIS_PER_PROBE <= 8 int BasisRankBase = 0; #else + [unroll((SSRC_MAX_NUM_BASIS_PER_PROBE * 4 + WAVE_SIZE - 1) / WAVE_SIZE)] for(int BasisRankBase = 0; BasisRankBase < NumBasis; BasisRankBase += WAVE_SIZE) { #endif int BasisRank = BasisRankBase + LocalID; int MaxIndex = -1; float MaxSimilarity = 0.f; SGData CurrentSG = LocalSGData4[BasisRank]; + [unroll(SSRC_MAX_NUM_BASIS_PER_PROBE * 4)] for (int i = 0; i= 0) { @@ -551,6 +581,7 @@ void SSRC_ReprojectProbeHistory (int LocalID : SV_GroupThreadID, int GroupID : S #if SSRC_MAX_NUM_BASIS_PER_PROBE <= 8 int BasisRankBase = 0; #else + [unroll((SSRC_MAX_NUM_BASIS_PER_PROBE * 4 + WAVE_SIZE - 1) / WAVE_SIZE)] for(int BasisRankBase = 0; BasisRankBase < NumBasis; BasisRankBase += WAVE_SIZE) { #endif int BasisRank = BasisRankBase + LocalID; @@ -567,6 +598,7 @@ void SSRC_ReprojectProbeHistory (int LocalID : SV_GroupThreadID, int GroupID : S } // Swap the data { + [unroll((SSRC_MAX_NUM_BASIS_PER_PROBE * 4 + WAVE_SIZE - 1) / WAVE_SIZE)] for(int i = 0; i * WAVE_SIZE < NumNewBasis; i++) { int BasisRank = i * WAVE_SIZE + LocalID; if(BasisRank < NumNewBasis) { @@ -614,9 +646,8 @@ void SSRC_AllocateUpdateRays (int DispatchID : SV_DispatchThreadID) { #define MIN_PDF_TO_TRACE 1e-4f //1e-1f // A scan sum is performed to accumulate raycount -SGData LocalSGData[SSRC_MAX_NUM_BASIS_PER_PROBE + 1]; -float LocalSGSize[SSRC_MAX_NUM_BASIS_PER_PROBE + 1]; -float LocalSGSizePrefixSum[SSRC_MAX_NUM_BASIS_PER_PROBE + 1]; +SGData LocalSGData[SSRC_MAX_NUM_BASIS_PER_PROBE]; +float LocalSGSize[SSRC_MAX_NUM_BASIS_PER_PROBE]; [numthreads(WAVE_SIZE, 1, 1)] void SSRC_SampleUpdateRays (int LocalID : SV_GroupThreadID, int GroupID : SV_GroupID) { @@ -639,39 +670,45 @@ void SSRC_SampleUpdateRays (int LocalID : SV_GroupThreadID, int GroupID : SV_Gro LocalSGData[BasisRank] = SG; float SGSize = SGIntegrate(SG.Lambda) * dot(SG.Color, 1.f.xxx); LocalSGSize[BasisRank] = SGSize; - LocalSGSizePrefixSum[LocalID] = WaveActiveSum(SGSize); + } else if(BasisRank < SSRC_MAX_NUM_BASIS_PER_PROBE) { + LocalSGSize[BasisRank] = 0; } } GroupMemoryBarrierWithGroupSync(); - if(WaveIsFirstLane()) { - LocalSGSizePrefixSum[BasisCount] = LocalSGSizePrefixSum[BasisCount - 1] + LocalSGSize[BasisCount - 1]; - } - GroupMemoryBarrierWithGroupSync(); float IrradianceSize = FOUR_PI * dot(g_RWProbeIrradianceTexture[ProbeIndex].xyz, 1.f.xxx); - float SumSize = LocalSGSizePrefixSum[BasisCount] + IrradianceSize; + float ThreadSizeSums[SSRC_MAX_NUM_BASIS_PER_PROBE]; + [unroll(SSRC_MAX_NUM_BASIS_PER_PROBE)] + for(int i = 0; i < BasisCount; i++) { + ThreadSizeSums[i] = i == 0 ? 0 : ThreadSizeSums[i-1]; + ThreadSizeSums[i] += LocalSGSize[i]; + } + float SumSizeBasis = ThreadSizeSums[BasisCount - 1]; + float SumSize = SumSizeBasis + IrradianceSize; + Random rng = MakeRandom(GroupID * WAVE_SIZE + LocalID, MI.FrameSeed); // Sample ray SG - +#if SSRC_MAX_NUM_UPDATE_RAY_PER_PROBE % WAVE_SIZE != 0 +#error "SSRC_MAX_NUM_UPDATE_RAY_PER_PROBE must be a multiple of WAVE_SIZE" +#endif + [unroll(SSRC_MAX_NUM_UPDATE_RAY_PER_PROBE / WAVE_SIZE)] for(int RayRankBase = 0; RayRankBase < RayCount; RayRankBase += WAVE_SIZE) { // We assume that ray count is always a multiple of WAVE_SIZE int RayRank = RayRankBase + LocalID; float u = rng.rand(); float U = u * SumSize; - int L = 0, R = BasisCount; - while(L < R) { - int M = (L + R + 1) / 2; - if(LocalSGSizePrefixSum[M] < U) { - L = M; - } else { - R = M - 1; + int BasisRank = BasisCount; + [unroll(SSRC_MAX_NUM_BASIS_PER_PROBE)] + for(int i = 0; i= MI.OutputDimensions)) { + if(any(DispatchID >= MI.ScreenDimensions)) { return; } @@ -990,7 +1031,7 @@ void SSRC_ReprojectPreviousUpdateError (int2 DispatchID : SV_DispatchThreadID) { float2 UV = 0.5f * float2(homogeneous.x, -homogeneous.y) + 0.5f; float Depth = homogeneous.z; - // Severe (precision?) error here + // Severe precision loss here if we use the following reprojection method // float2 UV = (float2(DispatchID) + 0.5f) * MI.OutputDimensionsInv; // float Depth = g_DepthTexture.Load(int3(UV, 0)).x; // float3 Normal = normalize(2.f * g_GeometryNormalTexture.Load(int3(UV, 0)).xyz - 1.f); @@ -1002,7 +1043,7 @@ void SSRC_ReprojectPreviousUpdateError (int2 DispatchID : SV_DispatchThreadID) { if (all(previous_uv > 0.0f) && all(previous_uv < 1.0f)) { - float3 homogeneous2 = transformPointProjection(homogeneous, g_Reprojection); + float3 homogeneous2 = transformPointProjection(homogeneous, MI.Reprojection); homogeneous2.z = GetLinearDepth(homogeneous2.z); float previous_depth = GetLinearDepth(g_PreviousDepthTexture.SampleLevel(g_NearestSampler, previous_uv, 0.0f).x); @@ -1021,7 +1062,6 @@ void SSRC_ReprojectPreviousUpdateError (int2 DispatchID : SV_DispatchThreadID) { // TODO allocate a initial weight for the update error // FIXME g_RWUpdateErrorSplatTexture[DispatchID] = 0.f; - } // UpdateErrorSplatTexture is later mipmapped. @@ -1650,10 +1690,17 @@ void SSRC_UpdateProbes (int LocalID : SV_GroupThreadID, int GroupID : SV_GroupID float SumSampleWeight = 0.f; float3 SumWeightedDiffRadiance = 0.f.xxx; float3 SumWeightedRadiance = 0.f.xxx; +#if SSRC_MAX_NUM_UPDATE_RAY_PER_PROBE % WAVE_SIZE != 0 +#error "SSRC_MAX_NUM_UPDATE_RAY_PER_PROBE must be a multiple of WAVE_SIZE" +#endif + [unroll(SSRC_MAX_NUM_UPDATE_RAY_PER_PROBE / WAVE_SIZE)] for(int RayRankBase = 0; RayRankBase < ProbeRayCount; RayRankBase += WAVE_SIZE) { int RayRank = RayRankBase + LocalID; int RayIndex = ProbeRayOffset + RayRank; float3 RayDirection = OctahedronToUnitVector(unpackUnorm2x16(g_RWUpdateRayDirectionBuffer[RayIndex]) * 2 - 1); + float4 RayRadianceInvPdf = UnpackFp16x4(g_RWUpdateRayRadianceInvPdfBuffer[RayIndex]); + float3 RayRadiance = RayRadianceInvPdf.xyz; + float InvPdf = RayRadianceInvPdf.w; if(InvPdf > 0) { float3 EvaluatedRadiance = 0.f.xxx; [unroll(SSRC_MAX_NUM_BASIS_PER_PROBE)] @@ -1686,6 +1733,7 @@ void SSRC_UpdateProbes (int LocalID : SV_GroupThreadID, int GroupID : SV_GroupID // TODO classifying update ray count & basis count into different levels running different kernels, // so we can completely unroll the loops + [unroll(SSRC_MAX_NUM_BASIS_PER_PROBE * SSRC_MAX_NUM_UPDATE_RAY_PER_PROBE / WAVE_SIZE)] for(int RayGroupOffset = 0; RayGroupOffset < ProbeRayCount; RayGroupOffset += ThreadPerBasis) { int RayRank = RayGroupOffset + BasisThread; if(RayRank < ProbeRayCount) { diff --git a/src/core/src/render_techniques/migi/migi.cpp b/src/core/src/render_techniques/migi/migi.cpp index 42bfa0b..ae16a8d 100644 --- a/src/core/src/render_techniques/migi/migi.cpp +++ b/src/core/src/render_techniques/migi/migi.cpp @@ -4,12 +4,13 @@ * This program uses MulanPSL2. See LICENSE for more. */ -#include "capsaicin_internal.h" #include "migi.h" +#include "capsaicin_internal.h" #include "components/blue_noise_sampler/blue_noise_sampler.h" #include "components/light_sampler_grid_stream/light_sampler_grid_stream.h" #include "components/stratified_sampler/stratified_sampler.h" +#include "migi_internal.h" // Special hacking for manipulating the draw topology withing gfx extern bool __override_gfx_null_render_target; @@ -29,13 +30,14 @@ MIGI::MIGI() MIGI::~MIGI() {terminate();} -void MIGI::render(CapsaicinInternal &capsaicin) noexcept { +void MIGI::render(CapsaicinInternal &capsaicin) noexcept +{ // Prepar settings updateRenderOptions(capsaicin); - auto light_sampler = capsaicin.getComponent(); - auto blue_noise_sampler = capsaicin.getComponent(); - auto stratified_sampler = capsaicin.getComponent(); + auto light_sampler = capsaicin.getComponent(); + auto blue_noise_sampler = capsaicin.getComponent(); + auto stratified_sampler = capsaicin.getComponent(); // Prepare for settings changes { @@ -46,26 +48,6 @@ void MIGI::render(CapsaicinInternal &capsaicin) noexcept { need_reload_kernel_ = false; } - if (need_reload_hash_grid_cache_debug_view_) - { - gfxDestroyKernel(gfx_, kernels_.debug_hash_grid_cells); - - GfxDrawState debug_screen_probes_draw_state; - gfxDrawStateSetColorTarget(debug_screen_probes_draw_state, 0, capsaicin.getAOVBuffer("Debug")); - - GfxDrawState debug_hash_grid_cells_draw_state; - gfxDrawStateSetColorTarget(debug_hash_grid_cells_draw_state, 0, capsaicin.getAOVBuffer("Debug")); - gfxDrawStateSetDepthStencilTarget(debug_hash_grid_cells_draw_state, tex_.depth); - gfxDrawStateSetCullMode(debug_hash_grid_cells_draw_state, D3D12_CULL_MODE_NONE); - - GfxDrawState debug_material_draw_state; - gfxDrawStateSetColorTarget(debug_material_draw_state, 0, capsaicin.getAOVBuffer("Debug")); - - kernels_.debug_hash_grid_cells = gfxCreateGraphicsKernel( - gfx_, kernels_.program, debug_hash_grid_cells_draw_state, "DebugHashGridCells"); - need_reload_hash_grid_cache_debug_view_ = false; - } - // Clear the hash-grid cache if user's changed the cell size if (need_reset_hash_grid_cache_) { @@ -74,7 +56,8 @@ void MIGI::render(CapsaicinInternal &capsaicin) noexcept { } // The world space reservoir size relates to the camera's field of view / resolution / restir configuration - if(need_reset_world_space_reservoirs_) { + if (need_reset_world_space_reservoirs_) + { clearReservoirs(); need_reset_world_space_reservoirs_ = false; } @@ -88,18 +71,14 @@ void MIGI::render(CapsaicinInternal &capsaicin) noexcept { // * Register the program parameters * // *********************************************************** -light_sampler->addProgramParameters(capsaicin, kernels_.program); + light_sampler->addProgramParameters(capsaicin, kernels_.program); stratified_sampler->addProgramParameters(capsaicin, kernels_.program); blue_noise_sampler->addProgramParameters(capsaicin, kernels_.program); - GfxTexture shading_normal_texture; - if(options_.shading_with_geometry_normal) - shading_normal_texture = capsaicin.getAOVBuffer("GeometryNormal"); - else shading_normal_texture = capsaicin.getAOVBuffer("ShadingNormal"); - - // Global read-only textures + // Global read-only gfxProgramSetParameter(gfx_, kernels_.program, "g_EnvironmentBuffer", capsaicin.getEnvironmentBuffer()); - gfxProgramSetParameter(gfx_, kernels_.program, "g_TextureMaps", capsaicin.getTextures(), capsaicin.getTextureCount()); + gfxProgramSetParameter( + gfx_, kernels_.program, "g_TextureMaps", capsaicin.getTextures(), capsaicin.getTextureCount()); gfxProgramSetParameter(gfx_, kernels_.program, "g_TextureSampler", capsaicin.getLinearSampler()); gfxProgramSetParameter(gfx_, kernels_.program, "g_NearestSampler", capsaicin.getNearestSampler()); gfxProgramSetParameter(gfx_, kernels_.program, "g_LinearSampler", capsaicin.getLinearSampler()); @@ -115,143 +94,154 @@ light_sampler->addProgramParameters(capsaicin, kernels_.program); // Acceleration structure gfxProgramSetParameter(gfx_, kernels_.program, "g_Scene", capsaicin.getAccelerationStructure()); - // Camera - gfxProgramSetParameter(gfx_, kernels_.program, "g_CameraPosition", capsaicin.getCamera().eye); - // THIS IS NOT NORMALIZED SOMETIMES!!!! - gfxProgramSetParameter(gfx_, kernels_.program, "g_CameraDirection", normalize(capsaicin.getCamera().center - capsaicin.getCamera().eye)); - gfxProgramSetParameter(gfx_, kernels_.program, "g_CameraFoVY", capsaicin.getCamera().fovY); - gfxProgramSetParameter(gfx_, kernels_.program, "g_CameraFoVY2", capsaicin.getCamera().fovY * 0.5f); - gfxProgramSetParameter(gfx_, kernels_.program, "g_AspectRatio", capsaicin.getCamera().aspect); - gfxProgramSetParameter(gfx_, kernels_.program, "g_CameraNear", capsaicin.getCamera().nearZ); - gfxProgramSetParameter(gfx_, kernels_.program, "g_CameraFar", capsaicin.getCamera().farZ); - const auto& camera = capsaicin.getCamera(); - auto camera_forward = glm::normalize(camera.center - camera.eye); - auto camera_up = camera.up; - auto camera_right = glm::cross(camera_forward, camera_up); - camera_up = normalize(cross(camera_right, camera_forward)); - // Half the height of the standard camera plane - float scale = tanf(camera.fovY / 2.f); - float aspect = capsaicin.getCamera().aspect; - camera_right *= scale * aspect; - camera_up *= scale; - gfxProgramSetParameter(gfx_, kernels_.program, "g_CameraUp", camera_up); - gfxProgramSetParameter(gfx_, kernels_.program, "g_CameraRight", camera_right); - bool taa_enable = false; - if(capsaicin.getOptions().find("taa_enable") != capsaicin.getOptions().end()) - taa_enable = std::get(capsaicin.getOptions()["taa_enable"]); - auto const &camera_matrices = capsaicin.getCameraMatrices(taa_enable); - gfxProgramSetParameter(gfx_, kernels_.program, "g_CameraView", camera_matrices.view); - gfxProgramSetParameter(gfx_, kernels_.program, "g_CameraProjection", camera_matrices.projection); - gfxProgramSetParameter(gfx_, kernels_.program, "g_CameraProjView", camera_matrices.view_projection); - gfxProgramSetParameter(gfx_, kernels_.program, "g_CameraViewInv", camera_matrices.inv_view); - gfxProgramSetParameter(gfx_, kernels_.program, "g_CameraProjViewInv", camera_matrices.inv_view_projection); - - gfxProgramSetParameter(gfx_, kernels_.program, "g_CameraPixelScale", 2.f * scale / float(options_.height)); - - gfxProgramSetParameter(gfx_, kernels_.program, "g_Reprojection", camera_matrices.reprojection); - gfxProgramSetParameter(gfx_, kernels_.program, "g_ForwardReprojection", - glm::dmat4(camera_matrices.view_projection) * glm::inverse(glm::dmat4(camera_matrices.view_projection_prev))); - - gfxProgramSetParameter(gfx_, kernels_.program, "g_PreviousCameraPosition", previous_camera_.eye); - - gfxProgramSetParameter(gfx_, kernels_.program, "g_FrameIndex", capsaicin.getFrameIndex()); - gfxProgramSetParameter(gfx_, kernels_.program, "g_FrameSeed", capsaicin.getFrameIndex()); - // G-Buffers - gfxProgramSetParameter(gfx_, kernels_.program, "g_DepthTexture", capsaicin.getAOVBuffer("VisibilityDepth")); - gfxProgramSetParameter(gfx_, kernels_.program, "g_VisibilityTexture", capsaicin.getAOVBuffer("Visibility")); - gfxProgramSetParameter(gfx_, kernels_.program, "g_GeometryNormalTexture", capsaicin.getAOVBuffer("GeometryNormal")); - gfxProgramSetParameter(gfx_, kernels_.program, "g_ShadingNormalTexture", shading_normal_texture); + gfxProgramSetParameter( + gfx_, kernels_.program, "g_DepthTexture", capsaicin.getAOVBuffer("VisibilityDepth")); + gfxProgramSetParameter( + gfx_, kernels_.program, "g_VisibilityTexture", capsaicin.getAOVBuffer("Visibility")); + gfxProgramSetParameter( + gfx_, kernels_.program, "g_GeometryNormalTexture", capsaicin.getAOVBuffer("GeometryNormal")); + gfxProgramSetParameter( + gfx_, kernels_.program, "g_ShadingNormalTexture", capsaicin.getAOVBuffer("ShadingNormal")); gfxProgramSetParameter(gfx_, kernels_.program, "g_VelocityTexture", capsaicin.getAOVBuffer("Velocity")); - gfxProgramSetParameter(gfx_, kernels_.program, "g_PreviousDepthTexture", capsaicin.getAOVBuffer("PrevVisibilityDepth")); - gfxProgramSetParameter(gfx_, kernels_.program, "g_PreviousGeometryNormalTexture", capsaicin.getAOVBuffer("PrevGeometryNormal")); - gfxProgramSetParameter(gfx_, kernels_.program, "g_PreviousShadingNormalTexture", capsaicin.getAOVBuffer("PrevShadingNormal")); - - gfxProgramSetParameter(gfx_, kernels_.program, "g_PrevCombinedIlluminationTexture",capsaicin.getAOVBuffer("PrevCombinedIllumination")); + gfxProgramSetParameter( + gfx_, kernels_.program, "g_PreviousDepthTexture", capsaicin.getAOVBuffer("PrevVisibilityDepth")); + gfxProgramSetParameter(gfx_, kernels_.program, "g_PreviousGeometryNormalTexture", + capsaicin.getAOVBuffer("PrevGeometryNormal")); + gfxProgramSetParameter(gfx_, kernels_.program, "g_PreviousShadingNormalTexture", + capsaicin.getAOVBuffer("PrevShadingNormal")); - // Indirect - // Group size is set upon invocation - gfxProgramSetParameter(gfx_, kernels_.program, "g_CountBuffer", buf_.dispatch_count); - gfxProgramSetParameter(gfx_, kernels_.program, "g_RWDispatchCommandBuffer", buf_.dispatch_command); - gfxProgramSetParameter(gfx_, kernels_.program, "g_RWDrawCommandBuffer", buf_.draw_command); - gfxProgramSetParameter(gfx_, kernels_.program, "g_RWDrawIndexedCommandBuffer", buf_.draw_indexed_command); - gfxProgramSetParameter(gfx_, kernels_.program, "g_RWReduceCountBuffer", buf_.reduce_count); + gfxProgramSetParameter(gfx_, kernels_.program, "g_PrevCombinedIlluminationTexture", + capsaicin.getAOVBuffer("PrevCombinedIllumination")); - // Params auto debug_output_aov = capsaicin.getAOVBuffer("Debug"); gfxProgramSetTexture(gfx_, kernels_.program, "g_RWDebugOutput", debug_output_aov); auto gi_output_aov = capsaicin.getAOVBuffer("GlobalIllumination"); gfxProgramSetTexture(gfx_, kernels_.program, "g_RWGlobalIlluminationOutput", gi_output_aov); - gfxProgramSetParameter(gfx_, kernels_.program, "g_RWActiveBasisCountBuffer", buf_.active_basis_count); - gfxProgramSetParameter(gfx_, kernels_.program, "g_RWActiveBasisIndexBuffer", buf_.active_basis_index); - gfxProgramSetParameter(gfx_, kernels_.program, "g_RWBasisEffectiveRadiusBuffer", buf_.basis_effective_radius); -// gfxProgramSetParameter(gfx_, kernels_.program, "g_RWBasisFilmPositionBuffer", buf_.basis_film_position); - gfxProgramSetParameter(gfx_, kernels_.program, "g_RWBasisEffectiveRadiusFilmBuffer", buf_.basis_effective_radius_film); - gfxProgramSetParameter(gfx_, kernels_.program, "g_RWBasisLocationBuffer", buf_.basis_location); - gfxProgramSetParameter(gfx_, kernels_.program, "g_RWBasisParameterBuffer", buf_.basis_parameter); - gfxProgramSetParameter(gfx_, kernels_.program, "g_RWQuantilizedBasisStepBuffer", buf_.quantilized_basis_step); - gfxProgramSetParameter(gfx_, kernels_.program, "g_RWBasisAverageGradientScaleBuffer", buf_.basis_average_gradient_scale); - gfxProgramSetParameter(gfx_, kernels_.program, "g_RWBasisFlagsBuffer", buf_.basis_flags); - gfxProgramSetParameter(gfx_, kernels_.program, "g_RWFreeBasisIndicesBuffer", buf_.free_basis_indices); - gfxProgramSetParameter(gfx_, kernels_.program, "g_RWFreeBasisIndicesCountBuffer", buf_.free_basis_indices_count); - gfxProgramSetParameter(gfx_, kernels_.program, "g_RWTileBasisCountBuffer", buf_.tile_basis_count); - gfxProgramSetParameter(gfx_, kernels_.program, "g_RWTileRayCountBuffer", buf_.tile_ray_count); - gfxProgramSetParameter(gfx_, kernels_.program, "g_RWTileRayOffsetBuffer", buf_.tile_ray_offset); - gfxProgramSetParameter(gfx_, kernels_.program, "g_RWUpdateRayDirectionBuffer", buf_.update_ray_direction); - gfxProgramSetParameter(gfx_, kernels_.program, "g_RWUpdateRayOriginBuffer", buf_.update_ray_origin); - gfxProgramSetParameter(gfx_, kernels_.program, "g_RWUpdateRayRadianceInvPdfBuffer", buf_.update_ray_radiance_inv_pdf); - gfxProgramSetParameter(gfx_, kernels_.program, "g_RWUpdateRayCacheBuffer", buf_.update_ray_cache); - gfxProgramSetParameter(gfx_, kernels_.program, "g_RWUpdateRayCountBuffer", buf_.update_ray_count); - gfxProgramSetParameter(gfx_, kernels_.program, "g_RWTileUpdateErrorSumsBuffer", buf_.tile_update_error_sums); - gfxProgramSetParameter(gfx_, kernels_.program, "g_RWUpdateErrorBuffer", buf_.tile_update_error); - - gfxProgramSetParameter(gfx_, kernels_.program, "g_RWTileBasisIndexInjectionBuffer", buf_.tile_basis_index_injection); - gfxProgramSetParameter(gfx_, kernels_.program, "g_RWTileBaseSlotOffsetBuffer", buf_.tile_base_slot_offset); - gfxProgramSetParameter(gfx_, kernels_.program, "g_RWTileBasisIndexBuffer", buf_.tile_basis_index); - assert(options_.width % SSRC_TILE_SIZE == 0 && options_.height % SSRC_TILE_SIZE == 0); - gfxProgramSetParameter(gfx_, kernels_.program, "g_TileDimensions", glm::int2(options_.width / SSRC_TILE_SIZE, options_.height / SSRC_TILE_SIZE)); - gfxProgramSetParameter(gfx_, kernels_.program, "g_TileDimensionsInv", glm::vec2(1.f / float(options_.width / SSRC_TILE_SIZE), 1.f / float(options_.height / SSRC_TILE_SIZE))); - gfxProgramSetParameter(gfx_, kernels_.program, "g_BasisWInitialRadius", options_.SSRC_initial_W_radius); - gfxProgramSetParameter(gfx_, kernels_.program, "g_BasisSpawnCoverageThreshold", options_.SSRC_basis_spawn_coverage_threshold); - gfxProgramSetParameter(gfx_, kernels_.program, "g_MinWeightE", options_.SSRC_min_weight_E); - gfxProgramSetParameter(gfx_, kernels_.program, "g_MaxBasisCount", options_.SSRC_max_basis_count); - gfxProgramSetParameter(gfx_, kernels_.program, "g_UpdateRayBudget", options_.SSRC_update_ray_budget); - gfxProgramSetParameter(gfx_, kernels_.program, "g_WCoveragePadding", options_.SSRC_W_coverage_padding); - gfxProgramSetParameter(gfx_, kernels_.program, "g_TileFractionPadding", options_.SSRC_tile_fraction_padding); - - gfxProgramSetParameter(gfx_, kernels_.program, "g_CR_DiskVertexCount", options_.SSRC_CR_disk_vertex_count); - gfxProgramSetParameter(gfx_, kernels_.program, "g_CR_DiskRadiusMultiplier", 1.f);//options_.SSRC_CR_disk_radius_multiplier); - gfxProgramSetParameter(gfx_, kernels_.program, "g_CR_DiskRadiusBias", 0.f);//options_.SSRC_CR_disk_radius_bias); - - gfxProgramSetParameter(gfx_, kernels_.program, "g_NoImportanceSampling", (uint)options_.no_importance_sampling); - gfxProgramSetParameter(gfx_, kernels_.program, "g_FixedStepSize", (uint)options_.fixed_step_size); - gfxProgramSetParameter(gfx_, kernels_.program, "g_FreezeBasisAllocation", (uint)options_.freeze_basis_allocation); - gfxProgramSetParameter(gfx_, kernels_.program, "g_NonUniformInitialW", (uint)options_.nonuniform_initial_w); - - gfxProgramSetParameter(gfx_, kernels_.program, "g_RWCacheCoverageTexture", tex_.cache_coverage); - gfxProgramSetParameter(gfx_, kernels_.program, "g_RWUpdateErrorSplatTexture", tex_.update_error_splat[internal_frame_index_ & 1]); - gfxProgramSetParameter(gfx_, kernels_.program, "g_UpdateErrorSplatTexture", tex_.update_error_splat[internal_frame_index_ & 1]); - gfxProgramSetParameter(gfx_, kernels_.program, "g_PreviousUpdateErrorSplatTexture", tex_.update_error_splat[!(internal_frame_index_ & 1)]); + static_assert(SSRC_TILE_SIZE == 16); + // Cache datastructure + { + int flip = internal_frame_index_ & 1; + gfxProgramSetParameter(gfx_, kernels_.program, "g_RWProbeHeaderPackedTexture",tex_.probe_header_packed[flip]); + gfxProgramSetParameter(gfx_, kernels_.program, "g_RWProbeScreenPositionTexture",tex_.probe_screen_position[flip]); + gfxProgramSetParameter(gfx_, kernels_.program, "g_RWProbeLinearDepthTexture", tex_.probe_linear_depth[flip]); + gfxProgramSetParameter(gfx_, kernels_.program, "g_RWProbeWorldPositionTexture", tex_.probe_world_position[flip]); + gfxProgramSetParameter(gfx_, kernels_.program, "g_RWProbeNormalTexture", tex_.probe_normal[flip]); + gfxProgramSetParameter(gfx_, kernels_.program, "g_RWPreviousProbeHeaderPackedTexture", tex_.probe_header_packed[1 - flip]); + gfxProgramSetParameter(gfx_, kernels_.program, "g_RWPreviousProbeScreenPositionTexture", tex_.probe_screen_position[1 - flip]); + gfxProgramSetParameter(gfx_, kernels_.program, "g_RWPreviousProbeLinearDepthTexture", tex_.probe_linear_depth[1 - flip]); + gfxProgramSetParameter(gfx_, kernels_.program, "g_RWPreviousProbeWorldPositionTexture", tex_.probe_world_position[1 - flip]); + gfxProgramSetParameter(gfx_, kernels_.program, "g_RWPreviousProbeNormalTexture", tex_.probe_normal[1 - flip]); + + gfxProgramSetParameter(gfx_, kernels_.program, "g_RWProbeSGBuffer", buf_.probe_SG[flip]); + gfxProgramSetParameter(gfx_, kernels_.program, "g_RWPreviousProbeSGBuffer", buf_.probe_SG[1 - flip]); + gfxProgramSetParameter(gfx_, kernels_.program, "g_RWAllocatedProbeSGCountBuffer", buf_.allocated_probe_SG_count); + + gfxProgramSetParameter(gfx_, kernels_.program, "g_RWProbeIrradianceTexture", tex_.probe_irradiance[flip]); + gfxProgramSetParameter(gfx_, kernels_.program, "g_RWPreviousProbeIrradianceTexture", tex_.probe_irradiance[1 - flip]); + + gfxProgramSetParameter(gfx_, kernels_.program, "g_RWProbeUpdateRayCountBuffer", buf_.probe_update_ray_count); + gfxProgramSetParameter(gfx_, kernels_.program, "g_RWProbeUpdateRayOffsetBuffer", buf_.probe_update_ray_offset); + + gfxProgramSetParameter(gfx_, kernels_.program, "g_RWUpdateRayProbeBuffer", buf_.update_ray_probe); + gfxProgramSetParameter(gfx_, kernels_.program, "g_RWUpdateRayDirectionBuffer", buf_.update_ray_direction); + gfxProgramSetParameter(gfx_, kernels_.program, "g_RWUpdateRayRadianceInvPdfBuffer", buf_.update_ray_radiance_inv_pdf); + gfxProgramSetParameter(gfx_, kernels_.program, "g_RWUpdateRayLinearDepthBuffer", buf_.update_ray_linear_depth); + + gfxProgramSetParameter(gfx_, kernels_.program, "g_RWTileAdaptiveProbeIndexTexture", tex_.tile_adaptive_probe_index[flip]); + gfxProgramSetParameter(gfx_, kernels_.program, "g_RWPreviousTileAdaptiveProbeIndexTexture", tex_.tile_adaptive_probe_index[1 - flip]); + gfxProgramSetParameter(gfx_, kernels_.program, "g_RWAdaptiveProbeCountBuffer", buf_.adaptive_probe_count); + gfxProgramSetParameter(gfx_, kernels_.program, "g_RWProbeUpdateErrorBuffer", buf_.probe_update_error); + + gfxProgramSetParameter(gfx_, kernels_.program, "g_RWUpdateErrorSplatTexture", tex_.update_error_splat[flip]); + gfxProgramSetParameter(gfx_, kernels_.program, "g_UpdateErrorSplatTexture", tex_.update_error_splat[flip]); + gfxProgramSetParameter(gfx_, kernels_.program, "g_PreviousUpdateErrorSplatTexture", tex_.update_error_splat[1 - flip]); + } + + // HiZ RWTextures are set upon kernel invocation - static_assert(SSRC_TILE_SIZE == 16); gfxProgramSetParameter(gfx_, kernels_.program, "g_TileHiZ_Min", tex_.HiZ_min, 3); gfxProgramSetParameter(gfx_, kernels_.program, "g_TileHiZ_Max", tex_.HiZ_max, 3); + // MI constant buffer + MIGI_Constants C; + { + const auto& camera = capsaicin.getCamera(); + // THIS MUST BE NORMALIZED! + auto camera_forward = glm::normalize(camera.center - camera.eye); + auto camera_up = camera.up; + auto camera_right = glm::cross(camera_forward, camera_up); + camera_up = normalize(cross(camera_right, camera_forward)); + // Half the height of the standard camera plane + float scale = tanf(camera.fovY / 2.f); + float aspect = capsaicin.getCamera().aspect; + camera_right *= scale * aspect; + camera_up *= scale; + bool taa_enable = false; + if(capsaicin.getOptions().find("taa_enable") != capsaicin.getOptions().end()) + taa_enable = std::get(capsaicin.getOptions()["taa_enable"]); + auto const &camera_matrices = capsaicin.getCameraMatrices(taa_enable); + C.CameraPosition = camera.eye; + C.CameraDirection = camera_forward; + C.CameraFoVY2 = camera.fovY / 2.f; + C.AspectRatio = aspect; + C.CameraNear = camera.nearZ; + C.CameraFar = camera.farZ; + C.CameraUp = camera_up; + C.CameraRight = camera_right; + C.CameraView = camera_matrices.view; + C.CameraProjView = camera_matrices.view_projection; + C.CameraProjViewInv = camera_matrices.inv_view_projection; + C.CameraViewInv = camera_matrices.inv_view; + C.CameraPixelScale = 2.f * scale / float(options_.height); + + C.Reprojection = camera_matrices.reprojection; + C.ForwardReprojection = glm::dmat4(camera_matrices.view_projection) * glm::inverse(glm::dmat4(camera_matrices.view_projection_prev)); + C.PreviousCameraPosition = previous_camera_.eye; + + C.FrameIndex = capsaicin.getFrameIndex(); + C.FrameSeed = options_.debug_freeze_frame_seed ? 123 : C.FrameIndex; + C.PreviousFrameSeed = previous_constants_.FrameSeed; + + C.ScreenDimensions = glm::uvec2(options_.width, options_.height); + C.ScreenDimensionsInv = glm::vec2(1.f / options_.width, 1.f / options_.height); + assert(options_.width % SSRC_TILE_SIZE == 0 && options_.height % SSRC_TILE_SIZE == 0); + C.TileDimensions = glm::uvec2(options_.width / SSRC_TILE_SIZE, options_.height / SSRC_TILE_SIZE); + C.TileDimensionsInv = glm::vec2(1.f / C.TileDimensions.x, 1.f / C.TileDimensions.y); + + C.UniformScreenProbeCount = C.TileDimensions.x * C.TileDimensions.y; - gfxProgramSetParameter(gfx_, kernels_.program, "g_ScreenCacheDimensions", glm::int2(options_.width, options_.height)); + C.MaxAdaptiveProbeCount = options_.SSRC_max_adaptive_probe_count; + C.NoImportanceSampling = options_.no_importance_sampling; + C.NoAdaptiveProbes = options_.no_adaptive_probes; - gfxProgramSetParameter(gfx_, kernels_.program, "g_CacheUpdateLearningRate", options_.cache_update_learing_rate); - gfxProgramSetParameter(gfx_, kernels_.program, "g_CacheUpdate_SGColor", (uint32_t)options_.cache_update_SG_color); - gfxProgramSetParameter(gfx_, kernels_.program, "g_CacheUpdate_SGDirection", (uint32_t)options_.cache_update_SG_direction); - gfxProgramSetParameter(gfx_, kernels_.program, "g_CacheUpdate_SGLambda", (uint32_t)options_.cache_update_SG_lambda); - gfxProgramSetParameter(gfx_, kernels_.program, "g_CacheUpdate_WAlpha", (uint32_t)options_.cache_update_W_alpha); - gfxProgramSetParameter(gfx_, kernels_.program, "g_CacheUpdate_WLambda", (uint32_t)options_.cache_update_W_lambda); + C.CacheUpdateLearningRate = options_.cache_update_learing_rate; + C.CacheUpdate_SGColor = options_.cache_update_SG_color; + C.CacheUpdate_SGDirection = options_.cache_update_SG_direction; + C.CacheUpdate_SGLambda = options_.cache_update_SG_lambda; - gfxProgramSetParameter(gfx_, kernels_.program, "g_OutputDimensions", glm::int2(options_.width, options_.height)); - gfxProgramSetParameter(gfx_, kernels_.program, "g_OutputDimensionsInv", glm::vec2(1.f / float(options_.width), 1.f / float(options_.height))); + C.DebugVisualizeMode = options_.debug_visualize_mode; + C.DebugVisualizeChannel = options_.debug_visualize_channel; + C.DebugVisualizeIncidentRadianceNumPoints = options_.debug_visualize_incident_radiance_num_points; + + C.DebugTonemapExposure = 1.f; + C.DebugCursorPixelCoords = options_.cursor_pixel_coords; + + C.DebugLight = options_.debug_light; + C.DebugLightPosition = options_.debug_light_position; + C.DebugLightSize = options_.debug_light_size; + C.DebugLightColor = options_.debug_light_color; + + previous_constants_ = C; + + + GfxBuffer MI_constants = capsaicin.allocateConstantBuffer(1); + gfxBufferGetData(gfx_, MI_constants)[0] = C; + gfxProgramSetParameter(gfx_, kernels_.program, "MI", MI_constants); + } // Hash grid radiance cache and world space ReSTIR, Raytracing: constant buffers { @@ -287,13 +277,8 @@ light_sampler->addProgramParameters(capsaicin, kernels_.program); hash_grid_cache_constant_data.first_cell_offset_tile_mip3 = hash_grid_cache_.first_cell_offset_tile_mip3_; hash_grid_cache_constant_data.buffer_ping_pong = hash_grid_cache_.radiance_cache_hash_buffer_ping_pong_; hash_grid_cache_constant_data.max_sample_count = options_.hash_grid_cache.max_sample_count; - hash_grid_cache_constant_data.debug_mip_level = options_.hash_grid_cache.debug_mip_level; - hash_grid_cache_constant_data.debug_propagate = (uint)options_.hash_grid_cache.debug_propagate; - hash_grid_cache_constant_data.debug_max_cell_decay = options_.hash_grid_cache.debug_max_cell_decay; -// hash_grid_cache_constant_data.debug_bucket_occupancy_histogram_size = -// hash_grid_cache_.debug_bucket_occupancy_histogram_size_; -// hash_grid_cache_constant_data.debug_bucket_overflow_histogram_size = -// hash_grid_cache_.debug_bucket_overflow_histogram_size_; + // Debugging features are clipped for the hash grid cache + gfxBufferGetData(gfx_, hash_grid_cache_constants)[0] = hash_grid_cache_constant_data; @@ -323,25 +308,10 @@ light_sampler->addProgramParameters(capsaicin, kernels_.program); gfxProgramSetParameter(gfx_, kernels_.program, "g_RTConstants", rt_constants); } - gfxProgramSetParameter(gfx_, kernels_.program, "g_DebugVisualizeMode", options_.debug_visualize_mode); - gfxProgramSetParameter(gfx_, kernels_.program, "g_DebugVisualizeChannel", options_.debug_visualize_channel); - gfxProgramSetParameter(gfx_, kernels_.program, "g_DebugVisualizeIncidentRadianceNumPoints", - options_.debug_visualize_incident_radiance_num_points); - gfxProgramSetParameter(gfx_, kernels_.program, "g_RWDebugVisualizeIncidentRadianceBuffer", - buf_.debug_visualize_incident_radiance); - - gfxProgramSetParameter(gfx_, kernels_.program, "g_DebugFreezeFrameSeed", options_.debug_freeze_frame_seed); - gfxProgramSetParameter(gfx_, kernels_.program, "g_DebugFreezeFrameSeedValue", 123); - - - gfxProgramSetParameter(gfx_, kernels_.program, "g_DebugCursorPixelCoords", options_.cursor_pixel_coords); gfxProgramSetParameter(gfx_, kernels_.program, "g_RWDebugCursorWorldPosBuffer", buf_.debug_cursor_world_pos); - - gfxProgramSetParameter(gfx_, kernels_.program, "g_DebugLight", (uint)options_.debug_light); - gfxProgramSetParameter(gfx_, kernels_.program, "g_DebugLightPosition", options_.debug_light_position); - gfxProgramSetParameter(gfx_, kernels_.program, "g_DebugLightSize", options_.debug_light_size); - gfxProgramSetParameter(gfx_, kernels_.program, "g_DebugLightColor", options_.debug_light_color); + gfxProgramSetParameter(gfx_, kernels_.program, "g_RWDebugVisualizeIncidentRadianceBuffer", + buf_.debug_visualize_incident_radiance); { float exposure = 1.f; @@ -422,16 +392,6 @@ light_sampler->addProgramParameters(capsaicin, kernels_.program); gfxCommandClearTexture(gfx_, tex_.update_error_splat[1]); } - // Reset the cache if needed - if(need_reset_screen_space_cache_) { - TimedSection section_timer(*this, "Reset Screen Space Cache"); - gfxCommandBindKernel(gfx_, kernels_.SSRC_reset); - auto threads = gfxKernelGetNumThreads(gfx_, kernels_.SSRC_reset); - uint32_t dispatch_size[] = {(options_.SSRC_max_basis_count + threads[0] - 1) / threads[0]}; - gfxCommandDispatch(gfx_, dispatch_size[0], 1, 1); - need_reset_screen_space_cache_ = false; - } - // Decay and remove out-dated hash grid cache cells // Also clear the counters for sketch buffers { @@ -442,37 +402,34 @@ light_sampler->addProgramParameters(capsaicin, kernels_.program); ? hash_grid_cache_.radiance_cache_packed_tile_count_buffer0_ : hash_grid_cache_.radiance_cache_packed_tile_count_buffer1_); - uint32_t const *num_threads = gfxKernelGetNumThreads(gfx_, kernels_.purge_tiles); + uint32_t const *num_threads = gfxKernelGetNumThreads(gfx_, kernels_.PurgeTiles); generateDispatch(radiance_cache_packed_tile_count_buffer, num_threads[0]); - gfxCommandBindKernel(gfx_, kernels_.clear_counters); + gfxCommandBindKernel(gfx_, kernels_.ClearCounters); gfxCommandDispatch(gfx_, 1, 1, 1); - gfxCommandBindKernel(gfx_, kernels_.purge_tiles); + gfxCommandBindKernel(gfx_, kernels_.PurgeTiles); gfxCommandDispatchIndirect(gfx_, buf_.dispatch_command); } // Precompute HiZ buffer for injection culling and other purposes { - auto divideAndRoundUp = [](uint32_t a, uint32_t b) -> uint32_t { - return (a + b - 1) / b; - }; { TimedSection const timed_section(*this, "PrecomputeHiZ_Min"); gfxProgramSetTexture(gfx_, kernels_.program, "g_RWHiZ_In", capsaicin.getAOVBuffer("VisibilityDepth")); gfxProgramSetTexture(gfx_, kernels_.program, "g_RWHiZ_Out", tex_.HiZ_min, 0); - gfxCommandBindKernel(gfx_, kernels_.precompute_HiZ_min); - auto threads = gfxKernelGetNumThreads(gfx_, kernels_.precompute_HiZ_min); + gfxCommandBindKernel(gfx_, kernels_.PrecomputeHiZ_min); + auto threads = gfxKernelGetNumThreads(gfx_, kernels_.PrecomputeHiZ_min); gfxCommandDispatch(gfx_, divideAndRoundUp(options_.width / 2, threads[0]), divideAndRoundUp(options_.height / 2, threads[1]), 1); gfxProgramSetTexture(gfx_, kernels_.program, "g_RWHiZ_In", tex_.HiZ_min, 0); gfxProgramSetTexture(gfx_, kernels_.program, "g_RWHiZ_Out", tex_.HiZ_min, 1); - gfxCommandBindKernel(gfx_, kernels_.precompute_HiZ_min); + gfxCommandBindKernel(gfx_, kernels_.PrecomputeHiZ_min); gfxCommandDispatch(gfx_, divideAndRoundUp(options_.width / 4, threads[0]), divideAndRoundUp(options_.height / 4, threads[1]), 1); gfxProgramSetTexture(gfx_, kernels_.program, "g_RWHiZ_In", tex_.HiZ_min, 1); gfxProgramSetTexture(gfx_, kernels_.program, "g_RWHiZ_Out", tex_.HiZ_min, 2); - gfxCommandBindKernel(gfx_, kernels_.precompute_HiZ_min); + gfxCommandBindKernel(gfx_, kernels_.PrecomputeHiZ_min); gfxCommandDispatch(gfx_, divideAndRoundUp(options_.width / 8, threads[0]), divideAndRoundUp(options_.height / 8, threads[1]), 1); } @@ -480,18 +437,18 @@ light_sampler->addProgramParameters(capsaicin, kernels_.program); TimedSection const timed_section(*this, "PrecomputeHiZ_Max"); gfxProgramSetTexture(gfx_, kernels_.program, "g_RWHiZ_In", capsaicin.getAOVBuffer("VisibilityDepth")); gfxProgramSetTexture(gfx_, kernels_.program, "g_RWHiZ_Out", tex_.HiZ_max, 0); - gfxCommandBindKernel(gfx_, kernels_.precompute_HiZ_max); - auto threads = gfxKernelGetNumThreads(gfx_, kernels_.precompute_HiZ_max); + gfxCommandBindKernel(gfx_, kernels_.PrecomputeHiZ_max); + auto threads = gfxKernelGetNumThreads(gfx_, kernels_.PrecomputeHiZ_max); gfxCommandDispatch(gfx_, divideAndRoundUp(options_.width / 2, threads[0]), divideAndRoundUp(options_.height / 2, threads[1]), 1); gfxProgramSetTexture(gfx_, kernels_.program, "g_RWHiZ_In", tex_.HiZ_max, 0); gfxProgramSetTexture(gfx_, kernels_.program, "g_RWHiZ_Out", tex_.HiZ_max, 1); - gfxCommandBindKernel(gfx_, kernels_.precompute_HiZ_max); + gfxCommandBindKernel(gfx_, kernels_.PrecomputeHiZ_max); gfxCommandDispatch(gfx_, divideAndRoundUp(options_.width / 4, threads[0]), divideAndRoundUp(options_.height / 4, threads[1]), 1); gfxProgramSetTexture(gfx_, kernels_.program, "g_RWHiZ_In", tex_.HiZ_max, 1); gfxProgramSetTexture(gfx_, kernels_.program, "g_RWHiZ_Out", tex_.HiZ_max, 2); - gfxCommandBindKernel(gfx_, kernels_.precompute_HiZ_max); + gfxCommandBindKernel(gfx_, kernels_.PrecomputeHiZ_max); gfxCommandDispatch(gfx_, divideAndRoundUp(options_.width / 8, threads[0]), divideAndRoundUp(options_.height / 8, threads[1]), 1); } @@ -499,25 +456,27 @@ light_sampler->addProgramParameters(capsaicin, kernels_.program); // Clear the counter for active basis { - TimedSection const timed_section(*this, "ClearActiveCounter"); + TimedSection const timed_section(*this, "SSRC_ClearCounters"); - gfxCommandBindKernel(gfx_, kernels_.SSRC_clear_active_counter); + gfxCommandBindKernel(gfx_, kernels_.SSRC_ClearCounters); gfxCommandDispatch(gfx_, 1, 1, 1); } + int uniform_probe_count = divideAndRoundUp(options_.width, SSRC_TILE_SIZE) * divideAndRoundUp(options_.height, SSRC_TILE_SIZE); + // Reproject and filter out-dated basis from previous frame { - const TimedSection timed_section(*this, "SSRC_ReprojectAndFilter"); - gfxCommandBindKernel(gfx_, kernels_.SSRC_reproject_and_filter); - auto threads = gfxKernelGetNumThreads(gfx_, kernels_.SSRC_reproject_and_filter); - uint32_t dispatch_size[] = {(options_.SSRC_max_basis_count + threads[0] - 1) / threads[0]}; + const TimedSection timed_section(*this, "SSRC_AllocateUniformProbes"); + gfxCommandBindKernel(gfx_, kernels_.SSRC_AllocateUniformProbes); + auto threads = gfxKernelGetNumThreads(gfx_, kernels_.SSRC_AllocateUniformProbes); + uint32_t dispatch_size[] = {(uniform_probe_count + threads[0] - 1) / threads[0]}; gfxCommandDispatch(gfx_, dispatch_size[0], 1, 1); } // Clear the tile index for injection { - const TimedSection timed_section(*this, "SSRC_ClearTileInjectionIndex"); - gfxCommandBindKernel(gfx_, kernels_.SSRC_clear_tile_injection_index); + const TimedSection timed_section(*this, "SSRC_AllocateAdaptiveProbes"); + gfxCommandBindKernel(gfx_, kernels_.SSRC_AllocateAdaptiveProbes); auto threads = gfxKernelGetNumThreads(gfx_, kernels_.SSRC_clear_tile_injection_index); assert(options_.width % SSRC_TILE_SIZE == 0 && options_.height % SSRC_TILE_SIZE == 0); int tile_size = options_.width / SSRC_TILE_SIZE * options_.height / SSRC_TILE_SIZE; @@ -571,15 +530,11 @@ light_sampler->addProgramParameters(capsaicin, kernels_.program); gfxCommandDispatch(gfx_, dispatch_size[0], dispatch_size[1], 1); } - auto divideAndRoundUp = [](uint32_t a, uint32_t b) -> uint32_t { - return (a + b - 1) / b; - }; - // reproject update error from previous frame { const TimedSection timed_section(*this, "SSRC_ReprojectPreviousUpdateError"); - gfxCommandBindKernel(gfx_, kernels_.SSRC_reproject_previous_update_error); - auto threads = gfxKernelGetNumThreads(gfx_, kernels_.SSRC_reproject_previous_update_error); + gfxCommandBindKernel(gfx_, kernels_.SSRC_ReprojectPreviousUpdateError); + auto threads = gfxKernelGetNumThreads(gfx_, kernels_.SSRC_ReprojectPreviousUpdateError); uint32_t dispatch_size[] = {divideAndRoundUp(options_.width, threads[0]), divideAndRoundUp(options_.height, threads[1])}; gfxCommandDispatch(gfx_, dispatch_size[0], dispatch_size[1], 1); } @@ -638,7 +593,7 @@ light_sampler->addProgramParameters(capsaicin, kernels_.program); // Trace update rays { TimedSection section_timer(*this, "TraceUpdateRays"); - gfxCommandBindKernel(gfx_, kernels_.SSRC_generate_trace_update_rays); + gfxCommandBindKernel(gfx_, kernels_.SSRC_GenerateTraceUpdateRays); gfxCommandDispatch(gfx_, 1, 1, 1); if(options_.use_dxr10) { gfxSbtSetShaderGroup( @@ -652,11 +607,11 @@ light_sampler->addProgramParameters(capsaicin, kernels_.program); i * capsaicin.getSbtStrideInEntries(kGfxShaderGroupType_Hit), MIGIRT::kScreenCacheUpdateHitGroupName); } - gfxCommandBindKernel(gfx_, kernels_.SSRC_trace_update_rays); + gfxCommandBindKernel(gfx_, kernels_.SSRC_TraceUpdateRaysMain); gfxCommandDispatchRaysIndirect(gfx_, sbt_, buf_.dispatch_rays_command); } else { - gfxCommandBindKernel(gfx_, kernels_.SSRC_trace_update_rays); + gfxCommandBindKernel(gfx_, kernels_.SSRC_TraceUpdateRaysMain); gfxCommandDispatchIndirect(gfx_, buf_.dispatch_command); } } @@ -965,7 +920,7 @@ void MIGI::generateDispatch(GfxBuffer dispatch_count_buffer, uint threads_per_gr gfxProgramSetParameter(gfx_, kernels_.program, "g_GroupSize", threads_per_group); gfxProgramSetParameter(gfx_, kernels_.program, "g_CountBuffer", dispatch_count_buffer); - gfxCommandBindKernel(gfx_, kernels_.generate_dispatch); + gfxCommandBindKernel(gfx_, kernels_.GenerateDispatch); gfxCommandDispatch(gfx_, 1, 1, 1); } @@ -974,7 +929,7 @@ void MIGI::generateDispatchRays(GfxBuffer count_buffer) { gfxProgramSetParameter(gfx_, kernels_.program, "g_CountBuffer", count_buffer); - gfxCommandBindKernel(gfx_, kernels_.generate_dispatch_rays); + gfxCommandBindKernel(gfx_, kernels_.GenerateDispatchRays); gfxCommandDispatch(gfx_, 1, 1, 1); } diff --git a/src/core/src/render_techniques/migi/migi.h b/src/core/src/render_techniques/migi/migi.h index 5296ffb..58eea65 100644 --- a/src/core/src/render_techniques/migi/migi.h +++ b/src/core/src/render_techniques/migi/migi.h @@ -60,12 +60,27 @@ class MIGI : public RenderTechnique struct MIGIResources { - // Screen coverage of the cache, used for basis spawn - // fp16x2 - GfxTexture cache_coverage {}; + // Probe header, uint32 per probe + GfxTexture probe_header_packed [2]; + // Probe screen position, 2xuint16 packed in uint32 + GfxTexture probe_screen_position [2]; + // Probe linear depth 1xfloat32 + GfxTexture probe_linear_depth [2]; + // Probe world position 3xfloat32 + GfxTexture probe_world_position [2]; + // Probe world normal 2xunorm16 + GfxTexture probe_normal [2]; + + // Probe irradiance 4xfloat16 + GfxTexture probe_irradiance [2]; + + // Tile adaptive probe count uint16 + GfxTexture tile_adaptive_probe_count [2]; + // Tile adaptive probe index uint16 + GfxTexture tile_adaptive_probe_index [2]; // Update error used to guide update ray spawnning - // fp16x2 with mipmaps up to the tile size (1/8 res), reprojection is done across frames + // fp16x2 GfxTexture update_error_splat [2]{}; // Hierarchical z-buffer @@ -103,6 +118,7 @@ class MIGI : public RenderTechnique } buf_{}; bool initResources (const CapsaicinInternal &capsaicin); + void releaseResources () ; struct MIGIKernels { @@ -111,6 +127,8 @@ class MIGI : public RenderTechnique GfxKernel PrecomputeHiZ_min {}; GfxKernel PrecomputeHiZ_max {}; + GfxKernel PurgeTiles {}; + GfxKernel ClearCounters {}; GfxKernel SSRC_ClearCounters {}; GfxKernel SSRC_AllocateUniformProbes {}; GfxKernel SSRC_AllocateAdaptiveProbes[SSRC_MAX_ADAPTIVE_PROBE_LAYERS] {}; @@ -139,7 +157,11 @@ class MIGI : public RenderTechnique } kernels_; + // Called before initResources bool initKernels (const CapsaicinInternal & capsaicin); + // Called after initResources + bool initGraphicsKernels (const CapsaicinInternal & capsaicin); + void releaseKernels () ; void clearHashGridCache () ; @@ -162,25 +184,25 @@ class MIGI : public RenderTechnique GfxSbt sbt_ {}; - // If the render dimensions have changed. - bool need_resize_ {true}; - // If the hash grid cache debug view mode changed. - bool need_reload_hash_grid_cache_debug_view_ {true}; // If the kernel needs to be reloaded. - // Note: the kernels is loaded upon initialization, so we do not need to set it to true. bool need_reload_kernel_ {false}; + // If the render resources should be reallocated. + bool need_reload_memory_ {false}; + // If the screen space cache needs to be reset. + mutable bool need_reset_screen_space_cache_ {true}; // If the hash grid cache needs to be reset. bool need_reset_hash_grid_cache_ {true}; // If the reservoirs need to be reset. bool need_reset_world_space_reservoirs_ {true}; - // If the screen space cache needs to be reset. - mutable bool need_reset_screen_space_cache_ {true}; bool readback_pending_ [kGfxConstant_BackBufferCount] {}; MIGIReadBackValues readback_values_; uint32_t internal_frame_index_ {}; + + MIGI_Constants previous_constants_ {}; }; + } #endif // CAPSAICIN_MIGI_H diff --git a/src/core/src/render_techniques/migi/migi_common.hlsl b/src/core/src/render_techniques/migi/migi_common.hlsl index 4e149cd..d58ce87 100644 --- a/src/core/src/render_techniques/migi/migi_common.hlsl +++ b/src/core/src/render_techniques/migi/migi_common.hlsl @@ -122,7 +122,7 @@ struct ProbeHeader { // Screen pixel position of the probe int2 ScreenPosition; int BasisOffset; - // 0: 1, 1: 2, 2: 4, 3: 8, no larger than 8 + // 0: 0, 1: 1, 2: 2, 3: 4, 4: 8, no larger than 8 int Class; bool bValid; float LinearDepth; @@ -160,7 +160,7 @@ struct MIGI_Constants { // Current NDC -> Prev NDC float4x4 Reprojection; // Prev NDC -> Current NDC - float4x4 ForwardProjection; + float4x4 ForwardReprojection; float3 PreviousCameraPosition; @@ -180,8 +180,6 @@ struct MIGI_Constants { // Budget for update rays int UpdateRayBudget; - // Pad the fraction for ray allocation among probes, 0: error propotional, 1: avg. - float UpdateRayFractionPadding; // SSRC parameters // Maximum number of adaptive probes to allocate @@ -201,10 +199,6 @@ struct MIGI_Constants { uint DebugVisualizeChannel; uint DebugVisualizeIncidentRadianceNumPoints; - // Replace FrameSeed under certain conditions - uint DebugFreezeFrameSeed; - uint DebugFreezeFrameSeedValue; - float DebugTonemapExposure; uint2 DebugCursorPixelCoords; @@ -229,6 +223,8 @@ static_assert((1 << SSRC_TILE_SIZE_L2) == SSRC_TILE_SIZE, "SSRC_TILE_SIZE != 1<< #define SSRC_MAX_NUM_BASIS_PER_PROBE 8 #define SSRC_MAX_NUM_UPDATE_RAY_PER_PROBE 128 +#define SSRC_MAX_ADAPTIVE_PROBE_LAYERS 2 + #ifdef __cplusplus }// namespace Capsaicin #endif diff --git a/src/core/src/render_techniques/migi/migi_fwd.h b/src/core/src/render_techniques/migi/migi_fwd.h index 0573842..00799f5 100644 --- a/src/core/src/render_techniques/migi/migi_fwd.h +++ b/src/core/src/render_techniques/migi/migi_fwd.h @@ -25,27 +25,12 @@ struct MIGIRenderOptions { bool debug_freeze_frame_seed {false}; - bool reset_screen_space_cache {false}; + bool reset_screen_space_cache {false}; uint32_t SSRC_max_update_ray_count {4 * 1024 * 1024}; - // Maximum number of basis active in the screen space radiance cache - uint32_t SSRC_max_basis_count {256 * 1024}; - // Min coverage for basis spawn - float SSRC_basis_spawn_coverage_threshold {3.f}; - // Radius control for basis injection - float SSRC_min_weight_E {0.08f}; - // Default initial W radius for newly generated basis - float SSRC_initial_W_radius {9.f}; - // Resolution of the disk when doing rasterization for tile index injection - uint32_t SSRC_CR_disk_vertex_count {12}; - - // Ray budget for each frame - uint32_t SSRC_update_ray_budget {2 * 1024 * 1024}; - // Used to pad W for cache coverage computation to prevent over allocation on surfaces parallel to the view direction - float SSRC_W_coverage_padding {0.05f}; - - // Used to adjust the impact of importance sampling on ray allocation among tiles. 1: avg, 0: importance - float SSRC_tile_fraction_padding {0.2f}; + uint32_t SSRC_max_adaptive_probe_count {32 * 1024}; + uint32_t SSRC_max_basis_count {4 * 1024 * 1024}; + uint32_t SSRC_max_probe_count {}; struct { uint32_t num_buckets_l2 {12}; // 1<<12 = 4096 @@ -72,24 +57,15 @@ struct MIGIRenderOptions { // If we disable importance sampling when generate update rays. // When enabled, rays are uniformly sampled in the hemisphere. bool no_importance_sampling = true; - // If we use fixed step size in gradient descent. - bool fixed_step_size = false; + // Whether to place adaptive probes + bool no_adaptive_probes = false; // Whether to render indirect lighting (using the hash grid cache) bool enable_indirect = true; - // Whether to freeze the allocation and deallocation of basis for visualization - bool freeze_basis_allocation {false}; - // Guess the initial W (basis radius) when generating new basis - bool nonuniform_initial_w {false}; float cache_update_learing_rate = 0.02f; bool cache_update_SG_color {true}; bool cache_update_SG_direction {false}; bool cache_update_SG_lambda {false}; - bool cache_update_W_alpha {false}; - bool cache_update_W_lambda {false}; - - // Whether to shade with geometry normals only - bool shading_with_geometry_normal {false}; std::string active_debug_view {}; bool debug_view_switched {false}; diff --git a/src/core/src/render_techniques/migi/migi_inc.hlsl b/src/core/src/render_techniques/migi/migi_inc.hlsl index cb2b1ec..d64f38b 100644 --- a/src/core/src/render_techniques/migi/migi_inc.hlsl +++ b/src/core/src/render_techniques/migi/migi_inc.hlsl @@ -144,8 +144,4 @@ ConstantBuffer MI; RWStructuredBuffer g_RWDebugCursorWorldPosBuffer; RWStructuredBuffer g_RWDebugVisualizeIncidentRadianceBuffer; - -// Varying parameters for multiple invocations of the same kernel -int g_AdaptiveProbeDownsampleFactor; - #endif // MIGI_SHARED_PARAMETERS_HLSL \ No newline at end of file diff --git a/src/core/src/render_techniques/migi/migi_init.cpp b/src/core/src/render_techniques/migi/migi_init.cpp index 1013155..a464f5e 100644 --- a/src/core/src/render_techniques/migi/migi_init.cpp +++ b/src/core/src/render_techniques/migi/migi_init.cpp @@ -15,6 +15,7 @@ #include "components/blue_noise_sampler/blue_noise_sampler.h" #include "components/light_sampler_grid_stream/light_sampler_grid_stream.h" #include "components/stratified_sampler/stratified_sampler.h" +#include "migi_internal.h" // Hack for missing functionalities in gfx @@ -76,147 +77,68 @@ bool MIGI::initKernels (const CapsaicinInternal & capsaicin) { defines_c.push_back(i.c_str()); } defines_c.push_back("HIZ_MIN"); - kernels_.precompute_HiZ_min = gfxCreateComputeKernel( + kernels_.PrecomputeHiZ_min = gfxCreateComputeKernel( gfx_, kernels_.program, "PrecomputeHiZ", defines_c.data(), (uint32_t)defines_c.size()); defines_c.pop_back(); - kernels_.precompute_HiZ_max = gfxCreateComputeKernel( + kernels_.PrecomputeHiZ_max = gfxCreateComputeKernel( gfx_, kernels_.program, "PrecomputeHiZ", defines_c.data(), (uint32_t)defines_c.size()); - kernels_.SSRC_clear_active_counter = gfxCreateComputeKernel( - gfx_, kernels_.program, "SSRC_ClearActiveCounter", defines_c.data(), (uint32_t)defines_c.size()); - kernels_.SSRC_reproject_and_filter = gfxCreateComputeKernel( - gfx_, kernels_.program, "SSRC_ReprojectAndFilter", defines_c.data(), (uint32_t)defines_c.size()); - kernels_.SSRC_clear_tile_injection_index = gfxCreateComputeKernel( - gfx_, kernels_.program, "SSRC_ClearTileInjectionIndex", defines_c.data(), (uint32_t)defines_c.size()); - kernels_.SSRC_inject_generate_draw_indexed = gfxCreateComputeKernel( - gfx_, kernels_.program, "SSRC_InjectGenerateDrawIndexed", defines_c.data(), (uint32_t)defines_c.size()); - GfxDrawState injection_draw_state = {}; - // No culling - gfxDrawStateSetCullMode(injection_draw_state, D3D12_CULL_MODE_NONE); - kernels_.SSRC_inject_reprojected_basis = gfxCreateGraphicsKernel(gfx_, kernels_.program, injection_draw_state, - "SSRC_InjectReprojectedBasis", defines_c.data(), (uint32_t)defines_c.size()); - kernels_.SSRC_clip_overflow_tile_index = gfxCreateComputeKernel( - gfx_, kernels_.program, "SSRC_ClipOverflowTileIndex", defines_c.data(), (uint32_t)defines_c.size()); - kernels_.SSRC_allocate_extra_slot_for_basis_generation = gfxCreateComputeKernel( - gfx_, kernels_.program, "SSRC_AllocateExtraSlotForBasisGeneration", defines_c.data(), (uint32_t)defines_c.size()); - kernels_.SSRC_compress_tile_basis_index = gfxCreateComputeKernel( - gfx_, kernels_.program, "SSRC_CompressTileBasisIndex", defines_c.data(), (uint32_t)defines_c.size()); - kernels_.SSRC_reproject_previous_update_error = gfxCreateComputeKernel( - gfx_, kernels_.program, "SSRC_ReprojectPreviousUpdateError", defines_c.data(), (uint32_t)defines_c.size()); - kernels_.SSRC_precompute_ray_budget_for_tiles = gfxCreateComputeKernel( - gfx_, kernels_.program, "SSRC_PrecomputeRayBudgetForTiles", defines_c.data(), (uint32_t)defines_c.size()); - kernels_.SSRC_tiles_set_reduce_count_32 = gfxCreateComputeKernel( - gfx_, kernels_.program, "SSRC_TilesSetReduceCount32", defines_c.data(), (uint32_t)defines_c.size()); - kernels_.SSRC_tiles_set_reduce_count = gfxCreateComputeKernel( - gfx_, kernels_.program, "SSRC_TilesSetReduceCount", defines_c.data(), (uint32_t)defines_c.size()); - kernels_.SSRC_allocate_update_rays = gfxCreateComputeKernel( + + kernels_.GenerateDispatch = gfxCreateComputeKernel( + gfx_, kernels_.program, "GenerateDispatch", defines_c.data(), (uint32_t)defines_c.size()); + kernels_.GenerateDispatchRays = gfxCreateComputeKernel( + gfx_, kernels_.program, "GenerateDispatchRays", defines_c.data(), (uint32_t)defines_c.size()); + kernels_.PurgeTiles = gfxCreateComputeKernel( + gfx_, kernels_.program, "PurgeTiles", defines_c.data(), (uint32_t)defines_c.size()); + kernels_.ClearCounters = gfxCreateComputeKernel( + gfx_, kernels_.program, "ClearCounters", defines_c.data(), (uint32_t)defines_c.size()); + kernels_.SSRC_ClearCounters = gfxCreateComputeKernel( + gfx_, kernels_.program, "SSRC_ClearCounters", defines_c.data(), (uint32_t)defines_c.size()); + kernels_.SSRC_AllocateUniformProbes = gfxCreateComputeKernel( + gfx_, kernels_.program, "SSRC_AllocateUniformProbes", defines_c.data(), (uint32_t)defines_c.size()); + for(int i = 0; i screen_cache_update_subobjects = base_subobjects; screen_cache_update_subobjects.push_back(MIGIRT::kScreenCacheUpdateHitGroupName); - kernels_.SSRC_trace_update_rays = gfxCreateRaytracingKernel(gfx_, kernels_.program, nullptr, 0, + kernels_.SSRC_TraceUpdateRaysMain = gfxCreateRaytracingKernel(gfx_, kernels_.program, nullptr, 0, screen_cache_update_exports.data(), (uint32_t)screen_cache_update_exports.size(), screen_cache_update_subobjects.data(), (uint32_t)screen_cache_update_subobjects.size(), defines_c.data(), (uint32_t)defines_c.size()); @@ -243,27 +165,25 @@ bool MIGI::initKernels (const CapsaicinInternal & capsaicin) { populate_cells_kernel_exports.push_back(MIGIRT::kPopulateCellsClosestHitShaderName); std::vector populate_cells_kernel_subobjects = base_subobjects; populate_cells_kernel_subobjects.push_back(MIGIRT::kPopulateCellsHitGroupName); - kernels_.populate_cells = gfxCreateRaytracingKernel(gfx_, kernels_.program, nullptr, 0, + kernels_.PopulateCellsMain = gfxCreateRaytracingKernel(gfx_, kernels_.program, nullptr, 0, populate_cells_kernel_exports.data(), (uint32_t)populate_cells_kernel_exports.size(), populate_cells_kernel_subobjects.data(), (uint32_t)populate_cells_kernel_subobjects.size(), defines_c.data(), (uint32_t)defines_c.size()); - //generate_dispatch_rays_kernel_ = gfxCreateComputeKernel(gfx_, kernels_.program, "GenerateDispatchRays"); - uint32_t entry_count[kGfxShaderGroupType_Count] { capsaicin.getSbtStrideInEntries(kGfxShaderGroupType_Raygen), capsaicin.getSbtStrideInEntries(kGfxShaderGroupType_Miss), gfxSceneGetInstanceCount(capsaicin.getScene()) * capsaicin.getSbtStrideInEntries(kGfxShaderGroupType_Hit), capsaicin.getSbtStrideInEntries(kGfxShaderGroupType_Callable)}; - GfxKernel sbt_kernels[] {kernels_.SSRC_trace_update_rays, kernels_.populate_cells}; + GfxKernel sbt_kernels[] {kernels_.SSRC_TraceUpdateRaysMain, kernels_.PopulateCellsMain}; sbt_ = gfxCreateSbt(gfx_, sbt_kernels, ARRAYSIZE(sbt_kernels), entry_count); } else { - kernels_.SSRC_trace_update_rays = gfxCreateComputeKernel( + kernels_.SSRC_TraceUpdateRaysMain = gfxCreateComputeKernel( gfx_, kernels_.program, "SSRC_TraceUpdateRaysMain", defines_c.data(), (uint32_t)defines_c.size()); - kernels_.populate_cells = gfxCreateComputeKernel( + kernels_.PopulateCellsMain = gfxCreateComputeKernel( gfx_, kernels_.program, "PopulateCellsMain", defines_c.data(), (uint32_t)defines_c.size()); } @@ -271,13 +191,67 @@ bool MIGI::initKernels (const CapsaicinInternal & capsaicin) { return true; } +bool MIGI::initGraphicsKernels (const CapsaicinInternal & capsaicin) { + // Do nothing + return true; +} + bool MIGI::initResources (const CapsaicinInternal & capsaicin) { - auto divideAndRoundUp = [](uint32_t a, uint32_t b) -> uint32_t { - return (a + b - 1) / b; - }; - tex_.cache_coverage = gfxCreateTexture2D(gfx_, capsaicin.getWidth(), capsaicin.getHeight(), DXGI_FORMAT_R16G16_FLOAT, 1); - tex_.cache_coverage.setName("CacheCoverageTexture"); + // Textures + int probe_texture_width = divideAndRoundUp(capsaicin.getWidth(), SSRC_TILE_SIZE); + int probe_texture_height_uniform = divideAndRoundUp(capsaicin.getHeight(), SSRC_TILE_SIZE); + int probe_texture_height = probe_texture_height_uniform + divideAndRoundUp(options_.SSRC_max_adaptive_probe_count, probe_texture_width); + if(probe_texture_height > 4096) { + std::cerr << "(Probe texture) Overflowing texture dimensions: " << probe_texture_height << std::endl; + return false; + } + + tex_.probe_header_packed[0] = gfxCreateTexture2D(gfx_, probe_texture_width, probe_texture_height, DXGI_FORMAT_R32_UINT); + tex_.probe_header_packed[0].setName("ProbeHeaderPacked0"); + tex_.probe_header_packed[1] = gfxCreateTexture2D(gfx_, probe_texture_width, probe_texture_height, DXGI_FORMAT_R32_UINT); + tex_.probe_header_packed[1].setName("ProbeHeaderPacked1"); + + tex_.probe_screen_position[0] = gfxCreateTexture2D(gfx_, probe_texture_width, probe_texture_height, DXGI_FORMAT_R32_UINT); + tex_.probe_screen_position[0].setName("ProbeScreenPosition0"); + tex_.probe_screen_position[1] = gfxCreateTexture2D(gfx_, probe_texture_width, probe_texture_height, DXGI_FORMAT_R32_UINT); + tex_.probe_screen_position[1].setName("ProbeScreenPosition1"); + + tex_.probe_linear_depth[0] = gfxCreateTexture2D(gfx_, probe_texture_width, probe_texture_height, DXGI_FORMAT_R32_FLOAT); + tex_.probe_linear_depth[0].setName("ProbeLinearDepth0"); + tex_.probe_linear_depth[1] = gfxCreateTexture2D(gfx_, probe_texture_width, probe_texture_height, DXGI_FORMAT_R32_FLOAT); + tex_.probe_linear_depth[1].setName("ProbeLinearDepth1"); + + tex_.probe_world_position[0] = gfxCreateTexture2D(gfx_, probe_texture_width, probe_texture_height, DXGI_FORMAT_R32G32B32_FLOAT); + tex_.probe_world_position[0].setName("ProbeWorldPosition0"); + tex_.probe_world_position[1] = gfxCreateTexture2D(gfx_, probe_texture_width, probe_texture_height, DXGI_FORMAT_R32G32B32_FLOAT); + tex_.probe_world_position[1].setName("ProbeWorldPosition1"); + + tex_.probe_normal[0] = gfxCreateTexture2D(gfx_, probe_texture_width, probe_texture_height, DXGI_FORMAT_R16G16_UNORM); + tex_.probe_normal[0].setName("ProbeNormal0"); + tex_.probe_normal[1] = gfxCreateTexture2D(gfx_, probe_texture_width, probe_texture_height, DXGI_FORMAT_R16G16_UNORM); + tex_.probe_normal[1].setName("ProbeNormal1"); + + tex_.probe_irradiance[0] = gfxCreateTexture2D(gfx_, probe_texture_width, probe_texture_height, DXGI_FORMAT_R16G16B16A16_FLOAT); + tex_.probe_irradiance[0].setName("ProbeIrradiance0"); + tex_.probe_irradiance[1] = gfxCreateTexture2D(gfx_, probe_texture_width, probe_texture_height, DXGI_FORMAT_R16G16B16A16_FLOAT); + tex_.probe_irradiance[1].setName("ProbeIrradiance1"); + + int tile_texture_width = divideAndRoundUp(capsaicin.getWidth(), SSRC_TILE_SIZE); + int tile_texture_height = divideAndRoundUp(capsaicin.getHeight(), SSRC_TILE_SIZE); + + tex_.tile_adaptive_probe_count[0] = gfxCreateTexture2D(gfx_, tile_texture_width, tile_texture_height, DXGI_FORMAT_R16_UINT); + tex_.tile_adaptive_probe_count[0].setName("TileAdaptiveProbeCount0"); + tex_.tile_adaptive_probe_count[1] = gfxCreateTexture2D(gfx_, tile_texture_width, tile_texture_height, DXGI_FORMAT_R16_UINT); + tex_.tile_adaptive_probe_count[1].setName("TileAdaptiveProbeCount1"); + + int tile_index_texture_width = tile_texture_width * SSRC_TILE_SIZE; + int tile_index_texture_height = tile_texture_height * SSRC_TILE_SIZE; + tex_.tile_adaptive_probe_index[0] = gfxCreateTexture2D(gfx_, tile_index_texture_width, tile_index_texture_height, DXGI_FORMAT_R16_UINT); + tex_.tile_adaptive_probe_index[0].setName("TileAdaptiveProbeIndex0"); + tex_.tile_adaptive_probe_index[1] = gfxCreateTexture2D(gfx_, tile_index_texture_width, tile_index_texture_height, DXGI_FORMAT_R16_UINT); + tex_.tile_adaptive_probe_index[1].setName("TileAdaptiveProbeIndex1"); + assert(capsaicin.getWidth() % 8 == 0 && capsaicin.getHeight() % 8 == 0); tex_.update_error_splat[0] = gfxCreateTexture2D(gfx_, capsaicin.getWidth(), capsaicin.getHeight(), DXGI_FORMAT_R16G16_FLOAT, SSRC_TILE_SIZE_L2 + 1); tex_.update_error_splat[0].setName("UpdateErrorSplat0"); @@ -293,72 +267,48 @@ bool MIGI::initResources (const CapsaicinInternal & capsaicin) { tex_.depth.setName("Depth (MIGI)"); // Buffers - buf_.active_basis_count = gfxCreateBuffer(gfx_, 1); - buf_.active_basis_count.setName("ActiveBasisCount"); - buf_.active_basis_index = gfxCreateBuffer(gfx_, cfg_.basis_buffer_allocation); - buf_.active_basis_index.setName("ActiveBasisIndex"); - buf_.basis_effective_radius= gfxCreateBuffer(gfx_, cfg_.basis_buffer_allocation); - buf_.basis_effective_radius.setName("BasisEffectiveRadius"); - buf_.basis_film_position = gfxCreateBuffer(gfx_, cfg_.basis_buffer_allocation); - buf_.basis_film_position.setName("BasisFilmPosition"); - buf_.basis_effective_radius_film = gfxCreateBuffer(gfx_, cfg_.basis_buffer_allocation); - buf_.basis_effective_radius_film.setName("BasisEffectiveRadiusFilm"); - buf_.basis_location = gfxCreateBuffer(gfx_, cfg_.basis_buffer_allocation); - buf_.basis_location.setName("BasisLocation"); - buf_.basis_parameter = gfxCreateBuffer(gfx_, cfg_.basis_buffer_allocation * 4); - buf_.basis_parameter.setName("BasisParameter"); - buf_.quantilized_basis_step= gfxCreateBuffer(gfx_, cfg_.basis_buffer_allocation * 7); - buf_.quantilized_basis_step.setName("QuantilizedBasisStep"); - buf_.basis_average_gradient_scale = gfxCreateBuffer(gfx_, cfg_.basis_buffer_allocation * 2); - buf_.basis_average_gradient_scale.setName("BasisAverageGradientScale"); - buf_.basis_flags = gfxCreateBuffer(gfx_, cfg_.basis_buffer_allocation); - buf_.basis_flags.setName("BasisFlags"); - buf_.free_basis_indices = gfxCreateBuffer(gfx_, cfg_.basis_buffer_allocation); - buf_.free_basis_indices.setName("FreeBasisIndices"); - buf_.free_basis_indices_count = gfxCreateBuffer(gfx_, 1); - buf_.free_basis_indices_count.setName("FreeBasisIndicesCount"); - assert(options_.width % SSRC_TILE_SIZE == 0 && options_.height % SSRC_TILE_SIZE == 0); - int ssrc_tile_count = options_.width / SSRC_TILE_SIZE * options_.height / SSRC_TILE_SIZE; - buf_.tile_basis_count = gfxCreateBuffer(gfx_, ssrc_tile_count); - buf_.tile_basis_count.setName("TileBasisCount"); - buf_.tile_ray_count = gfxCreateBuffer(gfx_, ssrc_tile_count); - buf_.tile_ray_count.setName("TileRayCount"); - buf_.tile_ray_offset = gfxCreateBuffer(gfx_, ssrc_tile_count); - buf_.tile_ray_offset.setName("TileRayOffset"); - buf_.update_ray_direction = gfxCreateBuffer(gfx_, options_.SSRC_max_update_ray_count); - buf_.update_ray_direction.setName("UpdateRayDirection"); - buf_.update_ray_origin = gfxCreateBuffer(gfx_, options_.SSRC_max_update_ray_count); - buf_.update_ray_origin.setName("UpdateRayOrigin"); - buf_.update_ray_radiance_inv_pdf = gfxCreateBuffer(gfx_, options_.SSRC_max_update_ray_count * 2); - buf_.update_ray_radiance_inv_pdf.setName("UpdateRayRadiancePdf"); - buf_.update_ray_cache = gfxCreateBuffer(gfx_, options_.SSRC_max_update_ray_count * 2); - buf_.update_ray_cache.setName("UpdateRayCache"); - buf_.update_ray_count = gfxCreateBuffer(gfx_, 1); - buf_.update_ray_count.setName("UpdateRayCount"); - buf_.tile_update_error_sums= gfxCreateBuffer(gfx_, divideAndRoundUp(ssrc_tile_count, cfg_.wave_lane_count)); - buf_.tile_update_error_sums.setName("TileUpdateErrorSums"); - buf_.tile_update_error = gfxCreateBuffer(gfx_, 1); - buf_.tile_update_error.setName("TileUpdateError"); - buf_.tile_basis_index_injection = gfxCreateBuffer(gfx_, ssrc_tile_count * SSRC_MAX_BASIS_PER_TILE); - buf_.tile_basis_index_injection.setName("TileBasisIndexInjection"); - buf_.tile_base_slot_offset = gfxCreateBuffer(gfx_, ssrc_tile_count); - buf_.tile_base_slot_offset.setName("TileBaseSlotOffset"); - buf_.tile_basis_index = gfxCreateBuffer(gfx_, ssrc_tile_count * (SSRC_MAX_BASIS_PER_TILE + 1)); - buf_.tile_basis_index.setName("TileBasisIndex"); - - buf_.dispatch_count = gfxCreateBuffer(gfx_, 1); - buf_.dispatch_count.setName("DispatchCount"); - buf_.dispatch_command = gfxCreateBuffer(gfx_, 1); + buf_.count = gfxCreateBuffer(gfx_, 1); + buf_.count.setName("Count"); + buf_.dispatch_command = gfxCreateBuffer(gfx_, 1); buf_.dispatch_command.setName("DispatchCommand"); buf_.dispatch_rays_command = gfxCreateBuffer(gfx_, 1); buf_.dispatch_rays_command.setName("DispatchRaysCommand"); - buf_.draw_command = gfxCreateBuffer(gfx_, 1); + buf_.draw_command = gfxCreateBuffer(gfx_, 1); buf_.draw_command.setName("DrawCommand"); - buf_.draw_indexed_command = gfxCreateBuffer(gfx_, 1); + buf_.draw_indexed_command = gfxCreateBuffer(gfx_, 1); buf_.draw_indexed_command.setName("DrawIndexedCommand"); - buf_.reduce_count = gfxCreateBuffer(gfx_, 1); + buf_.reduce_count = gfxCreateBuffer(gfx_, 1); buf_.reduce_count.setName("ReduceCount"); + buf_.probe_SG[0] = gfxCreateBuffer(gfx_, options_.SSRC_max_basis_count); + buf_.probe_SG[0].setName("ProbeSG0"); + buf_.probe_SG[1] = gfxCreateBuffer(gfx_, options_.SSRC_max_basis_count); + buf_.probe_SG[1].setName("ProbeSG1"); + + buf_.allocated_probe_SG_count = gfxCreateBuffer(gfx_, 1); + buf_.allocated_probe_SG_count.setName("AllocatedProbeSGCount"); + + buf_.probe_update_ray_count = gfxCreateBuffer(gfx_, options_.SSRC_max_probe_count); + buf_.probe_update_ray_count.setName("ProbeUpdateRayCount"); + buf_.probe_update_ray_offset = gfxCreateBuffer(gfx_, options_.SSRC_max_probe_count); + buf_.probe_update_ray_offset.setName("ProbeUpdateRayOffset"); + + buf_.update_ray_probe = gfxCreateBuffer(gfx_, divideAndRoundUp(options_.SSRC_max_update_ray_count, cfg_.wave_lane_count)); + buf_.update_ray_probe.setName("UpdateRayProbe"); + buf_.update_ray_direction = gfxCreateBuffer(gfx_, options_.SSRC_max_update_ray_count); + buf_.update_ray_direction.setName("UpdateRayDirection"); + buf_.update_ray_radiance_inv_pdf = gfxCreateBuffer(gfx_, options_.SSRC_max_update_ray_count * 2); + buf_.update_ray_radiance_inv_pdf.setName("UpdateRayRadiancePdf"); + buf_.update_ray_linear_depth = gfxCreateBuffer(gfx_, options_.SSRC_max_update_ray_count); + buf_.update_ray_linear_depth.setName("UpdateRayLinearDepth"); + + buf_.adaptive_probe_count = gfxCreateBuffer(gfx_, 1); + buf_.adaptive_probe_count.setName("AdaptiveProbeCount"); + buf_.probe_update_error = gfxCreateBuffer(gfx_, options_.SSRC_max_probe_count); + buf_.probe_update_error.setName("ProbeUpdateError"); + + assert(options_.width % SSRC_TILE_SIZE == 0 && options_.height % SSRC_TILE_SIZE == 0); + buf_.debug_visualize_incident_radiance = gfxCreateBuffer(gfx_, cfg_.max_debug_visualize_incident_radiance_num_points); buf_.debug_visualize_incident_radiance.setName("DebugVisualizeIncidentRadiance"); buf_.debug_visualize_incident_radiance_sum = gfxCreateBuffer(gfx_, 1); @@ -366,17 +316,6 @@ bool MIGI::initResources (const CapsaicinInternal & capsaicin) { buf_.debug_cursor_world_pos = gfxCreateBuffer(gfx_, 1); buf_.debug_cursor_world_pos.setName("DebugCursorWorldPos"); - // Initialize the disk index buffer for injection - std::vector disk_index_buffer; - for(int i = 0; i<(int)options_.SSRC_CR_disk_vertex_count - 2; i++) - { - disk_index_buffer.push_back(0); - disk_index_buffer.push_back(i + 1); - disk_index_buffer.push_back(i + 2); - } - buf_.disk_index_buffer = gfxCreateBuffer(gfx_, (options_.SSRC_CR_disk_vertex_count - 2) * 3, disk_index_buffer.data()); - buf_.disk_index_buffer.setName("DiskIndexBuffer"); - for(auto & e : buf_.readback) { e = gfxCreateBuffer(gfx_, 32, nullptr, GfxCpuAccess::kGfxCpuAccess_Read); @@ -394,11 +333,15 @@ bool MIGI::init(const CapsaicinInternal &capsaicin) noexcept } updateRenderOptions(capsaicin); + if(!initKernels(capsaicin)) { + return false; + } + if(!initResources(capsaicin)) { return false; } - if(!initKernels(capsaicin)) { + if(!initGraphicsKernels(capsaicin)) { return false; } @@ -415,107 +358,43 @@ bool MIGI::init(const CapsaicinInternal &capsaicin) noexcept return true; } -void MIGI::terminate() noexcept +void MIGI::releaseKernels() { - // Config - cfg_ = {}; - // Free all program & kernels + gfxDestroyKernel(gfx_, kernels_.PrecomputeHiZ_min); + gfxDestroyKernel(gfx_, kernels_.PrecomputeHiZ_max); + gfxDestroyKernel(gfx_, kernels_.GenerateDispatch); + gfxDestroyKernel(gfx_, kernels_.GenerateDispatchRays); + gfxDestroyKernel(gfx_, kernels_.PurgeTiles); + gfxDestroyKernel(gfx_, kernels_.ClearCounters); + gfxDestroyKernel(gfx_, kernels_.SSRC_ClearCounters); + gfxDestroyKernel(gfx_, kernels_.SSRC_AllocateUniformProbes); + for(int i = 0; i MIGI::getShaderCompileDefinitions(const CapsaicinInternal & capsaicin) const { std::vector ret; @@ -536,14 +479,14 @@ std::vector MIGI::getShaderCompileDefinitions(const CapsaicinIntern for(auto e : light_sampler_defines) ret.push_back(e); - if (capsaicin.hasAOVBuffer("OcclusionAndBentNormal")) ret.push_back("HAS_OCCLUSION"); - ret.push_back("USE_RESAMPLING"); + if (capsaicin.hasAOVBuffer("OcclusionAndBentNormal")) ret.emplace_back("HAS_OCCLUSION"); + ret.emplace_back("USE_RESAMPLING"); - if(options_.enable_indirect) ret.push_back("ENABLE_INDIRECT"); + if(options_.enable_indirect) ret.emplace_back("ENABLE_INDIRECT"); if (capsaicin.getCurrentDebugView().starts_with("HashGridCache_")) { - ret.push_back("DEBUG_HASH_CELLS"); + ret.emplace_back("DEBUG_HASH_CELLS"); } return ret; diff --git a/src/core/src/render_techniques/migi/migi_internal.h b/src/core/src/render_techniques/migi/migi_internal.h new file mode 100644 index 0000000..f2afe26 --- /dev/null +++ b/src/core/src/render_techniques/migi/migi_internal.h @@ -0,0 +1,22 @@ +/* + * Project Capsaicin: migi_internal.h + * Created: 2024/5/18 + * This program uses MulanPSL2. See LICENSE for more. + */ + +#ifndef CAPSAICIN_MIGI_INTERNAL_H +#define CAPSAICIN_MIGI_INTERNAL_H +#include +namespace Capsaicin { +inline int divideAndRoundUp (int a, int b) { + return (a + b - 1) / b; +} +inline int divideAndRoundUp (uint32_t a, int b) { + return ((int)a + b - 1) / b; +} +inline uint32_t divideAndRoundUp (uint32_t a, uint32_t b) { + return (a + b - 1u) / b; +} +} + +#endif // CAPSAICIN_MIGI_INTERNAL_H diff --git a/src/core/src/render_techniques/migi/migi_lib.hlsl b/src/core/src/render_techniques/migi/migi_lib.hlsl index ccae2a5..7c564dd 100644 --- a/src/core/src/render_techniques/migi/migi_lib.hlsl +++ b/src/core/src/render_techniques/migi/migi_lib.hlsl @@ -266,11 +266,11 @@ float3 SampleSG (float2 u, float lambda, out float pdf) { return float3(sinTheta * cos(phi), sinTheta * sin(phi), cosTheta); } -uint4 FetchBasisData_Packed (int BasisIndex) { - uint P0 = g_RWProbeSGBuffer[BasisIndex * 4]; - uint P1 = g_RWProbeSGBuffer[BasisIndex * 4 + 1]; - uint P2 = g_RWProbeSGBuffer[BasisIndex * 4 + 2]; - uint P3 = g_RWProbeSGBuffer[BasisIndex * 4 + 3]; +uint4 FetchBasisData_Packed (int BasisIndex, bool bPrevious = false) { + uint P0 = bPrevious ? g_RWPreviousProbeSGBuffer[BasisIndex * 4] : g_RWProbeSGBuffer[BasisIndex * 4]; + uint P1 = bPrevious ? g_RWPreviousProbeSGBuffer[BasisIndex * 4 + 1] : g_RWProbeSGBuffer[BasisIndex * 4 + 1]; + uint P2 = bPrevious ? g_RWPreviousProbeSGBuffer[BasisIndex * 4 + 2] : g_RWProbeSGBuffer[BasisIndex * 4 + 2]; + uint P3 = bPrevious ? g_RWPreviousProbeSGBuffer[BasisIndex * 4 + 3] : g_RWProbeSGBuffer[BasisIndex * 4 + 3]; return uint4(P0, P1, P2, P3); } @@ -316,8 +316,8 @@ void WriteBasisData (int BasisIndex, SGData SG) { } -SGData FetchBasisData (int BasisIndex) { - uint4 Packed = FetchBasisData_Packed(BasisIndex); +SGData FetchBasisData (int BasisIndex, bool bPrevious = false) { + uint4 Packed = FetchBasisData_Packed(BasisIndex, bPrevious); return UnpackBasisData(Packed); } diff --git a/src/core/src/render_techniques/migi/migi_options.cpp b/src/core/src/render_techniques/migi/migi_options.cpp index ab87442..4791216 100644 --- a/src/core/src/render_techniques/migi/migi_options.cpp +++ b/src/core/src/render_techniques/migi/migi_options.cpp @@ -10,29 +10,13 @@ #include "components/stratified_sampler/stratified_sampler.h" #include "migi.h" +#include "migi_internal.h" namespace Capsaicin { RenderOptionList MIGI::getRenderOptions() noexcept { auto ret = RenderOptionList(); - -// ret.emplace("lr_rate", options_.cache_update_learing_rate); -// ret.emplace("cache_update_SG_color", options_.cache_update_SG_color); -// ret.emplace("cache_update_SG_direction", options_.cache_update_SG_direction); -// ret.emplace("cache_update_SG_lambda", options_.cache_update_SG_lambda); -// ret.emplace("cache_update_W_lambda", options_.cache_update_W_lambda); - -// ret.emplace("reset_screen_space_cache", options_.reset_screen_space_cache); - - ret.emplace("SSRC_max_basis_count", options_.SSRC_max_basis_count); -// ret.emplace("SSRC_basis_spawn_coverage_threshold", options_.SSRC_basis_spawn_coverage_threshold); -// ret.emplace("SSRC_min_weight_E", options_.SSRC_min_weight_E); -// ret.emplace("SSRC_initial_W_radius", options_.SSRC_initial_W_radius); - - ret.emplace("shading_with_geometry_normal", options_.shading_with_geometry_normal); -// ret.emplace("no_importance_sampling", options_.no_importance_sampling); - ret.emplace("fixed_step_size", options_.fixed_step_size); ret.emplace("enable_indirect", options_.enable_indirect); return ret; @@ -64,21 +48,24 @@ void MIGI::updateRenderOptions(const CapsaicinInternal &capsaicin) uint32_t new_width = capsaicin.getWidth(); uint32_t new_height = capsaicin.getHeight(); if(options_.width != new_width || options_.height != new_height) { - need_resize_ = true; + need_reload_memory_ = true; } options_.width = new_width; options_.height = new_height; - assert(options_.SSRC_update_ray_budget * 1.5 <= options_.SSRC_max_update_ray_count); - options_.SSRC_max_basis_count = std::min((int)std::get(in["SSRC_max_basis_count"]), cfg_.basis_buffer_allocation); + int uniform_probe_x = divideAndRoundUp(options_.width, SSRC_TILE_SIZE); + int uniform_probe_y = divideAndRoundUp(options_.height, SSRC_TILE_SIZE); + int uniform_probe_count = uniform_probe_x * uniform_probe_y; + int max_probe_count = options_.SSRC_max_adaptive_probe_count + uniform_probe_count; + if(options_.SSRC_max_probe_count != max_probe_count) { + need_reload_memory_ = true; + } + options_.SSRC_max_probe_count = max_probe_count; // Only SSRC update rays request ReSTIR sampling. options_.restir.max_query_ray_count = options_.SSRC_max_update_ray_count; - options_.shading_with_geometry_normal = std::get(in["shading_with_geometry_normal"]); -// options_.no_importance_sampling = std::get(in["no_importance_sampling"]); - options_.fixed_step_size = std::get(in["fixed_step_size"]); auto new_enable_indirect = std::get(in["enable_indirect"]); if(options_.enable_indirect != new_enable_indirect) { need_reload_kernel_ = true; @@ -109,14 +96,9 @@ void MIGI::updateRenderOptions(const CapsaicinInternal &capsaicin) } // Reload flags - need_reload_hash_grid_cache_debug_view_ = capsaicin.getCurrentDebugView() != options_.active_debug_view - && ((options_.active_debug_view.starts_with("HashGridCache_") - && !capsaicin.getCurrentDebugView().starts_with("HashGridCache_")) - || (!options_.active_debug_view.starts_with("HashGridCache_") - && capsaicin.getCurrentDebugView().starts_with("HashGridCache_"))); - // The screen space cache needs to be reset if the render state changes (i.e. camera transaction). need_reset_screen_space_cache_ |= options_.reset_screen_space_cache || capsaicin.getFrameIndex() == 0; + need_reset_screen_space_cache_ |= need_reload_memory_ | need_reload_kernel_; } DebugViewList MIGI::getDebugViews() const noexcept diff --git a/src/core/src/render_techniques/migi/migi_probes.hlsl b/src/core/src/render_techniques/migi/migi_probes.hlsl index c97240e..e4965c4 100644 --- a/src/core/src/render_techniques/migi/migi_probes.hlsl +++ b/src/core/src/render_techniques/migi/migi_probes.hlsl @@ -42,8 +42,8 @@ void WriteScreenProbeHeader (int2 ProbeIndex, ProbeHeader Header) { g_RWProbeNormalTexture[ProbeIndex] = UnitVectorToOctahedron(Header.Normal * 0.5f + 0.5f); } -int2 GetTileJitter (int TileSize, bool bPrevious = false) { - return Hammersley16((bPrevious ? MI.PreviousFrameSeed : MI.FrameSeed) % 8, 8, 0) * TileSize; +int2 GetTileJitter (bool bPrevious = false) { + return Hammersley16((bPrevious ? MI.PreviousFrameSeed : MI.FrameSeed) % 8, 8, 0) * SSRC_TILE_SIZE; } int2 GetScreenProbeScreenPosition (int2 ProbeIndex, bool bPrevious = false) { @@ -67,7 +67,7 @@ int ComputeProbeRankFromSplattedError (int2 ScreenCoords) { } int GetProbeBasisCountFromClass (int ProbeClass) { - return 1 << Class; + return (ProbeClass > 0) ? (1 << (ProbeClass - 1)) : 0; } // Get the coords of a probe within the adaptive probe index texture