Skip to content

Commit

Permalink
Describe light culling shader resources.
Browse files Browse the repository at this point in the history
  • Loading branch information
Flone-dnb committed Nov 20, 2023
1 parent 1f17bb1 commit f0657a0
Show file tree
Hide file tree
Showing 15 changed files with 476 additions and 84 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
#include "../../../include/light_culling/CalculateGridTileFrustum.glsl"

/**
* Defines how much threads should be executed in the X and the Y dimensions of a grid tile.
* Defines how much threads should be executed in the X and the Y dimensions.
* This macro also defines how much pixels there are in one grid tile.
*/
#ifndef THREADS_IN_GROUP_XY
Expand Down
19 changes: 16 additions & 3 deletions res/engine/shaders/glsl/final/light_culling/LightCulling.comp
Original file line number Diff line number Diff line change
@@ -1,7 +1,11 @@
#version 450

#include "../../../include/light_culling/CalculateGridTileFrustum.glsl"
#include "../../../include/light_culling/LightCulling.glsl"

/**
* Defines how much threads should be executed in the X and the Y dimensions.
* This macro also defines how much pixels there are in one grid tile.
*/
#ifndef THREADS_IN_GROUP_XY
FAIL;
#endif
Expand All @@ -12,6 +16,15 @@ void main(){
// - Presentation "DirectX 11 Rendering in Battlefield 3" (2011) by Johan Andersson, DICE.
// - "Forward+: A Step Toward Film-Style Shading in Real Time", Takahiro Harada (2012).

// Get depth.
float depth = texelFetch(depthTexture, gl_GlobalInvocationID.xy, 0).r;
// Get depth of this pixel.
float depth = texelFetch(depthTexture, ivec2(gl_GlobalInvocationID.x, gl_GlobalInvocationID.y), 0).r;

if (gl_LocalInvocationIndex == 0){
// Only one thread in the group should initialize group shared variables.
initializeGroupSharedVariables(gl_WorkGroupID.x, gl_WorkGroupID.y);
}

// Make sure all group shared writes were finished and all threads from the group reached this line.
groupMemoryBarrier(); // wait for shared writes to finish
barrier(); // wait for threads of group to reach this line
}
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#include "../../include/light_culling/CalculateGridTileFrustum.glsl"

/**
* Defines how much threads should be executed in the X and the Y dimensions of a grid tile.
* Defines how much threads should be executed in the X and the Y dimensions.
* This macro also defines how much pixels there are in one grid tile.
*/
#ifndef THREADS_IN_GROUP_XY
Expand Down
19 changes: 16 additions & 3 deletions res/engine/shaders/hlsl/light_culling/LightCulling.comp.hlsl
Original file line number Diff line number Diff line change
@@ -1,15 +1,28 @@
#include "../../include/light_culling/LightCulling.glsl"

/**
* Defines how much threads should be executed in the X and the Y dimensions.
* This macro also defines how much pixels there are in one grid tile.
*/
#ifndef THREADS_IN_GROUP_XY
_Static_assert(false, "thread count in group - macro not defined");
#endif

/** 1 thread per pixel in a tile. 1 thread group per tile. */
[numthreads(THREADS_IN_GROUP_XY, THREADS_IN_GROUP_XY, 1 )]
void csLightCulling(uint3 dispatchThreadID : SV_DispatchThreadID){
void csLightCulling(uint3 threadIdInDispatch : SV_DispatchThreadID, uint threadIdInGroup : SV_GroupIndex, uint3 groupIdInDispatch : SV_GroupID){
// Sources:
// - Presentation "DirectX 11 Rendering in Battlefield 3" (2011) by Johan Andersson, DICE.
// - "Forward+: A Step Toward Film-Style Shading in Real Time", Takahiro Harada (2012).

// Get depth.
float depth = depthTexture.Load(int3(dispatchThreadID.xy, 0)).r;
// Get depth of this pixel.
float depth = depthTexture.Load(int3(threadIdInDispatch.xy, 0)).r;

if (threadIdInGroup == 0){
// Only one thread in the group should initialize group shared variables.
initializeGroupSharedVariables(groupIdInDispatch.x, groupIdInDispatch.y);
}

// Make sure all group shared writes were finished and all threads from the group reached this line.
GroupMemoryBarrierWithGroupSync();
}
318 changes: 305 additions & 13 deletions res/engine/shaders/include/light_culling/LightCulling.glsl

Large diffs are not rendered by default.

7 changes: 7 additions & 0 deletions src/engine_lib/private/render/directx/DirectXRenderer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -53,12 +53,19 @@ namespace ne {
#endif

DirectXRenderer::DirectXRenderer(GameManager* pGameManager) : Renderer(pGameManager) {
// Make sure we use the same formats as in Vulkan.
static_assert(
backBufferFormat == DXGI_FORMAT_R8G8B8A8_UNORM,
"also change format in Vulkan renderer for (visual) consistency");
static_assert(
depthStencilBufferFormat == DXGI_FORMAT_D24_UNORM_S8_UINT,
"also change format in Vulkan renderer for (visual) consistency");

// Self check for light culling compute shader:
static_assert(
depthStencilBufferFormat == DXGI_FORMAT_D24_UNORM_S8_UINT,
"light culling compute shader expects the depth values to be in range [0..1], please review the "
"light culling compute shader and make sure it works correctly");
}

std::vector<ShaderDescription> DirectXRenderer::getEngineShadersToCompile() const {
Expand Down
58 changes: 35 additions & 23 deletions src/engine_lib/private/render/vulkan/VulkanRenderer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@

namespace ne {
VulkanRenderer::VulkanRenderer(GameManager* pGameManager) : Renderer(pGameManager) {
// Make sure we use the same formats as in DirectX.
static_assert(
swapChainImageFormat == VK_FORMAT_B8G8R8A8_UNORM,
"also change format in DirectX renderer for (visual) consistency");
Expand All @@ -39,6 +40,12 @@ namespace ne {
static_assert(
depthImageFormat == VK_FORMAT_D24_UNORM_S8_UINT,
"also change format in DirectX renderer for (visual) consistency");

// Self check for light culling compute shader:
static_assert(
depthImageFormat == VK_FORMAT_D24_UNORM_S8_UINT,
"light culling compute shader expects the depth values to be in range [0..1], please review the "
"light culling compute shader and make sure it works correctly");
}

std::variant<MsaaState, Error> VulkanRenderer::getMaxSupportedAntialiasingQuality() const {
Expand Down Expand Up @@ -484,14 +491,20 @@ namespace ne {

std::variant<std::string, Error> VulkanRenderer::isDeviceSuitable(VkPhysicalDevice pGpu) {
// Get device properties.
VkPhysicalDeviceProperties deviceProperties;
vkGetPhysicalDeviceProperties(pGpu, &deviceProperties);
VkPhysicalDeviceDepthStencilResolveProperties depthStencilResolve{};
depthStencilResolve.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DEPTH_STENCIL_RESOLVE_PROPERTIES;
depthStencilResolve.pNext = nullptr;

VkPhysicalDeviceProperties2 deviceProperties;
deviceProperties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2;
deviceProperties.pNext = &depthStencilResolve;
vkGetPhysicalDeviceProperties2(pGpu, &deviceProperties);

// Make sure this GPU supports used Vulkan version.
if (deviceProperties.apiVersion < iUsedVulkanVersion) {
if (deviceProperties.properties.apiVersion < iUsedVulkanVersion) {
return std::format(
"GPU \"{}\" does not support used Vulkan version {}",
deviceProperties.deviceName,
deviceProperties.properties.deviceName,
getUsedApiVersion());
}

Expand All @@ -505,7 +518,8 @@ namespace ne {
const auto queueFamiliesIndices = std::get<QueueFamilyIndices>(std::move(queueFamiliesResult));
if (!queueFamiliesIndices.isComplete()) {
return std::format(
"GPU \"{}\" does not support all required queue families", deviceProperties.deviceName);
"GPU \"{}\" does not support all required queue families",
deviceProperties.properties.deviceName);
}

// Make sure this GPU supports all used device extensions.
Expand All @@ -519,7 +533,7 @@ namespace ne {
if (!sMissingDeviceExtension.empty()) {
return std::format(
"GPU \"{}\" does not support required device extension \"{}\"",
deviceProperties.deviceName,
deviceProperties.properties.deviceName,
sMissingDeviceExtension);
}

Expand All @@ -538,13 +552,9 @@ namespace ne {

// Prepare a linked list of features that will be filled in `vkGetPhysicalDeviceFeatures2` below
// so that we can check their support.
VkPhysicalDeviceDepthStencilResolveProperties depthStencilResolve{};
depthStencilResolve.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DEPTH_STENCIL_RESOLVE_PROPERTIES;
depthStencilResolve.pNext = nullptr;

VkPhysicalDeviceDescriptorIndexingFeatures descriptorIndexingFeatures{};
descriptorIndexingFeatures.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DESCRIPTOR_INDEXING_FEATURES;
descriptorIndexingFeatures.pNext = &depthStencilResolve;
descriptorIndexingFeatures.pNext = nullptr;

VkPhysicalDeviceFeatures2 deviceFeatures2{};
deviceFeatures2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2;
Expand All @@ -557,16 +567,16 @@ namespace ne {
// (because RenderSettings don't check whether it's supported or not when deserialized/changed)
if (deviceFeatures2.features.samplerAnisotropy == VK_FALSE) {
return std::format(
"GPU \"{}\" does not support anisotropic filtering", deviceProperties.deviceName);
"GPU \"{}\" does not support anisotropic filtering", deviceProperties.properties.deviceName);
}

// Make sure that maximum push constants size that we use is supported.
if (VulkanPushConstantsManager::getMaxPushConstantsSizeInBytes() >
deviceProperties.limits.maxPushConstantsSize) {
deviceProperties.properties.limits.maxPushConstantsSize) {
return std::format(
"GPU \"{}\" max push constants size is only {} while we expect {}",
deviceProperties.deviceName,
deviceProperties.limits.maxPushConstantsSize,
deviceProperties.properties.deviceName,
deviceProperties.properties.limits.maxPushConstantsSize,
VulkanPushConstantsManager::getMaxPushConstantsSizeInBytes());
}

Expand All @@ -576,17 +586,19 @@ namespace ne {
descriptorIndexingFeatures.descriptorBindingPartiallyBound == VK_FALSE ||
descriptorIndexingFeatures.runtimeDescriptorArray == VK_FALSE) {
return std::format(
"GPU \"{}\" does not support used indexing features", deviceProperties.deviceName);
"GPU \"{}\" does not support used indexing features", deviceProperties.properties.deviceName);
}

// Make sure used depth resolve mode is supported.
if ((depthStencilResolve.supportedDepthResolveModes & depthResolveMode) == 0) {
return std::format(
"GPU \"{}\" does not support used depth resolve mode", deviceProperties.deviceName);
"GPU \"{}\" does not support used depth resolve mode",
deviceProperties.properties.deviceName);
}
if ((depthStencilResolve.supportedStencilResolveModes & stencilResolveMode) == 0) {
return std::format(
"GPU \"{}\" does not support used stencil resolve mode", deviceProperties.deviceName);
"GPU \"{}\" does not support used stencil resolve mode",
deviceProperties.properties.deviceName);
}

// Make sure engine texture resource formats are supported as storage images.
Expand All @@ -597,7 +609,7 @@ namespace ne {
// Get format support details.
VkFormatProperties formatProperties;
vkGetPhysicalDeviceFormatProperties(
pPhysicalDevice,
pGpu,
VulkanResourceManager::convertTextureResourceFormatToVkFormat(format),
&formatProperties);

Expand All @@ -606,7 +618,7 @@ namespace ne {
return std::format(
"GPU \"{}\" does not support one of the used texture resource formats to be used as "
"a storage image",
deviceProperties.deviceName);
deviceProperties.properties.deviceName);
}
}

Expand Down Expand Up @@ -1855,9 +1867,9 @@ namespace ne {
VK_SAMPLE_COUNT_1_BIT, // 1 sample
depthImageFormat,
depthImageTiling,
VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT |
VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT,
VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT);
VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT | VK_IMAGE_USAGE_SAMPLED_BIT |
VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT,
VK_IMAGE_ASPECT_DEPTH_BIT); // specify only depth aspect because we only care about it
if (std::holds_alternative<Error>(result)) [[unlikely]] {
auto error = std::get<Error>(std::move(result));
error.addCurrentLocationToErrorStack();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,18 @@ namespace ne {
/** Macro value. */
static inline const auto sValue = "16";
};

/**
* Defines how much lights (of a specific type) are expected to be on average in a light grid
* tile for opaque or transparent geometry.
*/
struct AverageNumLightsOfSpecificTypePerTileMacro {
/** Macro name. */
static inline const auto sName = "AVERAGE_NUM_LIGHTS_OF_SPECIFIC_TYPE_PER_TILE";

/** Macro value. */
static inline const auto sValue = "200";
};
};
};
}
Original file line number Diff line number Diff line change
Expand Up @@ -585,7 +585,7 @@ namespace ne {

#if defined(DEBUG) && defined(WIN32)
static_assert(
sizeof(LightingShaderResourceManager) == 240, "consider notifying new arrays here"); // NOLINT
sizeof(LightingShaderResourceManager) == 256, "consider notifying new arrays here"); // NOLINT
#elif defined(DEBUG)
static_assert(
sizeof(LightingShaderResourceManager) == 144, "consider notifying new arrays here"); // NOLINT
Expand Down Expand Up @@ -626,7 +626,7 @@ namespace ne {

#if defined(DEBUG) && defined(WIN32)
static_assert(
sizeof(LightingShaderResourceManager) == 240, "consider notifying new arrays here"); // NOLINT
sizeof(LightingShaderResourceManager) == 256, "consider notifying new arrays here"); // NOLINT
#elif defined(DEBUG)
static_assert(
sizeof(LightingShaderResourceManager) == 144, "consider notifying new arrays here"); // NOLINT
Expand Down Expand Up @@ -655,7 +655,7 @@ namespace ne {

#if defined(DEBUG) && defined(WIN32)
static_assert(
sizeof(LightingShaderResourceManager) == 240, "consider notifying new arrays here"); // NOLINT
sizeof(LightingShaderResourceManager) == 256, "consider notifying new arrays here"); // NOLINT
#elif defined(DEBUG)
static_assert(
sizeof(LightingShaderResourceManager) == 144, "consider notifying new arrays here"); // NOLINT
Expand Down Expand Up @@ -1043,7 +1043,7 @@ namespace ne {

#if defined(DEBUG) && defined(WIN32)
static_assert(
sizeof(LightingShaderResourceManager) == 240, "consider creating new arrays here"); // NOLINT
sizeof(LightingShaderResourceManager) == 256, "consider creating new arrays here"); // NOLINT
#elif defined(DEBUG)
static_assert(
sizeof(LightingShaderResourceManager) == 144, "consider creating new arrays here"); // NOLINT
Expand All @@ -1065,7 +1065,7 @@ namespace ne {
std::scoped_lock renderGuard(*pRenderer->getRenderResourcesMutex());
pRenderer->waitForGpuToFinishWorkUpToThisPoint();

// Get tile size.
// Get tile size (this value also describes threads in one thread group).
size_t iTileSizeInPixels = 0;
try {
iTileSizeInPixels = std::stoull(
Expand All @@ -1075,14 +1075,14 @@ namespace ne {
"failed to convert frustum grid tile size to an integer, error: {}", exception.what()));
};

// Calculate tile count.
// Calculate tile count (using INT/INT to "floor" if not divisible equally).
const auto iTileCountX = static_cast<unsigned int>(renderResolution.first / iTileSizeInPixels);
const auto iTileCountY = static_cast<unsigned int>(renderResolution.second / iTileSizeInPixels);

// Calculate frustum count.
const size_t iFrustumCount = iTileCountX * iTileCountY;

// Calculate thread group count.
// Calculate thread group count (we should dispatch 1 thread per tile).
const auto iThreadGroupCountX = static_cast<unsigned int>(
std::ceil(static_cast<float>(iTileCountX) / static_cast<float>(iTileSizeInPixels)));
const auto iThreadGroupCountY = static_cast<unsigned int>(
Expand Down Expand Up @@ -1134,6 +1134,10 @@ namespace ne {
pComputeInterface->submitForExecution(iThreadGroupCountX, iThreadGroupCountY, 1);
}

// Save tile count to be used by light culling shader.
iLastUpdateTileCountX = iTileCountX;
iLastUpdateTileCountY = iTileCountY;

return {};
}

Expand Down Expand Up @@ -1271,8 +1275,9 @@ namespace ne {
// Resource that stores calculated grid of frustums is binded inside of the update function
// for shader that calculates that grid.

// Queue shader execution.
pComputeInterface->submitForExecution(16, 16, 1); // TODO
// Queue shader execution (we need to dispatch 1 thread group per tile).
pComputeInterface->submitForExecution(
frustumGridShader.iLastUpdateTileCountX, frustumGridShader.iLastUpdateTileCountY, 1);

return {};
}
Expand Down Expand Up @@ -1341,7 +1346,7 @@ namespace ne {

#if defined(DEBUG) && defined(WIN32)
static_assert(
sizeof(LightingShaderResourceManager) == 240, "consider resetting new arrays here"); // NOLINT
sizeof(LightingShaderResourceManager) == 256, "consider resetting new arrays here"); // NOLINT
#elif defined(DEBUG)
static_assert(
sizeof(LightingShaderResourceManager) == 144, "consider resetting new arrays here"); // NOLINT
Expand Down
Loading

0 comments on commit f0657a0

Please sign in to comment.