diff --git a/.clang-format b/.clang-format new file mode 100644 index 0000000..b70e42b --- /dev/null +++ b/.clang-format @@ -0,0 +1,47 @@ +BasedOnStyle: Google +IndentWidth: 4 +UseTab: Never +ColumnLimit: 160 +Language: Cpp +AccessModifierOffset: -4 +BreakBeforeBraces: Custom +BraceWrapping: + AfterCaseLabel: true + AfterClass: true + AfterControlStatement: true + AfterEnum: true + AfterFunction: true + AfterNamespace: true + AfterObjCDeclaration: true + AfterStruct: true + AfterUnion: true + AfterExternBlock: false + BeforeCatch: true + BeforeElse: true + IndentBraces: false + SplitEmptyFunction: true + SplitEmptyRecord: true + SplitEmptyNamespace: true +ConstructorInitializerAllOnOneLineOrOnePerLine : false +BreakConstructorInitializers: BeforeComma +DerivePointerAlignment: false +IndentCaseLabels: false +NamespaceIndentation: All +AlignConsecutiveAssignments: true +AlignConsecutiveDeclarations: true +AlignEscapedNewlines: Left +AlignTrailingComments: true +AlignOperands: true +AllowShortFunctionsOnASingleLine: false +AllowShortIfStatementsOnASingleLine: false +AllowShortLoopsOnASingleLine: false +AllowShortBlocksOnASingleLine: false +ReflowComments: false +SortIncludes: false +SortUsingDeclarations: false +BinPackArguments: false +BinPackParameters: false +ExperimentalAutoDetectBinPacking: false +AllowAllParametersOfDeclarationOnNextLine: true +AlignConsecutiveMacros: true +AlignAfterOpenBracket: true diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..57fba3b --- /dev/null +++ b/.gitattributes @@ -0,0 +1,15 @@ +# Set the default behavior, in case people don't have core.autocrlf set. +* text=auto + +# Declare files that will always have LF line endings on checkout. +*.c text eol=lf +*.cpp text eol=lf +*.h text eol=lf +*.hpp text eol=lf +*.idl text eol=lf +*.json text eol=lf +*.hlsl text eol=lf +*.patch text eol=lf + +# Denote all files that are truly binary and should not be modified. +*.png binary \ No newline at end of file diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..48b85f3 --- /dev/null +++ b/.gitignore @@ -0,0 +1,4 @@ +# CMake build folder +build +# Binary output folder +bin \ No newline at end of file diff --git a/CMakeLists.txt b/CMakeLists.txt new file mode 100644 index 0000000..a8e1f4c --- /dev/null +++ b/CMakeLists.txt @@ -0,0 +1,33 @@ +# This file is part of the AMD Work Graph Mesh Node Sample. +# +# Copyright (C) 2024 Advanced Micro Devices, Inc. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files(the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and /or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions : +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. + +cmake_minimum_required(VERSION 3.17) + +project("Work Graphs Mesh Node Sample" VERSION 0.1.0 LANGUAGES CXX) + +# Import FidelityFX & Cauldron +add_subdirectory(imported) + +# Add Work Graph Mesh Node Sample +add_subdirectory(meshNodeSample) + +set_property(DIRECTORY ${CMAKE_PROJECT_DIR} PROPERTY VS_STARTUP_PROJECT MeshNodeSample) diff --git a/imported/CMakeLists.txt b/imported/CMakeLists.txt new file mode 100644 index 0000000..f4aea3c --- /dev/null +++ b/imported/CMakeLists.txt @@ -0,0 +1,67 @@ +# This file is part of the AMD Work Graph Mesh Node Sample. +# +# Copyright (C) 2024 Advanced Micro Devices, Inc. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files(the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and /or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions : +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. + +include(FetchContent) + +FetchContent_Declare( + ffxsdk + GIT_REPOSITORY https://github.com/GPUOpen-LibrariesAndSDKs/FidelityFX-SDK + GIT_TAG 55ff22bb6981a9b9c087b9465101769fc0acd447 # fsr3-v3.0.4 +) + +FetchContent_GetProperties(ffxsdk) + +# manually import FidelityFX SDK +if (NOT ffxsdk_POPULATED) + message(STATUS "Downloading FidelityFX SDK") + FetchContent_Populate(ffxsdk) + message(STATUS "Downloaded FidelityFX SDK to ${ffxsdk_SOURCE_DIR}") + + # set root directory of FidelityFX SDK for patches + set(FFX_ROOT ${ffxsdk_SOURCE_DIR}) + # Apply patches to FidelityFX SDK + include(patch-ffx.cmake) + + # don't build any FFX samples + set(BUILD_TYPE FFX_NONE) + # build FFX SDK with Cauldron backend + set(FFX_API CAULDRON) + # enable FSR2 in FFX SDK. + set(FFX_FSR2 ON) + # FFX_FSR is required for FFX_FSR2, but also enables FFX sample, which also requires FFX_FSR1 + set(FFX_FSR ON) + set(FFX_FSR1 ON) + + # FFX uses CMAKE_HOME_DIRECTORY as root directory for all internal paths + # since FFX is not the top-level repository here, we need to change CMAKE_HOME_DIRECTORY such that all the paths still match up + set(CMAKE_HOME_DIRECTORY ${ffxsdk_SOURCE_DIR}) + add_subdirectory(${ffxsdk_SOURCE_DIR} ${ffxsdk_BINARY_DIR}) + + # Move FFX_FSR sample to folder in solution + set_target_properties(FFX_FSR PROPERTIES FOLDER "FFX Samples") +endif() + +# set root directory of FidelityFX SDK +set(FFX_ROOT ${ffxsdk_SOURCE_DIR} PARENT_SCOPE) + +# propagate configurations to top level; only DX12 is supported for this sample +set(CMAKE_CONFIGURATION_TYPES "DebugDX12;ReleaseDX12;RelWithDebInfoDX12" PARENT_SCOPE) \ No newline at end of file diff --git a/imported/agilitysdk-version.patch b/imported/agilitysdk-version.patch new file mode 100644 index 0000000..60919ee --- /dev/null +++ b/imported/agilitysdk-version.patch @@ -0,0 +1,52 @@ +diff --git a/framework/cauldron/framework/libs/agilitysdk/CAULDRONREADME.md b/framework/cauldron/framework/libs/agilitysdk/CAULDRONREADME.md +index e445450..470c187 100644 +--- a/framework/cauldron/framework/libs/agilitysdk/CAULDRONREADME.md ++++ b/framework/cauldron/framework/libs/agilitysdk/CAULDRONREADME.md +@@ -1,7 +1,7 @@ + # DX12 Agility SDK + + ## Current Version +-1.608.2 ++1.715.0-preview + + ## How to update + 1. Download the latest version (as a .nupkg) from https://devblogs.microsoft.com/directx/directx12agility/ +diff --git a/framework/cauldron/framework/src/render/dx12/device_dx12.cpp b/framework/cauldron/framework/src/render/dx12/device_dx12.cpp +index 6782b97..580ff02 100644 +--- a/framework/cauldron/framework/src/render/dx12/device_dx12.cpp ++++ b/framework/cauldron/framework/src/render/dx12/device_dx12.cpp +@@ -36,7 +36,7 @@ + using namespace Microsoft::WRL; + + // D3D12SDKVersion needs to line up with the version number on Microsoft's DirectX12 Agility SDK Download page +-extern "C" { __declspec(dllexport) extern const UINT D3D12SDKVersion = 608; } ++extern "C" { __declspec(dllexport) extern const UINT D3D12SDKVersion = 715; } + extern "C" { __declspec(dllexport) extern const char* D3D12SDKPath = u8".\\D3D12\\"; } + + namespace cauldron +diff --git a/sdk/tools/ffx_shader_compiler/libs/agilitysdk/FFX_SDK_README.md b/sdk/tools/ffx_shader_compiler/libs/agilitysdk/FFX_SDK_README.md +index d3ae9cd..db89102 100644 +--- a/sdk/tools/ffx_shader_compiler/libs/agilitysdk/FFX_SDK_README.md ++++ b/sdk/tools/ffx_shader_compiler/libs/agilitysdk/FFX_SDK_README.md +@@ -1,7 +1,7 @@ + # DX12 Agility SDK + + ## Current Version +-1.608.2 ++1.715.0-preview + + ## How to update + 1. Download the latest version (as a .nupkg) from https://devblogs.microsoft.com/directx/directx12agility/ +diff --git a/sdk/tools/ffx_shader_compiler/src/hlsl_compiler.cpp b/sdk/tools/ffx_shader_compiler/src/hlsl_compiler.cpp +index 5375d3d..39884e1 100644 +--- a/sdk/tools/ffx_shader_compiler/src/hlsl_compiler.cpp ++++ b/sdk/tools/ffx_shader_compiler/src/hlsl_compiler.cpp +@@ -24,7 +24,7 @@ + #include "utils.h" + + // D3D12SDKVersion needs to line up with the version number on Microsoft's DirectX12 Agility SDK Download page +-extern "C" { __declspec(dllexport) extern const UINT D3D12SDKVersion = 608; } ++extern "C" { __declspec(dllexport) extern const UINT D3D12SDKVersion = 715; } + extern "C" { __declspec(dllexport) extern const char* D3D12SDKPath = u8".\\D3D12\\"; } + + struct DxcCustomIncludeHandler : public IDxcIncludeHandler diff --git a/imported/binoutput.patch b/imported/binoutput.patch new file mode 100644 index 0000000..f9c0915 --- /dev/null +++ b/imported/binoutput.patch @@ -0,0 +1,13 @@ +diff --git a/common.cmake b/common.cmake +index 88aeb5d..4a2283b 100644 +--- a/common.cmake ++++ b/common.cmake +@@ -40,7 +40,7 @@ endif() + set(SAMPLE_ROOT ${CMAKE_HOME_DIRECTORY}/samples) + set(SDK_ROOT ${CMAKE_HOME_DIRECTORY}/sdk) + set(FRAMEWORK_ROOT ${CMAKE_HOME_DIRECTORY}/framework) +-set(BIN_OUTPUT ${CMAKE_HOME_DIRECTORY}/bin) ++set(BIN_OUTPUT ${CMAKE_SOURCE_DIR}/bin) + set(CAULDRON_ROOT ${FRAMEWORK_ROOT}/cauldron) + set(RENDERMODULE_ROOT ${FRAMEWORK_ROOT}/rendermodules) + set(FFX_API_CAULDRON_ROOT ${SAMPLE_ROOT}/ffx_cauldron) diff --git a/imported/cameracomponent.patch b/imported/cameracomponent.patch new file mode 100644 index 0000000..8b21864 --- /dev/null +++ b/imported/cameracomponent.patch @@ -0,0 +1,13 @@ +diff --git a/framework/cauldron/framework/inc/core/components/cameracomponent.h b/framework/cauldron/framework/inc/core/components/cameracomponent.h +index 114b5db..33a274d 100644 +--- a/framework/cauldron/framework/inc/core/components/cameracomponent.h ++++ b/framework/cauldron/framework/inc/core/components/cameracomponent.h +@@ -245,7 +245,7 @@ namespace cauldron + */ + static void SetJitterCallbackFunc(CameraJitterCallback callbackFunc) { s_pSetJitterCallback = callbackFunc; } + +- private: ++ protected: + CameraComponent() = delete; + + void ResetCamera(); diff --git a/imported/dxil.patch b/imported/dxil.patch new file mode 100644 index 0000000..f452d2f --- /dev/null +++ b/imported/dxil.patch @@ -0,0 +1,37 @@ +diff --git a/framework/cauldron/framework/libs/dxc/CMakeLists.txt b/framework/cauldron/framework/libs/dxc/CMakeLists.txt +index 6695b76..fad43bb 100644 +--- a/framework/cauldron/framework/libs/dxc/CMakeLists.txt ++++ b/framework/cauldron/framework/libs/dxc/CMakeLists.txt +@@ -2,8 +2,7 @@ add_library(dxc INTERFACE) + target_include_directories(dxc INTERFACE BEFORE "inc/") + + set(dxc_binaries +- ${CMAKE_CURRENT_SOURCE_DIR}/bin/x64/dxcompiler.dll +- ${CMAKE_CURRENT_SOURCE_DIR}/bin/x64/dxil.dll) ++ ${CMAKE_CURRENT_SOURCE_DIR}/bin/x64/dxcompiler.dll) + + copyTargetCommand("${dxc_binaries}" ${BIN_OUTPUT} copied_dxc_bin) + add_dependencies(dxc copied_dxc_bin) +\ No newline at end of file +diff --git a/framework/cauldron/framework/src/render/win/shaderbuilder_win.cpp b/framework/cauldron/framework/src/render/win/shaderbuilder_win.cpp +index 4847f86..47e2b50 100644 +--- a/framework/cauldron/framework/src/render/win/shaderbuilder_win.cpp ++++ b/framework/cauldron/framework/src/render/win/shaderbuilder_win.cpp +@@ -312,14 +312,10 @@ namespace cauldron + ComPtr pCompiledResult; + pCompiler->Compile(&shaderCodeBuffer, arguments.data(), static_cast(arguments.size()), &includeFileHandler, IID_PPV_ARGS(&pCompiledResult)); + +- // Handle any errors if they occurred +- ComPtr pErrors; // wide version currently doesn't appear to be supported +- pCompiledResult->GetOutput(DXC_OUT_ERRORS, IID_PPV_ARGS(&pErrors), nullptr); +- if (pErrors && pErrors->GetStringLength() > 0) ++ HRESULT compileStatus; ++ if (FAILED(pCompiledResult->GetStatus(&compileStatus)) || FAILED(compileStatus)) + { +- std::string errorString = pErrors->GetStringPointer(); +- std::wstring errorWString = StringToWString(errorString.c_str()); +- CauldronCritical(L"%ls : %ls", (shaderFile)? filePath.c_str() : L"ShaderCodeString", errorWString.c_str()); ++ CauldronCritical(L"%ls", (shaderFile)? filePath.c_str() : L"ShaderCodeString"); + return nullptr; + } + diff --git a/imported/patch-ffx.cmake b/imported/patch-ffx.cmake new file mode 100644 index 0000000..4eea100 --- /dev/null +++ b/imported/patch-ffx.cmake @@ -0,0 +1,52 @@ +# This file is part of the AMD Work Graph Mesh Node Sample. +# +# Copyright (C) 2024 Advanced Micro Devices, Inc. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files(the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and /or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions : +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. + +# This scripts applies small modifications to the FidelityFX & Cauldron SDK +# Patches: +# - Update Microsoft Agility SDK to 714 +# - patch camera component to allow for custom implementation + +# Update Agility SDK +include(update-agilitysdk.cmake) + +find_package(Git) + +message(STATUS "Patching cameracomponent.h") +# Patch camera component +execute_process(COMMAND "${GIT_EXECUTABLE}" apply "${CMAKE_CURRENT_SOURCE_DIR}/cameracomponent.patch" + WORKING_DIRECTORY "${FFX_ROOT}" + ERROR_QUIET + OUTPUT_STRIP_TRAILING_WHITESPACE) + +message(STATUS "Patching common.cmake") +# Patch bin output directory +execute_process(COMMAND "${GIT_EXECUTABLE}" apply "${CMAKE_CURRENT_SOURCE_DIR}/binoutput.patch" + WORKING_DIRECTORY "${FFX_ROOT}" + ERROR_QUIET + OUTPUT_STRIP_TRAILING_WHITESPACE) + +message(STATUS "Patching dxil.dll copy") +# Patch copying of dxil.dll to output directory +execute_process(COMMAND "${GIT_EXECUTABLE}" apply "${CMAKE_CURRENT_SOURCE_DIR}/dxil.patch" + WORKING_DIRECTORY "${FFX_ROOT}" + #ERROR_QUIET + OUTPUT_STRIP_TRAILING_WHITESPACE) diff --git a/imported/update-agilitysdk.cmake b/imported/update-agilitysdk.cmake new file mode 100644 index 0000000..f18d0d5 --- /dev/null +++ b/imported/update-agilitysdk.cmake @@ -0,0 +1,126 @@ +# This file is part of the AMD Work Graph Mesh Node Sample. +# +# Copyright (C) 2024 Advanced Micro Devices, Inc. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files(the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and /or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions : +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. + +# This script will download the Microsoft Agility SDK & DirectX Shader compiler from the official NuGet package repository +# Update these URLs and perform a clean build if you wish to use a newer version of these packages. +set(AGILITY_SDK_URL "https://www.nuget.org/api/v2/package/Microsoft.Direct3D.D3D12/1.715.0-preview") +set(DXC_URL "https://www.nuget.org/api/v2/package/Microsoft.Direct3D.DXC/1.8.2404.55-mesh-nodes-preview") + +# Check if Agility SDK NuGet package was already downloaded +if (NOT EXISTS ${CMAKE_CURRENT_BINARY_DIR}/agilitySDK.zip) + message(STATUS "Downloading Agility SDK from ${AGILITY_SDK_URL}") + + file(DOWNLOAD ${AGILITY_SDK_URL} ${CMAKE_CURRENT_BINARY_DIR}/agilitySDK.zip STATUS DOWNLOAD_RESULT) + + list(GET DOWNLOAD_RESULT 0 DOWNLOAD_RESULT_CODE) + if(NOT DOWNLOAD_RESULT_CODE EQUAL 0) + message(FATAL_ERROR "Failed to download Agility SDK! Error: ${DOWNLOAD_RESULT}.") + endif() + + message(STATUS "Successfully downloaded Agility SDK") +else() + message(STATUS "Found local copy of ${AGILITY_SDK_URL} in ${CMAKE_CURRENT_BINARY_DIR}/agilitySDK.zip. Skipping download.") +endif() + +message(STATUS "Extracting Agility SDK") + +# extract agility SDK zip +file(ARCHIVE_EXTRACT + INPUT ${CMAKE_CURRENT_BINARY_DIR}/agilitySDK.zip + DESTINATION ${CMAKE_CURRENT_BINARY_DIR}/agilitySDK) + +# validate agility SDK binaries +if(NOT EXISTS ${CMAKE_CURRENT_BINARY_DIR}/agilitySDK/build/native/bin/x64/D3D12Core.dll OR + NOT EXISTS ${CMAKE_CURRENT_BINARY_DIR}/agilitySDK/build/native/bin/x64/d3d12SDKLayers.dll) + message(FATAL_ERROR "Failed to extract Agility SDK!") +endif() + +message(STATUS "Successfully extracted Agility SDK") + +set(CAULDRON_AGILITY_SDK_PATH ${FFX_ROOT}/framework/cauldron/framework/libs/agilitysdk) + +# copy Agility SDK binaries +file(COPY_FILE ${CMAKE_CURRENT_BINARY_DIR}/agilitySDK/build/native/bin/x64/D3D12Core.dll ${CAULDRON_AGILITY_SDK_PATH}/bin/x64/D3D12Core.dll) +file(COPY_FILE ${CMAKE_CURRENT_BINARY_DIR}/agilitySDK/build/native/bin/x64/D3D12Core.pdb ${CAULDRON_AGILITY_SDK_PATH}/bin/x64/D3D12Core.pdb) +file(COPY_FILE ${CMAKE_CURRENT_BINARY_DIR}/agilitySDK/build/native/bin/x64/d3d12SDKLayers.dll ${CAULDRON_AGILITY_SDK_PATH}/bin/x64/d3d12SDKLayers.dll) +file(COPY_FILE ${CMAKE_CURRENT_BINARY_DIR}/agilitySDK/build/native/bin/x64/d3d12SDKLayers.pdb ${CAULDRON_AGILITY_SDK_PATH}/bin/x64/d3d12SDKLayers.pdb) +file(COPY_FILE ${CMAKE_CURRENT_BINARY_DIR}/agilitySDK/build/native/bin/x64/d3dconfig.exe ${CAULDRON_AGILITY_SDK_PATH}/bin/x64/d3dconfig.exe) +file(COPY_FILE ${CMAKE_CURRENT_BINARY_DIR}/agilitySDK/build/native/bin/x64/d3dconfig.pdb ${CAULDRON_AGILITY_SDK_PATH}/bin/x64/d3dconfig.pdb) + +# copy Agility SDK headers +file(COPY ${CMAKE_CURRENT_BINARY_DIR}/agilitySDK/build/native/include DESTINATION ${CAULDRON_AGILITY_SDK_PATH}) + +message(STATUS "Successfully copied Agility SDK to Cauldron source") + +# Check if DXC NuGet package was already downloaded +if (NOT EXISTS ${CMAKE_CURRENT_BINARY_DIR}/dxc.zip) + message(STATUS "Downloading DirectX Shader Compiler from ${DXC_URL}") + + file(DOWNLOAD ${DXC_URL} ${CMAKE_CURRENT_BINARY_DIR}/dxc.zip STATUS DOWNLOAD_RESULT) + + list(GET DOWNLOAD_RESULT 0 DOWNLOAD_RESULT_CODE) + if(NOT DOWNLOAD_RESULT_CODE EQUAL 0) + message(FATAL_ERROR "Failed to download DirectX Shader Compiler! Error: ${DOWNLOAD_RESULT}.") + endif() + + message(STATUS "Successfully downloaded DirectX Shader Compiler") +else() + message(STATUS "Found local copy of ${DXC_URL} in ${CMAKE_CURRENT_BINARY_DIR}/dxc.zip. Skipping download.") +endif() + +message(STATUS "Extracting DirectX Shader Compiler") + +# extract dxc zip +file(ARCHIVE_EXTRACT + INPUT ${CMAKE_CURRENT_BINARY_DIR}/dxc.zip + DESTINATION ${CMAKE_CURRENT_BINARY_DIR}/dxc) + +# validate DXC binaries +if(NOT EXISTS ${CMAKE_CURRENT_BINARY_DIR}/dxc/build/native/bin/x64/dxcompiler.dll) + message(FATAL_ERROR "Failed to extract DirectX Shader Compiler!") +endif() + +message(STATUS "Successfully extracted DirectX Shader Compiler") + +set(CAULDRON_DXC_PATH ${FFX_ROOT}/framework/cauldron/framework/libs/dxc) + +# copy dxc binaries +file(COPY_FILE ${CMAKE_CURRENT_BINARY_DIR}/dxc/build/native/bin/x64/dxcompiler.dll ${CAULDRON_DXC_PATH}/bin/x64/dxcompiler.dll) +file(COPY_FILE ${CMAKE_CURRENT_BINARY_DIR}/dxc/build/native/bin/x64/dxc.exe ${CAULDRON_DXC_PATH}/bin/x64/dxc.exe) +file(COPY_FILE ${CMAKE_CURRENT_BINARY_DIR}/dxc/build/native/lib/x64/dxcompiler.lib ${CAULDRON_DXC_PATH}/lib/x64/dxcompiler.lib) + +# copy dxc headers +file(COPY_FILE ${CMAKE_CURRENT_BINARY_DIR}/dxc/build/native/include/d3d12shader.h ${CAULDRON_DXC_PATH}/inc/d3d12shader.h) +file(COPY_FILE ${CMAKE_CURRENT_BINARY_DIR}/dxc/build/native/include/dxcapi.h ${CAULDRON_DXC_PATH}/inc/dxcapi.h) +file(COPY_FILE ${CMAKE_CURRENT_BINARY_DIR}/dxc/build/native/include/dxcerrors.h ${CAULDRON_DXC_PATH}/inc/dxcerrors.h) +file(COPY_FILE ${CMAKE_CURRENT_BINARY_DIR}/dxc/build/native/include/dxcisense.h ${CAULDRON_DXC_PATH}/inc/dxcisense.h) + +message(STATUS "Successfully copied DirectX Shader Compiler to Cauldron source") + +message(STATUS "Patching Agility SDK version") + +# find git and apply a patch to FFX +find_package(Git) +execute_process(COMMAND "${GIT_EXECUTABLE}" apply "${CMAKE_CURRENT_SOURCE_DIR}/agilitysdk-version.patch" + WORKING_DIRECTORY "${FFX_ROOT}" + ERROR_QUIET + OUTPUT_STRIP_TRAILING_WHITESPACE) \ No newline at end of file diff --git a/license.txt b/license.txt new file mode 100644 index 0000000..6c7cab2 --- /dev/null +++ b/license.txt @@ -0,0 +1,19 @@ +Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. \ No newline at end of file diff --git a/meshNodeSample/CMakeLists.txt b/meshNodeSample/CMakeLists.txt new file mode 100644 index 0000000..2433f29 --- /dev/null +++ b/meshNodeSample/CMakeLists.txt @@ -0,0 +1,101 @@ +# This file is part of the AMD Work Graph Mesh Node Sample. +# +# Copyright (C) 2024 Advanced Micro Devices, Inc. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files(the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and /or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions : +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. + +# Declare project +project(MeshNodeSample) + +# --------------------------------------------- +# Import FFX config +# --------------------------------------------- +# FidelityFX uses CMAKE_HOME_DIRECTORY as base for all paths +set(CMAKE_HOME_DIRECTORY ${FFX_ROOT}) +include(${FFX_ROOT}/common.cmake) +include(${FFX_ROOT}/sample.cmake) + +# Set compile definitions from Cauldron/FidelityFX +set_property(DIRECTORY APPEND PROPERTY COMPILE_DEFINITIONS + $<$:_DX12 _WIN> + $<$:_DX12 _WIN _RELEASE> + $<$:_DX12 _WIN _RELEASE> + FFX_API_CAULDRON + NOMINMAX +) + +# Output exe to bin directory +set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${BIN_OUTPUT}) +foreach( OUTPUTCONFIG ${CMAKE_CONFIGURATION_TYPES} ) + string( TOUPPER ${OUTPUTCONFIG} OUTPUTCONFIG ) + set( CMAKE_RUNTIME_OUTPUT_DIRECTORY_${OUTPUTCONFIG} ${BIN_OUTPUT} ) +endforeach( OUTPUTCONFIG CMAKE_CONFIGURATION_TYPES ) + +# --------------------------------------------- +# Sample render module +# --------------------------------------------- + +file(GLOB meshnodesample_src + ${CMAKE_CURRENT_SOURCE_DIR}/*.h + ${CMAKE_CURRENT_SOURCE_DIR}/*.cpp) +file(GLOB meshnodesample_shaders + ${CMAKE_CURRENT_SOURCE_DIR}/shaders/*.h + ${CMAKE_CURRENT_SOURCE_DIR}/shaders/*.hlsl) +set_source_files_properties(${meshnodesample_shaders} PROPERTIES VS_TOOL_OVERRIDE "Text") +copyCommand("${meshnodesample_shaders}" ${SHADER_OUTPUT}) + +# Add config file +set(config_file ${CMAKE_CURRENT_SOURCE_DIR}/config/meshnodesampleconfig.json) +copyCommand("${config_file}" ${CONFIG_OUTPUT}) + +# Add the sample to the solution +add_executable(${PROJECT_NAME} WIN32 ${default_icon_src} ${config_file} ${meshnodesample_src} ${meshnodesample_shaders} ${ffx_remap}) + +# Setup the correct exe based on backend name +set(EXE_OUT_NAME ${PROJECT_NAME}_) + +# Link everything (including the compiler for now) +target_link_libraries(${PROJECT_NAME} LINK_PUBLIC Framework RenderModules d3dcompiler ffx_fsr2_x64) +set_target_properties(${PROJECT_NAME} PROPERTIES + OUTPUT_NAME_DEBUGDX12 "${EXE_OUT_NAME}DX12D" + OUTPUT_NAME_DEBUGVK "${EXE_OUT_NAME}VKD" + OUTPUT_NAME_RELEASEDX12 "${EXE_OUT_NAME}DX12" + OUTPUT_NAME_RELEASEVK "${EXE_OUT_NAME}VK" + VS_DEBUGGER_WORKING_DIRECTORY "${BIN_OUTPUT}") + +# Add manifest info +addManifest(${PROJECT_NAME}) + +# Add dependency information +add_dependencies(${PROJECT_NAME} Framework) +add_dependencies(${PROJECT_NAME} RenderModules) +add_dependencies(${PROJECT_NAME} ffx_fsr2_x64) + +# Link the correct backend in + +target_link_libraries(${PROJECT_NAME} LINK_PUBLIC ffx_backend_cauldron_x64) +add_dependencies(${PROJECT_NAME} ffx_backend_cauldron_x64) +target_include_directories(${PROJECT_NAME} PUBLIC ${FFX_API_CAULDRON_ROOT} ${CMAKE_SOURCE_DIR}) + +# And solution layout definitions +source_group("" FILES ${ffx_remap}) +source_group("Icon" FILES ${default_icon_src}) +source_group("Config" FILES ${config_file}) +source_group("Sample" FILES ${meshnodesample_src}) +source_group("Sample\\Shaders" FILES ${meshnodesample_shaders}) diff --git a/meshNodeSample/config/meshnodesampleconfig.json b/meshNodeSample/config/meshnodesampleconfig.json new file mode 100644 index 0000000..4b5144b --- /dev/null +++ b/meshNodeSample/config/meshnodesampleconfig.json @@ -0,0 +1,35 @@ +{ + "Mesh Node Sample": { + "RenderResources": { + "GBufferColorTarget": { + "Format": "RGB10A2_UNORM", + "RenderResolution": true + }, + "GBufferNormalTarget": { + "Format": "RGBA16_FLOAT", + "RenderResolution": true + }, + "GBufferMotionVectorTarget": { + "Format": "RG16_FLOAT", + "RenderResolution": true + }, + "GBufferDepthTarget": "DepthTarget" + }, + + "RenderModules": [ + "WorkGraphRenderModule", + "FSR2RenderModule", + "ToneMappingRenderModule" + ], + + "Allocations": { + "GPUResourceViewCount": 200000, + "CPUResourceViewCount": 200000 + }, + + "InvertedDepth": false, + "Content": { + "SceneExposure": 0.5 + } + } +} diff --git a/meshNodeSample/fsr2rendermodule.cpp b/meshNodeSample/fsr2rendermodule.cpp new file mode 100644 index 0000000..1d28fcb --- /dev/null +++ b/meshNodeSample/fsr2rendermodule.cpp @@ -0,0 +1,320 @@ +// This file is part of the AMD Work Graph Mesh Node Sample. +// +// Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#include "fsr2rendermodule.h" + +#include + +#include + +#include "core/scene.h" +#include "render/dynamicresourcepool.h" +#include "render/profiler.h" +#include "render/rasterview.h" +#include "render/uploadheap.h" +#include "validation_remap.h" + +using namespace cauldron; + +void FSR2RenderModule::Init(const json& initData) +{ + // Fetch needed resources + m_pColorTarget = GetFramework()->GetColorTargetForCallback(GetName()); + m_pDepthTarget = GetFramework()->GetRenderTexture(L"GBufferDepthTarget"); + m_pMotionVectors = GetFramework()->GetRenderTexture(L"GBufferMotionVectorTarget"); + + CauldronAssert(ASSERT_CRITICAL, m_pColorTarget && m_pDepthTarget && m_pMotionVectors, L"Could not get one of the needed resources for FSR2 Rendermodule."); + + // Set our render resolution function as that to use during resize to get render width/height from display + // width/height + m_pUpdateFunc = [this](uint32_t displayWidth, uint32_t displayHeight) { return this->UpdateResolution(displayWidth, displayHeight); }; + + // UI + InitUI(); + + ////////////////////////////////////////////////////////////////////////// + // Finish up init + + // Start disabled as this will be enabled externally + SetModuleEnabled(false); + + // That's all we need for now + SetModuleReady(true); +} + +FSR2RenderModule::~FSR2RenderModule() +{ + // Protection + if (ModuleEnabled()) + EnableModule(false); // Destroy FSR context +} + +void FSR2RenderModule::EnableModule(bool enabled) +{ + // If disabling the render module, we need to disable the upscaler with the framework + if (enabled) + { + // Setup everything needed when activating FSR + // Will also enable upscaling + UpdatePreset(nullptr); + + // Toggle this now so we avoid the context changes in OnResize + SetModuleEnabled(enabled); + + // Setup Cauldron FidelityFX interface. + const size_t scratchBufferSize = ffxGetScratchMemorySize(FFX_FSR2_CONTEXT_COUNT); + void* scratchBuffer = calloc(scratchBufferSize, 1); + FfxErrorCode errorCode = + ffxGetInterface(&m_InitializationParameters.backendInterface, GetDevice(), scratchBuffer, scratchBufferSize, FFX_FSR2_CONTEXT_COUNT); + CauldronAssert(ASSERT_CRITICAL, errorCode == FFX_OK, L"Could not initialize the FidelityFX SDK backend"); + + // Create the FSR2 context + UpdateFSR2Context(true); + + // Set the jitter callback to use + CameraJitterCallback jitterCallback = [this](Vec2& values) { + // Increment jitter index for frame + ++m_JitterIndex; + + // Update FSR2 jitter for built in TAA + const ResolutionInfo& resInfo = GetFramework()->GetResolutionInfo(); + const int32_t jitterPhaseCount = ffxFsr2GetJitterPhaseCount(resInfo.RenderWidth, resInfo.DisplayWidth); + ffxFsr2GetJitterOffset(&m_JitterX, &m_JitterY, m_JitterIndex, jitterPhaseCount); + + values = Vec2(2.f * m_JitterX / resInfo.RenderWidth, 2.f * m_JitterY / resInfo.RenderHeight); + }; + CameraComponent::SetJitterCallbackFunc(jitterCallback); + + // ... and register UI elements for active upscaler + GetUIManager()->RegisterUIElements(m_UISection); + } + else + { + // Toggle this now so we avoid the context changes in OnResize + SetModuleEnabled(enabled); + + GetFramework()->EnableUpscaling(false); + + // Destroy the FSR2 context + UpdateFSR2Context(false); + + // Destroy the FidelityFX interface memory + free(m_InitializationParameters.backendInterface.scratchBuffer); + + // Deregister UI elements for inactive upscaler + GetUIManager()->UnRegisterUIElements(m_UISection); + } +} + +void FSR2RenderModule::InitUI() +{ + // Build UI options, but don't register them yet. Registration/Deregistration will be controlled by enabling/disabling the render module + m_UISection.SectionName = "Upscaling"; // We will piggy-back on existing upscaling section" + m_UISection.SectionType = UISectionType::Sample; + + // Setup scale preset options + const char* preset[] = {"Quality (1.5x)", "Balanced (1.7x)", "Performance (2x)", "Ultra Performance (3x)", "Custom"}; + std::vector presetComboOptions; + presetComboOptions.assign(preset, preset + _countof(preset)); + std::function presetCallback = [this](void* pParams) { this->UpdatePreset(static_cast(pParams)); }; + m_UISection.AddCombo("Scale Preset", reinterpret_cast(&m_ScalePreset), &presetComboOptions, presetCallback); + + // Setup scale factor (disabled for all but custom) + std::function ratioCallback = [this](void* pParams) { this->UpdateUpscaleRatio(static_cast(pParams)); }; + m_UISection.AddFloatSlider("Custom Scale", &m_UpscaleRatio, 1.f, 3.f, ratioCallback, &m_UpscaleRatioEnabled); + + // Sharpening + m_UISection.AddCheckBox("RCAS Sharpening", &m_RCASSharpen); + m_UISection.AddFloatSlider("Sharpness", &m_Sharpness, 0.f, 1.f, nullptr, &m_RCASSharpen); +} + +void FSR2RenderModule::UpdatePreset(const int32_t* pOldPreset) +{ + switch (m_ScalePreset) + { + case FSR2ScalePreset::Quality: + m_UpscaleRatio = 1.5f; + break; + case FSR2ScalePreset::Balanced: + m_UpscaleRatio = 1.7f; + break; + case FSR2ScalePreset::Performance: + m_UpscaleRatio = 2.0f; + break; + case FSR2ScalePreset::UltraPerformance: + m_UpscaleRatio = 3.0f; + break; + case FSR2ScalePreset::Custom: + default: + // Leave the upscale ratio at whatever it was + break; + } + + // Update whether we can update the custom scale slider + m_UpscaleRatioEnabled = (m_ScalePreset == FSR2ScalePreset::Custom); + + // Update resolution since rendering ratios have changed + GetFramework()->EnableUpscaling(true, m_pUpdateFunc); +} + +void FSR2RenderModule::UpdateUpscaleRatio(const float* pOldRatio) +{ + // Disable/Enable FSR2 since resolution ratios have changed + GetFramework()->EnableUpscaling(true, m_pUpdateFunc); +} + +void FSR2RenderModule::FfxMsgCallback(FfxMsgType type, const wchar_t* message) +{ + if (type == FFX_MESSAGE_TYPE_ERROR) + { + CauldronWarning(L"FSR2_API_DEBUG_ERROR: %ls", message); + } + else if (type == FFX_MESSAGE_TYPE_WARNING) + { + CauldronWarning(L"FSR2_API_DEBUG_WARNING: %ls", message); + } +} + +void FSR2RenderModule::UpdateFSR2Context(bool enabled) +{ + if (enabled) + { + const ResolutionInfo& resInfo = GetFramework()->GetResolutionInfo(); + m_InitializationParameters.maxRenderSize.width = resInfo.RenderWidth; + m_InitializationParameters.maxRenderSize.height = resInfo.RenderHeight; + m_InitializationParameters.displaySize.width = resInfo.DisplayWidth; + m_InitializationParameters.displaySize.height = resInfo.DisplayHeight; + + // Enable auto-exposure by default + m_InitializationParameters.flags = FFX_FSR2_ENABLE_AUTO_EXPOSURE; + + // Note, inverted depth and display mode are currently handled statically for the run of the sample. + // If they become changeable at runtime, we'll need to modify how this information is queried + static bool s_InvertedDepth = GetConfig()->InvertedDepth; + + // Setup inverted depth flag according to sample usage + if (s_InvertedDepth) + m_InitializationParameters.flags |= FFX_FSR2_ENABLE_DEPTH_INVERTED | FFX_FSR2_ENABLE_DEPTH_INFINITE; + + // Input data is HDR + m_InitializationParameters.flags |= FFX_FSR2_ENABLE_HIGH_DYNAMIC_RANGE; + + // Motion vectors include frame-to-frame jitter + m_InitializationParameters.flags |= FFX_FSR2_ENABLE_MOTION_VECTORS_JITTER_CANCELLATION; + +// Do eror checking in debug +#if defined(_DEBUG) + m_InitializationParameters.flags |= FFX_FSR2_ENABLE_DEBUG_CHECKING; + m_InitializationParameters.fpMessage = &FSR2RenderModule::FfxMsgCallback; +#endif // #if defined(_DEBUG) + + // Create the FSR2 context + FfxErrorCode errorCode = ffxFsr2ContextCreate(&m_FSR2Context, &m_InitializationParameters); + CauldronAssert(ASSERT_CRITICAL, errorCode == FFX_OK, L"Couldn't create the FidelityFX SDK FSR2 context."); + } + + else + { + // Destroy the FSR2 context + ffxFsr2ContextDestroy(&m_FSR2Context); + } +} + +ResolutionInfo FSR2RenderModule::UpdateResolution(uint32_t displayWidth, uint32_t displayHeight) +{ + return { + static_cast((float)displayWidth / m_UpscaleRatio), static_cast((float)displayHeight / m_UpscaleRatio), displayWidth, displayHeight}; +} + +void FSR2RenderModule::OnResize(const ResolutionInfo& resInfo) +{ + if (!ModuleEnabled()) + return; + + // Need to recreate the FSR2 context on resource resize + UpdateFSR2Context(false); // Destroy + UpdateFSR2Context(true); // Re-create + + // Rest jitter index + m_JitterIndex = 0; +} + +void FSR2RenderModule::Execute(double deltaTime, CommandList* pCmdList) +{ + GPUScopedProfileCapture sampleMarker(pCmdList, L"FFX FSR2"); + const ResolutionInfo& resInfo = GetFramework()->GetResolutionInfo(); + CameraComponent* pCamera = GetScene()->GetCurrentCamera(); + + // All cauldron resources come into a render module in a generic read state (ResourceState::NonPixelShaderResource | + // ResourceState::PixelShaderResource) + FfxFsr2DispatchDescription dispatchParameters = {}; + dispatchParameters.commandList = ffxGetCommandList(pCmdList); + dispatchParameters.color = ffxGetResource(m_pColorTarget->GetResource(), L"FSR2_Input_OutputColor", FFX_RESOURCE_STATE_PIXEL_COMPUTE_READ); + dispatchParameters.depth = ffxGetResource(m_pDepthTarget->GetResource(), L"FSR2_InputDepth", FFX_RESOURCE_STATE_PIXEL_COMPUTE_READ); + dispatchParameters.motionVectors = ffxGetResource(m_pMotionVectors->GetResource(), L"FSR2_InputMotionVectors", FFX_RESOURCE_STATE_PIXEL_COMPUTE_READ); + dispatchParameters.exposure = ffxGetResource(nullptr, L"FSR2_InputExposure", FFX_RESOURCE_STATE_PIXEL_COMPUTE_READ); + dispatchParameters.output = dispatchParameters.color; + + dispatchParameters.reactive = ffxGetResource(nullptr, L"FSR2_EmptyInputReactiveMap", FFX_RESOURCE_STATE_PIXEL_COMPUTE_READ); + dispatchParameters.transparencyAndComposition = ffxGetResource(nullptr, L"FSR2_EmptyTransparencyAndCompositionMap", FFX_RESOURCE_STATE_PIXEL_COMPUTE_READ); + + // Jitter is calculated earlier in the frame using a callback from the camera update + dispatchParameters.jitterOffset.x = m_JitterX; + dispatchParameters.jitterOffset.y = -m_JitterY; + dispatchParameters.motionVectorScale.x = resInfo.fRenderWidth() / 2.f; + dispatchParameters.motionVectorScale.y = -resInfo.fRenderHeight() / 2.f; + dispatchParameters.reset = false; + dispatchParameters.enableSharpening = m_RCASSharpen; + dispatchParameters.sharpness = m_Sharpness; + + // Cauldron keeps time in seconds, but FSR expects miliseconds + dispatchParameters.frameTimeDelta = static_cast(deltaTime * 1000.f); + + dispatchParameters.preExposure = GetScene()->GetSceneExposure(); + dispatchParameters.renderSize.width = resInfo.RenderWidth; + dispatchParameters.renderSize.height = resInfo.RenderHeight; + + // Note, inverted depth and display mode are currently handled statically for the run of the sample. + // If they become changeable at runtime, we'll need to modify how this information is queried + static bool s_InvertedDepth = GetConfig()->InvertedDepth; + + // Setup camera params as required + dispatchParameters.cameraFovAngleVertical = pCamera->GetFovY(); + if (s_InvertedDepth) + { + dispatchParameters.cameraFar = pCamera->GetNearPlane(); + dispatchParameters.cameraNear = FLT_MAX; + } + else + { + dispatchParameters.cameraFar = pCamera->GetFarPlane(); + dispatchParameters.cameraNear = pCamera->GetNearPlane(); + } + + FfxErrorCode errorCode = ffxFsr2ContextDispatch(&m_FSR2Context, &dispatchParameters); + FFX_ASSERT(errorCode == FFX_OK); + + // FidelityFX contexts modify the set resource view heaps, so set the cauldron one back + SetAllResourceViewHeaps(pCmdList); + + // We are now done with upscaling + GetFramework()->SetUpscalingState(UpscalerState::PostUpscale); +} diff --git a/meshNodeSample/fsr2rendermodule.h b/meshNodeSample/fsr2rendermodule.h new file mode 100644 index 0000000..585f992 --- /dev/null +++ b/meshNodeSample/fsr2rendermodule.h @@ -0,0 +1,125 @@ +// This file is part of the AMD Work Graph Mesh Node Sample. +// +// Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#pragma once + +#include "render/rendermodule.h" +#include "core/framework.h" +#include "core/uimanager.h" + +#include + +#include + +namespace cauldron +{ + class Texture; +} // namespace cauldron + +/** + * FSR2RenderModule takes care of: + * - creating UI section that enable users to select upscaling options + * - creating GPU resources + * - clearing and/or generating the reactivity masks + * - dispatch workloads for upscaling using FSR 2 + */ +class FSR2RenderModule : public cauldron::RenderModule +{ +public: + /** + * @brief Constructor with default behavior. + */ + FSR2RenderModule() + : RenderModule(L"FSR2RenderModule") + { + } + + /** + * @brief Tear down the FSR 2 API Context and release resources. + */ + virtual ~FSR2RenderModule(); + + /** + * @brief Initialize FSR 2 API Context, create resources, and setup UI section for FSR 2. + */ + void Init(const json& initData); + + /** + * @brief If render module is enabled, initialize the FSR 2 API Context. If disabled, destroy the FSR 2 API Context. + */ + void EnableModule(bool enabled) override; + + /** + * @brief Setup parameters that the FSR 2 API needs this frame and then call the FFX Dispatch. + */ + void Execute(double deltaTime, cauldron::CommandList* pCmdList) override; + + /** + * @brief Recreate the FSR 2 API Context to resize internal resources. Called by the framework when the resolution changes. + */ + void OnResize(const cauldron::ResolutionInfo& resInfo) override; + +private: + // Enum representing the FSR 2 quality modes. + enum class FSR2ScalePreset + { + Quality = 0, // 1.5f + Balanced, // 1.7f + Performance, // 2.f + UltraPerformance, // 3.f + Custom // 1.f - 3.f range + }; + + static void FfxMsgCallback(FfxMsgType type, const wchar_t* message); + + void InitUI(); + void UpdatePreset(const int32_t* pOldPreset); + void UpdateUpscaleRatio(const float* pOldRatio); + + cauldron::ResolutionInfo UpdateResolution(uint32_t displayWidth, uint32_t displayHeight); + void UpdateFSR2Context(bool enabled); + + FSR2ScalePreset m_ScalePreset = FSR2ScalePreset::Custom; + float m_UpscaleRatio = 1.f; + float m_Sharpness = 0.8f; + uint32_t m_JitterIndex = 0; + float m_JitterX = 0.f; + float m_JitterY = 0.f; + + bool m_UpscaleRatioEnabled = false; + bool m_RCASSharpen = true; + + // FidelityFX Super Resolution 2 information + FfxFsr2ContextDescription m_InitializationParameters = {}; + FfxFsr2Context m_FSR2Context; + + // For UI params + cauldron::UISection m_UISection; + + // FidelityFX Super Resolution 2 resources + const cauldron::Texture* m_pColorTarget = nullptr; + const cauldron::Texture* m_pDepthTarget = nullptr; + const cauldron::Texture* m_pMotionVectors = nullptr; + + // For resolution updates + std::function m_pUpdateFunc = nullptr; + +}; diff --git a/meshNodeSample/main.cpp b/meshNodeSample/main.cpp new file mode 100644 index 0000000..4746a8d --- /dev/null +++ b/meshNodeSample/main.cpp @@ -0,0 +1,139 @@ +// This file is part of the AMD Work Graph Mesh Node Sample. +// +// Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +// Framework and Windows implementation +#include "core/framework.h" +#include "core/win/framework_win.h" + +// Config file parsing +#include "misc/fileio.h" + +// Custom camera component +#include "samplecameracomponent.h" + +// Content manager to fix texture load bug +#include "core/contentmanager.h" + +// Render Module Registry +#include "rendermoduleregistry.h" +// Render Modules +#include "fsr2rendermodule.h" +#include "workgraphrendermodule.h" + +// D3D12 header to enable experimental shader models +#include "d3d12.h" + +using namespace cauldron; + +class MeshNodeSample final : public Framework +{ +public: + MeshNodeSample(const FrameworkInitParams* pInitParams) + : Framework(pInitParams) + { + } + + ~MeshNodeSample() = default; + + // Overrides + void ParseSampleConfig() override + { + const auto configFileName = L"configs/meshnodesampleconfig.json"; + + json sampleConfig; + CauldronAssert(ASSERT_CRITICAL, ParseJsonFile(configFileName, sampleConfig), L"Could not parse JSON file %ls", configFileName); + + // Get the sample configuration + json configData = sampleConfig["Mesh Node Sample"]; + + // Let the framework parse all the "known" options for us + ParseConfigData(configData); + } + + void RegisterSampleModules() override + { + // Init all pre-registered render modules + rendermodule::RegisterAvailableRenderModules(); + + // Register sample render module + RenderModuleFactory::RegisterModule("WorkGraphRenderModule"); + // Register FSR 2 render module + RenderModuleFactory::RegisterModule("FSR2RenderModule"); + } + + int32_t PreRun() override + { + const auto status = Framework::PreRun(); + + // Init custom camera entity & component + Task createCameraTask(InitCameraEntity, nullptr); + GetTaskManager()->AddTask(createCameraTask); + + // Cauldron is missing its media folder, thus these textures are not available. + // Due to a bug, Cauldron will not shutdown if these textures are not loaded, + // thus we decrement the pending texture loads manually with three nullptr textures + Texture* texturePtr = nullptr; + GetContentManager()->StartManagingContent(L"SpecularIBL", texturePtr); + GetContentManager()->StartManagingContent(L"DiffuseIBL", texturePtr); + GetContentManager()->StartManagingContent(L"BrdfLut", texturePtr); + + return status; + } + + int32_t DoSampleInit() override + { + // Enable FSR 2 upscaling and AA + GetFramework()->GetRenderModule("FSR2RenderModule")->EnableModule(true); + + return 0; + } + + void DoSampleShutdown() override + { + // Shutdown (disable) FSR 2 render module + GetFramework()->GetRenderModule("FSR2RenderModule")->EnableModule(false); + } +}; + +static FrameworkInitParamsInternal s_WindowsParams; + +////////////////////////////////////////////////////////////////////////// +// WinMain +int WINAPI wWinMain(HINSTANCE hInstance, HINSTANCE hPrevInstance, LPWSTR lpCmdLine, int nCmdShow) +{ + // Enable experimental D3D12 features for mesh nodes + std::array meshNodesExperimentalFeatures = {D3D12ExperimentalShaderModels, D3D12StateObjectsExperiment}; + CauldronThrowOnFail( + D3D12EnableExperimentalFeatures(static_cast(meshNodesExperimentalFeatures.size()), meshNodesExperimentalFeatures.data(), nullptr, nullptr)); + + // Create the sample and kick it off to the framework to run + FrameworkInitParams initParams = {}; + initParams.Name = L"Mesh Node Sample"; + initParams.CmdLine = lpCmdLine; + initParams.AdditionalParams = &s_WindowsParams; + + // Setup the windows info + s_WindowsParams.InstanceHandle = hInstance; + s_WindowsParams.CmdShow = nCmdShow; + + MeshNodeSample frameworkInstance(&initParams); + return RunFramework(&frameworkInstance); +} diff --git a/meshNodeSample/samplecameracomponent.cpp b/meshNodeSample/samplecameracomponent.cpp new file mode 100644 index 0000000..429ecc2 --- /dev/null +++ b/meshNodeSample/samplecameracomponent.cpp @@ -0,0 +1,180 @@ +// This file is part of the AMD Work Graph Mesh Node Sample. +// +// Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#include "samplecameracomponent.h" + +#include "core/contentmanager.h" +#include "core/framework.h" +#include "core/inputmanager.h" +#include "core/scene.h" + +MeshNodeSampleCameraComponent::MeshNodeSampleCameraComponent(cauldron::Entity* pOwner, cauldron::ComponentData* pData, cauldron::CameraComponentMgr* pManager) + : CameraComponent(pOwner, pData, pManager) +{ + m_Speed = 50.f; + m_ArcBallMode = false; +} + +void MeshNodeSampleCameraComponent::Update(double deltaTime) +{ + using namespace cauldron; + + // Always update temporal information + m_PrevViewMatrix = m_ViewMatrix; + m_PrevViewProjectionMatrix = m_ViewProjectionMatrix; + m_PrevProjJittered = m_ProjJittered; + + // If this camera is the currently active camera for the scene, check for input + if (GetScene()->GetCurrentCamera() == this) + { + const InputState& inputState = GetInputManager()->GetInputState(); + + // Read in inputs + + // Scale speed with mouse wheel rotation + if (inputState.GetMouseAxisDelta(Mouse_Wheel)) + { + m_Speed = m_Speed * ((inputState.GetMouseAxisDelta(Mouse_Wheel) > 0) ? 1.5f : (1.f / 1.5f)); + // clamp speed + m_Speed = std::max(m_Speed, 1.f); + m_Speed = std::min(m_Speed, 200.f); + } + + // Use right game pad stick to pitch and yaw the camera + bool hasRotation = false; + if (inputState.GetGamePadAxisState(Pad_RightThumbX) || inputState.GetGamePadAxisState(Pad_RightThumbY)) + { + // All rotations (per frame) are of 0.005 radians + m_Yaw -= inputState.GetGamePadAxisState(Pad_RightThumbX) / 200.f; + m_Pitch += inputState.GetGamePadAxisState(Pad_RightThumbY) / 200.f; + hasRotation = true; + } + + // Left click + mouse move == free cam look & WASDEQ movement (+ mouse wheel in/out) + else if (inputState.GetMouseButtonState(Mouse_LButton)) + { + // All rotations (per frame) are of 0.002 radians + m_Yaw -= inputState.GetMouseAxisDelta(Mouse_XAxis) / 500.f; + m_Pitch += inputState.GetMouseAxisDelta(Mouse_YAxis) / 500.f; + hasRotation = true; + } + + // If hitting the 'r' key or back button on game pad, reset camera to original transform + if (inputState.GetKeyState(Key_R) || inputState.GetGamePadButtonState(Pad_Back)) + { + ResetCamera(); + UpdateMatrices(); + return; + } + + Vec4 eyePos = Vec4(m_InvViewMatrix.getTranslation(), 0.f); + Vec4 polarVector = PolarToVector(m_Yaw, m_Pitch); + + // WASDQE == camera translation + float x(0.f), y(0.f), z(0.f); + x -= (inputState.GetKeyState(Key_A)) ? 1.f : 0.f; + x += (inputState.GetKeyState(Key_D)) ? 1.f : 0.f; + y -= (inputState.GetKeyState(Key_Q)) ? 1.f : 0.f; + y += (inputState.GetKeyState(Key_E)) ? 1.f : 0.f; + z -= (inputState.GetKeyState(Key_W)) ? 1.f : 0.f; + z += (inputState.GetKeyState(Key_S)) ? 1.f : 0.f; + + // Controller input can also translate + x += inputState.GetGamePadAxisState(Pad_LeftThumbX); + z -= inputState.GetGamePadAxisState(Pad_LeftThumbY); + y -= inputState.GetGamePadAxisState(Pad_LTrigger); + y += inputState.GetGamePadAxisState(Pad_RTrigger); + Vec4 movement = Vec4(x, y, z, 0.f); + + Mat4& transform = m_pOwner->GetTransform(); + + // Update from inputs + if (hasRotation || dot(movement.getXYZ(), movement.getXYZ())) + { + // Setup new eye position + eyePos = + m_InvViewMatrix.getCol3() + (m_InvViewMatrix * movement * m_Speed * static_cast(deltaTime)); // InvViewMatrix is the owner's transform + } + + // Limit maximum camera height + eyePos[1] = std::min(eyePos[1], 400.f); + + // Update camera jitter if we need it + if (CameraComponent::s_pSetJitterCallback) + { + s_pSetJitterCallback(m_jitterValues); + m_Dirty = true; + } + else + { + // Reset jitter if disabled + if (m_jitterValues.getX() != 0.f || m_jitterValues.getY() != 0.f) + { + m_jitterValues = Vec2(0.f, 0.f); + m_Dirty = true; + } + } + + LookAt(eyePos, eyePos - 10 * polarVector); + UpdateMatrices(); + } +} + +void InitCameraEntity(void*) +{ + using namespace cauldron; + + ContentBlock* pContentBlock = new ContentBlock(); + + // Memory backing camera creation + EntityDataBlock* pCameraDataBlock = new EntityDataBlock(); + pContentBlock->EntityDataBlocks.push_back(pCameraDataBlock); + pCameraDataBlock->pEntity = new Entity(L"MeshNodeDemoCamera"); + CauldronAssert(ASSERT_CRITICAL, pCameraDataBlock->pEntity, L"Could not allocate default perspective camera entity"); + + // Use the same matrix setup as Cauldron 1.4 (note that Cauldron kept view-matrix native transforms, and our + // entity needs the inverse of that) + Mat4 transform = LookAtMatrix(Vec4(120.65f, 24.44f, -15.74f, 0.f), // eye position + Vec4(120.45f, 24.44f, -14.74f, 0.f), // look-at position + Vec4(0.f, 1.f, 0.f, 0.f)); // up + transform = InverseMatrix(transform); + pCameraDataBlock->pEntity->SetTransform(transform); + + // Setup default camera parameters + CameraComponentData defaultPerspCameraCompData; + defaultPerspCameraCompData.Name = L"MeshNodeDemoCamera"; + defaultPerspCameraCompData.Perspective.AspectRatio = GetFramework()->GetAspectRatio(); + defaultPerspCameraCompData.Perspective.Yfov = CAULDRON_PI2 / defaultPerspCameraCompData.Perspective.AspectRatio; + defaultPerspCameraCompData.Znear = 0.5f; + defaultPerspCameraCompData.Zfar = 2000.f; + + CameraComponentData* pCameraComponentData = new CameraComponentData(defaultPerspCameraCompData); + pCameraDataBlock->ComponentsData.push_back(pCameraComponentData); + MeshNodeSampleCameraComponent* pCameraComponent = + new MeshNodeSampleCameraComponent(pCameraDataBlock->pEntity, pCameraComponentData, CameraComponentMgr::Get()); + pCameraDataBlock->pEntity->AddComponent(pCameraComponent); + + pCameraDataBlock->Components.push_back(pCameraComponent); + + pContentBlock->ActiveCamera = pCameraDataBlock->pEntity; + + GetContentManager()->StartManagingContent(L"MeshNodeDemoCameraEntities", pContentBlock, false); +} diff --git a/meshNodeSample/samplecameracomponent.h b/meshNodeSample/samplecameracomponent.h new file mode 100644 index 0000000..8ab90db --- /dev/null +++ b/meshNodeSample/samplecameracomponent.h @@ -0,0 +1,34 @@ +// This file is part of the AMD Work Graph Mesh Node Sample. +// +// Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#pragma once + +#include "core/components/cameracomponent.h" + +class MeshNodeSampleCameraComponent : public cauldron::CameraComponent +{ +public: + MeshNodeSampleCameraComponent(cauldron::Entity* pOwner, cauldron::ComponentData* pData, cauldron::CameraComponentMgr* pManager); + + void Update(double deltaTime) override; +}; + +void InitCameraEntity(void*); \ No newline at end of file diff --git a/meshNodeSample/shadercompiler.cpp b/meshNodeSample/shadercompiler.cpp new file mode 100644 index 0000000..08e369b --- /dev/null +++ b/meshNodeSample/shadercompiler.cpp @@ -0,0 +1,165 @@ +// This file is part of the AMD Work Graph Mesh Node Sample. +// +// Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#include "shadercompiler.h" + +#include "misc/assert.h" + +#define _SILENCE_EXPERIMENTAL_FILESYSTEM_DEPRECATION_WARNING // To avoid receiving deprecation error since we are using \ + // C++11 only +#include +using namespace std::experimental; + +template +inline void SafeRelease(Interface*& pInterfaceToRelease) +{ + if (pInterfaceToRelease != nullptr) + { + pInterfaceToRelease->Release(); + + pInterfaceToRelease = nullptr; + } +} + +ShaderCompiler::ShaderCompiler() +{ + HMODULE dxilModule = LoadLibraryW(L"dxil.dll"); + HMODULE dxcompilerModule = LoadLibraryW(L"dxcompiler.dll"); + + cauldron::CauldronAssert(cauldron::ASSERT_CRITICAL, dxcompilerModule, L"Failed to load dxcompiler.dll"); + + DxcCreateInstanceProc pfnDxcCreateInstance = DxcCreateInstanceProc(GetProcAddress(dxcompilerModule, "DxcCreateInstance")); + + cauldron::CauldronAssert(cauldron::ASSERT_CRITICAL, pfnDxcCreateInstance, L"Failed to load DxcCreateInstance from dxcompiler.dll"); + + if (FAILED(pfnDxcCreateInstance(CLSID_DxcUtils, IID_PPV_ARGS(&m_pUtils)))) + { + cauldron::CauldronCritical(L"Failed to create DXC utils"); + } + + if (FAILED(pfnDxcCreateInstance(CLSID_DxcCompiler, IID_PPV_ARGS(&m_pCompiler)))) + { + // delete utils if compiler creation fails + SafeRelease(m_pUtils); + + cauldron::CauldronCritical(L"Failed to create DXC compiler"); + } + + if (FAILED(m_pUtils->CreateDefaultIncludeHandler(&m_pIncludeHandler))) + { + // delete utils & compiler if include handler creation fails + SafeRelease(m_pCompiler); + SafeRelease(m_pUtils); + + cauldron::CauldronCritical(L"Failed to create DXC compiler"); + } +} + +ShaderCompiler::~ShaderCompiler() +{ + SafeRelease(m_pIncludeHandler); + SafeRelease(m_pCompiler); + SafeRelease(m_pUtils); +} + +IDxcBlob* ShaderCompiler::CompileShader(const wchar_t* shaderFilePath, const wchar_t* target, const wchar_t* entryPoint) +{ + IDxcBlobEncoding* source = nullptr; + + const auto shaderSourceFilePath = std::wstring(L"Shaders\\") + shaderFilePath; + + if (FAILED(m_pUtils->LoadFile(shaderSourceFilePath.c_str(), nullptr, &source)) || (source == nullptr)) + { + cauldron::CauldronCritical(L"Failed to load %s", shaderFilePath); + } + + const auto shadersFolderPath = filesystem::current_path() / L"shaders"; + const auto shaderIncludeArgument = std::wstring(L"-I") + shadersFolderPath.wstring(); + + std::vector arguments = { + L"-enable-16bit-types", + // use HLSL 2021 + L"-HV", + L"2021", + // column major matrices + DXC_ARG_PACK_MATRIX_COLUMN_MAJOR, + // include path for "shaders" folder + shaderIncludeArgument.c_str(), + }; + + IDxcOperationResult* result = nullptr; + const auto hr = m_pCompiler->Compile( + source, shaderFilePath, entryPoint, target, arguments.data(), static_cast(arguments.size()), nullptr, 0, m_pIncludeHandler, &result); + + // release source blob + SafeRelease(source); + + if (FAILED(hr)) + { + SafeRelease(result); + + cauldron::CauldronCritical(L"Failed to compile shader %s", shaderFilePath); + } + + HRESULT compileStatus; + if (FAILED(result->GetStatus(&compileStatus))) + { + SafeRelease(result); + + cauldron::CauldronCritical(L"Failed to get compilation status for shader %s", shaderFilePath); + } + + std::wstring errorString = L""; + + // try get error string from DXC result + { + IDxcBlobEncoding* errorStringBlob = nullptr; + if (SUCCEEDED(result->GetErrorBuffer(&errorStringBlob)) && (errorStringBlob != nullptr)) + { + IDxcBlobWide* errorStringBlob16 = nullptr; + m_pUtils->GetBlobAsUtf16(errorStringBlob, &errorStringBlob16); + + errorString = std::wstring(errorStringBlob16->GetStringPointer(), errorStringBlob16->GetStringLength()); + + SafeRelease(errorStringBlob16); + } + SafeRelease(errorStringBlob); + } + + if (FAILED(compileStatus)) + { + SafeRelease(result); + + cauldron::CauldronCritical(L"Failed to compile shader %s\n%s", shaderFilePath, errorString.c_str()); + } + + IDxcBlob* outputBlob = nullptr; + if (FAILED(result->GetResult(&outputBlob))) + { + SafeRelease(result); + + cauldron::CauldronCritical(L"Failed to get binary shader blob for shader %s", shaderFilePath); + } + + SafeRelease(result); + + return outputBlob; +} \ No newline at end of file diff --git a/meshNodeSample/shadercompiler.h b/meshNodeSample/shadercompiler.h new file mode 100644 index 0000000..11dd403 --- /dev/null +++ b/meshNodeSample/shadercompiler.h @@ -0,0 +1,45 @@ +// This file is part of the AMD Work Graph Mesh Node Sample. +// +// Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#pragma once + +// windows headers +#define WIN32_LEAN_AND_MEAN +#include +// for BSTR typedef +#include + +// DXC header +#include + +class ShaderCompiler +{ +public: + ShaderCompiler(); + ~ShaderCompiler(); + + IDxcBlob* CompileShader(const wchar_t* shaderFilePath, const wchar_t* target, const wchar_t* entryPoint); + +private: + IDxcUtils* m_pUtils = nullptr; + IDxcCompiler* m_pCompiler = nullptr; + IDxcIncludeHandler* m_pIncludeHandler = nullptr; +}; \ No newline at end of file diff --git a/meshNodeSample/shaders/beemeshshader.hlsl b/meshNodeSample/shaders/beemeshshader.hlsl new file mode 100644 index 0000000..c729d55 --- /dev/null +++ b/meshNodeSample/shaders/beemeshshader.hlsl @@ -0,0 +1,215 @@ +// This file is part of the AMD Work Graph Mesh Node Sample. +// +// Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#include "common.hlsl" + +// Static "vertex buffer" for bees +static const int numBeeVertices = 11; +static const float3 beePositions[numBeeVertices] = { + float3(0.84, 0.0, -0.0), + float3(-1.0, 0.0, -0.0), + float3(-0.083, 0.722, -0.682), + float3(-0.083, 0.722, 0.682), + float3(1.063, 0.361, -0.275), + float3(1.063, 0.361, 0.275), + float3(0.353, 0.6, 0.0), + float3(-0.283, 1.283, 1.415), + float3(0.753, 1.228, 1.865), + float3(-0.283, 1.283, -1.415), + float3(0.753, 1.228, -1.865), +}; +// Static vertex color attributes +static const float3 beeColors[numBeeVertices] = { + float3(0.72, 0.56, 0.032), + float3(0, 0, 0), + float3(0.72, 0.56, 0.032), + float3(0.72, 0.56, 0.032), + float3(0, 0, 0), + float3(0, 0, 0), + + float3(0.85, 0.85, 0.85), + float3(0.85, 0.85, 0.85), + float3(0.85, 0.85, 0.85), + float3(0.85, 0.85, 0.85), + float3(0.85, 0.85, 0.85), +}; + +// Static "index buffer" for bees +static const int numBeeTriangles = 10; +static const uint3 beeTriangles[numBeeTriangles] = { + uint3(1, 3, 2), + uint3(4, 5, 0), + uint3(9, 6, 10), + uint3(2, 5, 4), + uint3(7, 6, 8), + uint3(5, 3, 0), + uint3(3, 1, 0), + uint3(4, 0, 2), + uint3(2, 0, 1), + uint3(3, 5, 2), +}; + +float3 GetInsectPosition(float time) +{ + return 1.2 * float3(PerlinNoise2D(float2(time * 0.001, 0)), + PerlinNoise2D(float2(time * 0.001, 5)), + PerlinNoise2D(float2(time * 0.001, 9))); +} + +static const int beeGroupSize = 128; + +// customizable bee limit +static const int maxNumBees = min(32, min(256 / numBeeVertices, 192 / numBeeTriangles)); +static const int numOutputVertices = maxNumBees * numBeeVertices; +static const int numOutputTriangles = maxNumBees * numBeeTriangles; + +static const int numOutputVertexIterations = (numOutputVertices + (beeGroupSize - 1)) / beeGroupSize; +static const int numOutputTriangleIterations = (numOutputTriangles + (beeGroupSize - 1)) / beeGroupSize; + +[Shader("node")] +[NodeLaunch("mesh")] +[NodeId("DrawBees", 0)] +[NodeMaxDispatchGrid(maxInsectsPerRecord, 1, 1)] +// This limit was set through instrumentation and is not required on AMD GPUs. +// If you wish to change any of the procedural generation parameters, +// and you are running on a non-AMD GPU, you may need to adjust this limit. +// You can learn more at: +// https://gpuopen.com/learn/work_graphs_mesh_nodes/work_graphs_mesh_nodes-tips_tricks_best_practices +[NodeMaxInputRecordsPerGraphEntryRecord(20, true)] +[NumThreads(beeGroupSize, 1, 1)] +[OutputTopology("triangle")] +void BeeMeshShader( + uint gtid : SV_GroupThreadID, + uint gid : SV_GroupID, + DispatchNodeInputRecord inputRecord, + out indices uint3 tris[numOutputTriangles], + out vertices InsectVertex verts[numOutputVertices]) +{ + const int numBees = maxNumBees; + const int vertexCount = numBees * numBeeVertices; + const int triangleCount = numBees * numBeeTriangles; + + SetMeshOutputCounts(vertexCount, triangleCount); + + const float3 patchCenter = inputRecord.Get().position[gid]; + const int seed = CombineSeed(asuint(patchCenter.x), asuint(patchCenter.z)); + + [[unroll]] + for (int i = 0; i < numOutputTriangleIterations; ++i) + { + const int triId = gtid + beeGroupSize * i; + + if (triId < triangleCount) { + const int insectId = triId / numBeeTriangles; + const int insectTriangleId = triId % numBeeTriangles; + + tris[triId] = beeTriangles[insectTriangleId] + insectId * numBeeVertices; + } + } + + [[unroll]] + for (int i = 0; i < numOutputVertexIterations; ++i) + { + const int vertId = gtid + beeGroupSize * i; + + if (vertId < vertexCount) { + const int insectId = vertId / numBeeVertices; + const int insectVertexId = vertId % numBeeVertices; + + // start time before night start + const float nightStart = nightStartTime - Random(seed, 7843); + // end time after night end + const float nightEnd = nightEndTime + Random(seed, 732); + + // scale insects to 0 at night + const float nightScale = max(smoothstep(nightStart - 1, nightStart, GetTimeOfDay()), + 1 - smoothstep(nightEnd, nightEnd + 1, GetTimeOfDay())); + // slowly scale insects to 0 in the distance + // for simplicity, we omit this scaling from the motion vector, as it only affects very distant insects + const float distanceScale = + smoothstep(beeFadeStartDistance, beeMaxDistance, distance(patchCenter, GetCameraPosition())); + + const float scale = (.01 + 0.03 * Random(seed, insectId, 8)) * (1 - nightScale) * (1 - distanceScale); + + // radius scale for positioning insects + static const float R = 0.2; + const float angle = 2 * PI * Random(seed, insectId, 8); + const float radius = sqrt(R * Random(seed, insectId, 98)); + + // compute random position offset for insect + // insects will rotate around this position + const float3 insectBasePosition = + float3(radius * cos(angle), 0.75 + 0.5 * Random(seed, insectId, 988), radius * sin(angle)); + + const float timeOffset = 1e6 * Random(seed, insectId, 55); + const float time = GetTime() + timeOffset; + const float timePrev = GetPreviousTime() + timeOffset; + + // compute local insect position offsets + const float3 insectPositionOffset = GetInsectPosition(time); + const float3 insectPositionOffsetDelta = GetInsectPosition(time - 10); + const float3 prevInsectPositionOffset = GetInsectPosition(timePrev); + const float3 prevInsectPositionOffsetDelta = GetInsectPosition(timePrev - 10); + + const float3 insectPosition = insectBasePosition + insectPositionOffset; + const float3 prevInsectPosition = insectBasePosition + prevInsectPositionOffset; + + // compute forward vectors for rotating insects to face movement direction + const float2 forward = normalize(insectPositionOffset.xz - insectPositionOffsetDelta.xz); + const float2 prevForward = normalize(prevInsectPositionOffset.xz - prevInsectPositionOffsetDelta.xz); + + float3 vertexPosition = beePositions[insectVertexId] * scale; + float3 prevVertexPosition = vertexPosition; + + // rotate wing vertices around insect center + if (insectVertexId > 6) { + // compute wing animation angle + static const float wingDownAngle = -0.15; + static const float wingAmplitude = 0.4; + const float phase = wingDownAngle + wingAmplitude * cos(2 * PI * frac(time * 0.005)); + const float phasePrev = wingDownAngle + wingAmplitude * cos(2 * PI * frac(timePrev * 0.005)); + + // insect center for rotating wings + static const float3 rotatePoint = beePositions[6] * scale; + + float wingAngle = sign(vertexPosition.z) * phase; + float prevWingAngle = sign(vertexPosition.z) * phasePrev; + vertexPosition.yz = RotateAroundPoint2d(vertexPosition.yz, wingAngle, rotatePoint.yz); + prevVertexPosition.yz = RotateAroundPoint2d(prevVertexPosition.yz, prevWingAngle, rotatePoint.yz); + } + // rotate insect towards movement direction + vertexPosition.xz = float2(vertexPosition.x * forward.x - vertexPosition.z * forward.y, + vertexPosition.x * forward.y + vertexPosition.z * forward.x); + prevVertexPosition.xz = float2(prevVertexPosition.x * prevForward.x - prevVertexPosition.z * prevForward.y, + prevVertexPosition.x * prevForward.y + prevVertexPosition.z * prevForward.x); + + InsectVertex vertex; + vertex.objectSpacePosition = insectPosition + vertexPosition; + vertex.color = beeColors[insectVertexId]; + + ComputeClipSpacePositionAndMotion(vertex, + patchCenter + vertex.objectSpacePosition, + patchCenter + prevInsectPosition + prevVertexPosition); + + verts[vertId] = vertex; + } + } +} \ No newline at end of file diff --git a/meshNodeSample/shaders/biomes.hlsl b/meshNodeSample/shaders/biomes.hlsl new file mode 100644 index 0000000..94f0216 --- /dev/null +++ b/meshNodeSample/shaders/biomes.hlsl @@ -0,0 +1,630 @@ +// This file is part of the AMD Work Graph Mesh Node Sample. +// +// Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#include "common.hlsl" + +// Groupshared counters for group output records +groupshared uint denseGrassPatchCount; +groupshared uint sparseGrassPatchCount; +groupshared uint mushroomPatchCount; +groupshared uint butterflyPatchCount; +groupshared uint flowerPatchCount; +groupshared uint beePatchCount; + +// Groupshared terrain gradient estimate +groupshared int terrainGradient; + +[Shader("node")] +[NodeId("Tile", 0)] +[NodeLaunch("broadcasting")] +[NodeDispatchGrid(1, 1, 1)] +// each thread corresponds to one detailed tile +[NumThreads(detailedTilesPerTile, detailedTilesPerTile, 1)] +void MountainTile( + DispatchNodeInputRecord inputRecord, + + int2 groupThreadId : SV_GroupThreadID, + + [MaxRecords(detailedTilesPerTile * detailedTilesPerTile)] + [NodeId("GenerateRock")] + NodeOutput rockOutput, + + [MaxRecords(detailedTilesPerTile * detailedTilesPerTile)] + [NodeId("GenerateTree", 1)] + NodeOutput treeOutput) +{ + // clear groupshared counters + terrainGradient = 0; + + GroupMemoryBarrierWithGroupSync(); + + const int linearGroupThreadId = groupThreadId.x + groupThreadId.y * 8; + + const TileRecord input = inputRecord.Get(); + const int2 tileGridPosition = input.position; + const float2 tileWorldPosition = tileGridPosition * tileSize; + const float3 tileCenterWorldPosition = GetTerrainPosition(tileWorldPosition + tileSize * 0.5); + + const int2 threadGridPosition = tileGridPosition * detailedTilesPerTile + groupThreadId; + const float2 threadWorldPosition = threadGridPosition * detailedTileSize; + const float3 threadCenterWorldPosition = GetTerrainPosition(threadWorldPosition + detailedTileSize * 0.5); + + // Gradient estimation + if (any(groupThreadId == 0) || any(groupThreadId == (detailedTilesPerTile - 1))) + { + const float3 towardsCenter = tileCenterWorldPosition - threadCenterWorldPosition; + + InterlockedAdd(terrainGradient, int(towardsCenter.y * 10.f)); + } + + GroupMemoryBarrierWithGroupSync(); + + // Tree cluster output + { + const uint seed = CombineSeed(asuint(tileGridPosition.x), asuint(tileGridPosition.y)); + + const bool hasTreeCluster = (terrainGradient < 0) && (Random(seed, 97834) > 0.55); + const uint treeCount = hasTreeCluster * round(lerp(5, 10, Random(seed, 5614))); + + const bool hasThreadTreeOutput = linearGroupThreadId < treeCount; + + ThreadNodeOutputRecords treeOutputRecord = + treeOutput.GetThreadNodeOutputRecords(hasThreadTreeOutput); + + if (hasThreadTreeOutput) { + const float angle = linearGroupThreadId * (1.5f + Random(seed, 8437)); + const float radius = linearGroupThreadId * (1.f + Random(seed, 4742)); + const float2 offset = float2(sin(angle), cos(angle)) * radius; + + treeOutputRecord.Get().position = tileCenterWorldPosition.xz + offset; + } + + treeOutputRecord.OutputComplete(); + } + + // Rock output + { + const uint seed = CombineSeed(asuint(threadGridPosition.x), asuint(threadGridPosition.y)); + + const float3 biomeWeight = GetBiomeWeights(threadWorldPosition); + const float3 terrainNormal = GetTerrainNormal(threadWorldPosition); + + const bool hasRockOutput = + (abs(terrainGradient) < 500) && (Random(seed, 7982) > 0.75) && (terrainNormal.y > 0.65); + + ThreadNodeOutputRecords rockOutputRecord = + rockOutput.GetThreadNodeOutputRecords(hasRockOutput); + + if (hasRockOutput) { + rockOutputRecord.Get().position = threadCenterWorldPosition.xz; + } + + rockOutputRecord.OutputComplete(); + } +} + +bool HasTree(in int2 detailedTileGridPosition, out int outTreeType, out float2 outTreePosition) +{ + outTreeType = -1; + // Set position to +inf + outTreePosition = 1.f / 0.f; + + const float2 detailedTileWorldPosition = detailedTileGridPosition * detailedTileSize; + + const uint seed = CombineSeed(asuint(detailedTileGridPosition.x), asuint(detailedTileGridPosition.y)); + + const float3 biomeWeight = GetBiomeWeights(detailedTileWorldPosition); + + // check if woodlands is the dominant biome + if ((biomeWeight.y < biomeWeight.x) || (biomeWeight.y < biomeWeight.z)) { + return false; + } + + const float3 terrainNormal = GetTerrainNormal(detailedTileWorldPosition); + + const float2 randomOffset = float2(Random(seed, 82347), Random(seed, 9780)); + + outTreeType = ((biomeWeight.x > 0.4) || (terrainNormal.y < 0.85)) ? 1 : 0; + outTreePosition = detailedTileWorldPosition + randomOffset * detailedTileSize; + + return (Random(seed, 7982) > 0.1) && // Randomly limit tree occurance + (Random(seed, 28937) < biomeWeight.y) && // Only place trees in woodland biome + (terrainNormal.y > 0.65); // Don't place trees on very steep slopes +} + +[Shader("node")] +[NodeId("Tile", 1)] +[NodeLaunch("broadcasting")] +[NodeDispatchGrid(1, 1, 1)] +// each thread corresponds to one detailed tile +[NumThreads(detailedTilesPerTile, detailedTilesPerTile, 1)] +void WoodlandTile( + DispatchNodeInputRecord inputRecord, + + int2 groupThreadId : SV_GroupThreadID, + + [MaxRecords(detailedTilesPerTile * detailedTilesPerTile)] + [NodeId("DetailedTile")] + NodeOutput detailedTileOutput, + + [MaxRecords(detailedTilesPerTile * detailedTilesPerTile)] + [NodeId("GenerateTree")] + [NodeArraySize(2)] + NodeOutputArray treeOutput, + + [MaxRecords(1)] + [NodeId("DrawMushroomPatch")] + NodeOutput mushroomOutput, + + [MaxRecords(1)] + [NodeId("DrawSparseGrassPatch")] + NodeOutput sparseGrassOutput) +{ + // clear groupshared counters + sparseGrassPatchCount = 0; + mushroomPatchCount = 0; + butterflyPatchCount = 0; + + GroupMemoryBarrierWithGroupSync(); + + const TileRecord input = inputRecord.Get(); + const int2 tileGridPosition = input.position; + const float2 tileWorldPosition = tileGridPosition * tileSize; + + const int2 threadGridPosition = tileGridPosition * detailedTilesPerTile + groupThreadId; + const float2 threadWorldPosition = threadGridPosition * detailedTileSize; + const float3 threadCenterWorldPosition = GetTerrainPosition(threadWorldPosition + detailedTileSize * 0.5); + const float3 threadCenterCurvedWorldPosition = GetCurvedWorldSpacePosition(threadCenterWorldPosition); + const float centerDistanceToCamera = distance(GetCameraPosition(), threadCenterWorldPosition); + + const AxisAlignedBoundingBox threadBoundingBox = + GetGridBoundingBox(threadGridPosition, detailedTileSize, -100, 300); + const bool isThreadVisible = threadBoundingBox.IsVisible(ComputeClipPlanes()); + + const uint seed = CombineSeed(asuint(threadGridPosition.x), asuint(threadGridPosition.y)); + + // sparse grass + { + + bool hasOutput = true; + + // --- frustum cull --- + float radius = sqrt(grassPatchesPerDetailedTile * grassPatchesPerDetailedTile) * grassSpacing; + if (!IsSphereVisible(threadCenterCurvedWorldPosition, radius, ComputeClipPlanes())) { + hasOutput = false; + } + + // --- distance cull --- + if (((centerDistanceToCamera + radius) < denseGrassMaxDistance) || + ((centerDistanceToCamera + radius) > sparseGrassMaxDistance)) + { + hasOutput = false; + } + + int outputIndex = 0; + + if (hasOutput) { + InterlockedAdd(sparseGrassPatchCount, 1, outputIndex); + } + + GroupMemoryBarrierWithGroupSync(); + + GroupNodeOutputRecords sparseGrassRecord = + sparseGrassOutput.GetGroupNodeOutputRecords(sparseGrassPatchCount > 0); + + if (all(groupThreadId == 0) && sparseGrassPatchCount > 0) { + sparseGrassRecord.Get().dispatchGrid = uint3(sparseGrassPatchCount, sparseGrassThreadGroupsPerRecord, 1); + } + + if (hasOutput) { + // XZ-position + sparseGrassRecord.Get().position[outputIndex] = threadGridPosition; + } + + sparseGrassRecord.OutputComplete(); + } + + // tree output + { + int treeType; + float2 treePosition; + const bool hasTreeOutput = HasTree(threadGridPosition, treeType, treePosition); + + ThreadNodeOutputRecords treeOutputRecord = + treeOutput[treeType].GetThreadNodeOutputRecords(hasTreeOutput); + + if (hasTreeOutput) { + treeOutputRecord.Get().position = treePosition; + } + + treeOutputRecord.OutputComplete(); + + // Place mushrooms under each tree + const bool hasMushroomOutput = + hasTreeOutput && (centerDistanceToCamera < (mushroomMaxDistance * 1.5 + (detailedTileSize * 2))); + // Select random number of mushrooms to generate + const int mushroomOutputCount = + hasMushroomOutput * round(lerp(1, maxMushroomsPerDetailedTile, Random(seed, 67823))); + + // Synchronize mushroom output counts across thread group + int mushroomOutputIndex = 0; + if (mushroomOutputCount > 0) { + InterlockedAdd(mushroomPatchCount, mushroomOutputCount, mushroomOutputIndex); + } + + GroupMemoryBarrierWithGroupSync(); + + GroupNodeOutputRecords mushroomRecord = + mushroomOutput.GetGroupNodeOutputRecords(mushroomPatchCount > 0); + + if (all(groupThreadId == 0) && mushroomPatchCount > 0) { + mushroomRecord.Get().dispatchGrid = uint3(mushroomPatchCount, 1, 1); + } + + for (int mushroomIndex = 0; mushroomIndex < mushroomOutputCount; ++mushroomIndex) { + const float mushroomAngleRange = PI / 2; + const float mushroomOffsetAngle = + (-mushroomAngleRange / 2.f) + + (mushroomIndex * (mushroomAngleRange / mushroomOutputCount)) + + (Random(seed, mushroomIndex, 23456) - 1.f) * (mushroomAngleRange / mushroomOutputCount); + const float mushroomOffsetRadius = 0.75f + Random(seed, mushroomIndex, 89237) * 0.5f; + const float2 mushroomOffset = + float2(cos(mushroomOffsetAngle), sin(mushroomOffsetAngle)) * mushroomOffsetRadius; + + mushroomRecord.Get().position[mushroomOutputIndex + mushroomIndex] = + GetTerrainPosition(treePosition + mushroomOffset); + } + + mushroomRecord.OutputComplete(); + } + + // detailed tile output + { + const bool hasDetailedTileOutput = + isThreadVisible && (centerDistanceToCamera < (denseGrassMaxDistance + (detailedTileSize * 2))); + + ThreadNodeOutputRecords detailedTileOutputRecord = + detailedTileOutput.GetThreadNodeOutputRecords(hasDetailedTileOutput); + + if (hasDetailedTileOutput) { + detailedTileOutputRecord.Get().position = tileGridPosition * detailedTilesPerTile + groupThreadId; + } + + detailedTileOutputRecord.OutputComplete(); + } +} + +[Shader("node")] +[NodeId("Tile", 2)] +[NodeLaunch("broadcasting")] +[NodeDispatchGrid(1, 1, 1)] +// each thread corresponds to one detailed tile +[NumThreads(detailedTilesPerTile, detailedTilesPerTile, 1)] +void GrasslandTile( + DispatchNodeInputRecord inputRecord, + + int2 groupThreadId : SV_GroupThreadID, + + [MaxRecords(detailedTilesPerTile * detailedTilesPerTile)] + [NodeId("DetailedTile")] + NodeOutput detailedTileOutput, + + [MaxRecords(1)] + [NodeId("DrawButterflies")] + NodeOutput butterflyOutput, + + [MaxRecords(1)] + [NodeArraySize(2)] + [NodeId("DrawFlowerPatch")] + NodeOutputArray flowerOutput, + + [MaxRecords(1)] + [NodeId("DrawBees")] + NodeOutput beeOutput, + + [MaxRecords(1)] + [NodeId("DrawSparseGrassPatch")] + NodeOutput sparseGrassOutput) +{ + // clear groupshared counters + sparseGrassPatchCount = 0; + butterflyPatchCount = 0; + flowerPatchCount = 0; + beePatchCount = 0; + + GroupMemoryBarrierWithGroupSync(); + + const TileRecord input = inputRecord.Get(); + const int2 tileGridPosition = input.position; + const float2 tileWorldPosition = tileGridPosition * tileSize; + const float3 tileCenterWorldPosition = GetTerrainPosition(tileWorldPosition + tileSize * 0.5); + + const int2 threadGridPosition = tileGridPosition * detailedTilesPerTile + groupThreadId; + const float2 threadWorldPosition = threadGridPosition * detailedTileSize; + const float3 threadCenterWorldPosition = GetTerrainPosition(threadWorldPosition + detailedTileSize * 0.5); + const float3 threadCenterCurvedWorldPosition = GetCurvedWorldSpacePosition(threadCenterWorldPosition); + const float centerDistanceToCamera = distance(GetCameraPosition(), threadCenterWorldPosition); + + const AxisAlignedBoundingBox threadBoundingBox = + GetGridBoundingBox(threadGridPosition, detailedTileSize, -100, 300); + const bool isThreadVisible = threadBoundingBox.IsVisible(ComputeClipPlanes()); + + const uint seed = CombineSeed(asuint(threadGridPosition.x), asuint(threadGridPosition.y)); + const bool isNight = (GetTimeOfDay() > nightStartTime) || (GetTimeOfDay() < nightEndTime); + + // sparse grass + { + bool hasOutput = true; + + // --- frustum cull --- + float radius = sqrt(grassPatchesPerDetailedTile * grassPatchesPerDetailedTile) * grassSpacing; + if (!IsSphereVisible(threadCenterCurvedWorldPosition, radius, ComputeClipPlanes())) { + hasOutput = false; + } + + // --- distance cull --- + if (((centerDistanceToCamera + radius) < denseGrassMaxDistance) || + ((centerDistanceToCamera + radius) > sparseGrassMaxDistance)) + { + hasOutput = false; + } + + int outputIndex = 0; + + if (hasOutput) { + InterlockedAdd(sparseGrassPatchCount, 1, outputIndex); + } + + GroupMemoryBarrierWithGroupSync(); + + GroupNodeOutputRecords sparseGrassRecord = + sparseGrassOutput.GetGroupNodeOutputRecords(sparseGrassPatchCount > 0); + + if (all(groupThreadId == 0) && sparseGrassPatchCount > 0) { + sparseGrassRecord.Get().dispatchGrid = uint3(sparseGrassPatchCount, sparseGrassThreadGroupsPerRecord, 1); + } + + if (hasOutput) { + // XZ-position + sparseGrassRecord.Get().position[outputIndex] = threadGridPosition; + } + + sparseGrassRecord.OutputComplete(); + } + + // butterfly output + { + // 2% chance of spawning butterflies + const float butterflyProbability = 0.02f; + const bool hasButterflyOutput = + !isNight && // no butterflies at night + (centerDistanceToCamera < butterflyMaxDistance) && // cull butterflies in distance + (Random(seed, 1998) < butterflyProbability); + + int butterflyOutputIndex = 0; + + if (hasButterflyOutput) { + InterlockedAdd(butterflyPatchCount, 1, butterflyOutputIndex); + } + + GroupMemoryBarrierWithGroupSync(); + + GroupNodeOutputRecords butterflyOutputRecord = + butterflyOutput.GetGroupNodeOutputRecords(butterflyPatchCount > 0); + + if (all(groupThreadId == 0) && butterflyPatchCount > 0) { + butterflyOutputRecord.Get().dispatchGrid = uint3(butterflyPatchCount, 1, 1); + } + + if (hasButterflyOutput) { + // XZ-position + butterflyOutputRecord.Get().position[butterflyOutputIndex] = threadCenterWorldPosition; + } + + butterflyOutputRecord.OutputComplete(); + } + + // flower output + { + const float3 biomeWeight = GetBiomeWeights(threadWorldPosition); + + // cull flowers for visibility and max distance + const float flowerCullDistance = flowerMaxDistance - (Random(seed, 8437) * flowerMaxDistance * 0.2); + const bool hasFlowerOutput = isThreadVisible && (centerDistanceToCamera < flowerCullDistance); + // select random number of flowers to generate. number also depends on meadow biome weight + const int flowerOutputCount = + hasFlowerOutput * round(lerp(0, maxFlowersPerDetailedTile, Random(seed, 2134) * biomeWeight.z)); + // 30% chance of spawning bees over a flower + const float beeProbability = 0.3f; + // one of the generated flowers can also spawn a bee patch + const bool hasBeeOutput = (flowerOutputCount > 0) && // patch has at least one flower + !isNight && // no bees at night + (centerDistanceToCamera < beeMaxDistance) && // cull bees in distance + (Random(seed, 2378) < beeProbability); // limit bee occurrance + + // output indices into shared records + int flowerOutputIndex = 0; + int beeOutputIndex = 0; + + // increment shared flower/bee counters + if (flowerOutputCount > 0) { + InterlockedAdd(flowerPatchCount, flowerOutputCount, flowerOutputIndex); + } + if (hasBeeOutput) { + InterlockedAdd(beePatchCount, 1, beeOutputIndex); + } + + GroupMemoryBarrierWithGroupSync(); + + const uint flowerType = (distance(GetCameraPosition(), tileCenterWorldPosition) > flowerSparseStartDistance); + + GroupNodeOutputRecords flowerOutputRecord = + flowerOutput[flowerType].GetGroupNodeOutputRecords(flowerPatchCount > 0); + GroupNodeOutputRecords beeOutputRecord = + beeOutput.GetGroupNodeOutputRecords(beePatchCount > 0); + + if (all(groupThreadId == 0) && flowerPatchCount > 0) { + if (flowerType == 1) { + flowerOutputRecord.Get().dispatchGrid = uint3( + (flowerPatchCount + flowersInSparseFlowerThreadGroup - 1) / flowersInSparseFlowerThreadGroup, 1, 1); + } else { + flowerOutputRecord.Get().dispatchGrid = uint3(flowerPatchCount, 1, 1); + } + flowerOutputRecord.Get().flowerPatchCount = flowerPatchCount; + } + if (all(groupThreadId == 0) && beePatchCount > 0) { + beeOutputRecord.Get().dispatchGrid = uint3(beePatchCount, 1, 1); + } + + for (int flowerId = 0; flowerId < flowerOutputCount; ++flowerId) { + const float2 offset = + float2(Random(asuint(threadWorldPosition.x), asuint(threadWorldPosition.y), flowerId, 4387), + Random(asuint(threadWorldPosition.x), asuint(threadWorldPosition.y), flowerId, 8327)) * + detailedTileSize; + + flowerOutputRecord.Get().position[flowerOutputIndex + flowerId] = threadWorldPosition + offset; + } + + if (hasBeeOutput) { + beeOutputRecord.Get().position[beeOutputIndex] = + GetTerrainPosition(flowerOutputRecord.Get().position[flowerOutputIndex]); + } + + flowerOutputRecord.OutputComplete(); + beeOutputRecord.OutputComplete(); + } + + // detailed tile output + { + const bool hasDetailedTileOutput = + isThreadVisible && (centerDistanceToCamera < (denseGrassMaxDistance + (detailedTileSize * 2))); + + ThreadNodeOutputRecords detailedTileOutputRecord = + detailedTileOutput.GetThreadNodeOutputRecords(hasDetailedTileOutput); + + if (hasDetailedTileOutput) { + detailedTileOutputRecord.Get().position = tileGridPosition * detailedTilesPerTile + groupThreadId; + } + + detailedTileOutputRecord.OutputComplete(); + } +} + +[Shader("node")] +[NodeLaunch("broadcasting")] +[NodeDispatchGrid(1, 1, 1)] +// each thread corresponds to one grass patch tile +[NumThreads(grassPatchesPerDetailedTile, grassPatchesPerDetailedTile, 1)] +void DetailedTile( + DispatchNodeInputRecord inputRecord, + + int2 groupThreadId : SV_GroupThreadID, + + // Node outputs: + [MaxRecords(1)] + [NodeId("DrawDenseGrassPatch")] + NodeOutput grassOutput) +{ + // clear groupshared counters + denseGrassPatchCount = 0; + + GroupMemoryBarrierWithGroupSync(); + + const TileRecord input = inputRecord.Get(); + + const int2 tileGridPosition = input.position; + const float2 tileWorldPosition = tileGridPosition * detailedTileSize; + + const int2 threadGridPosition = tileGridPosition * grassPatchesPerDetailedTile + groupThreadId; + const float2 threadWorldPosition = (threadGridPosition + GetGrassOffset(threadGridPosition)) * grassSpacing; + + // get terrain height and normal & biome weights + const float3 patchPosition = GetTerrainPosition(threadWorldPosition.x, threadWorldPosition.y); + const float3 patchNormal = GetTerrainNormal(threadWorldPosition.x, threadWorldPosition.y); + const float3 biomeWeights = GetBiomeWeights(threadWorldPosition); + + bool hasOutput = true; + + // don't spawn grass on extremly steep slopes + if (patchNormal.y < 0.55) { + hasOutput = false; + } + + // cull against view frustum + const float radius = 4 * grassSpacing; + if (!IsSphereVisible(patchPosition, radius, ComputeClipPlanes())) { + hasOutput = false; + } + + const float distanceToCamera = distance(GetCameraPosition(), patchPosition.xyz); + + // cull against distance to camera + if (distanceToCamera > denseGrassMaxDistance) { + hasOutput = false; + } + + // cull at biome transitions to mountain biome + if (Random(asuint(patchPosition.x), asuint(patchPosition.z), 2378) < biomeWeights.x * 2) { + hasOutput = false; + } + + const float minGrassHeight = 0.2; + const float maxGrassHeight = minGrassHeight + .35; + const float grassHeight = minGrassHeight + (maxGrassHeight - minGrassHeight) * Random(threadGridPosition.x, threadGridPosition.y, 34567); + + // Each dense grass mesh shader can only render 16 grass blades. + // If grass patch has more than 16 blades, we require two thread groups to draw this patch + const bool hasSplitOutput = + lerp(32.f, 2., pow(saturate(distanceToCamera / (denseGrassMaxDistance * 1.05)), 0.75)) > 16.f; + + // Output dense grass + { + // sync total dense grass count + uint grassOutputIndex = 0; + + if (hasOutput) { + InterlockedAdd(denseGrassPatchCount, hasSplitOutput? 2 : 1, grassOutputIndex); + } + + GroupMemoryBarrierWithGroupSync(); + + GroupNodeOutputRecords denseGrassRecord = + grassOutput.GetGroupNodeOutputRecords(denseGrassPatchCount > 0); + + if (all(groupThreadId == 0) && denseGrassPatchCount > 0) { + denseGrassRecord.Get().dispatchGrid = uint3(denseGrassPatchCount, 1, 1); + } + + if (hasOutput) { + denseGrassRecord.Get().position[grassOutputIndex] = patchPosition; + denseGrassRecord.Get().height[grassOutputIndex] = grassHeight; + denseGrassRecord.Get().bladeOffset[grassOutputIndex] = 0; + + if (hasSplitOutput) { + denseGrassRecord.Get().position[grassOutputIndex + 1] = patchPosition; + denseGrassRecord.Get().height[grassOutputIndex + 1] = grassHeight; + denseGrassRecord.Get().bladeOffset[grassOutputIndex + 1] = 1; + } + } + + denseGrassRecord.OutputComplete(); + } +} \ No newline at end of file diff --git a/meshNodeSample/shaders/butterflymeshshader.hlsl b/meshNodeSample/shaders/butterflymeshshader.hlsl new file mode 100644 index 0000000..1c10fdb --- /dev/null +++ b/meshNodeSample/shaders/butterflymeshshader.hlsl @@ -0,0 +1,236 @@ +// This file is part of the AMD Work Graph Mesh Node Sample. +// +// Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#include "common.hlsl" + +// Static "vertex buffer" for butterflies +static const int numButterflyVertices = 16; +static const float3 butterflyPositions[numButterflyVertices] = { + float3(-0.548, 0.0, -0.0), + float3(-0.41, 0.0, 0.226), + float3(-0.41, -0.0, -0.226), + float3(-0.948, 0.239, 0.528), + float3(-1.048, 0.238, 0.468), + float3(-0.948, 0.239, -0.528), + float3(-1.048, 0.238, -0.468), + float3(0.747, 0.0, 0.125), + float3(0.747, -0.0, -0.125), + float3(-0.194, 0.139, -0.0), + float3(0.384, -0.046, -0.0), + float3(-0.297, 0.092, -0.0), + float3(-0.651, 0.324, 2.446), + float3(1.621, -0.0, 0.785), + float3(-0.651, 0.324, -2.446), + float3(1.621, -0.0, -0.785), +}; +// Static vertex color attributes +static const float3 butterflyColors[numButterflyVertices] = { + float3(0., 0., 0.), + float3(0., 0., 0.), + float3(0., 0., 0.), + float3(0., 0., 0.), + float3(0., 0., 0.), + float3(0., 0., 0.), + float3(0., 0., 0.), + float3(0., 0., 0.), + float3(0., 0., 0.), + float3(0., 0., 0.), + float3(0., 0., 0.), + float3(0., 0., 0.), + float3(0.2, 0.2, .7), + float3(0.2, 0.2, .7), + float3(0.2, 0.2, .7), + float3(0.2, 0.2, .7) +}; + +// Static "index buffer" for butterflies +static const int numButterflyTriangles = 14; +static const uint3 butterflyTriangles[numButterflyTriangles] = { + uint3(1, 10, 0), + uint3(3, 4, 0), + uint3(5, 0, 6), + uint3(10, 2, 0), + uint3(7, 1, 9), + uint3(1, 0, 9), + uint3(2, 9, 0), + uint3(2, 8, 9), + uint3(7, 9, 8), + uint3(7, 10, 1), + uint3(7, 8, 10), + uint3(2, 10, 8), + uint3(12, 13, 11), + uint3(11, 14, 15), +}; + +float3 GetInsectPosition(float time) +{ + return 4 * float3(PerlinNoise2D(float2(time * 0.001, 0)), + PerlinNoise2D(float2(time * 0.001, 5)), + PerlinNoise2D(float2(time * 0.001, 9))); +} + +static const int butterflyGroupSize = 128; + +// customizable butterfly limit +static const int maxNumButterflies = min(32, min(256 / numButterflyVertices, 192 / numButterflyTriangles)); +static const int numOutputVertices = maxNumButterflies * numButterflyVertices; +static const int numOutputTriangles = maxNumButterflies * numButterflyTriangles; + +static const int numOutputVertexIterations = (numOutputVertices + (butterflyGroupSize - 1)) / butterflyGroupSize; +static const int numOutputTriangleIterations = (numOutputTriangles + (butterflyGroupSize - 1)) / butterflyGroupSize; + +[Shader("node")] +[NodeLaunch("mesh")] +[NodeId("DrawButterflies", 0)] +[NodeMaxDispatchGrid(maxInsectsPerRecord, 1, 1)] +// This limit was set through instrumentation and is not required on AMD GPUs. +// If you wish to change any of the procedural generation parameters, +// and you are running on a non-AMD GPU, you may need to adjust this limit. +// You can learn more at: +// https://gpuopen.com/learn/work_graphs_mesh_nodes/work_graphs_mesh_nodes-tips_tricks_best_practices +[NodeMaxInputRecordsPerGraphEntryRecord(10, true)] +[NumThreads(butterflyGroupSize, 1, 1)] +[OutputTopology("triangle")] +void ButterflyMeshShader( + uint gtid : SV_GroupThreadID, + uint gid : SV_GroupID, + DispatchNodeInputRecord inputRecord, + out indices uint3 tris[numOutputTriangles], + out vertices InsectVertex verts[numOutputVertices]) +{ + const int numButterflies = maxNumButterflies; + const int vertexCount = numButterflies * numButterflyVertices; + const int triangleCount = numButterflies * numButterflyTriangles; + + SetMeshOutputCounts(vertexCount, triangleCount); + + const float3 patchCenter = inputRecord.Get().position[gid]; + const int seed = CombineSeed(asuint(patchCenter.x), asuint(patchCenter.z)); + + [[unroll]] + for (int i = 0; i < numOutputTriangleIterations; ++i) + { + const int triId = gtid + butterflyGroupSize * i; + + if (triId < triangleCount) { + const int insectId = triId / numButterflyTriangles; + const int insectTriangleId = triId % numButterflyTriangles; + + tris[triId] = butterflyTriangles[insectTriangleId] + insectId * numButterflyVertices; + } + } + + [[unroll]] + for (int i = 0; i < numOutputVertexIterations; ++i) + { + const int vertId = gtid + butterflyGroupSize * i; + + if (vertId < vertexCount) { + const int insectId = vertId / numButterflyVertices; + const int insectVertexId = vertId % numButterflyVertices; + + // start time before night start + const float nightStart = nightStartTime - Random(seed, 4561); + // end time after night end + const float nightEnd = nightEndTime + Random(seed, 6456); + + // scale insects to 0 at night + const float nightScale = max(smoothstep(nightStart - 1, nightStart, GetTimeOfDay()), + 1 - smoothstep(nightEnd, nightEnd + 1, GetTimeOfDay())); + // slowly scale insects to 0 in the distance + // for simplicity, we omit this scaling from the motion vector, as it only affects very distant insects + const float distanceScale = smoothstep( + butterflyFadeStartDistance, butterflyMaxDistance, distance(patchCenter, GetCameraPosition())); + + const float scale = (.01 + 0.03 * Random(seed, insectId, 8)) * (1 - nightScale) * (1 - distanceScale); + + // radius scale for positioning insects + static const float R = 2; + const float angle = 2 * PI * Random(seed, insectId, 8); + const float radius = sqrt(R * Random(seed, insectId, 98)); + + // compute random position offset for insect + // insects will rotate around this position + const float3 insectBasePosition = + float3(radius * cos(angle), 0.75 + 0.5 * Random(seed, insectId, 988), radius * sin(angle)); + + const float timeOffset = 1e6 * Random(seed, insectId, 55); + const float time = GetTime() + timeOffset; + const float timePrev = GetPreviousTime() + timeOffset; + + // compute local insect position offsets + const float3 insectPositionOffset = GetInsectPosition(time * .25); + const float3 insectPositionOffsetDelta = GetInsectPosition((time - 10) * .25); + const float3 prevInsectPositionOffset = GetInsectPosition(timePrev * .25); + const float3 prevInsectPositionOffsetDelta = GetInsectPosition((timePrev - 10) * .25); + + const float3 insectPosition = insectBasePosition + insectPositionOffset; + const float3 prevInsectPosition = insectBasePosition + prevInsectPositionOffset; + + // compute forward vectors for rotating insects to face movement direction + const float2 forward = -normalize(insectPositionOffset.xz - insectPositionOffsetDelta.xz); + const float2 prevForward = -normalize(prevInsectPositionOffset.xz - prevInsectPositionOffsetDelta.xz); + + float3 vertexPosition = butterflyPositions[insectVertexId] * scale; + float3 prevVertexPosition = vertexPosition; + + // rotate wing vertices around insect center + if (insectVertexId >= 12) { + // compute wing animation angle + static const float wingDownAngle = -0.15; + static const float wingAmplitude = 0.6; + const float phase = wingDownAngle + wingAmplitude * cos(2 * PI * frac(time * 0.005)); + const float phasePrev = wingDownAngle + wingAmplitude * cos(2 * PI * frac(timePrev * 0.005)); + + // insect center for rotating wings + static const float3 rotatePoint = float3(0.353, 0.6, 0.0) * scale; + + float wingAngle = sign(vertexPosition.z) * phase; + float prevWingAngle = sign(vertexPosition.z) * phasePrev; + vertexPosition.yz = RotateAroundPoint2d(vertexPosition.yz, wingAngle, rotatePoint.yz); + prevVertexPosition.yz = RotateAroundPoint2d(prevVertexPosition.yz, prevWingAngle, rotatePoint.yz); + } + // rotate insect towards movement direction + vertexPosition.xz = float2(vertexPosition.x * forward.x - vertexPosition.z * forward.y, + vertexPosition.x * forward.y + vertexPosition.z * forward.x); + prevVertexPosition.xz = float2(prevVertexPosition.x * prevForward.x - prevVertexPosition.z * prevForward.y, + prevVertexPosition.x * prevForward.y + prevVertexPosition.z * prevForward.x); + + InsectVertex vertex; + vertex.objectSpacePosition = insectPosition + vertexPosition; + + if (insectVertexId >= 12) { + // compute random wing color + vertex.color = 0.95 * normalize(float3(Random(insectId, seed, 'r'), // + Random(insectId, seed, 'g'), // + Random(insectId, seed, 'b'))); + } else { + vertex.color = butterflyColors[insectVertexId]; + } + + ComputeClipSpacePositionAndMotion(vertex, + patchCenter + vertex.objectSpacePosition, + patchCenter + prevInsectPosition + prevVertexPosition); + + verts[vertId] = vertex; + } + } +} \ No newline at end of file diff --git a/meshNodeSample/shaders/common.hlsl b/meshNodeSample/shaders/common.hlsl new file mode 100644 index 0000000..52946db --- /dev/null +++ b/meshNodeSample/shaders/common.hlsl @@ -0,0 +1,355 @@ +// This file is part of the AMD Work Graph Mesh Node Sample. +// +// Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#pragma once + +#include "workgraphcommon.h" +#include "utils.hlsl" +#include "heightmap.hlsl" + +// ================== +// Constants + +// World grid definitions +// DO NOT CHANGE THESE! +static const uint grassPatchesPerDetailedTile = 16; +static const uint detailedTilesPerTile = 8; +static const uint tilesPerChunk = 8; + +// World grid sizes are defined by grass blade spacing +static const float grassSpacing = 0.25; +static const float detailedTileSize = grassPatchesPerDetailedTile * grassSpacing; +static const float tileSize = detailedTilesPerTile * detailedTileSize; +static const float chunkSize = tilesPerChunk * tileSize; + +// Distance limits for procedural generation +static const float worldGridMaxDistance = 2000.f; + +static const float denseGrassMaxDistance = 80.f; +static const float sparseGrassMaxDistance = 250.f; + +static const float flowerMaxDistance = 300.f; +static const float flowerSparseStartDistance = 100.f; + +static const float mushroomMaxDistance = denseGrassMaxDistance; + +static const float butterflyMaxDistance = 25.f; +static const float butterflyFadeStartDistance = 20.f; +static const float beeMaxDistance = 40.f; +static const float beeFadeStartDistance = 30.f; + +// Night time definition: start at 18:00 till 6:00 +// Bees and butterflies won't be rendered/generated at night +static const float nightStartTime = 18.f; +static const float nightEndTime = 6.f; + +// Radius of curved world +static const float earthRadius = 6000.f; + +// =================================== +// Record structs for work graph nodes + +// Record for each tile in a chunk & detailed tile in a tile +struct TileRecord { + int2 position; +}; + +// Record for drawing terrain segments inside a chunk +struct DrawTerrainChunkRecord { + uint3 dispatchGrid : SV_DispatchGrid; + int2 chunkGridPosition; + int levelOfDetail; + // indicated if neighboring terrain tiles have higher LOD + // x = (-1, 0) + // y = (0, -1) + // z = (1, 0) + // w = (0, 1) + bool4 levelOfDetailTransition; +}; + +struct GenerateTreeRecord { + float2 position; +}; + +static const uint maxSplinesPerRecord = 32; +static const uint splineMaxControlPointCount = 8; + +// Record for drawing multiple splines. Each spline is defined as a series of control points. +// Each control point defines a vertex ring with varying radius and vertex count. +struct DrawSplineRecord { + uint3 dispatchGrid : SV_DispatchGrid; + float3 color[maxSplinesPerRecord]; + float rotationOffset[maxSplinesPerRecord]; + // x is overall wind strength, y is blending factor for individual vertices + float2 windStrength[maxSplinesPerRecord]; + uint controlPointCount[maxSplinesPerRecord]; + float3 controlPointPositions[maxSplinesPerRecord * splineMaxControlPointCount]; + uint controlPointVertexCounts[maxSplinesPerRecord * splineMaxControlPointCount]; + float2 controlPointRadii[maxSplinesPerRecord * splineMaxControlPointCount]; + float controlPointNoiseAmplitudes[maxSplinesPerRecord * splineMaxControlPointCount]; +}; + +// Each thread in a biome tile can generate one insects +static const uint maxInsectsPerRecord = detailedTilesPerTile * detailedTilesPerTile; + +// Record for insects +// Used by +// - DrawBees +// - DrawButterflies +struct DrawInsectRecord { + uint3 dispatchGrid : SV_DispatchGrid; + float3 position[maxInsectsPerRecord]; +}; + +// Each thread in a biome tile can generate up to 3 mushrooms +static const uint maxMushroomsPerDetailedTile = 3; +static const uint maxMushroomsPerRecord = detailedTilesPerTile * detailedTilesPerTile * maxMushroomsPerDetailedTile; + +// Record for mushrooms +// Used by +// - DrawMushroomPatch +struct DrawMushroomRecord { + uint3 dispatchGrid : SV_DispatchGrid; + float3 position[maxMushroomsPerRecord]; +}; + +// Each thread in a biome tile can generate up to 12 flowers +static const int maxFlowersPerDetailedTile = 12; +static const int maxFlowersPerRecord = (detailedTilesPerTile * detailedTilesPerTile) * maxFlowersPerDetailedTile; +// scaling factor for dispatch grid size when using sparse flowers +static const int flowersInSparseFlowerThreadGroup = 5; + +// Record for flowers +// Used by +// - DrawFlowerPatch +struct DrawFlowerRecord { + uint3 dispatchGrid : SV_DispatchGrid; + uint flowerPatchCount; + float2 position[maxFlowersPerRecord]; +}; + +// Each thread in a detailed tile corresponds to one or two dense grass patches +static const uint maxDenseGrassPatchesPerRecord = 2 * grassPatchesPerDetailedTile * grassPatchesPerDetailedTile; + +// Record for dense grass +// Used by +// - DrawDenseGrassPatch +struct DrawDenseGrassRecord { + uint3 dispatchGrid : SV_DispatchGrid; + float3 position[maxDenseGrassPatchesPerRecord]; + float height[maxDenseGrassPatchesPerRecord]; + uint bladeOffset[maxDenseGrassPatchesPerRecord]; +}; + +// Each thread in a biome tile corresponds to a sparse grass patch +static const uint maxSparseGrassPatchesPerRecord = detailedTilesPerTile * detailedTilesPerTile; +// Number of sparse grass mesh shader thread groups needed to render one detailed tile +static const uint sparseGrassThreadGroupsPerRecord = 8; + +// record for DrawSparseGrassPatch +struct DrawSparseGrassRecord { + uint3 dispatchGrid : SV_DispatchGrid; + int2 position[maxSparseGrassPatchesPerRecord]; +}; + +// ===================================== +// Common MS & PS input & output structs + +// Vertex definition for InsectPixelShader +// Used by +// - BeeMeshShader +// - ButterflyMeshShader +// - FlowerMeshShader +// - MushroomMeshShader +struct InsectVertex { + float4 clipSpacePosition : SV_POSITION; + // Vertex position in object space for computing normals + // object to world space transforms are always only translations + // thus we can safely compute normals in object space + float3 objectSpacePosition : POSITION0; + float2 clipSpaceMotion : TEXCOORD0; + float3 color : NORMAL0; +}; + +// Vertex definition for GrassPixelShader +// Used by +// - DenseGrassMeshShader +// - SparseGrassMeshShader +struct GrassVertex { + float4 clipSpacePosition : SV_POSITION; + float3 worldSpacePosition : POSITION0; + float2 clipSpaceMotion : TEXCOORD0; + float3 worldSpaceNormal : NORMAL0; + float3 worldSpaceGroundNormal : NORMAL1; + float rootHeight : BLENDWEIGHT0; + float height : BLENDWEIGHT1; +}; + +// Primitive definition for mesh shaders +// Used by +// - SparseGrassMeshShader +struct GrassCullPrimitive { + bool cull : SV_CullPrimitive; +}; + +// Output struct for deferred pixel shaders +struct DeferredPixelShaderOutput { + float4 baseColor : SV_Target0; + float4 normal : SV_Target1; + float2 motion : SV_Target2; +}; + +// ====================================================== +// Common functions for accessing data in constant buffer + +float GetTimeOfDay() +{ + return 12; +} + +float3 GetCameraPosition() +{ + return CameraPosition.xyz; +} + +float3 GetPreviousCameraPosition() +{ + return PreviousCameraPosition.xyz; +} + +ClipPlanes ComputeClipPlanes() +{ + return ComputeClipPlanes(ViewProjection); +} + +uint GetTime() +{ + return ShaderTime; +} + +uint GetPreviousTime() +{ + return PreviousShaderTime; +} + +float GetWindStrength() +{ + return WindStrength; +} + +// Rotation of wind direction around y-Axis; 0 = float3(1, 0, 0); +float GetWindDirection() +{ + return WindDirection; +} + +// ===================================================== +// Common functions for grass placement & wind animation + +float2 GetGrassOffset(in const int2 grid) +{ + float theta = 2. * PI * Random(grid.x, grid.y, 1337); + float radius = sqrt(Random(grid.x, 19, grid.y)); + static const float patchCenterVariance = 0.4; + + return patchCenterVariance * radius * float2(cos(theta), sin(theta)); +} + +// Returns 2D wind offset as 3D vector for convenience +float3 GetWindOffset(in const float2 pos, in const float time) +{ + float posOnSineWave = cos(GetWindDirection()) * pos.x - sin(GetWindDirection()) * pos.y; + + float t = 0.007 * time + posOnSineWave + 4 * PerlinNoise2D(0.1 * pos); + float windx = 2 * sin(.5 * t); + float windz = 1 * sin(1. * t); + + return 0.04 * float3(windx, 0, windz); +} + +// ================================================== +// Common functions for curved world & motion vectors + +// Computes position on curved world relative to current camera position +float3 GetCurvedWorldSpacePosition(in float3 worldSpacePosition, in bool previousCenter = false) +{ + const float2 center = previousCenter ? GetPreviousCameraPosition().xz : GetCameraPosition().xz; + const float2 centerToPos = worldSpacePosition.xz - center; + const float distanceToCenter = length(centerToPos); + const float2 direction = centerToPos / distanceToCenter; + + const float alpha = distanceToCenter / earthRadius; + const float s = sin(alpha); + const float c = cos(alpha); + + const float3 curvedPosUp = normalize(float3(direction.x * s, c, direction.y * s)); + const float3 centerToCurvedPos = + float3(direction.x * s * earthRadius, (c * earthRadius) - earthRadius, direction.y * s * earthRadius); + + const float heightScale = smoothstep(2000, 1000, distanceToCenter); + + return float3(center.x, 0, center.y) + centerToCurvedPos + // base postion + curvedPosUp * worldSpacePosition.y * heightScale; // add rotated y component +} + +// Computes bounding box for a grid element (e.g. chunk) on curved world +AxisAlignedBoundingBox GetGridBoundingBox(in int2 gridPosition, + in float elementSize, + in float minHeight, + in float maxHeight) +{ + AxisAlignedBoundingBox result; + + const float3 minWorldPosition = float3(gridPosition.x, 0, gridPosition.y) * elementSize; + const float3 maxWorldPosition = float3(gridPosition.x + 1, 0, gridPosition.y + 1) * elementSize; + + result.min = GetCurvedWorldSpacePosition(minWorldPosition) + float3(0, minHeight, 0); + result.max = GetCurvedWorldSpacePosition(maxWorldPosition) + float3(0, maxHeight, 0); + + return result; +} + +// Computes position on curved world, projects it into clip space & assigns it to vertex.clipSpacePosition +// Computes motion vector and assigns it to vertex.clipSpaceMotion +template +void ComputeClipSpacePositionAndMotion(inout T vertex, + in float3 worldSpacePosition, + in float3 previousWorldSpacePosition) +{ + // project vertex onto curved world + const float3 curvedWorldSpacePosition = GetCurvedWorldSpacePosition(worldSpacePosition); + const float3 previousCurvedWorldSpacePosition = GetCurvedWorldSpacePosition(previousWorldSpacePosition, true); + + vertex.clipSpacePosition = mul(ViewProjection, float4(curvedWorldSpacePosition, 1)); + + const float4 previousClipSpacePosition = + mul(PreviousViewProjection, float4(previousCurvedWorldSpacePosition, 1)); + vertex.clipSpaceMotion = (previousClipSpacePosition.xy / previousClipSpacePosition.w) - + (vertex.clipSpacePosition.xy / vertex.clipSpacePosition.w); +} + +// Computes position on curved world, projects it into clip space & assigns it to vertex.clipSpacePosition +// Computes motion vector and assigns it to vertex.clipSpaceMotion +template +void ComputeClipSpacePositionAndMotion(inout T vertex, in float3 worldSpacePosition) +{ + ComputeClipSpacePositionAndMotion(vertex, worldSpacePosition, worldSpacePosition); +} \ No newline at end of file diff --git a/meshNodeSample/shaders/densegrassmeshshader.hlsl b/meshNodeSample/shaders/densegrassmeshshader.hlsl new file mode 100644 index 0000000..7ea8c36 --- /dev/null +++ b/meshNodeSample/shaders/densegrassmeshshader.hlsl @@ -0,0 +1,206 @@ +// This file is part of the AMD Work Graph Mesh Node Sample. +// +// Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#include "common.hlsl" + +float3 bezier(float3 v0, float3 v1, float3 v2, float t) +{ + float3 a = lerp(v0, v1, t); + float3 b = lerp(v1, v2, t); + return lerp(a, b, t); +} + +float3 bezierDerivative(float3 v0, float3 v1, float3 v2, float t) +{ + return 2. * (1. - t) * (v1 - v0) + 2. * t * (v2 - v1); +} + +// The following function (MakePersistentLength) is taken from +// https://github.com/klejah/ResponsiveGrassDemo/blob/6ce514717467acc80fd965a6f7695d5151ba8c03/ResponsiveGrassDemo/shader/Grass/GrassUpdateForcesShader.cs#L67 +// Licensed under BSD 3-Clause: +// +// Copyright (c) 2016, klejah +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright notice, this +// list of conditions and the following disclaimer. +// +// * Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * Neither the name of the copyright holder nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +void MakePersistentLength(in float3 v0, inout float3 v1, inout float3 v2, in float height) +{ + // Persistent length + float3 v01 = v1 - v0; + float3 v12 = v2 - v1; + float lv01 = length(v01); + float lv12 = length(v12); + + float L1 = lv01 + lv12; + float L0 = length(v2 - v0); + float L = (2.0f * L0 + L1) / 3.0f; // http://steve.hollasch.net/cgindex/curves/cbezarclen.html + + float ldiff = height / L; + v01 = v01 * ldiff; + v12 = v12 * ldiff; + v1 = v0 + v01; + v2 = v1 + v12; +} + +static const int denseGrassGroupSize = 128; +static const int maxNumOutputVerticesLimit = 256; +static const int maxNumOutputTrianglesLimit = 192; +static const int numOutputVerticesLimit = 128; +static const int numOutputTrianglesLimit = 96; + +// 4 vertices per edge; 2 edges per blade +static const int numGrassBladeVerticesPerEdge = 4; +static const int numGrassBladeVertices = 2 * numGrassBladeVerticesPerEdge; +static const int numGrassBladeTriangles = 6; +static const int maxNumGrassBlades = + min(32, + min(maxNumOutputVerticesLimit / numGrassBladeVertices, maxNumOutputTrianglesLimit / numGrassBladeTriangles)); +static const int maxNumOutputGrassBlades = + min(32, min(numOutputVerticesLimit / numGrassBladeVertices, numOutputTrianglesLimit / numGrassBladeTriangles)); +static const int numOutputVertices = maxNumOutputGrassBlades * numGrassBladeVertices; +static const int numOutputTriangles = maxNumOutputGrassBlades * numGrassBladeTriangles; + +static const int numOutputVertexIterations = (numOutputVertices + (denseGrassGroupSize - 1)) / denseGrassGroupSize; +static const int numOutputTriangleIterations = (numOutputTriangles + (denseGrassGroupSize - 1)) / denseGrassGroupSize; + +[Shader("node")] +[NodeLaunch("mesh")] +[NodeId("DrawDenseGrassPatch", 0)] +[NodeMaxDispatchGrid(maxDenseGrassPatchesPerRecord, 1, 1)] +// This limit was set through instrumentation and is not required on AMD GPUs. +// If you wish to change any of the procedural generation parameters, +// and you are running on a non-AMD GPU, you may need to adjust this limit. +// You can learn more at: +// https://gpuopen.com/learn/work_graphs_mesh_nodes/work_graphs_mesh_nodes-tips_tricks_best_practices +[NodeMaxInputRecordsPerGraphEntryRecord(400, true)] +[NumThreads(denseGrassGroupSize, 1, 1)] +[OutputTopology("triangle")] +void DenseGrassMeshShader( + uint gtid : SV_GroupThreadID, + uint gid : SV_GroupID, + DispatchNodeInputRecord inputRecord, + out indices uint3 tris[numOutputTriangles], + out vertices GrassVertex verts[numOutputVertices]) +{ + const float3 patchCenter = inputRecord.Get().position[gid]; + const float patchHeight = inputRecord.Get().height[gid]; + const uint bladeOffset = inputRecord.Get().bladeOffset[gid]; + const float patchWindStrength = GetWindStrength(); + const float3 patchNormal = GetTerrainNormal(patchCenter.xz); + const int seed = CombineSeed(asuint(int(patchCenter.x / grassSpacing)), asuint(int(patchCenter.z / grassSpacing))); + + const float dist = distance(patchCenter, GetCameraPosition()); + const float bladeCountF = + lerp(float(maxNumGrassBlades), 2., pow(saturate(dist / (denseGrassMaxDistance * 1.05)), 0.75)); + + const int tileBladeCount = ceil(bladeCountF); + const int threadGroupBladeOffset = bladeOffset * maxNumOutputGrassBlades; + const int threadGroupBladeCount = clamp(tileBladeCount - threadGroupBladeOffset, 0, maxNumOutputGrassBlades); + + const int vertexCount = threadGroupBladeCount * numGrassBladeVertices; + const int triangleCount = threadGroupBladeCount * numGrassBladeTriangles; + + SetMeshOutputCounts(vertexCount, triangleCount); + + const int vertId = gtid; + if (vertId < vertexCount) { + const int bladeId = (vertId / numGrassBladeVertices) + threadGroupBladeOffset; + const int vertIdLocal = vertId % numGrassBladeVertices; + + const float height = patchHeight + float(Random(seed, bladeId, 20)) / 40.; + + // Position the grass in a circle around the hitPosition and angled using the hitNormal + float3 tangent = normalize(cross(float3(0, 0, 1), patchNormal)); + float3 bitangent = normalize(cross(patchNormal, tangent)); + + float bladeDirectionAngle = 2. * PI * Random(seed, 4, bladeId); + float2 bladeDirection = float2(cos(bladeDirectionAngle), sin(bladeDirectionAngle)) * height * 0.3; + + float offsetAngle = 2. * PI * Random(seed, bladeId); + float offsetRadius = grassSpacing * sqrt(Random(seed, 19, bladeId)); + float3 bladeOffset = offsetRadius * (cos(offsetAngle) * tangent + sin(offsetAngle) * bitangent); + + float3 v0 = patchCenter + bladeOffset; + float3 v1 = v0 + float3(0, height, 0); + float3 v2 = v1 + float3(bladeDirection.x, 0, bladeDirection.y); + + float3 v1prev = v1; + float3 v2prev = v2 + patchWindStrength * GetWindOffset(v0.xz, GetPreviousTime()); + + v2 += patchWindStrength * GetWindOffset(v0.xz, GetTime()); + + MakePersistentLength(v0, v1, v2, height); + MakePersistentLength(v0, v1prev, v2prev, height); + + float width = 0.03; + + width *= maxNumGrassBlades / bladeCountF; + + if (bladeId == (tileBladeCount - 1)) { + width *= frac(bladeCountF); + } + + GrassVertex vertex; + vertex.height = patchHeight; + vertex.worldSpaceGroundNormal = patchNormal; + vertex.rootHeight = v0.y; + + const float3 sideVec = normalize(float3(bladeDirection.y, 0, -bladeDirection.x)); + const float3 offset = BitSign(vertIdLocal, 0) * width * sideVec; + + v0 += offset * 1.0; + v1 += offset * 0.7; + v2 += offset * 0.3; + + v1prev += offset * 0.7; + v2prev += offset * 0.3; + + float t = (vertIdLocal / 2) / float(numGrassBladeVerticesPerEdge - 1); + vertex.worldSpacePosition = bezier(v0, v1, v2, t); + vertex.worldSpaceNormal = cross(sideVec, normalize(bezierDerivative(v0, v1, v2, t))); + + ComputeClipSpacePositionAndMotion(vertex, vertex.worldSpacePosition, bezier(v0, v1prev, v2prev, t)); + + verts[vertId] = vertex; + } + + const int triId = gtid; + if (triId < triangleCount) { + const int bladeId = triId / numGrassBladeTriangles; + const int triIdLocal = triId % numGrassBladeTriangles; + + const int offset = bladeId * numGrassBladeVertices + 2 * (triIdLocal / 2); + + tris[triId] = offset + (((triIdLocal & 1) == 0) ? uint3(0, 1, 2) : uint3(3, 2, 1)); + } +} \ No newline at end of file diff --git a/meshNodeSample/shaders/flowermeshshader.hlsl b/meshNodeSample/shaders/flowermeshshader.hlsl new file mode 100644 index 0000000..69dcf42 --- /dev/null +++ b/meshNodeSample/shaders/flowermeshshader.hlsl @@ -0,0 +1,397 @@ +// This file is part of the AMD Work Graph Mesh Node Sample. +// +// Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#include "common.hlsl" + +static const int flowerGroupSize = 128; + +static const int stemVertexCount = 6; +static const int stemTriangleCount = 6; + +static const int headMaxRingCount = 6; +static const int headMaxVertexCount = headMaxRingCount * 2 + 2; +static const int headMaxTriangleCount = headMaxRingCount * 4; + +static const int flowerMaxVertexCount = stemVertexCount + headMaxVertexCount; +static const int flowerMaxTriangleCount = stemTriangleCount + headMaxTriangleCount; + +static const int outputMaxVertexCount = 128; +static const int outputMaxTriangleCount = 256; +static const int maxFlowerCount = + min(outputMaxVertexCount / flowerMaxVertexCount, outputMaxTriangleCount / flowerMaxTriangleCount); + +static const int numOutputVertices = maxFlowerCount * flowerMaxVertexCount; +static const int numOutputTriangles = maxFlowerCount * flowerMaxTriangleCount; + +static const int numOutputVertexIterations = (numOutputVertices + (flowerGroupSize - 1)) / flowerGroupSize; +static const int numOutputTriangleIterations = (numOutputTriangles + (flowerGroupSize - 1)) / flowerGroupSize; + +static const int sparseFlowerVertexCount = 4; +static const int sparseFlowerTriangleCount = 2; + +static const int sparseOutputMaxVertexCount = 128; +static const int sparseOutputMaxTriangleCount = 64; +// maximum number of flower patches that can be rendered by one sparse flower thread group +static const int maxSparseFlowerPatchesPerThreadGroup = min(sparseOutputMaxVertexCount / sparseFlowerVertexCount, + sparseOutputMaxTriangleCount / sparseFlowerTriangleCount) / + maxFlowerCount; + +static const int numSparseOutputVertices = + maxSparseFlowerPatchesPerThreadGroup * maxFlowerCount * sparseFlowerVertexCount; +static const int numSparseOutputTriangles = + maxSparseFlowerPatchesPerThreadGroup * maxFlowerCount * sparseFlowerTriangleCount; + + +float GetFlowerHeight(in int flowerId, in uint seed) +{ + return .5f + Random(seed, 79823, flowerId) * 0.2f; +} + +float2 GetFlowerPosition(in int flowerId, in uint seed) +{ + const float flowerPositionAngle = flowerId + Random(seed, 324897, flowerId); + const float flowerPositionRadius = 0.05f + (flowerId * 0.05f) + Random(seed, flowerId, 4732) * 0.1f; + return float2(cos(flowerPositionAngle), sin(flowerPositionAngle)) * flowerPositionRadius; +} + +float3 GetFlowerColor(in int flowerId, in uint seed) +{ + static const float3 flowerColor[] = { + float3(1.0, 0.95, 0.95), + float3(1.0, 0.95, 0.95), + float3(1.0, 0.95, 0.95), + float3(1.0, 0.2, 0.3), + float3(1.0, 0.4, 1.0), + float3(0.95, 0.8, 0.0), + float3(0.7, 0.7, 1.0), + }; + + return flowerColor[Random(seed, flowerId) * 7]; +} + +int GetFlowerCount(in int seed) { + return round(lerp(2, maxFlowerCount, Random(seed, 6145))); +} + +static const float3 flowerStemColor = pow(float3(0.82, 0.89, 0.58), 2.2); + +[Shader("node")] +[NodeLaunch("mesh")] +[NodeId("DrawFlowerPatch", 0)] +[NodeMaxDispatchGrid(maxFlowersPerRecord, 1, 1)] +// This limit was set through instrumentation and is not required on AMD GPUs. +// If you wish to change any of the procedural generation parameters, +// and you are running on a non-AMD GPU, you may need to adjust this limit. +// You can learn more at: +// https://gpuopen.com/learn/work_graphs_mesh_nodes/work_graphs_mesh_nodes-tips_tricks_best_practices +[NodeMaxInputRecordsPerGraphEntryRecord(200, true)] +[NumThreads(flowerGroupSize, 1, 1)] +[OutputTopology("triangle")] +void FlowerMeshShader( + uint gtid : SV_GroupThreadID, + uint gid : SV_GroupID, + DispatchNodeInputRecord inputRecord, + out indices uint3 tris[numOutputTriangles], + out vertices InsectVertex verts[numOutputVertices]) +{ + const DrawFlowerRecord record = inputRecord.Get(); + + const float3 patchPosition = GetTerrainPosition(inputRecord.Get().position[gid]); + const uint seed = CombineSeed(asuint(patchPosition.x), asuint(patchPosition.z)); + + const int flowerCount = GetFlowerCount(seed); + const int headRingVertexCount = round(lerp(4, headMaxRingCount, Random(seed, 7878))); + + const int headVertexCount = headRingVertexCount * 2 + 2; + const int headTriangleCount = headRingVertexCount * 4; + + const int totalStemVertexCount = flowerCount * stemVertexCount; + const int totalStemTriangleCount = flowerCount * stemTriangleCount; + + const int totalHeadVertexCount = flowerCount * headVertexCount; + const int totalHeadTriangleCount = flowerCount * headTriangleCount; + + const int vertexCount = totalStemVertexCount + totalHeadVertexCount; + const int triangleCount = totalStemTriangleCount + totalHeadTriangleCount; + + SetMeshOutputCounts(vertexCount, triangleCount); + + [[unroll]] + for (uint i = 0; i < numOutputVertexIterations; ++i) + { + const int vertId = gtid + flowerGroupSize * i; + + int flowerId; + float3 localVertexPosition; + float3 color; + + if (vertId < totalStemVertexCount) { + // vertex is stem vertex + + flowerId = vertId / stemVertexCount; + const int stemVertexId = vertId % stemVertexCount; + + const bool isTopVertex = stemVertexId >= 3; + + const float flowerHeight = GetFlowerHeight(flowerId, seed); + const float stemRadius = .03f * (isTopVertex ? 0.8f : 1.f) + Random(seed, 87324, flowerId) * 0.02f; + const float stemRotationOffset = flowerId; + + const float vertexAngle = stemRotationOffset + stemVertexId * ((2 * PI) / 3.f); + const float2 vertexRingPosition = float2(cos(vertexAngle), sin(vertexAngle)) * stemRadius; + + const float vertexY = isTopVertex ? flowerHeight : 0; + localVertexPosition = float3(vertexRingPosition.x, vertexY, vertexRingPosition.y); + + color = flowerStemColor; + } else if (vertId < (totalStemVertexCount + totalHeadVertexCount)) { + const int vertexOffset = totalStemVertexCount; + + flowerId = (vertId - vertexOffset) / headVertexCount; + const int headVertexId = (vertId - vertexOffset) % headVertexCount; + + const bool isBottomVertex = headVertexId == 0; + const bool isTopVertex = headVertexId == (headVertexCount - 1); + const int headVertexRing = (headVertexId - 1) / headRingVertexCount; + + const float flowerHeight = GetFlowerHeight(flowerId, seed); + const float headRadius = (isBottomVertex || isTopVertex) ? 0.f : (.08f * (1 + (headVertexRing * 0.2))); + const float headRotationOffset = flowerId; + + const float vertexAngle = headRotationOffset + headVertexId * ((2 * PI) / float(headRingVertexCount)); + const float2 vertexRingPosition = float2(cos(vertexAngle), sin(vertexAngle)) * headRadius; + + const float ringHeight = 0.05f; + float vertexY = flowerHeight + headVertexRing * ringHeight; + if (isTopVertex) { + vertexY = flowerHeight + 0 * ringHeight; + } + if (isBottomVertex) { + vertexY = flowerHeight - ringHeight; + } + + localVertexPosition = float3(vertexRingPosition.x, vertexY, vertexRingPosition.y); + + color = GetFlowerColor(flowerId, seed); + } + + if (vertId < vertexCount) { + const float2 flowerPositionOffset = GetFlowerPosition(flowerId, seed); + const float3 windOffset = localVertexPosition.y * GetWindStrength() * + GetWindOffset(patchPosition.xz + flowerPositionOffset, GetTime()); + const float3 previousWindOffset = localVertexPosition.y * GetWindStrength() * + GetWindOffset(patchPosition.xz + flowerPositionOffset, GetPreviousTime()); + + InsectVertex vertex; + vertex.objectSpacePosition = + float3(flowerPositionOffset.x, 0, flowerPositionOffset.y) + localVertexPosition + windOffset; + vertex.color = color; + + const float3 worldSpaceBasePosition = + patchPosition + float3(flowerPositionOffset.x, 0, flowerPositionOffset.y) + localVertexPosition; + + ComputeClipSpacePositionAndMotion( + vertex, worldSpaceBasePosition + windOffset, worldSpaceBasePosition + previousWindOffset); + + verts[vertId] = vertex; + } + } + + [[unroll]] + for (uint i = 0; i < numOutputTriangleIterations; ++i) + { + const int triId = gtid + flowerGroupSize * i; + + uint3 triangleIndices = uint3(0, 1, 2); + + if (triId < totalStemTriangleCount) { + // triangle is stem triangle + + const int flowerId = triId / stemTriangleCount; + const int stemTriangleId = triId % stemTriangleCount; + + const int stemVertexOffset = flowerId * stemVertexCount; + + const uint base = stemTriangleId / 2; + // z -- w + // | \ | + // | \ | + // y -- x + const uint4 quad = uint4(base, (base + 1) % 3, (base + 1) % 3 + 3, base + 3); + + triangleIndices = stemVertexOffset + ((stemTriangleId & 0x1) ? quad.xyz : quad.xzw); + } else if (triId < (totalStemTriangleCount + totalHeadTriangleCount)) { + // triangle is head triangle + + const int triangleOffset = totalStemTriangleCount; + + const int headId = (triId - triangleOffset) / headTriangleCount; + const int headTriangleId = (triId - triangleOffset) % headTriangleCount; + + const int bottomVertexId = 0; + const int topVertexId = 1 + (2 * headRingVertexCount); + const int firstRingVertexOffset = 1; + const int secondRingVertexOffset = 1 + headRingVertexCount; + + const int headVertexOffset = totalStemVertexCount + headId * headVertexCount; + + triangleIndices = headVertexOffset + uint3(0, 0, 0); + + if (headTriangleId < (2 * headRingVertexCount)) { + const uint base = headTriangleId / 2; + const uint3 ringTriangle = uint3(bottomVertexId, + firstRingVertexOffset + base, + firstRingVertexOffset + (base + 1) % headRingVertexCount); + + triangleIndices = + headVertexOffset + ((headTriangleId & 0x1) ? (topVertexId - ringTriangle) : ringTriangle); + } else { + const uint base = (headTriangleId - (2 * headRingVertexCount)) / 2; + + const uint3 ringTriangle = uint3(firstRingVertexOffset + base, + firstRingVertexOffset + (base + 1) % headRingVertexCount, + firstRingVertexOffset + base + headRingVertexCount); + + triangleIndices = + headVertexOffset + ((headTriangleId & 0x1) ? (topVertexId - ringTriangle) : ringTriangle); + } + } + + if (triId < triangleCount) { + tris[triId] = triangleIndices; + } + } +} + +[Shader("node")] +[NodeLaunch("mesh")] +[NodeId("DrawFlowerPatch", 1)] +[NodeMaxDispatchGrid(maxFlowersPerRecord / flowersInSparseFlowerThreadGroup, 1, 1)] +// This limit was set through instrumentation and is not required on AMD GPUs. +// If you wish to change any of the procedural generation parameters, +// and you are running on a non-AMD GPU, you may need to adjust this limit. +// You can learn more at: +// https://gpuopen.com/learn/work_graphs_mesh_nodes/work_graphs_mesh_nodes-tips_tricks_best_practices +[NodeMaxInputRecordsPerGraphEntryRecord(200, true)] +[NumThreads(flowerGroupSize, 1, 1)] +[OutputTopology("triangle")] +void SparseFlowerMeshShader( + uint gtid : SV_GroupThreadID, + uint gid : SV_GroupID, + DispatchNodeInputRecord inputRecord, + out indices uint3 tris[numOutputTriangles], + out vertices InsectVertex verts[numOutputVertices]) +{ + const int recordPositionOffset = gid * maxSparseFlowerPatchesPerThreadGroup; + const int threadGroupPatchCount = + clamp(int(inputRecord.Get().flowerPatchCount) - recordPositionOffset, 0, maxSparseFlowerPatchesPerThreadGroup); + + int totalFlowerCount = 0; + + if (WaveGetLaneIndex() < threadGroupPatchCount) { + const float2 lanePatchPosition = inputRecord.Get().position[recordPositionOffset + WaveGetLaneIndex()]; + const int laneSeed = CombineSeed(asuint(lanePatchPosition.x), asuint(lanePatchPosition.y)); + const int laneFlowerCount = GetFlowerCount(laneSeed); + + totalFlowerCount = laneFlowerCount; + } + + totalFlowerCount = WaveActiveSum(totalFlowerCount); + + const int vertexCount = totalFlowerCount * sparseFlowerVertexCount; + const int triangleCount = totalFlowerCount * sparseFlowerTriangleCount; + + SetMeshOutputCounts(vertexCount, triangleCount); + + if (gtid < vertexCount) { + float3 patchPosition = 0; + uint seed = 0; + int vertexId = 0; + + { + int runningVertexCount = 0; + + for (int i = 0; i < maxSparseFlowerPatchesPerThreadGroup; ++i) { + const float2 candidatePatchPosition = inputRecord.Get().position[recordPositionOffset + i]; + const int candidateSeed = + CombineSeed(asuint(candidatePatchPosition.x), asuint(candidatePatchPosition.y)); + const int candidateFlowerCount = GetFlowerCount(candidateSeed); + const int candidateVertexCount = candidateFlowerCount * sparseFlowerVertexCount; + + if ((gtid >= runningVertexCount) && (gtid < (runningVertexCount + candidateVertexCount))) { + patchPosition = GetTerrainPosition(candidatePatchPosition); + seed = candidateSeed; + vertexId = gtid - runningVertexCount; + } + + runningVertexCount += candidateVertexCount; + } + } + + const int flowerId = vertexId / sparseFlowerVertexCount; + const int flowerVertexId = vertexId % sparseFlowerVertexCount; + + const float2 flowerPositionOffset = GetFlowerPosition(flowerId, seed); + const float flowerHeight = GetFlowerHeight(flowerId, seed); + + const float3 viewVector = normalize(patchPosition - GetCameraPosition()); + const float3 side = normalize(cross(viewVector, float3(0, 1, 0))); + const float3 back = normalize(float3(viewVector.x, 0, viewVector.z)); + + const float width = 0.1; + const float depth = 0.15; + + float3 localVertexPosition = 0; + + if (flowerVertexId == 1) { + localVertexPosition = float3(0, flowerHeight, 0) + side * width; + } else if (flowerVertexId == 2) { + localVertexPosition = float3(0, flowerHeight, 0) - side * width; + } else if (flowerVertexId == 3) { + localVertexPosition = float3(0, flowerHeight, 0) + back * depth; + } + + InsectVertex vertex; + vertex.objectSpacePosition = + float3(flowerPositionOffset.x, 0, flowerPositionOffset.y) + localVertexPosition; + + if (flowerVertexId == 0) { + vertex.color = flowerStemColor; + } else { + vertex.color = GetFlowerColor(flowerId, seed); + } + + ComputeClipSpacePositionAndMotion( + vertex, patchPosition + float3(flowerPositionOffset.x, 0, flowerPositionOffset.y) + localVertexPosition); + + verts[gtid] = vertex; + + } + + if (gtid < triangleCount) { + const int flowerId = gtid / sparseFlowerTriangleCount; + const int flowerTriangleId = gtid % sparseFlowerTriangleCount; + + const int flowerVertexOffset = flowerId * sparseFlowerVertexCount; + + tris[gtid] = flowerVertexOffset + ((flowerTriangleId & 0x1) ? uint3(0, 1, 2) : uint3(2, 1, 3)); + } +} \ No newline at end of file diff --git a/meshNodeSample/shaders/grasspixelshader.hlsl b/meshNodeSample/shaders/grasspixelshader.hlsl new file mode 100644 index 0000000..91fd2cd --- /dev/null +++ b/meshNodeSample/shaders/grasspixelshader.hlsl @@ -0,0 +1,48 @@ +// This file is part of the AMD Work Graph Mesh Node Sample. +// +// Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#include "common.hlsl" + +DeferredPixelShaderOutput GrassPixelShader(const GrassVertex input, bool isFrontFace : SV_IsFrontFace) +{ + DeferredPixelShaderOutput output; + output.motion = input.clipSpaceMotion.xy; + + const float3 biomeWeights = GetBiomeWeights(input.worldSpacePosition.xz); + const float biomeFactor = lerp(1.0, 0.75, biomeWeights.y); + + float selfshadow = clamp(pow((input.worldSpacePosition.y - input.rootHeight) / input.height, 1.5), 0, 1); + output.baseColor.rgb = pow(float3(0.41, 0.44, 0.29) * 2, 2.2) * selfshadow * biomeFactor; + output.baseColor.rgb *= 0.75 + 0.25 * PerlinNoise2D(0.25 * input.worldSpacePosition.xz); + output.baseColor.a = 1; + + float3 normal = normalize(input.worldSpaceNormal); + + if (!isFrontFace) { + normal = -normal; + } + + float3 groundNormal = normalize(lerp(normalize(input.worldSpaceGroundNormal), float3(0, 1, 0), 0.5)); + output.normal.xyz = normalize(lerp(groundNormal, normal, 0.25)); + output.normal.w = 1.0; + + return output; +} \ No newline at end of file diff --git a/meshNodeSample/shaders/heightmap.hlsl b/meshNodeSample/shaders/heightmap.hlsl new file mode 100644 index 0000000..9d0235f --- /dev/null +++ b/meshNodeSample/shaders/heightmap.hlsl @@ -0,0 +1,121 @@ +// This file is part of the AMD Work Graph Mesh Node Sample. +// +// Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#pragma once + +#include "utils.hlsl" + +// x = mountain +// y = woodland +// z = grassland +float3 GetBiomeWeights(in float2 position) +{ + const float2 pos = position * 0.01; + + float mountainFactor = 0; + mountainFactor += 1 * PerlinNoise2D(0.5 * pos); + mountainFactor += 2 * PerlinNoise2D(0.2 * pos + float2(38, 23)); + mountainFactor += 4 * PerlinNoise2D(0.1 * pos); + + float woodlandMountainFactor = 1.f - smoothstep(0, 1, 4 * pow(mountainFactor - 0.5, 2)); + woodlandMountainFactor = woodlandMountainFactor - pow(4 * PerlinNoise2D(0.1 * pos), 4); + mountainFactor = clamp(pow(clamp(mountainFactor, 0, 1), 2), 0, 1); + + float woodlandFactor = 0; + woodlandFactor += 1 * pow(4 * PerlinNoise2D(0.3 * pos), 3); + woodlandFactor += 5 * pow(1 * PerlinNoise2D(0.1 * pos), 2); + woodlandFactor = clamp(smoothstep(0, 1, woodlandFactor), 0, 1); + + woodlandFactor = smoothstep(0, 1, max(woodlandMountainFactor, woodlandFactor - mountainFactor)); + + float grasslandFactor = clamp(1 - (mountainFactor + woodlandFactor), 0, 1); + + return float3(mountainFactor, woodlandFactor, grasslandFactor); +} + +float GetTerrainHeight(in float2 pos) +{ + const float3 biomes = GetBiomeWeights(pos); + + // scale position down for low-frequency perlin noise + const float2 samplePosition = pos / 400.0; + + // Add multiple perlin noise layers to achieve base terrain height + float baseHeight = 0; + baseHeight += 1.0 * PerlinNoise2D(1.0 * samplePosition + float2(34, 98)); + baseHeight += 0.35 * PerlinNoise2D(2.0 * samplePosition + float2(73, 42)); + baseHeight += 0.25 * max(PerlinNoise2D(3.2 * samplePosition + float2(+0.5, -0.5)), + PerlinNoise2D(3.5 * samplePosition + float2(-0.5, +0.5))); + baseHeight += 0.15 * PerlinNoise2D(4.0 * samplePosition); + baseHeight += 0.08 * PerlinNoise2D(8.0 * samplePosition); + baseHeight += 0.07 * PerlinNoise2D(9.0 * samplePosition); + + // square height to make hills a bit more pronounced and scale to final height + float height = 140.0 * baseHeight * baseHeight; + + // Add additional high-frequency noise in mountain biome + float mountainHeight = 0; + mountainHeight += 0.97 * PerlinNoise2D(1.0 * samplePosition); + mountainHeight += 0.95 * max(PerlinNoise2D(2.8 * samplePosition + float2(+2.3, -4.5)), + PerlinNoise2D(3.1 * samplePosition + float2(-6.5, +3.6))); + mountainHeight += 0.75 * PerlinNoise2D(2.0 * samplePosition + float2(34, 56)); + + height += 70.0 * mountainHeight * mountainHeight * smoothstep(0.5, 1.0, biomes.x); + + // raise mountain biome up + height += 40.0 * smoothstep(0.0, 1.0, biomes.x); + + return height; +} + +float GetTerrainHeight(in float x, in float y) +{ + return GetTerrainHeight(float2(x, y)); +} + +float3 GetTerrainPosition(in float x, in float z) +{ + return float3(x, GetTerrainHeight(x, z), z); +} + +float3 GetTerrainPosition(in float2 pos) +{ + return float3(pos.x, GetTerrainHeight(pos.x, pos.y), pos.y); +} + +float3 GetTerrainNormal(in float x, in float z) +{ + const float height = GetTerrainHeight(x, z); + + static const float h = 0.01; + float dx = (height - GetTerrainHeight(x + h, z)); + float dz = (height - GetTerrainHeight(x, z + h)); + + float3 a = normalize(float3(h, -dx, 0)); + float3 b = normalize(float3(0, -dz, h)); + + return normalize(cross(b, a)); +} + +float3 GetTerrainNormal(in float2 pos) +{ + return GetTerrainNormal(pos.x, pos.y); +} \ No newline at end of file diff --git a/meshNodeSample/shaders/insectpixelshader.hlsl b/meshNodeSample/shaders/insectpixelshader.hlsl new file mode 100644 index 0000000..dd3650e --- /dev/null +++ b/meshNodeSample/shaders/insectpixelshader.hlsl @@ -0,0 +1,37 @@ +// This file is part of the AMD Work Graph Mesh Node Sample. +// +// Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#include "common.hlsl" + +DeferredPixelShaderOutput InsectPixelShader(const InsectVertex input) +{ + DeferredPixelShaderOutput output; + + output.motion = input.clipSpaceMotion.xy; + output.baseColor.rgb = input.color.rgb; + output.baseColor.a = 1.f; + + // compute normal from object space position derivatives + output.normal.xyz = normalize(cross(ddy(input.objectSpacePosition.xyz), ddx(input.objectSpacePosition.xyz))); + output.normal.w = 1.0; + + return output; +} \ No newline at end of file diff --git a/meshNodeSample/shaders/mushroommeshshader.hlsl b/meshNodeSample/shaders/mushroommeshshader.hlsl new file mode 100644 index 0000000..12827ff --- /dev/null +++ b/meshNodeSample/shaders/mushroommeshshader.hlsl @@ -0,0 +1,256 @@ +// This file is part of the AMD Work Graph Mesh Node Sample. +// +// Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#include "common.hlsl" + +struct ShroomType { + int hatPoints; + int stemPoints; + + float hatRingInnerRadius; + float hatRingOuterRadius; + float stemRingRadius; + + float hatRingInnerDepth; + float hatRingOuterDepth; + + float hatTilt; + float shroomCountBiasPow; + float positionNoise; + + float3 stemColor; + float3 hatColor; +}; + +// Mushroom definitions +static const int numShroomTypes = 2; +static const ShroomType shroomTypes[numShroomTypes] = { // + { + // brown shroom + 7, // int hatPoints; + 5, // int stemPoints; + .25, // float hatRingInnerRadius; + .4, // float hatRingOuterRadius; + .1, // float stemRingRadius; + .075, // float hatRingInnerDepth; + .25, // float hatRingOuterDepth; + .65, // float hatTilt; + 1, // float shroomCountBiasPow; + .025, // float positionNoise; + float3(0.33, 0.21, 0.14) * .8, // float3 stemColor + float3(0.33, 0.21, 0.14) * .8 // float3 hatColor + }, + { + // red shroom + 8, // int hatPoints; + 5, // int stemPoints; + .25, // float hatRingInnerRadius; + .4, // float hatRingOuterRadius; + .1, // float stemRingRadius; + .05, // float hatRingInnerDepth; + .15, // float hatRingOuterDepth; + .1, // float hatTilt; + 1, // float shroomCountBiasPow; + .025, // float positionNoise; + float3(1, 1, 1) * .6, // float3 stemColor + float3(.225, 0.05, 0.) * .5 // float3 hatColor + }, +}; + +static const float radiusMin = 0.2; +static const float radiusV = 0.2; +static const float scale = .6; + +static const int mushroomGroupSize = 128; +static const int numOutputVerticesLimit = 256; +static const int numOutputTrianglesLimit = 192; + +static const int numOutputVertexIterations = (numOutputVerticesLimit + (mushroomGroupSize - 1)) / mushroomGroupSize; +static const int numOutputTriangleIterations = (numOutputTrianglesLimit + (mushroomGroupSize - 1)) / mushroomGroupSize; + +[Shader("node")] +[NodeLaunch("mesh")] +[NodeId("DrawMushroomPatch", 0)] +[NodeMaxDispatchGrid(maxMushroomsPerRecord, 1, 1)] +// This limit was set through instrumentation and is not required on AMD GPUs. +// If you wish to change any of the procedural generation parameters, +// and you are running on a non-AMD GPU, you may need to adjust this limit. +// You can learn more at: +// https://gpuopen.com/learn/work_graphs_mesh_nodes/work_graphs_mesh_nodes-tips_tricks_best_practices +[NodeMaxInputRecordsPerGraphEntryRecord(50, true)] +[NumThreads(mushroomGroupSize, 1, 1)] +[OutputTopology("triangle")] +void MushroomMeshShader( + uint gtid : SV_GroupThreadID, + uint gid : SV_GroupID, + DispatchNodeInputRecord inputRecord, + out indices uint3 tris[numOutputTrianglesLimit], + out vertices InsectVertex verts[numOutputVerticesLimit]) +{ + const float3 patchCenter = inputRecord.Get().position[gid]; + + const int seed = CombineSeed(asuint(patchCenter.x), asuint(patchCenter.z)); + + const int shroomType = numShroomTypes * Random(seed, 11002); + + const int vertsPerStem = 2 * shroomTypes[shroomType].stemPoints; + const int vertsPerHat = 2 * shroomTypes[shroomType].hatPoints + 1; + const int vertsPerShroom = vertsPerStem + vertsPerHat; + + const int trisPerStem = 2 * shroomTypes[shroomType].stemPoints; + const int trisPerHat = 3 * shroomTypes[shroomType].hatPoints; + const int trisPerShroom = trisPerStem + trisPerHat; + + const int maxNumShrooms = min(numOutputVerticesLimit / vertsPerShroom, numOutputTrianglesLimit / trisPerShroom); + + const float radiusLookup[5] = { + shroomTypes[shroomType].stemRingRadius, + shroomTypes[shroomType].stemRingRadius, + 0., + shroomTypes[shroomType].hatRingInnerRadius, + shroomTypes[shroomType].hatRingOuterRadius, + }; + const float heightLookup[5] = { + 0., + 0.1, + .2, + .2 - shroomTypes[shroomType].hatRingInnerDepth, + .2 - shroomTypes[shroomType].hatRingOuterDepth, + }; + + + const int numShrooms = + min(maxNumShrooms, 1 + maxNumShrooms * pow(Random(seed, 99990001), shroomTypes[shroomType].shroomCountBiasPow)); + + const int vertexCount = numShrooms * vertsPerShroom; + const int triangleCount = numShrooms * trisPerShroom; + + SetMeshOutputCounts(vertexCount, triangleCount); + + [[unroll]] + for (int i = 0; i < numOutputTriangleIterations; ++i) + { + const int triId = gtid + mushroomGroupSize * i; + + if (triId < triangleCount) { + const int shroomIdx = triId / trisPerShroom; + int ti = triId % trisPerShroom; + + const bool isHat = ti < trisPerHat; + + if (!isHat) { + ti -= trisPerHat; + } + + const int points = isHat ? shroomTypes[shroomType].hatPoints : shroomTypes[shroomType].stemPoints; + const int ring = ti / points; + + const int baseVertex = isHat ? (1 + (ring == 2) * points) : (vertsPerHat + ring * points); + + const int vi = ti - ring * points; + + const int a = baseVertex + vi; + const int b = baseVertex + ((vi + 1) % points); + int c = (ring == 1) ? b : a; + c += ((ring == 1) ^ !isHat) ? points : -points; + c = max(c, 0); + + tris[triId] = shroomIdx * vertsPerShroom + uint3(a, b, c); + } + } + + [[unroll]] + for (int i = 0; i < numOutputVertexIterations; ++i) + { + const int vertId = gtid + mushroomGroupSize * i; + + if (vertId < vertexCount) { + const int shroomIdx = vertId / vertsPerShroom; + int vi = vertId % vertsPerShroom; + float3 pos = float3(0, 0, 0); + bool isHat = vi < vertsPerHat; + + vi -= (!isHat) * vertsPerHat; + + int points = isHat ? shroomTypes[shroomType].hatPoints : shroomTypes[shroomType].stemPoints; + + int ring = (vi + (shroomTypes[shroomType].hatPoints - 1) * isHat) / points; + + float angle = frac((vi - isHat) / float(points)); + + angle += (isHat && ring == 2) * -1. / (2 * points); + + angle *= 2 * PI; + + float radius = radiusLookup[2 * isHat + ring]; + + pos.x += radius * cos(angle); + pos.y += heightLookup[2 * isHat + ring]; + pos.z += radius * sin(angle); + + float theta = 2 * PI * shroomIdx / numShrooms + .1 * Random(seed, shroomIdx) * PI * 2; + float rad = sqrt(Random(seed, 19, shroomIdx)) * shroomTypes[shroomType].hatTilt; + + float3 offset = float3(sin(theta), 0, cos(theta)); + float3 dir = rad * offset; + + dir.y = sqrt(1 - dir.x * dir.x - dir.z * dir.z); + + float3x3 rotation; + rotation[0] = normalize(cross(dir, float3(0, 0, 1))); + rotation[1] = dir; + rotation[2] = normalize(cross(rotation[0], dir)); + rotation[0] = normalize(cross(dir, rotation[2])); + + if (isHat) { + // add a little bit of noise + pos.x += shroomTypes[shroomType].positionNoise * (2. * Random(vertId, 0xFEFA, seed) - 1.); + pos.y += shroomTypes[shroomType].positionNoise * (2. * Random(vertId, 0xFEFB, seed) - 1.); + pos.z += shroomTypes[shroomType].positionNoise * (2. * Random(vertId, 0xFEFC, seed) - 1.); + + pos = mul(transpose(rotation), pos); + } + + float r = Random(seed, 877, shroomIdx); + + float distance = radiusMin + radiusV * r; + + static const float heightMin = .5; + static const float heightV = 1.; + if (isHat || ring == 1) { + pos.y += heightMin + heightV * (1 - r); + } + + InsectVertex vertex; + vertex.color.rgb = isHat ? shroomTypes[shroomType].hatColor : shroomTypes[shroomType].stemColor; + if (!isHat && ring == 0) { + vertex.color.xyz *= .1; + } + vertex.color.rgb *= 3.5; + + vertex.objectSpacePosition = scale * pos + distance * offset; + + ComputeClipSpacePositionAndMotion(vertex, patchCenter + vertex.objectSpacePosition); + + verts[vertId] = vertex; + } + } +} \ No newline at end of file diff --git a/meshNodeSample/shaders/rock.hlsl b/meshNodeSample/shaders/rock.hlsl new file mode 100644 index 0000000..7354f94 --- /dev/null +++ b/meshNodeSample/shaders/rock.hlsl @@ -0,0 +1,92 @@ +// This file is part of the AMD Work Graph Mesh Node Sample. +// +// Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#include "common.hlsl" + +// Rock generation is single-threaded, this we use a coalescing node to generate multiple rocks in parallel. +// Rocks are rendered with the same spline mesh node as the trees. +[Shader("node")] +[NodeLaunch("coalescing")] +[NumThreads(maxSplinesPerRecord, 1, 1)] +void GenerateRock( + [MaxRecords(maxSplinesPerRecord)] + GroupNodeInputRecords inputRecord, + + uint threadId : SV_GroupThreadID, + + [MaxRecords(1)] + [NodeId("DrawSpline")] + NodeOutput output) +{ + GroupNodeOutputRecords outputRecord = output.GetGroupNodeOutputRecords(1); + + outputRecord.Get().dispatchGrid = uint3(inputRecord.Count(), 1, 1); + + if (threadId < inputRecord.Count()) { + const float2 basePositionXZ = inputRecord.Get(threadId).position; + const uint seed = CombineSeed(asuint(basePositionXZ.x), asuint(basePositionXZ.y)); + + const float3 basePosition = GetTerrainPosition(basePositionXZ); + const float3 terrainNormal = GetTerrainNormal(basePositionXZ); + const float3 basePositionUp = lerp(float3(0, 1, 0), terrainNormal, 1 + Random(seed, 456) * 0.5); + + const float rotationAngle = Random(seed, 14658) * 2 * PI; + + const float upScale = lerp(0.5, 1.2, Random(seed, 546)); + const float a = 1.05f + Random(seed, 6514); + const float2 sideScale = lerp(0.6, 5.0, Random(seed, 9487)) * float2(a, 1); + + const float f = 1.05f + Random(seed, 1564); + const float c = lerp(0.5, 0.9, Random(seed, 49827)); + + outputRecord.Get(0).color[threadId] = float3(0.1, 0.1, 0.1) * 3.5; + outputRecord.Get(0).rotationOffset[threadId] = rotationAngle; + outputRecord.Get(0).windStrength[threadId] = 0; + outputRecord.Get(0).controlPointCount[threadId] = 4; + + int controlPointIndex = threadId * splineMaxControlPointCount; + + outputRecord.Get(0).controlPointPositions[controlPointIndex] = basePosition - terrainNormal; + outputRecord.Get(0).controlPointVertexCounts[controlPointIndex] = 1; + outputRecord.Get(0).controlPointRadii[controlPointIndex] = 0; + outputRecord.Get(0).controlPointNoiseAmplitudes[controlPointIndex] = 0.0; + controlPointIndex++; + + outputRecord.Get(0).controlPointPositions[controlPointIndex] = basePosition; + outputRecord.Get(0).controlPointVertexCounts[controlPointIndex] = round(lerp(5, 7, Random(seed, 4145))); + outputRecord.Get(0).controlPointRadii[controlPointIndex] = sideScale; + outputRecord.Get(0).controlPointNoiseAmplitudes[controlPointIndex] = 0.5 * upScale; + controlPointIndex++; + + outputRecord.Get(0).controlPointPositions[controlPointIndex] = basePosition + upScale * basePositionUp; + outputRecord.Get(0).controlPointVertexCounts[controlPointIndex] = round(lerp(5, 7, Random(seed, 4578))); + outputRecord.Get(0).controlPointRadii[controlPointIndex] = c * sideScale; + outputRecord.Get(0).controlPointNoiseAmplitudes[controlPointIndex] = 0.5 * upScale; + controlPointIndex++; + + outputRecord.Get(0).controlPointPositions[controlPointIndex] = basePosition + f * upScale * basePositionUp; + outputRecord.Get(0).controlPointVertexCounts[controlPointIndex] = 1; + outputRecord.Get(0).controlPointRadii[controlPointIndex] = Random(seed, 89514); + outputRecord.Get(0).controlPointNoiseAmplitudes[controlPointIndex] = 0; + } + + outputRecord.OutputComplete(); +} \ No newline at end of file diff --git a/meshNodeSample/shaders/shading.hlsl b/meshNodeSample/shaders/shading.hlsl new file mode 100644 index 0000000..88f39a6 --- /dev/null +++ b/meshNodeSample/shaders/shading.hlsl @@ -0,0 +1,74 @@ +// This file is part of the AMD Work Graph Mesh Node Sample. +// +// Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#include "fullscreen.hlsl" +#include "shadingcommon.h" +#include "upscaler.h" +#include "skybox.hlsl" + +//-------------------------------------------------------------------------------------- +// Texture definitions +//-------------------------------------------------------------------------------------- +RWTexture2D RenderTarget : register(u0); + +Texture2D BaseColor : register(t0); +Texture2D Normal : register(t1); + +//-------------------------------------------------------------------------------------- +// Main function +//-------------------------------------------------------------------------------------- +[numthreads(s_shadingThreadGroupSizeX, s_shadingThreadGroupSizeY, 1)] + +void MainCS(uint3 dtID : SV_DispatchThreadID) +{ + if (any(dtID.xy > UpscalerInfo.FullScreenScaleRatio.xy)) { + return; + } + + const float2 uv = GetUV(dtID.xy, 1.f / UpscalerInfo.FullScreenScaleRatio.xy); + const float3 clip = float3(2 * uv.x - 1, 1 - 2 * uv.y, 1); + const float3 viewDirection = normalize(PerspectiveProject(InverseViewProjection, clip) - CameraPosition.xyz); + + const LightingData lightingData = GetLightingData(); + + const float4 baseColor = BaseColor[dtID.xy]; + + if (baseColor.a < 0.5) { + RenderTarget[dtID.xy] = float4(GetSkyboxColor(viewDirection, lightingData), 1); + + return; + } + + const float3 normal = Normal[dtID.xy]; + + // Ambient + const float sunZenithDot = lightingData.sunDirection.y; + const float sunZenithDot01 = (sunZenithDot + 1.0) * 0.5; + const float3 ambientContrib = baseColor.rgb * 0.2 * saturate(pow(dot(normal, -viewDirection), .25)) * 1 * + (skybox::SunZenith_Gradient(0.5) + skybox::ViewZenith_Gradient(sunZenithDot01)); + + // Diffuse + const float3 diffuseContrib = saturate(dot(normal, lightingData.globalLightDirection)); + + float3 color = ambientContrib + diffuseContrib * baseColor.rgb; + + RenderTarget[dtID.xy] = float4(color.rgb, 1); +} \ No newline at end of file diff --git a/meshNodeSample/shaders/shadingcommon.h b/meshNodeSample/shaders/shadingcommon.h new file mode 100644 index 0000000..47ff90b --- /dev/null +++ b/meshNodeSample/shaders/shadingcommon.h @@ -0,0 +1,44 @@ +// This file is part of the AMD Work Graph Mesh Node Sample. +// +// Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#pragma once + +#if __cplusplus +#include "misc/math.h" +#endif // __cplusplus + +static const unsigned int s_shadingThreadGroupSizeX = 8; +static const unsigned int s_shadingThreadGroupSizeY = 8; + +#if __cplusplus +struct ShadingCBData +{ + Mat4 InverseViewProjection; + Vec4 CameraPosition; +}; +#else +// Fullscreen.hlsl binds to b0, so contants need to start at b1 if using fullscreen.hlsl +cbuffer ShadingCBData : register(b1) +{ + matrix InverseViewProjection; + float4 CameraPosition; +} +#endif // __cplusplus \ No newline at end of file diff --git a/meshNodeSample/shaders/skybox.hlsl b/meshNodeSample/shaders/skybox.hlsl new file mode 100644 index 0000000..5693ea8 --- /dev/null +++ b/meshNodeSample/shaders/skybox.hlsl @@ -0,0 +1,255 @@ +// This file is part of the AMD Work Graph Mesh Node Sample. +// +// Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#pragma once + +#include "shadingcommon.h" +#include "utils.hlsl" + +// namespace contains skybox lookup gradients based on the lookup textures from https://kelvinvanhoorn.com/2022/03/17/skybox-tutorial-part-1/ +namespace skybox { + + + float SunZenith_Gradient_b(float x) + { + const int count = 5; + const float xs[count] = { 0.0, 0.375, 0.515625, 0.625, 0.9921875 }; + const float ys[count] = { + 0.09119905696129081, 0.17743197428764693, 0.5457624421381186, 0.8458267513775317, 0.8470588235294116 }; + for (int i = 0; i < (count - 1); ++i) { + if (xs[i] <= x && x < xs[i + 1]) { + float t = (x - xs[i]) / (xs[i + 1] - xs[i]); + return lerp(ys[i], ys[i + 1], smoothstep(0., 1., t)); + } + } + return ys[count - 1]; + } + + float SunZenith_Gradient_g(float x) + { + const int count = 4; + const float xs[count] = { 0.0, 0.3828125, 0.6171875, 0.9921875 }; + const float ys[count] = { 0.055852483484183674, 0.1324827899848155, 0.678304263482747, 0.5670582583360976 }; + for (int i = 0; i < (count - 1); ++i) { + if (xs[i] <= x && x < xs[i + 1]) { + float t = (x - xs[i]) / (xs[i + 1] - xs[i]); + return lerp(ys[i], ys[i + 1], smoothstep(0., 1., t)); + } + } + return ys[count - 1]; + } + + float SunZenith_Gradient_r(float x) + { + const int count = 4; + const float xs[count] = { 0.0, 0.375, 0.6171875, 0.9921875 }; + const float ys[count] = { 0.05266772050977707, 0.0846283802271616, 0.37306694022972375, 0.2979585185575305 }; + for (int i = 0; i < (count - 1); ++i) { + if (xs[i] <= x && x < xs[i + 1]) { + float t = (x - xs[i]) / (xs[i + 1] - xs[i]); + return lerp(ys[i], ys[i + 1], smoothstep(0., 1., t)); + } + } + return ys[count - 1]; + } + + float3 SunZenith_Gradient(float x) + { + return float3(SunZenith_Gradient_r(x), SunZenith_Gradient_g(x), SunZenith_Gradient_b(x)); + } + + float ViewZenith_Gradient_b(float x) + { + const int count = 6; + const float xs[count] = { 0.0, 0.375, 0.484375, 0.5390625, 0.6484375, 0.9921875 }; + const float ys[count] = { 0.020563663948531673, + 0.3419597608904451, + 0.04161602600815216, + 0.1095349428231567, + 0.8149446289251109, + 0.9900099167444902 }; + for (int i = 0; i < (count - 1); ++i) { + if (xs[i] <= x && x < xs[i + 1]) { + float t = (x - xs[i]) / (xs[i + 1] - xs[i]); + return lerp(ys[i], ys[i + 1], smoothstep(0., 1., t)); + } + } + return ys[count - 1]; + } + + float ViewZenith_Gradient_g(float x) + { + const int count = 5; + const float xs[count] = { 0.0, 0.359375, 0.53125, 0.6171875, 0.9921875 }; + const float ys[count] = { + 0.010815067628993518, 0.19845916353774884, 0.5742594966049783, 0.7342900528103369, 0.748949825687303 }; + for (int i = 0; i < (count - 1); ++i) { + if (xs[i] <= x && x < xs[i + 1]) { + float t = (x - xs[i]) / (xs[i + 1] - xs[i]); + return lerp(ys[i], ys[i + 1], smoothstep(0., 1., t)); + } + } + return ys[count - 1]; + } + + float ViewZenith_Gradient_r(float x) + { + const int count = 6; + const float xs[count] = { 0.0, 0.359375, 0.4921875, 0.59375, 0.640625, 0.9921875 }; + const float ys[count] = { 0.009057957120245073, + 0.15106019459324935, + 0.9180293234405212, + 0.47611197653854354, + 0.5406124274378104, + 0.500948270549165 }; + for (int i = 0; i < (count - 1); ++i) { + if (xs[i] <= x && x < xs[i + 1]) { + float t = (x - xs[i]) / (xs[i + 1] - xs[i]); + return lerp(ys[i], ys[i + 1], smoothstep(0., 1., t)); + } + } + return ys[count - 1]; + } + + float3 ViewZenith_Gradient(float x) + { + return float3(ViewZenith_Gradient_r(x), ViewZenith_Gradient_g(x), ViewZenith_Gradient_b(x)); + } + + float SunView_Gradient_b(float x) + { + const int count = 5; + const float xs[count] = { 0.0, 0.4765625, 0.6328125, 0.984375, 0.9921875 }; + const float ys[count] = { + 0.0, 0.025240814536934993, 0.35683113031269376, 0.6095478205423517, 0.6113290117056305 }; + for (int i = 0; i < (count - 1); ++i) { + if (xs[i] <= x && x < xs[i + 1]) { + float t = (x - xs[i]) / (xs[i + 1] - xs[i]); + return lerp(ys[i], ys[i + 1], smoothstep(0., 1., t)); + } + } + return ys[count - 1]; + } + + float SunView_Gradient_g(float x) + { + const int count = 5; + const float xs[count] = { 0.0, 0.3828125, 0.515625, 0.6171875, 0.9921875 }; + const float ys[count] = { + 0.0038584077592145796, 0.11097681754988054, 0.7754699617187375, 0.5145669601324688, 0.6615264387746628 }; + for (int i = 0; i < (count - 1); ++i) { + if (xs[i] <= x && x < xs[i + 1]) { + float t = (x - xs[i]) / (xs[i + 1] - xs[i]); + return lerp(ys[i], ys[i + 1], smoothstep(0., 1., t)); + } + } + return ys[count - 1]; + } + + float SunView_Gradient_r(float x) + { + const int count = 5; + const float xs[count] = { 0.0, 0.3828125, 0.515625, 0.6328125, 0.9921875 }; + const float ys[count] = { + 0.005119371666456357, 0.147390195920971, 0.9702326103489829, 0.26112308728542827, 0.39095905970609257 }; + for (int i = 0; i < (count - 1); ++i) { + if (xs[i] <= x && x < xs[i + 1]) { + float t = (x - xs[i]) / (xs[i + 1] - xs[i]); + return lerp(ys[i], ys[i + 1], smoothstep(0., 1., t)); + } + } + return ys[count - 1]; + } + + float3 SunView_Gradient(float x) + { + return float3(SunView_Gradient_r(x), SunView_Gradient_g(x), SunView_Gradient_b(x)); + } + +} // namespace skybox + +struct LightingData { + float3 sunDirection; + float3 moonDirection; + + float3 globalLightDirection; +}; + +LightingData GetLightingData() +{ + LightingData result; + + const float southAngle = ToRadians(0.0); + const float latitude = ToRadians(0.0); + const float timeOfDay = 12.0; + + const float3 up = float3(0, 1, 0); + const float3 south = float3(cos(southAngle), 0, sin(southAngle)); + + const float3 right = normalize(cross(south, up)); + + const float sunAngle = (timeOfDay / 24.f) * 2 * PI; + + const float3 sunVector = -cos(sunAngle) * up + -sin(sunAngle) * right; + result.sunDirection = normalize(cos(latitude) * sunVector + sin(latitude) * south); + result.moonDirection = normalize(cos(latitude) * -sunVector + sin(latitude) * -south); + + result.globalLightDirection = (result.sunDirection.y < 0) ? result.moonDirection : result.sunDirection; + + return result; +} + +// Skybox based on https://kelvinvanhoorn.com/2022/03/17/skybox-tutorial-part-1/ +float3 GetSkyboxColor(in float3 direction, in LightingData lightingData) +{ + const float sunViewDot = dot(lightingData.sunDirection, direction); + const float moonViewDot = dot(lightingData.moonDirection, direction); + const float sunZenithDot = lightingData.sunDirection.y; + const float viewZenithDot = direction.y; + const float sunMoonDot = dot(lightingData.sunDirection, lightingData.moonDirection); + + float sunViewDot01 = (sunViewDot + 1.0) * 0.5; + float sunZenithDot01 = (sunZenithDot + 1.0) * 0.5; + + float3 sunZenithColor = skybox::SunZenith_Gradient(sunZenithDot01); + + float3 viewZenithColor = skybox::ViewZenith_Gradient(sunZenithDot01) ; + float vzMask = pow(saturate(1.0 - viewZenithDot), 4); + + float3 sunViewColor = skybox::SunView_Gradient(sunZenithDot01) * 0.7; + float svMask = pow(saturate(sunViewDot), 4); + + float3 skyColor = sunZenithColor + vzMask * viewZenithColor + svMask * sunViewColor; + + const float sunRadius = 0.05; + const float sunMask = step(1 - (sunRadius * sunRadius), sunViewDot); + const float sunVisible = clamp((lightingData.sunDirection.y + 5 * sunRadius) / (5 * sunRadius), 0, 1); + + const float3 sunColor = sunMask * sunVisible; + + const float moonRadius = 0.03; + const float moonMask = step(1 - (moonRadius * moonRadius), moonViewDot); + const float moonVisible = clamp((lightingData.moonDirection.y + 5 * moonRadius) / (5 * moonRadius), 0, 1); + + const float3 moonColor = moonMask * moonVisible; + + return skyColor + sunColor + moonColor; +} \ No newline at end of file diff --git a/meshNodeSample/shaders/sparsegrassmeshshader.hlsl b/meshNodeSample/shaders/sparsegrassmeshshader.hlsl new file mode 100644 index 0000000..c82a7e8 --- /dev/null +++ b/meshNodeSample/shaders/sparsegrassmeshshader.hlsl @@ -0,0 +1,145 @@ +// This file is part of the AMD Work Graph Mesh Node Sample. +// +// Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#include "common.hlsl" + +static const int sparseGrassGroupSize = 128; +static const int numOutputVerticesLimit = 128; +static const int numOutputTrianglesLimit = 64; + +// multiple 8x4 patches fill a detailed tile +static const int2 sparseGrassThreadGroupGridSize = int2(8, 4); +static const int2 spraseGrassThreadGroupsPerAxis = grassPatchesPerDetailedTile / sparseGrassThreadGroupGridSize; + +float2 GetPatchPosition(in int patch, in int patchOffset, in int2 gridBase) +{ + const int x = patch % sparseGrassThreadGroupGridSize.x; + const int z = patch / sparseGrassThreadGroupGridSize.x; + + const int offsetX = patchOffset % spraseGrassThreadGroupsPerAxis.x; + const int offsetZ = patchOffset / spraseGrassThreadGroupsPerAxis.x; + + const int2 grid = int2(x, z) + int2(offsetX, offsetZ) * sparseGrassThreadGroupGridSize; + + return grassSpacing * gridBase + + (grid + GetGrassOffset(gridBase + grid)) * grassSpacing * (8.0 / sparseGrassThreadGroupGridSize.x); +} + + +[Shader("node")] +[NodeLaunch("mesh")] +[NodeId("DrawSparseGrassPatch", 0)] +[NodeMaxDispatchGrid(maxSparseGrassPatchesPerRecord, sparseGrassThreadGroupsPerRecord, 1)] +// This limit was set through instrumentation and is not required on AMD GPUs. +// If you wish to change any of the procedural generation parameters, +// and you are running on a non-AMD GPU, you may need to adjust this limit. +// You can learn more at: +// https://gpuopen.com/learn/work_graphs_mesh_nodes/work_graphs_mesh_nodes-tips_tricks_best_practices +[NodeMaxInputRecordsPerGraphEntryRecord(100, true)] +[NumThreads(sparseGrassGroupSize, 1, 1)] +[OutputTopology("triangle")] +void SparseGrassMeshShader( + uint gtid : SV_GroupThreadID, + uint2 gid : SV_GroupID, + DispatchNodeInputRecord inputRecord, + out indices uint3 tris[numOutputTrianglesLimit], + out primitives GrassCullPrimitive prims[numOutputTrianglesLimit], + out vertices GrassVertex verts[numOutputVerticesLimit]) +{ + const int bladeCount = sparseGrassThreadGroupGridSize.x * sparseGrassThreadGroupGridSize.y; + // 4 vertices per blade + const int vertexCount = bladeCount * 4; + // 2 triangles per blade + const int triangleCount = bladeCount * 2; + + SetMeshOutputCounts(vertexCount, triangleCount); + + GrassVertex vertex; + + float low = .05; + float high = .45; + + float3 color = pow(float3(0.41, 0.44, 0.29), 2.2) * .775; + + const int2 gridBase = inputRecord.Get().position[gid.x] * grassPatchesPerDetailedTile; + + static const float3 grassColor = float3(0.130139, 0.149961, 0.059513); + + if (gtid < vertexCount) { + int vertexId = gtid; + int patch = vertexId / 4; + int vi = vertexId % 4; + bool isLow = vi == 0 || vi == 1; + bool isRight = vi == 0 || vi == 2; + + const float2 pos = GetPatchPosition(patch, gid.y, gridBase); + + float3 patchNormal = GetTerrainNormal(pos); + + float3 center = float3(pos.x, GetTerrainHeight(pos), pos.y); + + // Fade grass into the ground in the distance + const float distanceScale = smoothstep(sparseGrassMaxDistance * 0.9, sparseGrassMaxDistance, distance(center, GetCameraPosition())); + center.y -= high * distanceScale; + + float3 center2cam = normalize(GetCameraPosition() - center); + + float3 forward = normalize(float3(center2cam.x, min(0.2, center2cam.y), center2cam.z)); + float3 right = normalize(cross(center2cam, float3(0, 1, 0))); + + float3 up = normalize(cross(right, forward)); + + vertex.worldSpacePosition = center; + vertex.worldSpacePosition += right * BitSign((uint)isRight, 0) * grassSpacing; + vertex.worldSpacePosition += up * (isLow ? (low) : high); + + vertex.worldSpacePosition.xz += center2cam.xz * (length(center2cam) / 1000.); + vertex.worldSpacePosition.y = center.y + (isLow ? (low + center2cam.y * 0.2) : high); + + vertex.rootHeight = center.y + .08; + vertex.height = high; + vertex.worldSpaceNormal = isLow ? normalize(float3(center2cam.x, 0, center2cam.z)) : float3(0, 1, 0); + vertex.worldSpaceGroundNormal = patchNormal; + + ComputeClipSpacePositionAndMotion(vertex, vertex.worldSpacePosition); + + verts[vertexId] = vertex; + } + + if (gtid < triangleCount) { + const int patch = gtid / 2; + const int base = 4 * patch; + + const float2 pos = GetPatchPosition(patch, gid.y, gridBase); + const float3 center = float3(pos.x, GetTerrainHeight(pos), pos.y); + + const float3 terrainNormal = GetTerrainNormal(pos); + const float3 biomeWeight = GetBiomeWeights(pos); + + const bool cull = (Random(asuint(pos.x), asuint(pos.y), 2378) < biomeWeight.x * 2) || + (terrainNormal.y < 0.55); + + uint3 tri = (gtid % 2) == 0 ? uint3(base, base + 1, base + 2) : uint3(base + 3, base + 2, base + 1); + + tris[gtid] = tri; + prims[gtid].cull = cull; + } +} \ No newline at end of file diff --git a/meshNodeSample/shaders/splinerenderer.hlsl b/meshNodeSample/shaders/splinerenderer.hlsl new file mode 100644 index 0000000..98f7a55 --- /dev/null +++ b/meshNodeSample/shaders/splinerenderer.hlsl @@ -0,0 +1,333 @@ +// This file is part of the AMD Work Graph Mesh Node Sample. +// +// Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#include "common.hlsl" + +struct TransformedVertex { + float4 clipSpacePosition : SV_POSITION; + float3 objectSpacePosition : NORMAL0; + float2 clipSpaceMotion : TEXCOORD0; + float3 color : NORMAL1; +}; + +// Returns number of line sections in a vertex ring +int GetRingSectionCount(in int ringVertexCount) { + return ringVertexCount == 1 ? 0 : ringVertexCount; +} + +static const int splineGroupSize = 128; +static const int numOutputVerticesLimit = 64; +static const int numOutputTrianglesLimit = 128; + +[Shader("node")] +[NodeLaunch("mesh")] +[NodeId("DrawSpline", 0)] +[NodeMaxDispatchGrid(maxSplinesPerRecord, 1, 1)] +// This limit was set through instrumentation and is not required on AMD GPUs. +// If you wish to change any of the procedural generation parameters, +// and you are running on a non-AMD GPU, you may need to adjust this limit. +// You can learn more at: +// https://gpuopen.com/learn/work_graphs_mesh_nodes/work_graphs_mesh_nodes-tips_tricks_best_practices +[NodeMaxInputRecordsPerGraphEntryRecord(10000, true)] +[NumThreads(splineGroupSize, 1, 1)] +[OutputTopology("triangle")] +void SplineMeshShader( + uint threadId : SV_GroupThreadID, + uint gid : SV_GroupID, + DispatchNodeInputRecord inputRecord, + out indices uint3 tris[numOutputTrianglesLimit], + out vertices TransformedVertex verts[numOutputVerticesLimit]) +{ + const uint splineControlPointCount = clamp(inputRecord.Get().controlPointCount[gid], 0, splineMaxControlPointCount); + const uint splineSectionCount = clamp(int(splineControlPointCount) - 1, 0, splineMaxControlPointCount - 1); + + const uint controlPointOffset = gid * splineMaxControlPointCount; + + uint vertexOutputCount = 0; + uint primitiveOutputCount = 0; + + // control point for which the current thread will create a vertex + int threadVertexControlPoint = 0; + // index on control point ring which the current thread will generate + int threadVertexControlPointVertex = 0; + + // control point section for which the current thread will create a triangle + int threadPrimitiveSection = 0; + // index on control point ring which the current thread will generate + int threadPrimitiveSectionTriangle = 0; + int threadPrimitiveSectionVertexOffset = 0; + + { + // Number of vertices in last vertex ring + int lastRingVertexCount = 0; + + // count vertices in first ring + { + const int controlPointVertexCount = inputRecord.Get().controlPointVertexCounts[controlPointOffset]; + + // check if current thread will generate a vertex on this ring + if (threadId < controlPointVertexCount) + { + threadVertexControlPoint = 0; + threadVertexControlPointVertex = threadId; + } + + // Count total number of vertices + vertexOutputCount += controlPointVertexCount; + + // Update last ring size + lastRingVertexCount = controlPointVertexCount; + } + + // count vertex & triangles count for every ring + for (int ring = 1; ring < splineControlPointCount; ++ring) + { + const int controlPointVertexCount = inputRecord.Get().controlPointVertexCounts[controlPointOffset + ring]; + + if ((vertexOutputCount <= threadId) && ((vertexOutputCount + controlPointVertexCount) > threadId)) + { + threadVertexControlPoint = ring; + threadVertexControlPointVertex = threadId - int(vertexOutputCount); + } + + // lower sections are on last ring + const int lowerSectionCount = GetRingSectionCount(lastRingVertexCount); + // upper sections are on current ring + const int upperSectionCount = GetRingSectionCount(controlPointVertexCount); + + // Count number of regular sections (i.e. one edge of the lower ring maps to one section on the upper ring) + const int sectionCount = min(lowerSectionCount, upperSectionCount); + const int regularTriangleCount = sectionCount * 2; + // Count number of irregular trianlges (i.e. a edge connects with a single vertex on the other ring) + const int irregularTriangleCount = max(lowerSectionCount, upperSectionCount) - sectionCount; + + // Total number of triangles on this ring + const int triangleCount = regularTriangleCount + irregularTriangleCount; + + if ((primitiveOutputCount <= threadId) && ((primitiveOutputCount + triangleCount) > threadId)) + { + // Primitives are generated from lower to upper ring, i.e. we need the index of the last ring + threadPrimitiveSection = ring - 1; + threadPrimitiveSectionTriangle = threadId - primitiveOutputCount; + // Get index of first vertex in last ring + threadPrimitiveSectionVertexOffset = vertexOutputCount - lastRingVertexCount; + } + + // Count total number of vertices & triangles + vertexOutputCount += controlPointVertexCount; + primitiveOutputCount += triangleCount; + + // Update last ring size + lastRingVertexCount = controlPointVertexCount; + } + } + + vertexOutputCount = min(vertexOutputCount, numOutputVerticesLimit); + primitiveOutputCount = min(primitiveOutputCount, numOutputTrianglesLimit); + + SetMeshOutputCounts(vertexOutputCount, primitiveOutputCount); + + if (threadId < vertexOutputCount) { + TransformedVertex vertex; + + // Base position to compute object-local positions + const float3 splineBasePosition = inputRecord.Get().controlPointPositions[controlPointOffset]; + + const float3 controlPointPosition = inputRecord.Get().controlPointPositions[controlPointOffset + threadVertexControlPoint]; + const uint controlPointVertexCount = inputRecord.Get().controlPointVertexCounts[controlPointOffset + threadVertexControlPoint]; + + // Compute forward vector based on previous and next control point positions + float3 forward = float3(0, 0, 0); + // Add direction from previous control point + if (threadVertexControlPoint > 0) + { + const float3 previousControlPointPosition = inputRecord.Get().controlPointPositions[controlPointOffset + threadVertexControlPoint - 1]; + forward += controlPointPosition - previousControlPointPosition; + } + // Add direction to next control point + if (threadVertexControlPoint < (splineControlPointCount - 1)) + { + const float3 nextControlPointPosition = inputRecord.Get().controlPointPositions[controlPointOffset + threadVertexControlPoint + 1]; + forward += nextControlPointPosition - controlPointPosition; + } + + forward = normalize(forward); + + // get perpendicular vector to forward + float3 right = normalize(cross(forward, float3(1, 0, 0))); + float3 up = normalize(cross(forward, right)); + + const float rotationOffset = inputRecord.Get().rotationOffset[gid]; + const float vertexAlpha = rotationOffset + (threadVertexControlPointVertex / float(controlPointVertexCount)) * 2.f * PI; + + const float2 radius = inputRecord.Get().controlPointRadii[controlPointOffset + threadVertexControlPoint]; + const float noiseAmplitude = inputRecord.Get().controlPointNoiseAmplitudes[controlPointOffset + threadVertexControlPoint]; + + // random noise value in [-noiseAmplitude; noiseAmplitude] + const float noise = (Random(Hash(controlPointPosition), Hash(vertexAlpha)) * 2.0 - 1.0) * noiseAmplitude; + + const float3 worldSpaceBasePosition = + controlPointPosition + // base position + cos(vertexAlpha) * right * radius.y + // position on vertex ring in right direction + sin(vertexAlpha) * up * radius.x + // position on vertex ring in up direction + forward * noise; // random noise offset in spline direction + + // x = wind strength for current spline + // y = factor for how much the actual vertex position in influencing the wind offset + // 0 = wind offset is only determined by control point position + // 1 = wind offset is only determined by vertex position + const float2 windStrength = inputRecord.Get().windStrength[gid]; + const float3 windReferencePosition = lerp(controlPointPosition, worldSpaceBasePosition, windStrength.y); + // Get Height above terrain scaled by wind strength + const float vertexHeight = max(windReferencePosition.y - GetTerrainHeight(windReferencePosition.xz), 0) * windStrength.x; + + // Compute wind offset for current and last frame + const float3 windOffset = + vertexHeight * GetWindStrength() * GetWindOffset(windReferencePosition.xz, GetTime()); + const float3 previousWindOffset = + vertexHeight * GetWindStrength() * GetWindOffset(windReferencePosition.xz, GetPreviousTime()); + + // compute position relative to first control point + // this improve floating-point precision of ddx & ddy derivatives in pixel shader + vertex.objectSpacePosition = worldSpaceBasePosition - splineBasePosition + windOffset; + vertex.color = inputRecord.Get().color[gid]; + + ComputeClipSpacePositionAndMotion( + vertex, worldSpaceBasePosition + windOffset, worldSpaceBasePosition + previousWindOffset); + + + verts[threadId] = vertex; + } + + if (threadId < primitiveOutputCount) { + // Get number of vertices in current (lower) and next (upper) ring + const int lowerVertexCount = inputRecord.Get().controlPointVertexCounts[controlPointOffset + threadPrimitiveSection]; + const int upperVertexCount = inputRecord.Get().controlPointVertexCounts[controlPointOffset + threadPrimitiveSection + 1]; + + // Get number of sections in current and next ring + const int lowerSectionCount = GetRingSectionCount(lowerVertexCount); + const int upperSectionCount = GetRingSectionCount(upperVertexCount); + + // Get index of first vertex in current and next ring + const int lowerVertexOffset = threadPrimitiveSectionVertexOffset; + const int upperVertexOffset = lowerVertexOffset + lowerVertexCount; + + // The number of full sections (i.e. one edge in the lower ring corresponds to one edge in the upper ring) + // is determined by the smaller vertex ring. + // Irregular triangles will be generated from the larger ring towards the smaller ring + const bool isLower = lowerSectionCount <= upperSectionCount; + + // Total number of regular sections + const int sectionCount = isLower ? lowerSectionCount : upperSectionCount; + // Ratio between sections in the smaller and larger ring + const float sectionFactor = isLower ? upperSectionCount / float(lowerSectionCount) : lowerSectionCount / float(upperSectionCount); + + const int regularTriangleCount = sectionCount * 2; + // check if current triangle is irregular (i.e. connects an edge to a single vertex) + const bool isIrregularTriangle = threadPrimitiveSectionTriangle >= regularTriangleCount; + // index of irregular triangle + const int irregularTriangleIndex = threadPrimitiveSectionTriangle - regularTriangleCount; + + // running counters + int lowerSection = 0; + int upperSection = 0; + int sectionJumpCount = 0; + + // indices for smaller and larger (=other) section + int section = 0; + int otherSection = 0; + for (; section < sectionCount; ++section) + { + // compute which section on the larger ring should match up with the current section + int otherSectionTarget = section * sectionFactor; + + // Generate irregular triangles for skipped sections + for (; otherSection < otherSectionTarget; ++otherSection) + { + if (isIrregularTriangle && (sectionJumpCount == irregularTriangleIndex)) + { + lowerSection = isLower ? section : otherSection; + upperSection = isLower ? otherSection : section; + } + sectionJumpCount++; + } + + // Generate a regular triangles for both sections + if (!isIrregularTriangle && (section == (threadPrimitiveSectionTriangle / 2))) + { + lowerSection = isLower ? section : otherSection; + upperSection = isLower ? otherSection : section; + } + + otherSection++; + } + // Generate remaining irregular triangles to close the ring + for (; otherSection < max(lowerSectionCount, upperSectionCount); ++otherSection) + { + if (isIrregularTriangle && (sectionJumpCount == irregularTriangleIndex)) + { + lowerSection = isLower ? section : otherSection; + upperSection = isLower ? otherSection : section; + } + sectionJumpCount++; + } + + uint3 tri = 0; + + if (isIrregularTriangle) { + if (isLower) { + tri = uint3(lowerVertexOffset + ((lowerSection + 1) % lowerVertexCount), + upperVertexOffset + ((upperSection + 1) % upperVertexCount), + upperVertexOffset + ((upperSection + 0) % upperVertexCount)); + } else { + tri = uint3(lowerVertexOffset + ((lowerSection + 0) % lowerVertexCount), + lowerVertexOffset + ((lowerSection + 1) % lowerVertexCount), + upperVertexOffset + ((upperSection + 0) % upperVertexCount)); + } + } else { + if (threadPrimitiveSectionTriangle & 0x1) { + tri = uint3(upperVertexOffset + ((upperSection + 0) % upperVertexCount), + lowerVertexOffset + ((lowerSection + 1) % lowerVertexCount), + upperVertexOffset + ((upperSection + 1) % upperVertexCount)); + } else { + tri = uint3(lowerVertexOffset + ((lowerSection + 0) % lowerVertexCount), + lowerVertexOffset + ((lowerSection + 1) % lowerVertexCount), + upperVertexOffset + ((upperSection + 0) % upperVertexCount)); + } + } + + tris[threadId] = tri; + } +} + +DeferredPixelShaderOutput SplinePixelShader(TransformedVertex input) +{ + DeferredPixelShaderOutput output; + + output.baseColor = float4(input.color, 1); + output.motion = input.clipSpaceMotion; + + // compute normal from object space position derivatives + output.normal.xyz = normalize(cross(ddy(input.objectSpacePosition.xyz), ddx(input.objectSpacePosition.xyz))); + output.normal.w = 1.0; + + return output; +} \ No newline at end of file diff --git a/meshNodeSample/shaders/terrainrenderer.hlsl b/meshNodeSample/shaders/terrainrenderer.hlsl new file mode 100644 index 0000000..6128570 --- /dev/null +++ b/meshNodeSample/shaders/terrainrenderer.hlsl @@ -0,0 +1,156 @@ +// This file is part of the AMD Work Graph Mesh Node Sample. +// +// Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#include "common.hlsl" + +struct TransformedVertex { + float4 clipSpacePosition : SV_POSITION; + float3 worldSpacePosition : NORMAL0; + float3 normal : NORMAL1; + float2 clipSpaceMotion : TEXCOORD0; +}; + +uint3 GetPrimitive(uint index, in uint primitivesPerRow) +{ + uint verticesPerRow = primitivesPerRow + 1; + + uint cell = index / 2; + uint row = cell / primitivesPerRow; + cell = cell % primitivesPerRow; + + uint base = (row * verticesPerRow) + cell; + + // c - d + // | / | + // a - b + const uint a = base; + const uint b = base + 1; + const uint c = base + verticesPerRow; + const uint d = base + verticesPerRow + 1; + + return (index % 2) == 0 ? uint3(a, c, d) : uint3(d, b, a); +} + +[Shader("node")] +[NodeLaunch("mesh")] +[NodeId("DrawTerrainChunk", 0)] +[NodeMaxDispatchGrid(8, 8, 1)] +// This limit reflects the maximum dispatch size of the chunk grid and is not required on AMD GPUs. +// If you wish to change any of the procedural generation parameters, +// and you are running on a non-AMD GPU, you may need to adjust this limit. +// You can learn more at: +// https://gpuopen.com/learn/work_graphs_mesh_nodes/work_graphs_mesh_nodes-tips_tricks_best_practices +[NodeMaxInputRecordsPerGraphEntryRecord(32 * 32, true)] +[NumThreads(128, 1, 1)] +[OutputTopology("triangle")] +void TerrainMeshShader( + uint gtid : SV_GroupThreadID, + uint2 gid : SV_GroupID, + DispatchNodeInputRecord inputRecord, + out indices uint3 tris[128], + out vertices TransformedVertex verts[81]) +{ + const DrawTerrainChunkRecord record = inputRecord.Get(); + + const int levelOfDetail = record.levelOfDetail; + // number of thread groups per chunk axis for LOD 0 + const int baseThreadGroupsPerChunkAxis = 8; + + const int threadGroupsPerChunkAxis = + baseThreadGroupsPerChunkAxis / clamp(1L << levelOfDetail, 1, baseThreadGroupsPerChunkAxis); + const int threadGroupIdScale = baseThreadGroupsPerChunkAxis / threadGroupsPerChunkAxis; + + const int primitivesPerAxis = 8; + const int verticesPerAxis = primitivesPerAxis + 1; + const float scale = chunkSize / float(primitivesPerAxis * baseThreadGroupsPerChunkAxis); + + const int vertexCount = verticesPerAxis * verticesPerAxis; + const int primitiveCount = primitivesPerAxis * primitivesPerAxis * 2; + + SetMeshOutputCounts(vertexCount, primitiveCount); + + const int2 tile = record.chunkGridPosition * baseThreadGroupsPerChunkAxis + int2(gid.xy) * threadGroupIdScale; + + const bool4 localLevelOfDetailTransition = + bool4(record.levelOfDetailTransition.x && (gid.x == 0), + record.levelOfDetailTransition.y && (gid.y == 0), + record.levelOfDetailTransition.z && (gid.x == (record.dispatchGrid.x - 1)), + record.levelOfDetailTransition.w && (gid.y == (record.dispatchGrid.y - 1))); + + if (gtid < vertexCount) { + TransformedVertex vertex; + + int2 localVertexIndex = int2(gtid % verticesPerAxis, gtid / verticesPerAxis); + + // collapse vertices along LOD borders + if (localLevelOfDetailTransition.x && (localVertexIndex.x == 0) && ((localVertexIndex.y % 2) == 1)) { + localVertexIndex.y = int(localVertexIndex.y / 2) * 2; + } + if (localLevelOfDetailTransition.y && (localVertexIndex.y == 0) && ((localVertexIndex.x % 2) == 1)) { + localVertexIndex.x = int(localVertexIndex.x / 2) * 2; + } + if (localLevelOfDetailTransition.z && (localVertexIndex.x == 8) && ((localVertexIndex.y % 2) == 1)) { + localVertexIndex.y = int(localVertexIndex.y / 2) * 2; + } + if (localLevelOfDetailTransition.w && (localVertexIndex.y == 8) && ((localVertexIndex.x % 2) == 1)) { + localVertexIndex.x = int(localVertexIndex.x / 2) * 2; + } + + localVertexIndex *= threadGroupIdScale; + + const int2 globalVertexIndex = tile * primitivesPerAxis + localVertexIndex; + + const float2 globalVertexPosition = globalVertexIndex * scale; + + const float3 worldSpacePosition = + float3(globalVertexPosition.x, GetTerrainHeight(globalVertexPosition), globalVertexPosition.y); + + vertex.normal = GetTerrainNormal(worldSpacePosition.xz); + vertex.worldSpacePosition = worldSpacePosition; + ComputeClipSpacePositionAndMotion(vertex, worldSpacePosition); + + verts[gtid] = vertex; + } + + { + tris[gtid] = GetPrimitive(gtid, 8); + } +} + +DeferredPixelShaderOutput TerrainPixelShader(TransformedVertex input) +{ + DeferredPixelShaderOutput output; + + output.normal = float4(normalize(input.normal), 1); + output.motion = input.clipSpaceMotion; + + const float3 biomeWeights = GetBiomeWeights(input.worldSpacePosition.xz); + const float biomeFactor = lerp(1.0, 0.75, biomeWeights.y); + + const float3 grassColor = pow(float3(0.41, 0.44, 0.29) * 2, 2.2) * 0.65 * biomeFactor * + (0.75 + 0.25 * PerlinNoise2D(0.25 * input.worldSpacePosition.xz)); + const float3 rockColor = float3(0.24, 0.24, 0.24); + + output.baseColor.rgb = lerp(grassColor, rockColor, biomeWeights.x); + output.baseColor.a = 1.0; + + return output; +} \ No newline at end of file diff --git a/meshNodeSample/shaders/tree.hlsl b/meshNodeSample/shaders/tree.hlsl new file mode 100644 index 0000000..b45bf90 --- /dev/null +++ b/meshNodeSample/shaders/tree.hlsl @@ -0,0 +1,300 @@ +// This file is part of the AMD Work Graph Mesh Node Sample. +// +// Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#include "common.hlsl" + +// Oak tree generation is single-threaded, so we use a coalescing node to generate multiple trees at once. +// Each oka tree consists of three splines: the trunk, a branch and the leafes. +// Thus we can process maxSplinesPerRecord (=32) trees in parallel +[Shader("node")] +[NodeId("GenerateTree", 0)] +[NodeLaunch("coalescing")] +[NumThreads(maxSplinesPerRecord, 1, 1)] +void GenerateOakTree( + [MaxRecords(maxSplinesPerRecord)] + GroupNodeInputRecords inputRecord, + + uint threadId : SV_GroupThreadID, + + [MaxRecords(3)] + [NodeId("DrawSpline")] + NodeOutput output) +{ + GroupNodeOutputRecords outputRecord = output.GetGroupNodeOutputRecords(3); + + if (threadId < inputRecord.Count()) { + const float2 basePositionXZ = inputRecord.Get(threadId).position; + const float3 basePosition = GetTerrainPosition(basePositionXZ); + + const uint seed = CombineSeed(asuint(basePositionXZ.x), asuint(basePositionXZ.y)); + + const float rotationAngle = Random(seed, 78923) * 2 * PI; + const float3 forward = float3(sin(rotationAngle), 0, cos(rotationAngle)); + const float3 up = lerp(float3(0, 1, 0), GetTerrainNormal(basePositionXZ), 0.1); + const float3 side = normalize(cross(forward, up)); + + const float upScale = lerp(0.5, 1.2, Random(seed, 546)); + const float sideScale = lerp(0.6, 1.0, Random(seed, 9487)); + + const int splineIndex = threadId; + + // Tree trunk + { + // Set dispatch grid to number of splines per record + outputRecord.Get(0).dispatchGrid = uint3(inputRecord.Count(), 1, 1); + outputRecord.Get(0).color[splineIndex] = float3(0.18, 0.12, 0.10) * 6; + outputRecord.Get(0).rotationOffset[splineIndex] = 0; + outputRecord.Get(0).windStrength[splineIndex] = float2(0, 0); + outputRecord.Get(0).controlPointCount[splineIndex] = 5; + + int controlPointIndex = splineIndex * splineMaxControlPointCount; + + outputRecord.Get(0).controlPointPositions[controlPointIndex] = basePosition - up; + outputRecord.Get(0).controlPointVertexCounts[controlPointIndex] = 5; + outputRecord.Get(0).controlPointRadii[controlPointIndex] = 0.5 * sideScale; + outputRecord.Get(0).controlPointNoiseAmplitudes[controlPointIndex] = 0.0; + controlPointIndex++; + + outputRecord.Get(0).controlPointPositions[controlPointIndex] = basePosition + 2 * upScale * up; + outputRecord.Get(0).controlPointVertexCounts[controlPointIndex] = 4; + outputRecord.Get(0).controlPointRadii[controlPointIndex] = 0.35 * sideScale; + outputRecord.Get(0).controlPointNoiseAmplitudes[controlPointIndex] = 0.5; + controlPointIndex++; + + outputRecord.Get(0).controlPointPositions[controlPointIndex] = + basePosition + 4 * upScale * up + 1 * sideScale * forward; + outputRecord.Get(0).controlPointVertexCounts[controlPointIndex] = 3; + outputRecord.Get(0).controlPointRadii[controlPointIndex] = 0.25 * sideScale; + outputRecord.Get(0).controlPointNoiseAmplitudes[controlPointIndex] = 0.0; + controlPointIndex++; + + outputRecord.Get(0).controlPointPositions[controlPointIndex] = + basePosition + 4.5 * upScale * up + 1.5 * sideScale * forward + 0.5 * sideScale * side; + outputRecord.Get(0).controlPointVertexCounts[controlPointIndex] = 2; + outputRecord.Get(0).controlPointRadii[controlPointIndex] = 0.3 * sideScale; + outputRecord.Get(0).controlPointNoiseAmplitudes[controlPointIndex] = 0.0; + controlPointIndex++; + + outputRecord.Get(0).controlPointPositions[controlPointIndex] = + basePosition + 5.5 * upScale * up + 2 * sideScale * forward + 1 * sideScale * side; + outputRecord.Get(0).controlPointVertexCounts[controlPointIndex] = 1; + outputRecord.Get(0).controlPointRadii[controlPointIndex] = 0.0; + outputRecord.Get(0).controlPointNoiseAmplitudes[controlPointIndex] = 0.0; + } + + // Tree branch + { + outputRecord.Get(1).dispatchGrid = uint3(inputRecord.Count(), 1, 1); + outputRecord.Get(1).color[splineIndex] = float3(0.18, 0.12, 0.10) * 6; + outputRecord.Get(1).rotationOffset[splineIndex] = 0; + outputRecord.Get(1).windStrength[splineIndex] = float2(0, 0); + outputRecord.Get(1).controlPointCount[splineIndex] = 3; + + int controlPointIndex = splineIndex * splineMaxControlPointCount; + + outputRecord.Get(1).controlPointPositions[controlPointIndex] = + basePosition + 3 * upScale * up + 0.5 * sideScale * forward; + outputRecord.Get(1).controlPointVertexCounts[controlPointIndex] = 4; + outputRecord.Get(1).controlPointRadii[controlPointIndex] = 0.25 * sideScale; + outputRecord.Get(1).controlPointNoiseAmplitudes[controlPointIndex] = 0.0; + controlPointIndex++; + + outputRecord.Get(1).controlPointPositions[controlPointIndex] = + basePosition + 4 * upScale * up - 0.5 * sideScale * forward; + outputRecord.Get(1).controlPointVertexCounts[controlPointIndex] = 3; + outputRecord.Get(1).controlPointRadii[controlPointIndex] = 0.2 * sideScale; + outputRecord.Get(1).controlPointNoiseAmplitudes[controlPointIndex] = 0.25; + controlPointIndex++; + + outputRecord.Get(1).controlPointPositions[controlPointIndex] = + basePosition + 5 * upScale * up - 1 * sideScale * forward; + outputRecord.Get(1).controlPointVertexCounts[controlPointIndex] = 1; + outputRecord.Get(1).controlPointRadii[controlPointIndex] = 0.0; + outputRecord.Get(1).controlPointNoiseAmplitudes[controlPointIndex] = 0.0; + } + + // Tree leaves + { + outputRecord.Get(2).dispatchGrid = uint3(inputRecord.Count(), 1, 1); + outputRecord.Get(2).color[splineIndex] = float3(0.3, 0.3, 0.0) * lerp(0.7, 1.3, Random(seed, 1456)); + outputRecord.Get(2).rotationOffset[splineIndex] = rotationAngle; + outputRecord.Get(2).windStrength[splineIndex] = float2(0.125, 0.5); + outputRecord.Get(2).controlPointCount[splineIndex] = 4; + + int controlPointIndex = splineIndex * splineMaxControlPointCount; + + outputRecord.Get(2).controlPointPositions[controlPointIndex] = + basePosition + 4 * upScale * up + 0.5 * sideScale * forward; + outputRecord.Get(2).controlPointVertexCounts[controlPointIndex] = 1; + outputRecord.Get(2).controlPointRadii[controlPointIndex] = 0.0; + outputRecord.Get(2).controlPointNoiseAmplitudes[controlPointIndex] = 0.0; + controlPointIndex++; + + outputRecord.Get(2).controlPointPositions[controlPointIndex] = + basePosition + 5 * upScale * up + 0.5 * sideScale * forward; + outputRecord.Get(2).controlPointVertexCounts[controlPointIndex] = round(lerp(5, 7, Random(seed, 2156))); + outputRecord.Get(2).controlPointRadii[controlPointIndex] = float2(2.5, 4) * sideScale; + outputRecord.Get(2).controlPointNoiseAmplitudes[controlPointIndex] = 0.7 * upScale; + controlPointIndex++; + + outputRecord.Get(2).controlPointPositions[controlPointIndex] = + basePosition + 6.5 * upScale * up + 0.5 * sideScale * forward; + outputRecord.Get(2).controlPointVertexCounts[controlPointIndex] = round(lerp(3, 5, Random(seed, 458))); + outputRecord.Get(2).controlPointRadii[controlPointIndex] = 3.5 * sideScale; + outputRecord.Get(2).controlPointNoiseAmplitudes[controlPointIndex] = 0.7 * upScale; + controlPointIndex++; + + outputRecord.Get(2).controlPointPositions[controlPointIndex] = + basePosition + 8.5 * upScale * up + 0.5 * sideScale * forward + 0.5 * sideScale * side; + outputRecord.Get(2).controlPointVertexCounts[controlPointIndex] = 1; + outputRecord.Get(2).controlPointRadii[controlPointIndex] = 0.0; + outputRecord.Get(2).controlPointNoiseAmplitudes[controlPointIndex] = 0.0; + } + } + + outputRecord.OutputComplete(); +} + +// Pine tree generation works the same way as the oak tree generation. +// Each pine tree consists of two splines: the tree trunk and the "leaves". +[Shader("node")] +[NodeId("GenerateTree", 1)] +[NodeLaunch("coalescing")] +[NumThreads(maxSplinesPerRecord, 1, 1)] +void GeneratePineTree( + [MaxRecords(maxSplinesPerRecord)] + GroupNodeInputRecords inputRecord, + + uint threadId : SV_GroupThreadID, + + [MaxRecords(2)] + [NodeId("DrawSpline")] + NodeOutput output) +{ + GroupNodeOutputRecords outputRecord = output.GetGroupNodeOutputRecords(2); + + if (threadId < inputRecord.Count()) { + const float2 basePositionXZ = inputRecord.Get(threadId).position; + const float3 basePosition = GetTerrainPosition(basePositionXZ); + const float3 terrainNormal = GetTerrainNormal(basePositionXZ); + const float3 basePositionUp = lerp(float3(0, 1, 0), terrainNormal, 0.1); + + const uint seed = CombineSeed(asuint(basePositionXZ.x), asuint(basePositionXZ.y)); + + const float stemTerrainFactor = 1.f + (1.f - smoothstep(0.6, 1.0, terrainNormal.y)) * 0.5; + + const float rotationAngle = Random(seed, 14658) * 2 * PI; + const float stemHeight = 1 + Random(seed, 2384) * 2 * stemTerrainFactor; + const float leafRadiusScale = 1.5 + Random(seed, 3827); + const float leafSectionScale = 1.5 + Random(seed, 78934) * 2 * stemTerrainFactor; + + const int splineIndex = threadId; + + // Tree trunk + { + outputRecord.Get(0).dispatchGrid = uint3(inputRecord.Count(), 1, 1); + outputRecord.Get(0).color[splineIndex] = float3(1.08, 0.72, 0.6); + outputRecord.Get(0).rotationOffset[splineIndex] = rotationAngle; + outputRecord.Get(0).windStrength[splineIndex] = float2(0.125, 0); + outputRecord.Get(0).controlPointCount[splineIndex] = 2; + + int controlPointIndex = splineIndex * splineMaxControlPointCount; + + outputRecord.Get(0).controlPointPositions[controlPointIndex] = basePosition - basePositionUp * 4.f; + outputRecord.Get(0).controlPointVertexCounts[controlPointIndex] = 5; + outputRecord.Get(0).controlPointRadii[controlPointIndex] = 0.4; + outputRecord.Get(0).controlPointNoiseAmplitudes[controlPointIndex] = 0.0; + controlPointIndex++; + + outputRecord.Get(0).controlPointPositions[controlPointIndex] = + basePosition + float3(0, stemHeight + 0.5, 0); + outputRecord.Get(0).controlPointVertexCounts[controlPointIndex] = 4; + outputRecord.Get(0).controlPointRadii[controlPointIndex] = 0.3; + outputRecord.Get(0).controlPointNoiseAmplitudes[controlPointIndex] = 0; + } + + // Tree leaves + { + const float green = saturate(PerlinNoise2D(0.05 * basePositionXZ)); + const float brightness = PerlinNoise2D(0.4 * basePositionXZ + float2(498, 345)); + const float3 color = float3(0.24, 0.25 + green * 0.15, 0.0) * (1.0 + brightness * 0.4); + + outputRecord.Get(1).dispatchGrid = uint3(inputRecord.Count(), 1, 1); + outputRecord.Get(1).color[splineIndex] = color; + outputRecord.Get(1).rotationOffset[splineIndex] = rotationAngle; + outputRecord.Get(1).windStrength[splineIndex] = float2(0.125, 0.5); + outputRecord.Get(1).controlPointCount[splineIndex] = 7; + + int controlPointIndex = splineIndex * splineMaxControlPointCount; + + const float ringHeight0 = stemHeight; + outputRecord.Get(1).controlPointPositions[controlPointIndex] = basePosition + float3(0, ringHeight0, 0); + outputRecord.Get(1).controlPointVertexCounts[controlPointIndex] = 1; + outputRecord.Get(1).controlPointRadii[controlPointIndex] = 0.0; + outputRecord.Get(1).controlPointNoiseAmplitudes[controlPointIndex] = 0.0; + controlPointIndex++; + + outputRecord.Get(1).controlPointPositions[controlPointIndex] = + basePosition + float3(0, ringHeight0 + 0.5, 0); + outputRecord.Get(1).controlPointVertexCounts[controlPointIndex] = 7; + outputRecord.Get(1).controlPointRadii[controlPointIndex] = leafRadiusScale; + outputRecord.Get(1).controlPointNoiseAmplitudes[controlPointIndex] = 0.2; + controlPointIndex++; + + const float ringHeight1 = stemHeight + 1 * leafSectionScale; + outputRecord.Get(1).controlPointPositions[controlPointIndex] = basePosition + float3(0, ringHeight1, 0); + outputRecord.Get(1).controlPointVertexCounts[controlPointIndex] = 7; + outputRecord.Get(1).controlPointRadii[controlPointIndex] = leafRadiusScale * 0.3; + outputRecord.Get(1).controlPointNoiseAmplitudes[controlPointIndex] = 0.1; + controlPointIndex++; + + outputRecord.Get(1).controlPointPositions[controlPointIndex] = + basePosition + float3(0, ringHeight1 + 0.5, 0); + outputRecord.Get(1).controlPointVertexCounts[controlPointIndex] = 7; + outputRecord.Get(1).controlPointRadii[controlPointIndex] = leafRadiusScale * 0.8; + outputRecord.Get(1).controlPointNoiseAmplitudes[controlPointIndex] = 0.2; + controlPointIndex++; + + const float ringHeight2 = stemHeight + 2 * leafSectionScale; + outputRecord.Get(1).controlPointPositions[controlPointIndex] = basePosition + float3(0, ringHeight2, 0); + outputRecord.Get(1).controlPointVertexCounts[controlPointIndex] = 7; + outputRecord.Get(1).controlPointRadii[controlPointIndex] = leafRadiusScale * 0.3; + outputRecord.Get(1).controlPointNoiseAmplitudes[controlPointIndex] = 0.1; + controlPointIndex++; + + outputRecord.Get(1).controlPointPositions[controlPointIndex] = + basePosition + float3(0, ringHeight2 + 0.5, 0); + outputRecord.Get(1).controlPointVertexCounts[controlPointIndex] = 7; + outputRecord.Get(1).controlPointRadii[controlPointIndex] = leafRadiusScale * 0.6; + outputRecord.Get(1).controlPointNoiseAmplitudes[controlPointIndex] = 0.2; + controlPointIndex++; + + const float ringHeight3 = stemHeight + 3 * leafSectionScale; + outputRecord.Get(1).controlPointPositions[controlPointIndex] = basePosition + float3(0, ringHeight3, 0); + outputRecord.Get(1).controlPointVertexCounts[controlPointIndex] = 1; + outputRecord.Get(1).controlPointRadii[controlPointIndex] = 0.0; + outputRecord.Get(1).controlPointNoiseAmplitudes[controlPointIndex] = 0.0; + controlPointIndex++; + } + } + + outputRecord.OutputComplete(); +} \ No newline at end of file diff --git a/meshNodeSample/shaders/utils.hlsl b/meshNodeSample/shaders/utils.hlsl new file mode 100644 index 0000000..c2355aa --- /dev/null +++ b/meshNodeSample/shaders/utils.hlsl @@ -0,0 +1,307 @@ +// This file is part of the AMD Work Graph Mesh Node Sample. +// +// Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#pragma once + +#define PI 3.14159265359 + +// ======================== +// Projection Utils + +float3 PerspectiveDivision(in float4 vec) +{ + return vec.xyz / vec.w; +} + +float3 PerspectiveProject(in float4x4 projectionMatrix, in float4 vec) +{ + return PerspectiveDivision(mul(projectionMatrix, vec)); +} + +float3 PerspectiveProject(in float4x4 projectionMatrix, in float3 vec) +{ + return PerspectiveProject(projectionMatrix, float4(vec, 1)); +} + +// ======================== +// Bit Utils + +bool IsBitSet(in uint data, in int bitIndex) +{ + return data & (1u << bitIndex); +} + +int BitSign(in uint data, in int bitIndex) +{ + return IsBitSet(data, bitIndex) ? 1 : -1; +} + +// ======================== +// Randon & Noise functions + +// Random gradient at 2D position +float2 PerlinNoiseDir2D(in int2 position) +{ + const int2 pos = position % 289; + + float f = 0; + f = (34 * pos.x + 1); + f = f * pos.x % 289 + pos.y; + f = (34 * f + 1) * f % 289; + f = frac(f / 43) * 2 - 1; + + float x = f - round(f); + float y = abs(f) - 0.5; + + return normalize(float2(x, y)); +} + +float PerlinNoise2D(in float2 position) +{ + const int2 gridPositon = floor(position); + const float2 gridOffset = frac(position); + + const float d00 = dot(PerlinNoiseDir2D(gridPositon + int2(0, 0)), gridOffset - float2(0, 0)); + const float d01 = dot(PerlinNoiseDir2D(gridPositon + int2(0, 1)), gridOffset - float2(0, 1)); + const float d10 = dot(PerlinNoiseDir2D(gridPositon + int2(1, 0)), gridOffset - float2(1, 0)); + const float d11 = dot(PerlinNoiseDir2D(gridPositon + int2(1, 1)), gridOffset - float2(1, 1)); + + const float2 interpolationWeights = gridOffset * gridOffset * gridOffset * (gridOffset * (gridOffset * 6 - 15) + 10); + + const float d0 = lerp(d00, d01, interpolationWeights.y); + const float d1 = lerp(d10, d11, interpolationWeights.y); + + return lerp(d0, d1, interpolationWeights.x); +} + +uint Hash(uint seed) +{ + seed = (seed ^ 61u) ^ (seed >> 16u); + seed *= 9u; + seed = seed ^ (seed >> 4u); + seed *= 0x27d4eb2du; + seed = seed ^ (seed >> 15u); + return seed; +} + +uint CombineSeed(uint a, uint b) +{ + return a ^ Hash(b) + 0x9e3779b9 + (a << 6) + (a >> 2); +} + +uint CombineSeed(uint a, uint b, uint c) +{ + return CombineSeed(CombineSeed(a, b), c); +} + +uint CombineSeed(uint a, uint b, uint c, uint d) +{ + return CombineSeed(CombineSeed(a, b), c, d); +} + +uint Hash(in float seed) +{ + return Hash(asuint(seed)); +} + +uint Hash(in float3 vec) +{ + return CombineSeed(Hash(vec.x), Hash(vec.y), Hash(vec.z)); +} + +uint Hash(in float4 vec) +{ + return CombineSeed(Hash(vec.x), Hash(vec.y), Hash(vec.z), Hash(vec.w)); +} + +float Random(uint seed) +{ + return Hash(seed) / float(~0u); +} + +float Random(uint a, uint b) +{ + return Random(CombineSeed(a, b)); +} + +float Random(uint a, uint b, uint c) +{ + return Random(CombineSeed(a, b), c); +} + +float Random(uint a, uint b, uint c, uint d) +{ + return Random(CombineSeed(a, b), c, d); +} + +float Random(uint a, uint b, uint c, uint d, uint e) +{ + return Random(CombineSeed(a, b), c, d, e); +} + +// ======================== + +float ToRadians(in float degrees) +{ + return PI * (degrees / 180.0); +} + +template +T IdentityMatrix() +{ + // float4x4 identity matrix should(TM) convert to identity matrix for smaller matrices + return (T)float4x4(1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1); +} + +template <> +float3x3 IdentityMatrix() +{ + return float3x3(1, 0, 0, 0, 1, 0, 0, 0, 0); +} + +template <> +float4x4 IdentityMatrix() +{ + return float4x4(1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1); +} + +float2 RotateAroundPoint2d(in const float2 position, in const float angle, in const float2 rotationPoint) +{ + // Move reference position to origin + const float2 p = position - rotationPoint; + + const float s = sin(angle); + const float c = cos(angle); + + return float2(p.x * c - p.y * s, p.x * s + p.y * c) + rotationPoint; +} + +// ===================================== +// Bounding box & visibility test utils + +struct ClipPlanes { + float4 planes[6]; +}; + +float4 PlaneNormalize(in float4 plane) +{ + const float l = length(plane.xyz); + + if (l > 0.0) { + return plane / l; + } + + return 0; +} + +ClipPlanes ComputeClipPlanes(in float4x4 viewProjectionMatrix) +{ + ClipPlanes result; + + result.planes[0] = PlaneNormalize(viewProjectionMatrix[3] + viewProjectionMatrix[0]); + result.planes[1] = PlaneNormalize(viewProjectionMatrix[3] - viewProjectionMatrix[0]); + result.planes[2] = PlaneNormalize(viewProjectionMatrix[3] + viewProjectionMatrix[1]); + result.planes[3] = PlaneNormalize(viewProjectionMatrix[3] - viewProjectionMatrix[1]); + result.planes[4] = PlaneNormalize(viewProjectionMatrix[3] + viewProjectionMatrix[2]); + result.planes[5] = PlaneNormalize(viewProjectionMatrix[3] - viewProjectionMatrix[2]); + + return result; +} + +bool IsSphereVisible(const in float3 center, const in float radius, const in float4 clipPlanes[6]) +{ + for (int i = 0; i < 6; ++i) { + if (dot(float4(center, 1), clipPlanes[i]) < -radius) { + return false; + } + } + + return true; +} + +bool IsSphereVisible(const in float3 center, const in float radius, const in ClipPlanes clipPlanes) +{ + return IsSphereVisible(center, radius, clipPlanes.planes); +} + +bool IsPointVisible(const in float3 position, const in float4 clipPlanes[6]) +{ + return IsSphereVisible(position, 0, clipPlanes); +} + +bool IsPointVisible(const in float3 position, const in ClipPlanes clipPlanes) +{ + return IsPointVisible(position, clipPlanes.planes); +} + +struct AxisAlignedBoundingBox { + float3 min; + float3 max; + + void Transform(const in float4x4 transform) + { + const float3 center = (max + min) * 0.5; + const float3 extents = max - center; + + const float3 transformedCenter = mul(transform, float4(center, 1.0)).xyz; + + float3x3 absMatrix = abs((float3x3)transform); + float3 transformedExtents = mul(absMatrix, extents); + + min = transformedCenter - transformedExtents; + max = transformedCenter + transformedExtents; + } + + bool IsVisible(const in float4 clipPlanes[6]) + { + for (int i = 0; i < 6; ++i) { + float4 plane = clipPlanes[i]; + + float3 axis = float3(plane.x < 0.f ? min.x : max.x, // + plane.y < 0.f ? min.y : max.y, // + plane.z < 0.f ? min.z : max.z); + + if ((dot(plane.xyz, axis) + plane.w) < 0.0f) { + return false; + } + } + + return true; + } + + bool IsVisible(const in ClipPlanes clipPlanes) + { + return IsVisible(clipPlanes.planes); + } + + bool IsVisible(const in float4x4 transform, const in float4 clipPlanes[6]) + { + AxisAlignedBoundingBox tmp = {min, max}; + tmp.Transform(transform); + + return tmp.IsVisible(clipPlanes); + } + + bool IsVisible(const in float4x4 transform, const in ClipPlanes clipPlanes) + { + return IsVisible(transform, clipPlanes.planes); + } +}; \ No newline at end of file diff --git a/meshNodeSample/shaders/workgraphcommon.h b/meshNodeSample/shaders/workgraphcommon.h new file mode 100644 index 0000000..5da31fc --- /dev/null +++ b/meshNodeSample/shaders/workgraphcommon.h @@ -0,0 +1,53 @@ +// This file is part of the AMD Work Graph Mesh Node Sample. +// +// Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#pragma once + +#if __cplusplus +#include "misc/math.h" +#endif // __cplusplus + +#if __cplusplus +struct WorkGraphCBData { + Mat4 ViewProjection; + Mat4 PreviousViewProjection; + Mat4 InverseViewProjection; + Vec4 CameraPosition; + Vec4 PreviousCameraPosition; + uint32_t ShaderTime; + uint32_t PreviousShaderTime; + float WindStrength; + float WindDirection; +}; +#else +cbuffer WorkGraphCBData : register(b0) +{ + matrix ViewProjection; + matrix PreviousViewProjection; + matrix InverseViewProjection; + float4 CameraPosition; + float4 PreviousCameraPosition; + uint ShaderTime; + uint PreviousShaderTime; + float WindStrength; + float WindDirection; +} +#endif // __cplusplus \ No newline at end of file diff --git a/meshNodeSample/shaders/world.hlsl b/meshNodeSample/shaders/world.hlsl new file mode 100644 index 0000000..17fbe81 --- /dev/null +++ b/meshNodeSample/shaders/world.hlsl @@ -0,0 +1,176 @@ +// This file is part of the AMD Work Graph Mesh Node Sample. +// +// Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#include "common.hlsl" + +// Record for launching a grid of chunks +// grid size & offset are computed based on current camera view +struct ChunkGridRecord { + uint2 grid : SV_DispatchGrid; + int2 offset; +}; + +float2 ComputeFarPlaneCorner(in float clipX, in float clipY) +{ + // compute position of frustum corner on far plane + const float3 cornerWorldPosition = PerspectiveProject(InverseViewProjection, float3(clipX, clipY, 1.f)); + + const float2 viewVector = cornerWorldPosition.xz - GetCameraPosition().xz; + const float viewVectorLength = length(viewVector); + // limit view vector to maximum terrain distance + const float viewVectorScale = min(worldGridMaxDistance / viewVectorLength, 1.f); + + return GetCameraPosition().xz + viewVector * viewVectorScale; +} + +void minmax(inout float2 minTerrainPosition, inout float2 maxTerrainPosition, in float2 position) +{ + minTerrainPosition = min(minTerrainPosition, position); + maxTerrainPosition = max(maxTerrainPosition, position); +} + +[Shader("node")] +[NodeLaunch("thread")] +void World( + [MaxRecords(1)] + [NodeId("ChunkGrid")] + NodeOutput chunkGridOutput) +{ + // This node computes the world-space extends of the chunk grid based on the current camera view frustum + + // Compute bounding box of view frustum + // Start with camera position + float2 minTerrainPosition = GetCameraPosition().xz; + float2 maxTerrainPosition = minTerrainPosition; + + // Add far plane corners to view frustum bounding box + minmax(minTerrainPosition, maxTerrainPosition, ComputeFarPlaneCorner(-1, -1)); + minmax(minTerrainPosition, maxTerrainPosition, ComputeFarPlaneCorner(-1, +1)); + minmax(minTerrainPosition, maxTerrainPosition, ComputeFarPlaneCorner(+1, -1)); + minmax(minTerrainPosition, maxTerrainPosition, ComputeFarPlaneCorner(+1, +1)); + + // Compute & round chunk coordinates + const int2 minChunkPosition = floor(minTerrainPosition / chunkSize); + const int2 maxChunkPosition = ceil(maxTerrainPosition / chunkSize); + + // Dispatch one thread group per chunk + ThreadNodeOutputRecords chunkGridRecord = chunkGridOutput.GetThreadNodeOutputRecords(1); + + chunkGridRecord.Get().grid = clamp(maxChunkPosition - minChunkPosition, 0, 32); + chunkGridRecord.Get().offset = minChunkPosition; + + chunkGridRecord.OutputComplete(); +} + +int GetTerrainChunkLevelOfDetail(in int2 chunkGridPosition) +{ + const float2 chunkWorldPosition = chunkGridPosition * chunkSize; + const float3 chunkWorldCenterPosition = GetTerrainPosition(chunkWorldPosition + chunkSize * 0.5); + const float distanceToCamera = distance(GetCameraPosition(), chunkWorldCenterPosition); + + return clamp(distanceToCamera / (3 * chunkSize), 0, 3); +} + +[Shader("node")] +[NodeLaunch("broadcasting")] +[NodeMaxDispatchGrid(32, 32, 1)] +// each thread corresponds to one tile +[NumThreads(tilesPerChunk, tilesPerChunk, 1)] +void ChunkGrid( + DispatchNodeInputRecord inputRecord, + + int2 groupId : SV_GroupId, + int2 groupThreadId : SV_GroupThreadID, + + [MaxRecords(1)] + [NodeId("DrawTerrainChunk")] + NodeOutput terrainOutput, + + [MaxRecords(tilesPerChunk * tilesPerChunk)] + [NodeId("Tile")] + [NodeArraySize(3)] + NodeOutputArray tileOutput) +{ + const ChunkGridRecord input = inputRecord.Get(); + const int2 chunkGridPosition = input.offset + groupId; + const float2 chunkWorldPosition = chunkGridPosition * chunkSize; + + const ClipPlanes clipPlanes = ComputeClipPlanes(); + + const AxisAlignedBoundingBox chunkBoundingBox = GetGridBoundingBox(chunkGridPosition, chunkSize, -100, 300); + const bool isChunkVisible = chunkBoundingBox.IsVisible(clipPlanes); + + // Terrain output + { + const bool hasTerrainOutput = isChunkVisible; + + GroupNodeOutputRecords terrainOutputRecord = + terrainOutput.GetGroupNodeOutputRecords(hasTerrainOutput); + + if (hasTerrainOutput) { + const int levelOfDetail = GetTerrainChunkLevelOfDetail(chunkGridPosition); + const uint dispatchSize = 8 / clamp(1U << levelOfDetail, 1, 8); + + terrainOutputRecord.Get().dispatchGrid = uint3(dispatchSize, dispatchSize, 1); + terrainOutputRecord.Get().chunkGridPosition = chunkGridPosition; + terrainOutputRecord.Get().levelOfDetail = levelOfDetail; + + terrainOutputRecord.Get().levelOfDetailTransition.x = + GetTerrainChunkLevelOfDetail(chunkGridPosition + int2(-1, 0)) > levelOfDetail; + terrainOutputRecord.Get().levelOfDetailTransition.y = + GetTerrainChunkLevelOfDetail(chunkGridPosition + int2(0, -1)) > levelOfDetail; + terrainOutputRecord.Get().levelOfDetailTransition.z = + GetTerrainChunkLevelOfDetail(chunkGridPosition + int2(1, 0)) > levelOfDetail; + terrainOutputRecord.Get().levelOfDetailTransition.w = + GetTerrainChunkLevelOfDetail(chunkGridPosition + int2(0, 1)) > levelOfDetail; + } + + terrainOutputRecord.OutputComplete(); + } + + // Tile output + if (isChunkVisible) + { + const int2 threadGridPosition = chunkGridPosition * tilesPerChunk + groupThreadId; + const float2 threadWorldPosition = threadGridPosition * tileSize; + + const AxisAlignedBoundingBox tileBoundingBox = GetGridBoundingBox(threadGridPosition, tileSize, -100, 300); + + const bool hasTileOutput = tileBoundingBox.IsVisible(clipPlanes); + + // Get biome weights in center of tile + const float3 biomeWeights = GetBiomeWeights(threadWorldPosition + tileSize * 0.5); + + // Classify biome tile to launch by dominant biome + const uint biome = biomeWeights.x > biomeWeights.y ? (biomeWeights.x > biomeWeights.z ? 0 : 2) + : (biomeWeights.y > biomeWeights.z ? 1 : 2); + + ThreadNodeOutputRecords tileOutputRecord = + tileOutput[biome].GetThreadNodeOutputRecords(hasTileOutput); + + if (hasTileOutput) { + + tileOutputRecord.Get().position = chunkGridPosition * tilesPerChunk + groupThreadId; + } + + tileOutputRecord.OutputComplete(); + } +} diff --git a/meshNodeSample/workgraphrendermodule.cpp b/meshNodeSample/workgraphrendermodule.cpp new file mode 100644 index 0000000..fc2176e --- /dev/null +++ b/meshNodeSample/workgraphrendermodule.cpp @@ -0,0 +1,538 @@ +// This file is part of the AMD Work Graph Mesh Node Sample. +// +// Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#include "workgraphrendermodule.h" + +#include "core/framework.h" +#include "core/scene.h" +#include "core/uimanager.h" +#include "misc/assert.h" + +// Render components +#include "render/buffer.h" +#include "render/device.h" +#include "render/dynamicresourcepool.h" +#include "render/parameterset.h" +#include "render/pipelinedesc.h" +#include "render/pipelineobject.h" +#include "render/profiler.h" +#include "render/rasterview.h" +#include "render/rootsignature.h" +#include "render/rootsignaturedesc.h" +#include "render/texture.h" + +// D3D12 Cauldron implementation +#include "render/dx12/buffer_dx12.h" +#include "render/dx12/commandlist_dx12.h" +#include "render/dx12/device_dx12.h" +#include "render/dx12/gpuresource_dx12.h" +#include "render/dx12/rootsignature_dx12.h" + +// common files with shaders +#include "shaders/shadingcommon.h" +#include "shaders/workgraphcommon.h" + +// shader compiler +#include "shadercompiler.h" + +#include + +using namespace cauldron; + +// Name for work graph program inside the state object +static const wchar_t* WorkGraphProgramName = L"WorkGraph"; + +WorkGraphRenderModule::WorkGraphRenderModule() + : RenderModule(L"WorkGraphRenderModule") +{ +} + +WorkGraphRenderModule::~WorkGraphRenderModule() +{ + // Delete work graph + if (m_pWorkGraphStateObject) + m_pWorkGraphStateObject->Release(); + if (m_pWorkGraphParameterSet) + delete m_pWorkGraphParameterSet; + if (m_pWorkGraphRootSignature) + delete m_pWorkGraphRootSignature; + if (m_pWorkGraphBackingMemoryBuffer) + delete m_pWorkGraphBackingMemoryBuffer; + + // Delete shading pipeline + if (m_pShadingPipeline) + delete m_pShadingPipeline; + if (m_pShadingRootSignature) + delete m_pShadingRootSignature; + if (m_pShadingParameterSet) + delete m_pShadingParameterSet; +} + +void WorkGraphRenderModule::Init(const json& initData) +{ + InitTextures(); + InitWorkGraphProgram(); + InitShadingPipeline(); + + cauldron::UISection uiSection = {}; + uiSection.SectionName = "Procedural Generation"; + + uiSection.AddFloatSlider("Wind Strength", &m_WindStrength, 0.f, 2.5f); + uiSection.AddFloatSlider("Wind Direction", &m_WindDirection, 0.f, 360.f, nullptr, nullptr, false, "%.1f"); + + GetUIManager()->RegisterUIElements(uiSection); + + SetModuleReady(true); +} + +void WorkGraphRenderModule::Execute(double deltaTime, cauldron::CommandList* pCmdList) +{ + const auto previousShaderTime = m_shaderTime; + + // Increment shader time + m_shaderTime += static_cast(deltaTime * 1000.0); + + // Get render resolution based on upscaler state + const auto upscaleState = GetFramework()->GetUpscalingState(); + const auto& resInfo = GetFramework()->GetResolutionInfo(); + + uint32_t width, height; + if (upscaleState == UpscalerState::None || upscaleState == UpscalerState::PostUpscale) + { + width = resInfo.DisplayWidth; + height = resInfo.DisplayHeight; + } + else + { + width = resInfo.RenderWidth; + height = resInfo.RenderHeight; + } + + { + GPUScopedProfileCapture workGraphMarker(pCmdList, L"Work Graph"); + + std::vector barriers; + barriers.push_back(Barrier::Transition(m_pGBufferColorOutput->GetResource(), + ResourceState::NonPixelShaderResource | ResourceState::PixelShaderResource, + ResourceState::RenderTargetResource)); + barriers.push_back(Barrier::Transition(m_pGBufferNormalOutput->GetResource(), + ResourceState::NonPixelShaderResource | ResourceState::PixelShaderResource, + ResourceState::RenderTargetResource)); + barriers.push_back(Barrier::Transition(m_pGBufferMotionOutput->GetResource(), + ResourceState::NonPixelShaderResource | ResourceState::PixelShaderResource, + ResourceState::RenderTargetResource)); + barriers.push_back(Barrier::Transition( + m_pGBufferDepthOutput->GetResource(), ResourceState::NonPixelShaderResource | ResourceState::PixelShaderResource, ResourceState::DepthWrite)); + + ResourceBarrier(pCmdList, static_cast(barriers.size()), barriers.data()); + + // Clear color targets + float clearColor[4] = {0.0f, 0.0f, 0.0f, 0.0f}; + for (const auto* rasterView : m_pGBufferRasterViews) + { + ClearRenderTarget(pCmdList, &rasterView->GetResourceView(), clearColor); + } + + // Clear depth target + ClearDepthStencil(pCmdList, &m_pGBufferDepthRasterView->GetResourceView(), 0); + + // Begin raster with render targets + BeginRaster(pCmdList, static_cast(m_pGBufferRasterViews.size()), m_pGBufferRasterViews.data(), m_pGBufferDepthRasterView, nullptr); + SetViewportScissorRect(pCmdList, 0, 0, width, height, 0.f, 1.f); + + const auto* currentCamera = GetScene()->GetCurrentCamera(); + + WorkGraphCBData workGraphData = {}; + workGraphData.ViewProjection = currentCamera->GetProjectionJittered() * currentCamera->GetView(); + workGraphData.PreviousViewProjection = currentCamera->GetPrevProjectionJittered() * currentCamera->GetPreviousView(); + workGraphData.InverseViewProjection = InverseMatrix(workGraphData.ViewProjection); + workGraphData.CameraPosition = currentCamera->GetCameraTranslation(); + workGraphData.PreviousCameraPosition = InverseMatrix(currentCamera->GetPreviousView()).getCol3(); + workGraphData.ShaderTime = m_shaderTime; + workGraphData.PreviousShaderTime = previousShaderTime; + workGraphData.WindStrength = m_WindStrength; + workGraphData.WindDirection = DEG_TO_RAD(m_WindDirection); + + BufferAddressInfo workGraphDataInfo = GetDynamicBufferPool()->AllocConstantBuffer(sizeof(WorkGraphCBData), &workGraphData); + m_pWorkGraphParameterSet->UpdateRootConstantBuffer(&workGraphDataInfo, 0); + + // Bind all the parameters + m_pWorkGraphParameterSet->Bind(pCmdList, nullptr); + + // Dispatch the work graph + { + D3D12_DISPATCH_GRAPH_DESC dispatchDesc = {}; + dispatchDesc.Mode = D3D12_DISPATCH_MODE_NODE_CPU_INPUT; + dispatchDesc.NodeCPUInput = {}; + dispatchDesc.NodeCPUInput.EntrypointIndex = m_WorkGraphEntryPointIndex; + // Launch graph with one record + dispatchDesc.NodeCPUInput.NumRecords = 1; + // Record does not contain any data + dispatchDesc.NodeCPUInput.RecordStrideInBytes = 0; + dispatchDesc.NodeCPUInput.pRecords = nullptr; + + // Get ID3D12GraphicsCommandList10 from Cauldron command list + ID3D12GraphicsCommandList10* commandList; + CauldronThrowOnFail(pCmdList->GetImpl()->DX12CmdList()->QueryInterface(IID_PPV_ARGS(&commandList))); + + commandList->SetProgram(&m_WorkGraphProgramDesc); + commandList->DispatchGraph(&dispatchDesc); + + // Release command list (only releases additional reference created by QueryInterface) + commandList->Release(); + + // Clear backing memory initialization flag, as the graph has run at least once now + m_WorkGraphProgramDesc.WorkGraph.Flags &= ~D3D12_SET_WORK_GRAPH_FLAG_INITIALIZE; + } + + EndRaster(pCmdList, nullptr); + + // Transition render targets back to readable state + for (auto& barrier : barriers) + { + std::swap(barrier.DestState, barrier.SourceState); + } + + ResourceBarrier(pCmdList, static_cast(barriers.size()), barriers.data()); + } + + { + GPUScopedProfileCapture shadingMarker(pCmdList, L"Shading"); + + // Render modules expect resources coming in/going out to be in a shader read state + Barrier barrier = Barrier::Transition( + m_pShadingOutput->GetResource(), ResourceState::NonPixelShaderResource | ResourceState::PixelShaderResource, ResourceState::UnorderedAccess); + ResourceBarrier(pCmdList, 1, &barrier); + + BufferAddressInfo upscaleInfo = + GetDynamicBufferPool()->AllocConstantBuffer(sizeof(UpscalerInformation), &GetScene()->GetSceneInfo().UpscalerInfo.FullScreenScaleRatio); + m_pShadingParameterSet->UpdateRootConstantBuffer(&upscaleInfo, 0); + + const auto* currentCamera = GetScene()->GetCurrentCamera(); + + ShadingCBData shadingData = {}; + shadingData.InverseViewProjection = InverseMatrix(currentCamera->GetProjectionJittered() * currentCamera->GetView()); + shadingData.CameraPosition = currentCamera->GetCameraTranslation(); + + BufferAddressInfo shadingInfo = GetDynamicBufferPool()->AllocConstantBuffer(sizeof(ShadingCBData), &shadingData); + m_pShadingParameterSet->UpdateRootConstantBuffer(&shadingInfo, 1); + + // Bind all the parameters + m_pShadingParameterSet->Bind(pCmdList, m_pShadingPipeline); + + SetPipelineState(pCmdList, m_pShadingPipeline); + + const uint32_t numGroupX = DivideRoundingUp(width, s_shadingThreadGroupSizeX); + const uint32_t numGroupY = DivideRoundingUp(height, s_shadingThreadGroupSizeY); + Dispatch(pCmdList, numGroupX, numGroupY, 1); + + // Render modules expect resources coming in/going out to be in a shader read state + barrier = Barrier::Transition( + m_pShadingOutput->GetResource(), ResourceState::UnorderedAccess, ResourceState::NonPixelShaderResource | ResourceState::PixelShaderResource); + ResourceBarrier(pCmdList, 1, &barrier); + } +} + +void WorkGraphRenderModule::OnResize(const cauldron::ResolutionInfo& resInfo) +{ +} + +void WorkGraphRenderModule::InitTextures() +{ + m_pShadingOutput = GetFramework()->GetColorTargetForCallback(GetName()); + CauldronAssert(ASSERT_CRITICAL, m_pShadingOutput != nullptr, L"Couldn't find or create the render target of WorkGraphRenderModule."); + + m_pGBufferColorOutput = GetFramework()->GetRenderTexture(L"GBufferColorTarget"); + m_pGBufferNormalOutput = GetFramework()->GetRenderTexture(L"GBufferNormalTarget"); + m_pGBufferMotionOutput = GetFramework()->GetRenderTexture(L"GBufferMotionVectorTarget"); + m_pGBufferDepthOutput = GetFramework()->GetRenderTexture(L"GBufferDepthTarget"); + + m_pGBufferRasterViews[0] = GetRasterViewAllocator()->RequestRasterView(m_pGBufferColorOutput, ViewDimension::Texture2D); + m_pGBufferRasterViews[1] = GetRasterViewAllocator()->RequestRasterView(m_pGBufferNormalOutput, ViewDimension::Texture2D); + m_pGBufferRasterViews[2] = GetRasterViewAllocator()->RequestRasterView(m_pGBufferMotionOutput, ViewDimension::Texture2D); + + m_pGBufferDepthRasterView = GetRasterViewAllocator()->RequestRasterView(m_pGBufferDepthOutput, ViewDimension::Texture2D); +} + +void WorkGraphRenderModule::InitWorkGraphProgram() +{ + // Create root signature for work graph + RootSignatureDesc workGraphRootSigDesc; + workGraphRootSigDesc.AddConstantBufferView(0, ShaderBindStage::Compute, 1); + // Work graphs with mesh nodes use graphics root signature instead of compute root signature + workGraphRootSigDesc.m_PipelineType = PipelineType::Graphics; + + m_pWorkGraphRootSignature = RootSignature::CreateRootSignature(L"MeshNodeSample_WorkGraphRootSignature", workGraphRootSigDesc); + + // Create parameter set for root signature + m_pWorkGraphParameterSet = ParameterSet::CreateParameterSet(m_pWorkGraphRootSignature); + m_pWorkGraphParameterSet->SetRootConstantBufferResource(GetDynamicBufferPool()->GetResource(), sizeof(WorkGraphCBData), 0); + + // Get D3D12 device + // CreateStateObject is only available on ID3D12Device9 + ID3D12Device9* d3dDevice = nullptr; + CauldronThrowOnFail(GetDevice()->GetImpl()->DX12Device()->QueryInterface(IID_PPV_ARGS(&d3dDevice))); + + // Check if mesh nodes are supported + { + D3D12_FEATURE_DATA_D3D12_OPTIONS21 options = {}; + CauldronThrowOnFail(d3dDevice->CheckFeatureSupport(D3D12_FEATURE_D3D12_OPTIONS21, &options, sizeof(options))); + + // check if work graphs tier 1.1 (mesh nodes) is supported + if (options.WorkGraphsTier < D3D12_WORK_GRAPHS_TIER_1_1) + { + CauldronCritical(L"Work graphs tier 1.1 (mesh nodes) are not supported on the current device."); + } + } + + // Create work graph + CD3DX12_STATE_OBJECT_DESC stateObjectDesc(D3D12_STATE_OBJECT_TYPE_EXECUTABLE); + + // configure draw nodes to use graphics root signature + auto configSubobject = stateObjectDesc.CreateSubobject(); + configSubobject->SetFlags(D3D12_STATE_OBJECT_FLAG_WORK_GRAPHS_USE_GRAPHICS_STATE_FOR_GLOBAL_ROOT_SIGNATURE); + + // set root signature for work graph + auto rootSignatureSubobject = stateObjectDesc.CreateSubobject(); + rootSignatureSubobject->SetRootSignature(m_pWorkGraphRootSignature->GetImpl()->DX12RootSignature()); + + auto workgraphSubobject = stateObjectDesc.CreateSubobject(); + workgraphSubobject->IncludeAllAvailableNodes(); + workgraphSubobject->SetProgramName(WorkGraphProgramName); + + // add DXIL shader libraries + ShaderCompiler shaderCompiler; + + // list of compiled shaders to be released once the work graph is created + std::vector compiledShaders; + + // Helper function for adding a shader library to the work graph state object + const auto AddShaderLibrary = [&](const wchar_t* shaderFileName) { + // compile shader as library + auto* blob = shaderCompiler.CompileShader(shaderFileName, L"lib_6_9", nullptr); + auto shaderBytecode = CD3DX12_SHADER_BYTECODE(blob->GetBufferPointer(), blob->GetBufferSize()); + + // add blob to state object + auto librarySubobject = stateObjectDesc.CreateSubobject(); + librarySubobject->SetDXILLibrary(&shaderBytecode); + + // add shader blob to be released later + compiledShaders.push_back(blob); + }; + + // Helper function for adding a pixel shader to the work graph state object + // Pixel shaders need to be compiled with "ps" target and as such the DXIL library object needs to specify a name + // for the pixel shader (exportName) with which the generic program can reference the pixel shader + const auto AddPixelShader = [&](const wchar_t* shaderFileName, const wchar_t* entryPoint) { + // compile shader as pixel shader + auto* blob = shaderCompiler.CompileShader(shaderFileName, L"ps_6_9", entryPoint); + auto shaderBytecode = CD3DX12_SHADER_BYTECODE(blob->GetBufferPointer(), blob->GetBufferSize()); + + // add blob to state object + auto librarySubobject = stateObjectDesc.CreateSubobject(); + librarySubobject->SetDXILLibrary(&shaderBytecode); + + // add shader blob to be released later + compiledShaders.push_back(blob); + }; + + // =================================================================== + // State object for graphics PSO state description in generic programs + + // Rasterizer state configuration without culling + auto rasterizerNoCullingSubobject = stateObjectDesc.CreateSubobject(); + rasterizerNoCullingSubobject->SetFrontCounterClockwise(true); + rasterizerNoCullingSubobject->SetFillMode(D3D12_FILL_MODE_SOLID); + rasterizerNoCullingSubobject->SetCullMode(D3D12_CULL_MODE_NONE); + + // Rasterizer state configuration with backface culling + auto rasterizerBackfaceCullingSubobject = stateObjectDesc.CreateSubobject(); + rasterizerBackfaceCullingSubobject->SetFrontCounterClockwise(true); + rasterizerBackfaceCullingSubobject->SetFillMode(D3D12_FILL_MODE_SOLID); + rasterizerBackfaceCullingSubobject->SetCullMode(D3D12_CULL_MODE_BACK); + + // Primitive topology configuration + auto primitiveTopologySubobject = stateObjectDesc.CreateSubobject(); + primitiveTopologySubobject->SetPrimitiveTopologyType(D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE); + + // Depth stencil format configuration + auto depthStencilFormatSubobject = stateObjectDesc.CreateSubobject(); + depthStencilFormatSubobject->SetDepthStencilFormat(GetDXGIFormat(m_pGBufferDepthOutput->GetFormat())); + + // Render target format configuration + auto renderTargetFormatSubobject = stateObjectDesc.CreateSubobject(); + renderTargetFormatSubobject->SetNumRenderTargets(3); + renderTargetFormatSubobject->SetRenderTargetFormat(0, GetDXGIFormat(m_pGBufferColorOutput->GetFormat())); + renderTargetFormatSubobject->SetRenderTargetFormat(1, GetDXGIFormat(m_pGBufferNormalOutput->GetFormat())); + renderTargetFormatSubobject->SetRenderTargetFormat(2, GetDXGIFormat(m_pGBufferMotionOutput->GetFormat())); + + // ============================= + // Generic programs (mesh nodes) + + // Helper function to add a mesh node generic program subobject + const auto AddMeshNode = [&](const wchar_t* meshShaderExportName, const wchar_t* pixelShaderExportName, bool backfaceCulling) { + auto genericProgramSubobject = stateObjectDesc.CreateSubobject(); + // add mesh shader + genericProgramSubobject->AddExport(meshShaderExportName); + // add pixel shader + genericProgramSubobject->AddExport(pixelShaderExportName); + + // add graphics state subobjects + if (backfaceCulling) + { + genericProgramSubobject->AddSubobject(*rasterizerBackfaceCullingSubobject); + } + else + { + genericProgramSubobject->AddSubobject(*rasterizerNoCullingSubobject); + } + genericProgramSubobject->AddSubobject(*primitiveTopologySubobject); + genericProgramSubobject->AddSubobject(*depthStencilFormatSubobject); + genericProgramSubobject->AddSubobject(*renderTargetFormatSubobject); + }; + + // =================================== + // Add shader libraries and mesh nodes + + // Shader libraries for procedural world generation + AddShaderLibrary(L"world.hlsl"); + AddShaderLibrary(L"biomes.hlsl"); + AddShaderLibrary(L"tree.hlsl"); + AddShaderLibrary(L"rock.hlsl"); + + // Terrain Mesh Node + AddShaderLibrary(L"terrainrenderer.hlsl"); + AddPixelShader(L"terrainrenderer.hlsl", L"TerrainPixelShader"); + AddMeshNode(L"TerrainMeshShader", L"TerrainPixelShader", true); + + // Spline Mesh Node for trees & rocks + AddShaderLibrary(L"splinerenderer.hlsl"); + AddPixelShader(L"splinerenderer.hlsl", L"SplinePixelShader"); + AddMeshNode(L"SplineMeshShader", L"SplinePixelShader", true); + + // Grass Nodes + AddShaderLibrary(L"densegrassmeshshader.hlsl"); + AddShaderLibrary(L"sparsegrassmeshshader.hlsl"); + AddPixelShader(L"grasspixelshader.hlsl", L"GrassPixelShader"); + AddMeshNode(L"DenseGrassMeshShader", L"GrassPixelShader", false); + AddMeshNode(L"SparseGrassMeshShader", L"GrassPixelShader", false); + + // Flowers, Insects & Mushroom Nodes + AddShaderLibrary(L"beemeshshader.hlsl"); + AddShaderLibrary(L"butterflymeshshader.hlsl"); + AddShaderLibrary(L"flowermeshshader.hlsl"); + AddShaderLibrary(L"mushroommeshshader.hlsl"); + AddPixelShader(L"insectpixelshader.hlsl", L"InsectPixelShader"); + AddMeshNode(L"BeeMeshShader", L"InsectPixelShader", false); + AddMeshNode(L"ButterflyMeshShader", L"InsectPixelShader", false); + AddMeshNode(L"FlowerMeshShader", L"InsectPixelShader", false); + AddMeshNode(L"SparseFlowerMeshShader", L"InsectPixelShader", false); + AddMeshNode(L"MushroomMeshShader", L"InsectPixelShader", false); + + // Create work graph state object + CauldronThrowOnFail(d3dDevice->CreateStateObject(stateObjectDesc, IID_PPV_ARGS(&m_pWorkGraphStateObject))); + + // release all compiled shaders + for (auto* shader : compiledShaders) + { + if (shader) + { + shader->Release(); + } + } + + // Get work graph properties + ID3D12StateObjectProperties1* stateObjectProperties; + ID3D12WorkGraphProperties1* workGraphProperties; + + CauldronThrowOnFail(m_pWorkGraphStateObject->QueryInterface(IID_PPV_ARGS(&stateObjectProperties))); + CauldronThrowOnFail(m_pWorkGraphStateObject->QueryInterface(IID_PPV_ARGS(&workGraphProperties))); + + // Get the index of our work graph inside the state object (state object can contain multiple work graphs) + const auto workGraphIndex = workGraphProperties->GetWorkGraphIndex(WorkGraphProgramName); + + // Set the input record limit. This is required for work graphs with mesh nodes. + // In this case we'll only have a single input record + workGraphProperties->SetMaximumInputRecords(workGraphIndex, 1, 1); + + // Create backing memory buffer + D3D12_WORK_GRAPH_MEMORY_REQUIREMENTS memoryRequirements = {}; + workGraphProperties->GetWorkGraphMemoryRequirements(workGraphIndex, &memoryRequirements); + if (memoryRequirements.MaxSizeInBytes > 0) + { + BufferDesc bufferDesc = BufferDesc::Data(L"MeshNodeSample_WorkGraphBackingMemory", + static_cast(memoryRequirements.MaxSizeInBytes), + 1, + D3D12_WORK_GRAPHS_BACKING_MEMORY_ALIGNMENT_IN_BYTES, + ResourceFlags::AllowUnorderedAccess); + + m_pWorkGraphBackingMemoryBuffer = Buffer::CreateBufferResource(&bufferDesc, ResourceState::UnorderedAccess); + } + + // Prepare work graph desc + m_WorkGraphProgramDesc.Type = D3D12_PROGRAM_TYPE_WORK_GRAPH; + m_WorkGraphProgramDesc.WorkGraph.ProgramIdentifier = stateObjectProperties->GetProgramIdentifier(WorkGraphProgramName); + // Set flag to initialize backing memory. + // We'll clear this flag once we've run the work graph for the first time. + m_WorkGraphProgramDesc.WorkGraph.Flags = D3D12_SET_WORK_GRAPH_FLAG_INITIALIZE; + // Set backing memory + if (m_pWorkGraphBackingMemoryBuffer) + { + const auto addressInfo = m_pWorkGraphBackingMemoryBuffer->GetAddressInfo(); + m_WorkGraphProgramDesc.WorkGraph.BackingMemory.StartAddress = addressInfo.GetImpl()->GPUBufferView; + m_WorkGraphProgramDesc.WorkGraph.BackingMemory.SizeInBytes = addressInfo.GetImpl()->SizeInBytes; + } + + // Query entry point index + m_WorkGraphEntryPointIndex = workGraphProperties->GetEntrypointIndex(workGraphIndex, {L"World", 0}); + + // Release state object properties + stateObjectProperties->Release(); + workGraphProperties->Release(); + + // Release ID3D12Device9 (only releases additional reference created by QueryInterface) + d3dDevice->Release(); +} + +void WorkGraphRenderModule::InitShadingPipeline() +{ + RootSignatureDesc shadingRootSigDesc; + shadingRootSigDesc.AddConstantBufferView(0, ShaderBindStage::Compute, 1); + shadingRootSigDesc.AddConstantBufferView(1, ShaderBindStage::Compute, 1); + shadingRootSigDesc.AddTextureSRVSet(0, ShaderBindStage::Compute, 2); + shadingRootSigDesc.AddTextureUAVSet(0, ShaderBindStage::Compute, 1); + + m_pShadingRootSignature = RootSignature::CreateRootSignature(L"MeshNodeSample_ShadingRootSignature", shadingRootSigDesc); + + PipelineDesc shadingPsoDesc; + shadingPsoDesc.SetRootSignature(m_pShadingRootSignature); + shadingPsoDesc.AddShaderDesc(ShaderBuildDesc::Compute(L"shading.hlsl", L"MainCS", ShaderModel::SM6_0)); + + m_pShadingPipeline = PipelineObject::CreatePipelineObject(L"MeshNodeSample_ShadingPipeline", shadingPsoDesc); + + m_pShadingParameterSet = ParameterSet::CreateParameterSet(m_pShadingRootSignature); + + m_pShadingParameterSet->SetRootConstantBufferResource(GetDynamicBufferPool()->GetResource(), sizeof(UpscalerInformation), 0); + m_pShadingParameterSet->SetRootConstantBufferResource(GetDynamicBufferPool()->GetResource(), sizeof(ShadingCBData), 1); + m_pShadingParameterSet->SetTextureSRV(m_pGBufferColorOutput, ViewDimension::Texture2D, 0); + m_pShadingParameterSet->SetTextureSRV(m_pGBufferNormalOutput, ViewDimension::Texture2D, 1); + m_pShadingParameterSet->SetTextureUAV(m_pShadingOutput, ViewDimension::Texture2D, 0); +} diff --git a/meshNodeSample/workgraphrendermodule.h b/meshNodeSample/workgraphrendermodule.h new file mode 100644 index 0000000..aa09dc1 --- /dev/null +++ b/meshNodeSample/workgraphrendermodule.h @@ -0,0 +1,104 @@ +// This file is part of the AMD Work Graph Mesh Node Sample. +// +// Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#pragma once + +#include "render/rendermodule.h" +#include "render/shaderbuilder.h" + +// d3dx12 for work graphs +#include "d3dx12/d3dx12.h" + +// Forward declaration of Cauldron classes +namespace cauldron +{ + class Buffer; + class ParameterSet; + class PipelineObject; + class RasterView; + class RootSignature; + class Texture; +} // namespace cauldron + +class WorkGraphRenderModule : public cauldron::RenderModule +{ +public: + WorkGraphRenderModule(); + virtual ~WorkGraphRenderModule(); + + /** + * @brief Initialize work graphs, UI & other contexts + */ + void Init(const json& initData) override; + + /** + * @brief Execute the work graph. + */ + void Execute(double deltaTime, cauldron::CommandList* pCmdList) override; + + /** + * @brief Called by the framework when resolution changes. + */ + void OnResize(const cauldron::ResolutionInfo& resInfo) override; + +private: + /** + * @brief Create and initialize textures required for rendering and shading. + */ + void InitTextures(); + /** + * @brief Create and initialize the work graph program with mesh nodes. + */ + void InitWorkGraphProgram(); + /** + * @brief Create and initialize the shading compute pipeline. + */ + void InitShadingPipeline(); + + // time variable for shader animations in milliseconds + uint32_t m_shaderTime = 0; + + // UI controlled settings + float m_WindStrength = 1.f; + float m_WindDirection = 0.f; + + const cauldron::Texture* m_pGBufferDepthOutput = nullptr; + const cauldron::RasterView* m_pGBufferDepthRasterView = nullptr; + const cauldron::Texture* m_pGBufferColorOutput = nullptr; + const cauldron::Texture* m_pGBufferNormalOutput = nullptr; + const cauldron::Texture* m_pGBufferMotionOutput = nullptr; + std::array m_pGBufferRasterViews; + + cauldron::RootSignature* m_pWorkGraphRootSignature = nullptr; + cauldron::ParameterSet* m_pWorkGraphParameterSet = nullptr; + ID3D12StateObject* m_pWorkGraphStateObject = nullptr; + cauldron::Buffer* m_pWorkGraphBackingMemoryBuffer = nullptr; + // Program description for binding the work graph + // contains work graph identifier & backing memory + D3D12_SET_PROGRAM_DESC m_WorkGraphProgramDesc = {}; + // Index of entry point node + UINT m_WorkGraphEntryPointIndex = 0; + + const cauldron::Texture* m_pShadingOutput = nullptr; + cauldron::RootSignature* m_pShadingRootSignature = nullptr; + cauldron::ParameterSet* m_pShadingParameterSet = nullptr; + cauldron::PipelineObject* m_pShadingPipeline = nullptr; +}; \ No newline at end of file diff --git a/readme.md b/readme.md new file mode 100644 index 0000000..7434d99 --- /dev/null +++ b/readme.md @@ -0,0 +1,60 @@ +# Work Graphs Mesh Node Sample + +![](./screenshot.jpg) + +This sample shows fully GPU-driven rendering through work graphs and mesh nodes in action. +If you wish to learn more about this sample, work graphs or mesh nodes, you can check out our accompanying blog post on [GPUOpen](https://gpuopen.com/learn/work_graphs_mesh_nodes). + +## Building the sample + +### Prerequisites + +To build the Work Graphs Mesh Node Sample, you must first install the following tools: + +- [CMake 3.17](https://cmake.org/download/) +- [Visual Studio 2019](https://visualstudio.microsoft.com/downloads/) +- [Windows 10 SDK 10.0.18362.0](https://developer.microsoft.com/en-us/windows/downloads/windows-10-sdk) +- [Vulkan SDK 1.3.239](https://vulkan.lunarg.com/) (build dependency of Cauldron) + +You will also need a mesh node compatible driver. Information on driver availability can be found [here](https://gpuopen.com/learn/work_graphs_mesh_nodes/work_graphs_mesh_nodes-getting_started). + +### Getting up and running + +Clone the repository +``` +git clone https://github.com/GPUOpen-LibrariesAndSDKs/WorkGraphsMeshNodeSample.git +``` + +Inside the cloned repository, run +``` +cmake -B build . +``` +This will download the [FidelityFX SDK](https://github.com/GPUOpen-LibrariesAndSDKs/FidelityFX-SDK/tree/release-FSR3-3.0.4), [Agility SDK](https://www.nuget.org/packages/Microsoft.Direct3D.D3D12) and [Direct X Shader Compiler](https://www.nuget.org/packages/Microsoft.Direct3D.DXC) and put them all together with the sample project. +You can find the scripts for this in the [`imported`](./imported/) folder. + +Open the generated Visual Studio project with +``` +cmake --open build +``` + +Build & run the `MeshNodeSample` project. + +### Controls + +| Key | Action | +| ---------------------|---------------------------------------------------------------------------------| +| **Left Mouse + Drag**| Rotates the camera view direction. | +| **Mouse wheel** | Increase/Decrease camera movement speed. | +| **A** | Strafe the camera to the left. | +| **W** | Move the camera forward. | +| **S** | Strafe the camera to the right. | +| **D** | Move the camera backward. | +| **Q** | Move the camera upwards. | +| **E** | Move the camera downwards. | +| **F1** | Toggles the main UI on/off. | +| **F2** | Toggles the performance UI on/off. | +| **F3** | Toggles the message log UI on/off. (Defaults to on in debug and off in release) | +| **M** | Toggles magnifying glass. | +| **L** | Toggles magnifying glass lock when enabled. | +| **ESC** | Shutsdown and quits sample. | +| **Alt-Enter** | Toggles fullscreen mode. | \ No newline at end of file diff --git a/screenshot.jpg b/screenshot.jpg new file mode 100644 index 0000000..c9615e4 Binary files /dev/null and b/screenshot.jpg differ