From e4068856bc962a68021da094ec4214ee89d7068c Mon Sep 17 00:00:00 2001 From: Samuliak Date: Wed, 8 Jan 2025 16:09:15 +0100 Subject: [PATCH 01/12] implement framebuffer fetch --- src/Cafe/HW/Latte/Core/LatteShader.cpp | 107 ++++++++++-------- .../LegacyShaderDecompiler/LatteDecompiler.h | 1 + .../LatteDecompilerAnalyzer.cpp | 36 ++++++ .../LatteDecompilerEmitMSL.cpp | 48 ++++++-- .../LatteDecompilerEmitMSLHeader.hpp | 78 +++++++------ src/Cafe/HW/Latte/Renderer/Metal/LatteToMtl.h | 2 +- 6 files changed, 179 insertions(+), 93 deletions(-) diff --git a/src/Cafe/HW/Latte/Core/LatteShader.cpp b/src/Cafe/HW/Latte/Core/LatteShader.cpp index f46c68d4a..7ad258840 100644 --- a/src/Cafe/HW/Latte/Core/LatteShader.cpp +++ b/src/Cafe/HW/Latte/Core/LatteShader.cpp @@ -504,11 +504,31 @@ void LatteSHRC_UpdateVSBaseHash(uint8* vertexShaderPtr, uint32 vertexShaderSize, uint64 vsHash2 = 0; _calculateShaderProgramHash(vsProgramCode, vertexShaderSize, &hashCacheVS, &vsHash1, &vsHash2); uint64 vsHash = vsHash1 + vsHash2 + _activeFetchShader->key + _activePSImportTable.key + (usesGeometryShader ? 0x1111ULL : 0ULL); + + uint32 tmp = LatteGPUState.contextNew.PA_CL_VTE_CNTL.getRawValue() ^ 0x43F; + vsHash += tmp; + + auto primitiveType = LatteGPUState.contextNew.VGT_PRIMITIVE_TYPE.get_PRIMITIVE_MODE(); + // TODO: include always in the hash in case of geometry shader or rect shader + if (primitiveType == Latte::LATTE_VGT_PRIMITIVE_TYPE::E_PRIMITIVE_TYPE::RECTS) + { + vsHash += 13ULL; + } + else if (primitiveType == Latte::LATTE_VGT_PRIMITIVE_TYPE::E_PRIMITIVE_TYPE::POINTS) + { + // required for Vulkan since we have to write the pointsize in the shader + vsHash += 71ULL; + } + vsHash += (LatteGPUState.contextRegister[mmVGT_STRMOUT_EN] ? 21 : 0); + // halfZ + if (LatteGPUState.contextNew.PA_CL_CLIP_CNTL.get_DX_CLIP_SPACE_DEF()) + vsHash += 0x1537; + if (g_renderer->GetType() == RendererAPI::Metal) { if (usesGeometryShader || _activeFetchShader->mtlFetchVertexManually) { - for (sint32 g = 0; g < _activeFetchShader->bufferGroups.size(); g++) + for (sint32 g = 0; g < _activeFetchShader->bufferGroups.size(); g++) { LatteParsedFetchShaderBufferGroup_t& group = _activeFetchShader->bufferGroups[g]; uint32 bufferIndex = group.attributeBufferIndex; @@ -522,47 +542,28 @@ void LatteSHRC_UpdateVSBaseHash(uint8* vertexShaderPtr, uint32 vertexShaderSize, if (!usesGeometryShader) { - // Rasterization - bool rasterizationEnabled = !LatteGPUState.contextNew.PA_CL_CLIP_CNTL.get_DX_RASTERIZATION_KILL(); + // Rasterization + bool rasterizationEnabled = !LatteGPUState.contextNew.PA_CL_CLIP_CNTL.get_DX_RASTERIZATION_KILL(); - // HACK - if (!LatteGPUState.contextNew.PA_CL_VTE_CNTL.get_VPORT_X_OFFSET_ENA()) - rasterizationEnabled = true; + // HACK + if (!LatteGPUState.contextNew.PA_CL_VTE_CNTL.get_VPORT_X_OFFSET_ENA()) + rasterizationEnabled = true; - const auto& polygonControlReg = LatteGPUState.contextNew.PA_SU_SC_MODE_CNTL; - uint32 cullFront = polygonControlReg.get_CULL_FRONT(); - uint32 cullBack = polygonControlReg.get_CULL_BACK(); - if (cullFront && cullBack) - rasterizationEnabled = false; + const auto& polygonControlReg = LatteGPUState.contextNew.PA_SU_SC_MODE_CNTL; + uint32 cullFront = polygonControlReg.get_CULL_FRONT(); + uint32 cullBack = polygonControlReg.get_CULL_BACK(); + if (cullFront && cullBack) + rasterizationEnabled = false; - if (rasterizationEnabled) - vsHash += 51ULL; + if (rasterizationEnabled) + vsHash += 51ULL; - // Vertex fetch - if (_activeFetchShader->mtlFetchVertexManually) - vsHash += 349ULL; + // Vertex fetch + if (_activeFetchShader->mtlFetchVertexManually) + vsHash += 349ULL; } } - uint32 tmp = LatteGPUState.contextNew.PA_CL_VTE_CNTL.getRawValue() ^ 0x43F; - vsHash += tmp; - - auto primitiveType = LatteGPUState.contextNew.VGT_PRIMITIVE_TYPE.get_PRIMITIVE_MODE(); - // TODO: include always in the hash in case of geometry shader or rect shader - if (primitiveType == Latte::LATTE_VGT_PRIMITIVE_TYPE::E_PRIMITIVE_TYPE::RECTS) - { - vsHash += 13ULL; - } - else if (primitiveType == Latte::LATTE_VGT_PRIMITIVE_TYPE::E_PRIMITIVE_TYPE::POINTS) - { - // required for Vulkan since we have to write the pointsize in the shader - vsHash += 71ULL; - } - vsHash += (LatteGPUState.contextRegister[mmVGT_STRMOUT_EN] ? 21 : 0); - // halfZ - if (LatteGPUState.contextNew.PA_CL_CLIP_CNTL.get_DX_CLIP_SPACE_DEF()) - vsHash += 0x1537; - _shaderBaseHash_vs = vsHash; } @@ -589,19 +590,6 @@ void LatteSHRC_UpdatePSBaseHash(uint8* pixelShaderPtr, uint32 pixelShaderSize, b // get vertex shader uint64 psHash = psHash1 + psHash2 + _activePSImportTable.key + (usesGeometryShader ? hashCacheGS.prevHash1 : 0ULL); -#if ENABLE_METAL - if (g_renderer->GetType() == RendererAPI::Metal) - { - for (uint8 i = 0; i < LATTE_NUM_COLOR_TARGET; i++) - { - auto format = LatteMRT::GetColorBufferFormat(i, LatteGPUState.contextNew); - uint8 dataType = (uint8)GetMtlPixelFormatInfo(format, false).dataType; - psHash += (uint64)dataType; - psHash = std::rotl(psHash, 7); - } - } -#endif - _shaderBaseHash_ps = psHash; } @@ -635,6 +623,7 @@ uint64 LatteSHRC_CalcVSAuxHash(LatteDecompilerShader* vertexShader, uint32* cont auxHashTex += 0x333; } } + return auxHash + auxHashTex; } @@ -668,6 +657,28 @@ uint64 LatteSHRC_CalcPSAuxHash(LatteDecompilerShader* pixelShader, uint32* conte auxHash = (auxHash << 3) | (auxHash >> 61); auxHash += (uint64)dim; } + + // Textures as render targets + for (uint32 i = 0; i < pixelShader->textureUnitListCount; i++) + { + uint8 t = pixelShader->textureUnitList[i]; + auxHash = std::rotl(auxHash, 11); + auxHash += (uint64)pixelShader->textureRenderTargetIndex[t]; + } + +#if ENABLE_METAL + if (g_renderer->GetType() == RendererAPI::Metal) + { + for (uint8 i = 0; i < LATTE_NUM_COLOR_TARGET; i++) + { + auto format = LatteMRT::GetColorBufferFormat(i, LatteGPUState.contextNew); + uint8 dataType = (uint8)GetMtlPixelFormatInfo(format, false).dataType; + auxHash = std::rotl(auxHash, 7); + auxHash += (uint64)dataType; + } + } +#endif + return auxHash; } diff --git a/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompiler.h b/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompiler.h index 2812facc7..64aa1413d 100644 --- a/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompiler.h +++ b/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompiler.h @@ -183,6 +183,7 @@ struct LatteDecompilerShader std::bitset textureUnitMask2; uint16 textureUnitSamplerAssignment[LATTE_NUM_MAX_TEX_UNITS]{ 0 }; // LATTE_DECOMPILER_SAMPLER_NONE means undefined bool textureUsesDepthCompare[LATTE_NUM_MAX_TEX_UNITS]{}; + uint8 textureRenderTargetIndex[LATTE_NUM_MAX_TEX_UNITS] = {255}; // analyzer stage (pixel outputs) uint32 pixelColorOutputMask{ 0 }; // from LSB to MSB, 1 bit per written output. 1 if written (indices of color attachments) diff --git a/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerAnalyzer.cpp b/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerAnalyzer.cpp index b9ca7b5d4..2d0c7f762 100644 --- a/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerAnalyzer.cpp +++ b/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerAnalyzer.cpp @@ -850,6 +850,42 @@ void LatteDecompiler_analyze(LatteDecompilerShaderContext* shaderContext, LatteD shader->textureUnitList[shader->textureUnitListCount] = i; shader->textureUnitListCount++; } + shader->textureRenderTargetIndex[i] = 255; + } + // check if textures are used as render targets + if (shader->shaderType == LatteConst::ShaderType::Pixel) + { + for (sint32 i = 0; i < shader->textureUnitListCount; i++) + { + sint32 textureIndex = shader->textureUnitList[i]; + const auto& texRegister = texRegs[textureIndex]; + + // get physical address of texture data + MPTR physAddr = (texRegister.word2.get_BASE_ADDRESS() << 8); + if (physAddr == MPTR_NULL) + continue; // invalid data + + for (sint32 j = 0; j < LATTE_NUM_COLOR_TARGET; j++) + { + uint32* colorBufferRegBase = shaderContext->contextRegisters + (mmCB_COLOR0_BASE + j); + uint32 regColorBufferBase = colorBufferRegBase[mmCB_COLOR0_BASE - mmCB_COLOR0_BASE] & 0xFFFFFF00; // the low 8 bits are ignored? How to Survive seems to rely on this + uint32 regColorSize = colorBufferRegBase[mmCB_COLOR0_SIZE - mmCB_COLOR0_BASE]; + uint32 regColorInfo = colorBufferRegBase[mmCB_COLOR0_INFO - mmCB_COLOR0_BASE]; + uint32 regColorView = colorBufferRegBase[mmCB_COLOR0_VIEW - mmCB_COLOR0_BASE]; + // decode color buffer reg info + Latte::E_HWTILEMODE colorBufferTileMode = (Latte::E_HWTILEMODE)((regColorInfo >> 8) & 0xF); + uint32 numberType = (regColorInfo >> 12) & 7; + Latte::E_GX2SURFFMT colorBufferFormat = LatteMRT::GetColorBufferFormat(j, *shaderContext->contextRegistersNew); + + MPTR colorBufferPhysMem = regColorBufferBase; + + if (physAddr == colorBufferPhysMem) + { + shader->textureRenderTargetIndex[i] = j; + break; + } + } + } } // for geometry shaders check the copy shader for stream writes if (shader->shaderType == LatteConst::ShaderType::Geometry && shaderContext->parsedGSCopyShader->list_streamWrites.empty() == false) diff --git a/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitMSL.cpp b/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitMSL.cpp index b925f862c..22c511ba1 100644 --- a/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitMSL.cpp +++ b/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitMSL.cpp @@ -2260,6 +2260,22 @@ static void _emitTEXSampleTextureCode(LatteDecompilerShaderContext* shaderContex return; } + // Do a framebuffer fetch if possible + // TODO: filter out more? + uint8 renderTargetIndex = shaderContext->shader->textureRenderTargetIndex[texInstruction->textureFetch.textureIndex]; + if (renderTargetIndex != 255) + { + src->addFmt("col{}.", renderTargetIndex); + // TODO: clean up + std::string components[] = {"x", "y", "z", "w"}; + for (sint32 i = 0; i < numWrittenElements; i++) + { + src->addFmt("{}", components[i]); + } + src->add(");" _CRLF); + return; + } + if (emulateCompare) { cemu_assert_debug(!isGather); @@ -2630,20 +2646,28 @@ static void _emitTEXGetTextureResInfoCode(LatteDecompilerShaderContext* shaderCo // todo - mip index parameter? - auto texDim = shaderContext->shader->textureUnitDim[texInstruction->textureFetch.textureIndex]; - - if (texDim == Latte::E_DIM::DIM_1D) - src->addFmt(" = int4(tex{}.get_width(), 1, 1, 1).", texInstruction->textureFetch.textureIndex); - else if (texDim == Latte::E_DIM::DIM_1D_ARRAY) - src->addFmt(" = int4(tex{}.get_width(), tex{}.get_array_size(), 1, 1).", texInstruction->textureFetch.textureIndex, texInstruction->textureFetch.textureIndex); - else if (texDim == Latte::E_DIM::DIM_2D || texDim == Latte::E_DIM::DIM_2D_MSAA) - src->addFmt(" = int4(tex{}.get_width(), tex{}.get_height(), 1, 1).", texInstruction->textureFetch.textureIndex, texInstruction->textureFetch.textureIndex); - else if (texDim == Latte::E_DIM::DIM_2D_ARRAY) - src->addFmt(" = int4(tex{}.get_width(), tex{}.get_height(), tex{}.get_array_size(), 1).", texInstruction->textureFetch.textureIndex, texInstruction->textureFetch.textureIndex, texInstruction->textureFetch.textureIndex); + if (shaderContext->shader->textureRenderTargetIndex[texInstruction->textureFetch.textureIndex] != 255) + { + // TODO: use the render target size + src->addFmt(" = int4(1920, 1080, 1, 1)."); + } else { - cemu_assert_debug(false); - src->addFmt(" = int4(tex{}.get_width(), tex{}.get_height(), 1, 1).", texInstruction->textureFetch.textureIndex, texInstruction->textureFetch.textureIndex); + auto texDim = shaderContext->shader->textureUnitDim[texInstruction->textureFetch.textureIndex]; + + if (texDim == Latte::E_DIM::DIM_1D) + src->addFmt(" = int4(tex{}.get_width(), 1, 1, 1).", texInstruction->textureFetch.textureIndex); + else if (texDim == Latte::E_DIM::DIM_1D_ARRAY) + src->addFmt(" = int4(tex{}.get_width(), tex{}.get_array_size(), 1, 1).", texInstruction->textureFetch.textureIndex, texInstruction->textureFetch.textureIndex); + else if (texDim == Latte::E_DIM::DIM_2D || texDim == Latte::E_DIM::DIM_2D_MSAA) + src->addFmt(" = int4(tex{}.get_width(), tex{}.get_height(), 1, 1).", texInstruction->textureFetch.textureIndex, texInstruction->textureFetch.textureIndex); + else if (texDim == Latte::E_DIM::DIM_2D_ARRAY) + src->addFmt(" = int4(tex{}.get_width(), tex{}.get_height(), tex{}.get_array_size(), 1).", texInstruction->textureFetch.textureIndex, texInstruction->textureFetch.textureIndex, texInstruction->textureFetch.textureIndex); + else + { + cemu_assert_debug(false); + src->addFmt(" = int4(tex{}.get_width(), tex{}.get_height(), 1, 1).", texInstruction->textureFetch.textureIndex, texInstruction->textureFetch.textureIndex); + } } for(sint32 f=0; f<4; f++) diff --git a/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitMSLHeader.hpp b/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitMSLHeader.hpp index 2d871d99d..40e704554 100644 --- a/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitMSLHeader.hpp +++ b/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitMSLHeader.hpp @@ -448,6 +448,8 @@ namespace LatteDecompiler static void _emitTextureDefinitions(LatteDecompilerShaderContext* shaderContext) { + bool renderTargetIndexUsed[LATTE_NUM_COLOR_TARGET] = {false}; + auto src = shaderContext->shaderSource; // texture sampler definition for (sint32 i = 0; i < LATTE_NUM_MAX_TEX_UNITS; i++) @@ -455,44 +457,56 @@ namespace LatteDecompiler if (!shaderContext->output->textureUnitMask[i]) continue; - src->add(", "); + uint8 renderTargetIndex = shaderContext->shader->textureRenderTargetIndex[i]; + if (renderTargetIndex == 255) + { + src->add(", "); - // Only 2D and 2D array textures can be used with comparison samplers - if (shaderContext->shader->textureUsesDepthCompare[i] && IsValidDepthTextureType(shaderContext->shader->textureUnitDim[i])) - src->add("depth"); - else - src->add("texture"); + // Only 2D and 2D array textures can be used with comparison samplers + if (shaderContext->shader->textureUsesDepthCompare[i] && IsValidDepthTextureType(shaderContext->shader->textureUnitDim[i])) + src->add("depth"); + else + src->add("texture"); - if (shaderContext->shader->textureIsIntegerFormat[i]) - { - // integer samplers - if (shaderContext->shader->textureUnitDim[i] == Latte::E_DIM::DIM_1D) - src->add("1d"); - else if (shaderContext->shader->textureUnitDim[i] == Latte::E_DIM::DIM_2D || shaderContext->shader->textureUnitDim[i] == Latte::E_DIM::DIM_2D_MSAA) - src->add("2d"); - else - cemu_assert_unimplemented(); + if (shaderContext->shader->textureIsIntegerFormat[i]) + { + // integer samplers + if (shaderContext->shader->textureUnitDim[i] == Latte::E_DIM::DIM_1D) + src->add("1d"); + else if (shaderContext->shader->textureUnitDim[i] == Latte::E_DIM::DIM_2D || shaderContext->shader->textureUnitDim[i] == Latte::E_DIM::DIM_2D_MSAA) + src->add("2d"); + else + cemu_assert_unimplemented(); + } + else if (shaderContext->shader->textureUnitDim[i] == Latte::E_DIM::DIM_2D || shaderContext->shader->textureUnitDim[i] == Latte::E_DIM::DIM_2D_MSAA) + src->add("2d"); + else if (shaderContext->shader->textureUnitDim[i] == Latte::E_DIM::DIM_1D) + src->add("1d"); + else if (shaderContext->shader->textureUnitDim[i] == Latte::E_DIM::DIM_2D_ARRAY) + src->add("2d_array"); + else if (shaderContext->shader->textureUnitDim[i] == Latte::E_DIM::DIM_CUBEMAP) + src->add("cube_array"); + else if (shaderContext->shader->textureUnitDim[i] == Latte::E_DIM::DIM_3D) + src->add("3d"); + else + { + cemu_assert_unimplemented(); + } + + uint32 binding = shaderContext->output->resourceMappingMTL.textureUnitToBindingPoint[i]; + //uint32 textureBinding = shaderContext->output->resourceMappingMTL.textureUnitToBindingPoint[i] % 31; + //uint32 samplerBinding = textureBinding % 16; + src->addFmt(" tex{} [[texture({})]]", i, binding); + src->addFmt(", sampler samplr{} [[sampler({})]]", i, binding); } - else if (shaderContext->shader->textureUnitDim[i] == Latte::E_DIM::DIM_2D || shaderContext->shader->textureUnitDim[i] == Latte::E_DIM::DIM_2D_MSAA) - src->add("2d"); - else if (shaderContext->shader->textureUnitDim[i] == Latte::E_DIM::DIM_1D) - src->add("1d"); - else if (shaderContext->shader->textureUnitDim[i] == Latte::E_DIM::DIM_2D_ARRAY) - src->add("2d_array"); - else if (shaderContext->shader->textureUnitDim[i] == Latte::E_DIM::DIM_CUBEMAP) - src->add("cube_array"); - else if (shaderContext->shader->textureUnitDim[i] == Latte::E_DIM::DIM_3D) - src->add("3d"); else { - cemu_assert_unimplemented(); + if (!renderTargetIndexUsed[renderTargetIndex]) + { + src->addFmt(", {} col{} [[color({})]]", GetDataTypeStr(GetColorBufferDataType(renderTargetIndex, *shaderContext->contextRegistersNew)), renderTargetIndex, renderTargetIndex); + renderTargetIndexUsed[renderTargetIndex] = true; + } } - - uint32 binding = shaderContext->output->resourceMappingMTL.textureUnitToBindingPoint[i]; - //uint32 textureBinding = shaderContext->output->resourceMappingMTL.textureUnitToBindingPoint[i] % 31; - //uint32 samplerBinding = textureBinding % 16; - src->addFmt(" tex{} [[texture({})]]", i, binding); - src->addFmt(", sampler samplr{} [[sampler({})]]", i, binding); } } diff --git a/src/Cafe/HW/Latte/Renderer/Metal/LatteToMtl.h b/src/Cafe/HW/Latte/Renderer/Metal/LatteToMtl.h index 7544ceed9..ef25ca5d5 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/LatteToMtl.h +++ b/src/Cafe/HW/Latte/Renderer/Metal/LatteToMtl.h @@ -55,7 +55,7 @@ inline const char* GetDataTypeStr(MetalDataType dataType) return "float4"; default: cemu_assert_suspicious(); - return ""; + return "INVALID"; } } From 68d328b0947eb0c23dec734fc28f577958f24401 Mon Sep 17 00:00:00 2001 From: Samuliak Date: Wed, 8 Jan 2025 16:44:54 +0100 Subject: [PATCH 02/12] mask out color attachments --- .../HW/Latte/LegacyShaderDecompiler/LatteDecompiler.h | 2 +- .../LatteDecompilerAnalyzer.cpp | 11 ++++------- 2 files changed, 5 insertions(+), 8 deletions(-) diff --git a/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompiler.h b/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompiler.h index 64aa1413d..21f6d2b2b 100644 --- a/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompiler.h +++ b/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompiler.h @@ -183,7 +183,7 @@ struct LatteDecompilerShader std::bitset textureUnitMask2; uint16 textureUnitSamplerAssignment[LATTE_NUM_MAX_TEX_UNITS]{ 0 }; // LATTE_DECOMPILER_SAMPLER_NONE means undefined bool textureUsesDepthCompare[LATTE_NUM_MAX_TEX_UNITS]{}; - uint8 textureRenderTargetIndex[LATTE_NUM_MAX_TEX_UNITS] = {255}; + uint8 textureRenderTargetIndex[LATTE_NUM_MAX_TEX_UNITS]; // analyzer stage (pixel outputs) uint32 pixelColorOutputMask{ 0 }; // from LSB to MSB, 1 bit per written output. 1 if written (indices of color attachments) diff --git a/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerAnalyzer.cpp b/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerAnalyzer.cpp index 2d0c7f762..b1ede036a 100644 --- a/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerAnalyzer.cpp +++ b/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerAnalyzer.cpp @@ -855,6 +855,7 @@ void LatteDecompiler_analyze(LatteDecompilerShaderContext* shaderContext, LatteD // check if textures are used as render targets if (shader->shaderType == LatteConst::ShaderType::Pixel) { + uint8 colorBufferMask = LatteMRT::GetActiveColorBufferMask(shader, *shaderContext->contextRegistersNew); for (sint32 i = 0; i < shader->textureUnitListCount; i++) { sint32 textureIndex = shader->textureUnitList[i]; @@ -867,15 +868,11 @@ void LatteDecompiler_analyze(LatteDecompilerShaderContext* shaderContext, LatteD for (sint32 j = 0; j < LATTE_NUM_COLOR_TARGET; j++) { + if (((colorBufferMask) & (1 << j)) == 0) + continue; // color buffer not enabled + uint32* colorBufferRegBase = shaderContext->contextRegisters + (mmCB_COLOR0_BASE + j); uint32 regColorBufferBase = colorBufferRegBase[mmCB_COLOR0_BASE - mmCB_COLOR0_BASE] & 0xFFFFFF00; // the low 8 bits are ignored? How to Survive seems to rely on this - uint32 regColorSize = colorBufferRegBase[mmCB_COLOR0_SIZE - mmCB_COLOR0_BASE]; - uint32 regColorInfo = colorBufferRegBase[mmCB_COLOR0_INFO - mmCB_COLOR0_BASE]; - uint32 regColorView = colorBufferRegBase[mmCB_COLOR0_VIEW - mmCB_COLOR0_BASE]; - // decode color buffer reg info - Latte::E_HWTILEMODE colorBufferTileMode = (Latte::E_HWTILEMODE)((regColorInfo >> 8) & 0xF); - uint32 numberType = (regColorInfo >> 12) & 7; - Latte::E_GX2SURFFMT colorBufferFormat = LatteMRT::GetColorBufferFormat(j, *shaderContext->contextRegistersNew); MPTR colorBufferPhysMem = regColorBufferBase; From 27a31fedabf24d90b0c56b1fa770fe29ce729296 Mon Sep 17 00:00:00 2001 From: Samuliak Date: Wed, 8 Jan 2025 16:58:06 +0100 Subject: [PATCH 03/12] clean up accurate barriers --- .../HW/Latte/Renderer/Metal/MetalRenderer.cpp | 23 ++++++++++++------- .../HW/Latte/Renderer/Metal/MetalRenderer.h | 3 +-- 2 files changed, 16 insertions(+), 10 deletions(-) diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp b/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp index c5bdd335c..d9850f071 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp @@ -1031,7 +1031,11 @@ void MetalRenderer::draw_execute(uint32 baseVertex, uint32 baseInstance, uint32 endRenderPass = CheckIfRenderPassNeedsFlush(geometryShader); if (endRenderPass) + { EndEncoding(); + // TODO: only log in debug? + cemuLog_logOnce(LogType::Force, "Ending render pass due to render target self-dependency\n"); + } } // Primitive type @@ -1871,6 +1875,11 @@ bool MetalRenderer::CheckIfRenderPassNeedsFlush(LatteDecompilerShader* shader) const auto relative_textureUnit = shader->resourceMapping.getTextureUnitFromBindingPoint(i); auto hostTextureUnit = relative_textureUnit; auto textureDim = shader->textureUnitDim[relative_textureUnit]; + + // Texture is accessed as a framebuffer fetch, therefore there is no need to flush it + if (shader->textureRenderTargetIndex[relative_textureUnit] != 255) + continue; + auto texUnitRegIndex = hostTextureUnit * 7; switch (shader->shaderType) { @@ -1895,15 +1904,13 @@ bool MetalRenderer::CheckIfRenderPassNeedsFlush(LatteDecompilerShader* shader) continue; LatteTexture* baseTexture = textureView->baseTexture; - if (!m_state.m_isFirstDrawInRenderPass) + + // If the texture is also used in the current render pass, we need to end the render pass to "flush" the texture + for (uint8 i = 0; i < LATTE_NUM_COLOR_TARGET; i++) { - // If the texture is also used in the current render pass, we need to end the render pass to "flush" the texture - for (uint8 i = 0; i < LATTE_NUM_COLOR_TARGET; i++) - { - auto colorTarget = m_state.m_activeFBO.m_fbo->colorBuffer[i].texture; - if (colorTarget && colorTarget->baseTexture == baseTexture) - return true; - } + auto colorTarget = m_state.m_activeFBO.m_fbo->colorBuffer[i].texture; + if (colorTarget && colorTarget->baseTexture == baseTexture) + return true; } } diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h b/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h index 49aa40b10..c3898f3a6 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h @@ -125,8 +125,7 @@ struct MetalState MetalActiveFBOState m_lastUsedFBO; size_t m_vertexBufferOffsets[MAX_MTL_VERTEX_BUFFERS]; - // TODO: find out what is the max number of bound textures on the Wii U - class LatteTextureViewMtl* m_textures[64] = {nullptr}; + class LatteTextureViewMtl* m_textures[LATTE_NUM_MAX_TEX_UNITS] = {nullptr}; size_t m_uniformBufferOffsets[METAL_GENERAL_SHADER_TYPE_TOTAL][MAX_MTL_BUFFERS]; MTL::Viewport m_viewport; From 6d1d739de5713497fdf72d5de58fa04f1a6df08b Mon Sep 17 00:00:00 2001 From: Samuliak Date: Thu, 9 Jan 2025 16:03:53 +0100 Subject: [PATCH 04/12] limit framebuffer fetch to 2D textures --- .../LegacyShaderDecompiler/LatteDecompilerAnalyzer.cpp | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerAnalyzer.cpp b/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerAnalyzer.cpp index b1ede036a..0f9f28c61 100644 --- a/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerAnalyzer.cpp +++ b/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerAnalyzer.cpp @@ -866,6 +866,13 @@ void LatteDecompiler_analyze(LatteDecompilerShaderContext* shaderContext, LatteD if (physAddr == MPTR_NULL) continue; // invalid data + // Check for dimension + auto dim = shader->textureUnitDim[textureIndex]; + // TODO: 2D arrays could technically be supported as well + if (dim != Latte::E_DIM::DIM_2D) + continue; + + // Check if the texture is used as render target for (sint32 j = 0; j < LATTE_NUM_COLOR_TARGET; j++) { if (((colorBufferMask) & (1 << j)) == 0) @@ -876,6 +883,7 @@ void LatteDecompiler_analyze(LatteDecompilerShaderContext* shaderContext, LatteD MPTR colorBufferPhysMem = regColorBufferBase; + // TODO: check if mip matches as well? if (physAddr == colorBufferPhysMem) { shader->textureRenderTargetIndex[i] = j; From 03ec23140b8de7ce8839aa84b3f186e58e8fc7f5 Mon Sep 17 00:00:00 2001 From: Samuliak Date: Thu, 9 Jan 2025 16:09:02 +0100 Subject: [PATCH 05/12] don't bind textures when framebuffer fetched --- src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp b/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp index d9850f071..176ae6df1 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp @@ -1926,6 +1926,11 @@ void MetalRenderer::BindStageResources(MTL::RenderCommandEncoder* renderCommandE { const auto relative_textureUnit = shader->resourceMapping.getTextureUnitFromBindingPoint(i); auto hostTextureUnit = relative_textureUnit; + + // Don't bind textures that are accessed with a framebuffer fetch + if (shader->textureRenderTargetIndex[relative_textureUnit] != 255) + continue; + auto textureDim = shader->textureUnitDim[relative_textureUnit]; auto texUnitRegIndex = hostTextureUnit * 7; switch (shader->shaderType) From 3fae686f21a69ca93044ebfa030b6862fd29b847 Mon Sep 17 00:00:00 2001 From: Samuliak Date: Thu, 9 Jan 2025 16:11:52 +0100 Subject: [PATCH 06/12] remove the accurate barriers option --- src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp | 4 ++++ src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h | 2 +- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp b/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp index 176ae6df1..6bb7964cb 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp @@ -1008,6 +1008,7 @@ void MetalRenderer::draw_execute(uint32 baseVertex, uint32 baseInstance, uint32 LatteDecompilerShader* pixelShader = LatteSHRC_GetActivePixelShader(); const auto fetchShader = LatteSHRC_GetActiveFetchShader(); + /* bool neverSkipAccurateBarrier = false; // "Accurate barriers" is usually enabled globally but since the CPU cost is substantial we allow users to disable it (debug -> 'Accurate barriers' option) @@ -1037,6 +1038,7 @@ void MetalRenderer::draw_execute(uint32 baseVertex, uint32 baseInstance, uint32 cemuLog_logOnce(LogType::Force, "Ending render pass due to render target self-dependency\n"); } } + */ // Primitive type const LattePrimitiveMode primitiveMode = static_cast(LatteGPUState.contextRegister[mmVGT_PRIMITIVE_TYPE]); @@ -1867,6 +1869,7 @@ bool MetalRenderer::AcquireDrawable(bool mainWindow) return layer.AcquireDrawable(); } +/* bool MetalRenderer::CheckIfRenderPassNeedsFlush(LatteDecompilerShader* shader) { sint32 textureCount = shader->resourceMapping.getTextureCount(); @@ -1916,6 +1919,7 @@ bool MetalRenderer::CheckIfRenderPassNeedsFlush(LatteDecompilerShader* shader) return false; } +*/ void MetalRenderer::BindStageResources(MTL::RenderCommandEncoder* renderCommandEncoder, LatteDecompilerShader* shader, bool usesGeometryShader) { diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h b/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h index c3898f3a6..60fb8e034 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h @@ -362,7 +362,7 @@ class MetalRenderer : public Renderer bool AcquireDrawable(bool mainWindow); - bool CheckIfRenderPassNeedsFlush(LatteDecompilerShader* shader); + //bool CheckIfRenderPassNeedsFlush(LatteDecompilerShader* shader); void BindStageResources(MTL::RenderCommandEncoder* renderCommandEncoder, LatteDecompilerShader* shader, bool usesGeometryShader); void ClearColorTextureInternal(MTL::Texture* mtlTexture, sint32 sliceIndex, sint32 mipIndex, float r, float g, float b, float a); From a0239cb75648e96267f8df2a9352cae3d0e2d0f1 Mon Sep 17 00:00:00 2001 From: Samuliak Date: Thu, 9 Jan 2025 16:27:45 +0100 Subject: [PATCH 07/12] check for framebuffer fetch support --- .../LatteDecompilerEmitMSL.cpp | 33 ++++++++++--------- .../LatteDecompilerEmitMSLHeader.hpp | 24 +++++++------- .../HW/Latte/Renderer/Metal/MetalRenderer.cpp | 26 ++++++++------- .../HW/Latte/Renderer/Metal/MetalRenderer.h | 6 ++++ 4 files changed, 50 insertions(+), 39 deletions(-) diff --git a/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitMSL.cpp b/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitMSL.cpp index 22c511ba1..a9e3184c9 100644 --- a/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitMSL.cpp +++ b/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitMSL.cpp @@ -10,7 +10,7 @@ #include "Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerInstructions.h" #include "Cafe/HW/Latte/Core/FetchShader.h" #include "Cafe/HW/Latte/Renderer/Renderer.h" -#include "Cafe/HW/Latte/Renderer/Metal/MetalCommon.h" +#include "Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h" #include "Cafe/HW/Latte/Renderer/Metal/LatteToMtl.h" #include "config/ActiveSettings.h" #include "util/helpers/StringBuf.h" @@ -2261,19 +2261,22 @@ static void _emitTEXSampleTextureCode(LatteDecompilerShaderContext* shaderContex } // Do a framebuffer fetch if possible - // TODO: filter out more? - uint8 renderTargetIndex = shaderContext->shader->textureRenderTargetIndex[texInstruction->textureFetch.textureIndex]; - if (renderTargetIndex != 255) - { - src->addFmt("col{}.", renderTargetIndex); - // TODO: clean up - std::string components[] = {"x", "y", "z", "w"}; - for (sint32 i = 0; i < numWrittenElements; i++) - { - src->addFmt("{}", components[i]); - } - src->add(");" _CRLF); - return; + if (static_cast(g_renderer.get())->SupportsFramebufferFetch()) + { + // TODO: filter out more? + uint8 renderTargetIndex = shaderContext->shader->textureRenderTargetIndex[texInstruction->textureFetch.textureIndex]; + if (renderTargetIndex != 255) + { + src->addFmt("col{}.", renderTargetIndex); + // TODO: clean up + std::string components[] = {"x", "y", "z", "w"}; + for (sint32 i = 0; i < numWrittenElements; i++) + { + src->addFmt("{}", components[i]); + } + src->add(");" _CRLF); + return; + } } if (emulateCompare) @@ -2646,7 +2649,7 @@ static void _emitTEXGetTextureResInfoCode(LatteDecompilerShaderContext* shaderCo // todo - mip index parameter? - if (shaderContext->shader->textureRenderTargetIndex[texInstruction->textureFetch.textureIndex] != 255) + if (static_cast(g_renderer.get())->SupportsFramebufferFetch() && shaderContext->shader->textureRenderTargetIndex[texInstruction->textureFetch.textureIndex] != 255) { // TODO: use the render target size src->addFmt(" = int4(1920, 1080, 1, 1)."); diff --git a/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitMSLHeader.hpp b/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitMSLHeader.hpp index 40e704554..84722a24b 100644 --- a/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitMSLHeader.hpp +++ b/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitMSLHeader.hpp @@ -1,7 +1,7 @@ #pragma once #include "Common/precompiled.h" -#include "Cafe/HW/Latte/Renderer/Metal/MetalCommon.h" +#include "Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h" #include "HW/Latte/Core/LatteShader.h" namespace LatteDecompiler @@ -458,11 +458,19 @@ namespace LatteDecompiler continue; uint8 renderTargetIndex = shaderContext->shader->textureRenderTargetIndex[i]; - if (renderTargetIndex == 255) + if (static_cast(g_renderer.get())->SupportsFramebufferFetch() && renderTargetIndex != 255) { - src->add(", "); + if (!renderTargetIndexUsed[renderTargetIndex]) + { + src->addFmt(", {} col{} [[color({})]]", GetDataTypeStr(GetColorBufferDataType(renderTargetIndex, *shaderContext->contextRegistersNew)), renderTargetIndex, renderTargetIndex); + renderTargetIndexUsed[renderTargetIndex] = true; + } + } + else + { + src->add(", "); - // Only 2D and 2D array textures can be used with comparison samplers + // Only certain texture dimensions can be used with comparison samplers if (shaderContext->shader->textureUsesDepthCompare[i] && IsValidDepthTextureType(shaderContext->shader->textureUnitDim[i])) src->add("depth"); else @@ -499,14 +507,6 @@ namespace LatteDecompiler src->addFmt(" tex{} [[texture({})]]", i, binding); src->addFmt(", sampler samplr{} [[sampler({})]]", i, binding); } - else - { - if (!renderTargetIndexUsed[renderTargetIndex]) - { - src->addFmt(", {} col{} [[color({})]]", GetDataTypeStr(GetColorBufferDataType(renderTargetIndex, *shaderContext->contextRegistersNew)), renderTargetIndex, renderTargetIndex); - renderTargetIndexUsed[renderTargetIndex] = true; - } - } } } diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp b/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp index 6bb7964cb..bd6f93158 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp @@ -87,6 +87,7 @@ MetalRenderer::MetalRenderer() // Feature support m_isAppleGPU = m_device->supportsFamily(MTL::GPUFamilyApple1); + m_supportsFramebufferFetch = m_device->supportsFamily(MTL::GPUFamilyApple2); m_hasUnifiedMemory = m_device->hasUnifiedMemory(); m_supportsMetal3 = m_device->supportsFamily(MTL::GPUFamilyMetal3); m_recommendedMaxVRAMUsage = m_device->recommendedMaxWorkingSetSize(); @@ -584,21 +585,22 @@ void MetalRenderer::DeleteFontTextures() void MetalRenderer::AppendOverlayDebugInfo() { ImGui::Text("--- GPU info ---"); - ImGui::Text("GPU %s", m_device->name()->utf8String()); - ImGui::Text("Is Apple GPU %s", (m_isAppleGPU ? "yes" : "no")); - ImGui::Text("Has unified memory %s", (m_hasUnifiedMemory ? "yes" : "no")); - ImGui::Text("Supports Metal3 %s", (m_supportsMetal3 ? "yes" : "no")); + ImGui::Text("GPU %s", m_device->name()->utf8String()); + ImGui::Text("Is Apple GPU %s", (m_isAppleGPU ? "yes" : "no")); + ImGui::Text("Supports framebuffer fetch %s", (m_supportsFramebufferFetch ? "yes" : "no")); + ImGui::Text("Has unified memory %s", (m_hasUnifiedMemory ? "yes" : "no")); + ImGui::Text("Supports Metal3 %s", (m_supportsMetal3 ? "yes" : "no")); ImGui::Text("--- Metal info ---"); - ImGui::Text("Render pipeline states %zu", m_pipelineCache->GetPipelineCacheSize()); - ImGui::Text("Buffer allocator memory %zuMB", m_performanceMonitor.m_bufferAllocatorMemory / 1024 / 1024); + ImGui::Text("Render pipeline states %zu", m_pipelineCache->GetPipelineCacheSize()); + ImGui::Text("Buffer allocator memory %zuMB", m_performanceMonitor.m_bufferAllocatorMemory / 1024 / 1024); ImGui::Text("--- Metal info (per frame) ---"); - ImGui::Text("Command buffers %u", m_performanceMonitor.m_commandBuffers); - ImGui::Text("Render passes %u", m_performanceMonitor.m_renderPasses); - ImGui::Text("Clears %u", m_performanceMonitor.m_clears); - ImGui::Text("Manual vertex fetch draws %u (mesh draws: %u)", m_performanceMonitor.m_manualVertexFetchDraws, m_performanceMonitor.m_meshDraws); - ImGui::Text("Triangle fans %u", m_performanceMonitor.m_triangleFans); + ImGui::Text("Command buffers %u", m_performanceMonitor.m_commandBuffers); + ImGui::Text("Render passes %u", m_performanceMonitor.m_renderPasses); + ImGui::Text("Clears %u", m_performanceMonitor.m_clears); + ImGui::Text("Manual vertex fetch draws %u (mesh draws: %u)", m_performanceMonitor.m_manualVertexFetchDraws, m_performanceMonitor.m_meshDraws); + ImGui::Text("Triangle fans %u", m_performanceMonitor.m_triangleFans); } void MetalRenderer::renderTarget_setViewport(float x, float y, float width, float height, float nearZ, float farZ, bool halfZ) @@ -1932,7 +1934,7 @@ void MetalRenderer::BindStageResources(MTL::RenderCommandEncoder* renderCommandE auto hostTextureUnit = relative_textureUnit; // Don't bind textures that are accessed with a framebuffer fetch - if (shader->textureRenderTargetIndex[relative_textureUnit] != 255) + if (m_supportsFramebufferFetch && shader->textureRenderTargetIndex[relative_textureUnit] != 255) continue; auto textureDim = shader->textureUnitDim[relative_textureUnit]; diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h b/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h index 60fb8e034..db79471d0 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h @@ -375,6 +375,11 @@ class MetalRenderer : public Renderer return m_isAppleGPU; } + bool SupportsFramebufferFetch() const + { + return m_supportsFramebufferFetch; + } + bool HasUnifiedMemory() const { return m_hasUnifiedMemory; @@ -477,6 +482,7 @@ class MetalRenderer : public Renderer // Feature support bool m_isAppleGPU; + bool m_supportsFramebufferFetch; bool m_hasUnifiedMemory; bool m_supportsMetal3; uint32 m_recommendedMaxVRAMUsage; From d9bf99cb208beed8601675bcc3a56757cf995b5f Mon Sep 17 00:00:00 2001 From: Samuliak Date: Thu, 9 Jan 2025 20:34:49 +0100 Subject: [PATCH 08/12] fix: a typo in texture index --- .../LegacyShaderDecompiler/LatteDecompilerAnalyzer.cpp | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerAnalyzer.cpp b/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerAnalyzer.cpp index 0f9f28c61..9a40d7436 100644 --- a/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerAnalyzer.cpp +++ b/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerAnalyzer.cpp @@ -872,6 +872,14 @@ void LatteDecompiler_analyze(LatteDecompilerShaderContext* shaderContext, LatteD if (dim != Latte::E_DIM::DIM_2D) continue; + // Check for mip level + // TODO: uncomment? + /* + auto lastMip = texRegister.word5.get_LAST_LEVEL(); + if (lastMip != 0) + continue; + */ + // Check if the texture is used as render target for (sint32 j = 0; j < LATTE_NUM_COLOR_TARGET; j++) { @@ -886,7 +894,7 @@ void LatteDecompiler_analyze(LatteDecompilerShaderContext* shaderContext, LatteD // TODO: check if mip matches as well? if (physAddr == colorBufferPhysMem) { - shader->textureRenderTargetIndex[i] = j; + shader->textureRenderTargetIndex[textureIndex] = j; break; } } From 4479584eb644daa5f6fc3ad52c5269b46c9c77ca Mon Sep 17 00:00:00 2001 From: Samuliak Date: Fri, 10 Jan 2025 09:55:35 +0100 Subject: [PATCH 09/12] support texture lod query for framebuffer fetch --- .../LatteDecompilerEmitMSL.cpp | 33 +++++++++++-------- 1 file changed, 20 insertions(+), 13 deletions(-) diff --git a/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitMSL.cpp b/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitMSL.cpp index a9e3184c9..bcfe2d3d2 100644 --- a/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitMSL.cpp +++ b/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitMSL.cpp @@ -2261,9 +2261,9 @@ static void _emitTEXSampleTextureCode(LatteDecompilerShaderContext* shaderContex } // Do a framebuffer fetch if possible + // TODO: support comparison samplers if (static_cast(g_renderer.get())->SupportsFramebufferFetch()) { - // TODO: filter out more? uint8 renderTargetIndex = shaderContext->shader->textureRenderTargetIndex[texInstruction->textureFetch.textureIndex]; if (renderTargetIndex != 255) { @@ -2720,24 +2720,31 @@ static void _emitTEXGetCompTexLodCode(LatteDecompilerShaderContext* shaderContex src->add(" = "); _emitTypeConversionPrefixMSL(shaderContext, LATTE_DECOMPILER_DTYPE_FLOAT, shaderContext->typeTracker.defaultDataType); - if( shaderContext->shader->textureUnitDim[texInstruction->textureFetch.textureIndex] == Latte::E_DIM::DIM_CUBEMAP ) + if (static_cast(g_renderer.get())->SupportsFramebufferFetch() && shaderContext->shader->textureRenderTargetIndex[texInstruction->textureFetch.textureIndex] != 255) { - // 3 coordinates - if(shaderContext->typeTracker.defaultDataType == LATTE_DECOMPILER_DTYPE_FLOAT) - src->addFmt("float4(textureCalculateLod(tex{}, samplr{}, {}.{}{}{}),0.0,0.0)", texInstruction->textureFetch.textureIndex, texInstruction->textureFetch.textureIndex, _getRegisterVarName(shaderContext, texInstruction->srcGpr), resultElemTable[texInstruction->textureFetch.srcSel[0]], resultElemTable[texInstruction->textureFetch.srcSel[1]], resultElemTable[texInstruction->textureFetch.srcSel[2]]); - else - src->addFmt("float4(textureCalculateLod(tex{}, samplr{}, bitCast({}.{}{}{})),0.0,0.0)", texInstruction->textureFetch.textureIndex, texInstruction->textureFetch.textureIndex, _getRegisterVarName(shaderContext, texInstruction->srcGpr), resultElemTable[texInstruction->textureFetch.srcSel[0]], resultElemTable[texInstruction->textureFetch.srcSel[1]], resultElemTable[texInstruction->textureFetch.srcSel[2]]); + // We assume that textures accessed as framebuffer fetch are always sampled at pixel coordinates, therefore the lod would always be 0.0 + src->add("float4(0.0, 0.0, 0.0, 0.0)"); } else { - if (shaderContext->typeTracker.defaultDataType == LATTE_DECOMPILER_DTYPE_FLOAT) - src->addFmt("float4(textureCalculateLod(tex{}, samplr{}, {}.{}{}),0.0,0.0)", texInstruction->textureFetch.textureIndex, texInstruction->textureFetch.textureIndex, _getRegisterVarName(shaderContext, texInstruction->srcGpr), resultElemTable[texInstruction->textureFetch.srcSel[0]], resultElemTable[texInstruction->textureFetch.srcSel[1]]); - else - src->addFmt("float4(textureCalculateLod(tex{}, samplr{}, bitCast({}.{}{})),0.0,0.0)", texInstruction->textureFetch.textureIndex, texInstruction->textureFetch.textureIndex, _getRegisterVarName(shaderContext, texInstruction->srcGpr), resultElemTable[texInstruction->textureFetch.srcSel[0]], resultElemTable[texInstruction->textureFetch.srcSel[1]]); - debugBreakpoint(); + if (shaderContext->shader->textureUnitDim[texInstruction->textureFetch.textureIndex] == Latte::E_DIM::DIM_CUBEMAP) + { + // 3 coordinates + if(shaderContext->typeTracker.defaultDataType == LATTE_DECOMPILER_DTYPE_FLOAT) + src->addFmt("float4(textureCalculateLod(tex{}, samplr{}, {}.{}{}{}), 0.0, 0.0)", texInstruction->textureFetch.textureIndex, texInstruction->textureFetch.textureIndex, _getRegisterVarName(shaderContext, texInstruction->srcGpr), resultElemTable[texInstruction->textureFetch.srcSel[0]], resultElemTable[texInstruction->textureFetch.srcSel[1]], resultElemTable[texInstruction->textureFetch.srcSel[2]]); + else + src->addFmt("float4(textureCalculateLod(tex{}, samplr{}, bitCast({}.{}{}{})), 0.0, 0.0)", texInstruction->textureFetch.textureIndex, texInstruction->textureFetch.textureIndex, _getRegisterVarName(shaderContext, texInstruction->srcGpr), resultElemTable[texInstruction->textureFetch.srcSel[0]], resultElemTable[texInstruction->textureFetch.srcSel[1]], resultElemTable[texInstruction->textureFetch.srcSel[2]]); + } + else + { + if (shaderContext->typeTracker.defaultDataType == LATTE_DECOMPILER_DTYPE_FLOAT) + src->addFmt("float4(textureCalculateLod(tex{}, samplr{}, {}.{}{}), 0.0, 0.0)", texInstruction->textureFetch.textureIndex, texInstruction->textureFetch.textureIndex, _getRegisterVarName(shaderContext, texInstruction->srcGpr), resultElemTable[texInstruction->textureFetch.srcSel[0]], resultElemTable[texInstruction->textureFetch.srcSel[1]]); + else + src->addFmt("float4(textureCalculateLod(tex{}, samplr{}, bitCast({}.{}{})), 0.0, 0.0)", texInstruction->textureFetch.textureIndex, texInstruction->textureFetch.textureIndex, _getRegisterVarName(shaderContext, texInstruction->srcGpr), resultElemTable[texInstruction->textureFetch.srcSel[0]], resultElemTable[texInstruction->textureFetch.srcSel[1]]); + debugBreakpoint(); + } } - _emitTypeConversionSuffixMSL(shaderContext, LATTE_DECOMPILER_DTYPE_FLOAT, shaderContext->typeTracker.defaultDataType); src->add("."); From ca3fe9610434a4fe4b61acca9cc5f8c4f3cf9db5 Mon Sep 17 00:00:00 2001 From: Samuliak Date: Fri, 10 Jan 2025 15:40:19 +0100 Subject: [PATCH 10/12] fix: component indexing for framebuffer fetch --- .../LatteDecompilerEmitMSL.cpp | 482 +++++++++--------- 1 file changed, 237 insertions(+), 245 deletions(-) diff --git a/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitMSL.cpp b/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitMSL.cpp index bcfe2d3d2..ba0180dc9 100644 --- a/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitMSL.cpp +++ b/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitMSL.cpp @@ -2261,282 +2261,274 @@ static void _emitTEXSampleTextureCode(LatteDecompilerShaderContext* shaderContex } // Do a framebuffer fetch if possible - // TODO: support comparison samplers - if (static_cast(g_renderer.get())->SupportsFramebufferFetch()) + uint8 renderTargetIndex = shaderContext->shader->textureRenderTargetIndex[texInstruction->textureFetch.textureIndex]; + if (static_cast(g_renderer.get())->SupportsFramebufferFetch() && renderTargetIndex != 255) { - uint8 renderTargetIndex = shaderContext->shader->textureRenderTargetIndex[texInstruction->textureFetch.textureIndex]; - if (renderTargetIndex != 255) - { - src->addFmt("col{}.", renderTargetIndex); - // TODO: clean up - std::string components[] = {"x", "y", "z", "w"}; - for (sint32 i = 0; i < numWrittenElements; i++) - { - src->addFmt("{}", components[i]); - } - src->add(");" _CRLF); - return; - } + // TODO: support comparison samplers + src->addFmt("col{}", renderTargetIndex); } - - if (emulateCompare) + else { - cemu_assert_debug(!isGather); + if (emulateCompare) + { + cemu_assert_debug(!isGather); - src->add("sampleCompareEmulate("); - } + src->add("sampleCompareEmulate("); + } - src->addFmt("tex{}", texInstruction->textureFetch.textureIndex); - if (!emulateCompare) - { - src->add("."); - if (isRead) + src->addFmt("tex{}", texInstruction->textureFetch.textureIndex); + if (!emulateCompare) { - if (hasOffset) - cemu_assert_unimplemented(); - src->add("read("); - unnormalizationHandled = true; - useTexelCoordinates = true; + src->add("."); + if (isRead) + { + if (hasOffset) + cemu_assert_unimplemented(); + src->add("read("); + unnormalizationHandled = true; + useTexelCoordinates = true; + } + else + { + if (isGather) + src->add("gather"); + else + src->add("sample"); + if (isCompare) + src->add("_compare"); + src->addFmt("(samplr{}, ", texInstruction->textureFetch.textureIndex); + } } else { - if (isGather) - src->add("gather"); - else - src->add("sample"); - if (isCompare) - src->add("_compare"); - src->addFmt("(samplr{}, ", texInstruction->textureFetch.textureIndex); + src->addFmt(", samplr{}, ", texInstruction->textureFetch.textureIndex); } - } - else - { - src->addFmt(", samplr{}, ", texInstruction->textureFetch.textureIndex); - } - - // for textureGather() add shift (todo: depends on rounding mode set in sampler registers?) - if (texOpcode == GPU7_TEX_INST_FETCH4) - { - if (texDim == Latte::E_DIM::DIM_2D) - { - //src->addFmt2("(vec2(-0.1) / vec2(textureSize(tex{},0).xy)) + ", texInstruction->textureIndex); - // vec2(-0.00001) is minimum to break Nvidia - // vec2(0.0001) is minimum to fix shadows on Intel, also fixes it on AMD (Windows and Linux) + // for textureGather() add shift (todo: depends on rounding mode set in sampler registers?) + if (texOpcode == GPU7_TEX_INST_FETCH4) + { + if (texDim == Latte::E_DIM::DIM_2D) + { + //src->addFmt2("(vec2(-0.1) / vec2(textureSize(tex{},0).xy)) + ", texInstruction->textureIndex); - // todo - emulating coordinate rounding mode correctly is tricky - // GX2 supports two modes: Truncate or rounding according to DX9 rules - // Vulkan uses truncate mode when point sampling (min and mag is both nearest) otherwise it uses rounding + // vec2(-0.00001) is minimum to break Nvidia + // vec2(0.0001) is minimum to fix shadows on Intel, also fixes it on AMD (Windows and Linux) - // adding a small fixed bias is enough to avoid vendor-specific cases where small inaccuracies cause the number to get rounded down due to truncation - src->addFmt("float2(0.0001) + "); - } - } + // todo - emulating coordinate rounding mode correctly is tricky + // GX2 supports two modes: Truncate or rounding according to DX9 rules + // Vulkan uses truncate mode when point sampling (min and mag is both nearest) otherwise it uses rounding - const sint32 texCoordDataType = (texOpcode == GPU7_TEX_INST_LD) ? LATTE_DECOMPILER_DTYPE_SIGNED_INT : LATTE_DECOMPILER_DTYPE_FLOAT; - if(useTexelCoordinates) - { - // handle integer coordinates for texelFetch - if (texDim == Latte::E_DIM::DIM_2D || texDim == Latte::E_DIM::DIM_2D_MSAA) - { - src->add("uint2("); - src->add("float2("); - _emitTEXSampleCoordInputComponent(shaderContext, texInstruction, 0, texCoordDataType); - src->addFmt(", "); - _emitTEXSampleCoordInputComponent(shaderContext, texInstruction, 1, texCoordDataType); + // adding a small fixed bias is enough to avoid vendor-specific cases where small inaccuracies cause the number to get rounded down due to truncation + src->addFmt("float2(0.0001) + "); + } + } - src->addFmt(")*supportBuffer.tex{}Scale", texInstruction->textureFetch.textureIndex); // close float2 and scale + const sint32 texCoordDataType = (texOpcode == GPU7_TEX_INST_LD) ? LATTE_DECOMPILER_DTYPE_SIGNED_INT : LATTE_DECOMPILER_DTYPE_FLOAT; + if(useTexelCoordinates) + { + // handle integer coordinates for texelFetch + if (texDim == Latte::E_DIM::DIM_2D || texDim == Latte::E_DIM::DIM_2D_MSAA) + { + src->add("uint2("); + src->add("float2("); + _emitTEXSampleCoordInputComponent(shaderContext, texInstruction, 0, texCoordDataType); + src->addFmt(", "); + _emitTEXSampleCoordInputComponent(shaderContext, texInstruction, 1, texCoordDataType); - src->add("), 0"); // close int2 and lod param - // todo - lod - } - else if (texDim == Latte::E_DIM::DIM_1D) - { - // VC DS games forget to initialize textures and use texel fetch on an uninitialized texture (a dim of 0 maps to 1D) - src->add("uint("); - src->add("float("); - _emitTEXSampleCoordInputComponent(shaderContext, texInstruction, 0, (texOpcode == GPU7_TEX_INST_LD) ? LATTE_DECOMPILER_DTYPE_SIGNED_INT : LATTE_DECOMPILER_DTYPE_FLOAT); - src->addFmt(")*supportBuffer.tex{}Scale.x", texInstruction->textureFetch.textureIndex); - src->add("), 0"); - // todo - lod - } - else - cemu_assert_debug(false); - } - else /* useTexelCoordinates == false */ - { - // float coordinates - if ( (texOpcode == GPU7_TEX_INST_SAMPLE_C || texOpcode == GPU7_TEX_INST_SAMPLE_C_L || texOpcode == GPU7_TEX_INST_SAMPLE_C_LZ) ) - { - // shadow sampler - if (texDim == Latte::E_DIM::DIM_2D_ARRAY) - { - // 3 coords + compare value - src->add("float2("); - _emitTEXSampleCoordInputComponent(shaderContext, texInstruction, 0, LATTE_DECOMPILER_DTYPE_FLOAT); - src->add(", "); - _emitTEXSampleCoordInputComponent(shaderContext, texInstruction, 1, LATTE_DECOMPILER_DTYPE_FLOAT); - src->add("), uint(rint("); - _emitTEXSampleCoordInputComponent(shaderContext, texInstruction, 2, LATTE_DECOMPILER_DTYPE_FLOAT); - src->add("))"); + src->addFmt(")*supportBuffer.tex{}Scale", texInstruction->textureFetch.textureIndex); // close float2 and scale - src->addFmt(", {}", _getTexGPRAccess(shaderContext, texInstruction->srcGpr, LATTE_DECOMPILER_DTYPE_FLOAT, texInstruction->textureFetch.srcSel[3], -1, -1, -1, tempBuffer0)); - } - else if (texDim == Latte::E_DIM::DIM_CUBEMAP) - { - // 2 coords + faceId - if (texInstruction->textureFetch.srcSel[0] >= 4 || texInstruction->textureFetch.srcSel[1] >= 4) - { - debugBreakpoint(); - } - src->addFmt("redcCUBEReverse({},", _getTexGPRAccess(shaderContext, texInstruction->srcGpr, LATTE_DECOMPILER_DTYPE_FLOAT, texInstruction->textureFetch.srcSel[0], texInstruction->textureFetch.srcSel[1], -1, -1, tempBuffer0)); - _emitTEXSampleCoordInputComponent(shaderContext, texInstruction, 2, LATTE_DECOMPILER_DTYPE_SIGNED_INT); - src->addFmt(")"); - src->addFmt(", uint(cubeMapArrayIndex{})", texInstruction->textureFetch.textureIndex); // cubemap index - } - else if (texDim == Latte::E_DIM::DIM_1D) - { - // 1 coord + 1 unused coord (per spec) + compare value - if (texInstruction->textureFetch.srcSel[0] >= 4) - { - debugBreakpoint(); - } - src->addFmt("{}, {}", _getTexGPRAccess(shaderContext, texInstruction->srcGpr, LATTE_DECOMPILER_DTYPE_FLOAT, texInstruction->textureFetch.srcSel[0], -1, -1, -1, tempBuffer0), _getTexGPRAccess(shaderContext, texInstruction->srcGpr, LATTE_DECOMPILER_DTYPE_FLOAT, texInstruction->textureFetch.srcSel[3], -1, -1, -1, tempBuffer1)); - } - else - { - // 2 coords + compare value (as float3) - if (texInstruction->textureFetch.srcSel[0] >= 4 && texInstruction->textureFetch.srcSel[1] >= 4) - { - debugBreakpoint(); - } - src->addFmt("float2({}), {}", _getTexGPRAccess(shaderContext, texInstruction->srcGpr, LATTE_DECOMPILER_DTYPE_FLOAT, texInstruction->textureFetch.srcSel[0], texInstruction->textureFetch.srcSel[1], -1, -1, tempBuffer0), _getTexGPRAccess(shaderContext, texInstruction->srcGpr, LATTE_DECOMPILER_DTYPE_FLOAT, texInstruction->textureFetch.srcSel[3], -1, -1, -1, tempBuffer1)); - } - } - else if(texDim == Latte::E_DIM::DIM_2D_ARRAY) - { - // 3 coords - src->add("float2("); - _emitTEXSampleCoordInputComponent(shaderContext, texInstruction, 0, LATTE_DECOMPILER_DTYPE_FLOAT); - src->add(", "); - _emitTEXSampleCoordInputComponent(shaderContext, texInstruction, 1, LATTE_DECOMPILER_DTYPE_FLOAT); - src->add("), uint(rint("); - _emitTEXSampleCoordInputComponent(shaderContext, texInstruction, 2, LATTE_DECOMPILER_DTYPE_FLOAT); - src->add("))"); - } - else if(texDim == Latte::E_DIM::DIM_3D) - { - // 3 coords - src->add("float3("); - _emitTEXSampleCoordInputComponent(shaderContext, texInstruction, 0, LATTE_DECOMPILER_DTYPE_FLOAT); - src->add(", "); - _emitTEXSampleCoordInputComponent(shaderContext, texInstruction, 1, LATTE_DECOMPILER_DTYPE_FLOAT); - src->add(", "); - _emitTEXSampleCoordInputComponent(shaderContext, texInstruction, 2, LATTE_DECOMPILER_DTYPE_FLOAT); - src->add(")"); - } - else if( texDim == Latte::E_DIM::DIM_CUBEMAP ) - { - // 2 coords + faceId - cemu_assert_debug(texInstruction->textureFetch.srcSel[0] < 4); - cemu_assert_debug(texInstruction->textureFetch.srcSel[1] < 4); - src->addFmt("redcCUBEReverse({},", _getTexGPRAccess(shaderContext, texInstruction->srcGpr, LATTE_DECOMPILER_DTYPE_FLOAT, texInstruction->textureFetch.srcSel[0], texInstruction->textureFetch.srcSel[1], -1, -1, tempBuffer0)); - _emitTEXSampleCoordInputComponent(shaderContext, texInstruction, 2, LATTE_DECOMPILER_DTYPE_SIGNED_INT); - src->add(")"); - src->addFmt(", uint(cubeMapArrayIndex{})", texInstruction->textureFetch.textureIndex); // cubemap index - } - else if( texDim == Latte::E_DIM::DIM_1D ) - { - // 1 coord - src->add(_getTexGPRAccess(shaderContext, texInstruction->srcGpr, LATTE_DECOMPILER_DTYPE_FLOAT, texInstruction->textureFetch.srcSel[0], -1, -1, -1, tempBuffer0)); - } - else - { - // 2 coords - src->add("float2("); - _emitTEXSampleCoordInputComponent(shaderContext, texInstruction, 0, LATTE_DECOMPILER_DTYPE_FLOAT); - src->add(","); - _emitTEXSampleCoordInputComponent(shaderContext, texInstruction, 1, LATTE_DECOMPILER_DTYPE_FLOAT); - src->add(")"); - // avoid truncate to effectively round downwards on texel edges - if (ActiveSettings::ForceSamplerRoundToPrecision()) - src->addFmt("+ float2(1.0)/float2(tex{}.get_width(), tex{}.get_height())/512.0", texInstruction->textureFetch.textureIndex, texInstruction->textureFetch.textureIndex); - } - // lod or lod bias parameter - // 1D textures don't support lod - if (texDim != Latte::E_DIM::DIM_1D && texDim != Latte::E_DIM::DIM_1D_ARRAY) - { - if (texOpcode == GPU7_TEX_INST_SAMPLE_L || texOpcode == GPU7_TEX_INST_SAMPLE_LB || texOpcode == GPU7_TEX_INST_SAMPLE_C_L) + src->add("), 0"); // close int2 and lod param + // todo - lod + } + else if (texDim == Latte::E_DIM::DIM_1D) + { + // VC DS games forget to initialize textures and use texel fetch on an uninitialized texture (a dim of 0 maps to 1D) + src->add("uint("); + src->add("float("); + _emitTEXSampleCoordInputComponent(shaderContext, texInstruction, 0, (texOpcode == GPU7_TEX_INST_LD) ? LATTE_DECOMPILER_DTYPE_SIGNED_INT : LATTE_DECOMPILER_DTYPE_FLOAT); + src->addFmt(")*supportBuffer.tex{}Scale.x", texInstruction->textureFetch.textureIndex); + src->add("), 0"); + // todo - lod + } + else + cemu_assert_debug(false); + } + else /* useTexelCoordinates == false */ + { + // float coordinates + if ( (texOpcode == GPU7_TEX_INST_SAMPLE_C || texOpcode == GPU7_TEX_INST_SAMPLE_C_L || texOpcode == GPU7_TEX_INST_SAMPLE_C_LZ) ) { - src->add(", "); - if (texOpcode == GPU7_TEX_INST_SAMPLE_LB) + // shadow sampler + if (texDim == Latte::E_DIM::DIM_2D_ARRAY) { - src->addFmt("bias({})", _FormatFloatAsConstant((float)texInstruction->textureFetch.lodBias / 16.0f)); + // 3 coords + compare value + src->add("float2("); + _emitTEXSampleCoordInputComponent(shaderContext, texInstruction, 0, LATTE_DECOMPILER_DTYPE_FLOAT); + src->add(", "); + _emitTEXSampleCoordInputComponent(shaderContext, texInstruction, 1, LATTE_DECOMPILER_DTYPE_FLOAT); + src->add("), uint(rint("); + _emitTEXSampleCoordInputComponent(shaderContext, texInstruction, 2, LATTE_DECOMPILER_DTYPE_FLOAT); + src->add("))"); + + src->addFmt(", {}", _getTexGPRAccess(shaderContext, texInstruction->srcGpr, LATTE_DECOMPILER_DTYPE_FLOAT, texInstruction->textureFetch.srcSel[3], -1, -1, -1, tempBuffer0)); + } + else if (texDim == Latte::E_DIM::DIM_CUBEMAP) + { + // 2 coords + faceId + if (texInstruction->textureFetch.srcSel[0] >= 4 || texInstruction->textureFetch.srcSel[1] >= 4) + { + debugBreakpoint(); + } + src->addFmt("redcCUBEReverse({},", _getTexGPRAccess(shaderContext, texInstruction->srcGpr, LATTE_DECOMPILER_DTYPE_FLOAT, texInstruction->textureFetch.srcSel[0], texInstruction->textureFetch.srcSel[1], -1, -1, tempBuffer0)); + _emitTEXSampleCoordInputComponent(shaderContext, texInstruction, 2, LATTE_DECOMPILER_DTYPE_SIGNED_INT); + src->addFmt(")"); + src->addFmt(", uint(cubeMapArrayIndex{})", texInstruction->textureFetch.textureIndex); // cubemap index + } + else if (texDim == Latte::E_DIM::DIM_1D) + { + // 1 coord + 1 unused coord (per spec) + compare value + if (texInstruction->textureFetch.srcSel[0] >= 4) + { + debugBreakpoint(); + } + src->addFmt("{}, {}", _getTexGPRAccess(shaderContext, texInstruction->srcGpr, LATTE_DECOMPILER_DTYPE_FLOAT, texInstruction->textureFetch.srcSel[0], -1, -1, -1, tempBuffer0), _getTexGPRAccess(shaderContext, texInstruction->srcGpr, LATTE_DECOMPILER_DTYPE_FLOAT, texInstruction->textureFetch.srcSel[3], -1, -1, -1, tempBuffer1)); } else { - // TODO: is this correct? - src->add("level("); - _emitTEXSampleCoordInputComponent(shaderContext, texInstruction, 3, LATTE_DECOMPILER_DTYPE_FLOAT); - src->add(")"); + // 2 coords + compare value (as float3) + if (texInstruction->textureFetch.srcSel[0] >= 4 && texInstruction->textureFetch.srcSel[1] >= 4) + { + debugBreakpoint(); + } + src->addFmt("float2({}), {}", _getTexGPRAccess(shaderContext, texInstruction->srcGpr, LATTE_DECOMPILER_DTYPE_FLOAT, texInstruction->textureFetch.srcSel[0], texInstruction->textureFetch.srcSel[1], -1, -1, tempBuffer0), _getTexGPRAccess(shaderContext, texInstruction->srcGpr, LATTE_DECOMPILER_DTYPE_FLOAT, texInstruction->textureFetch.srcSel[3], -1, -1, -1, tempBuffer1)); } } - else if (texOpcode == GPU7_TEX_INST_SAMPLE_LZ || texOpcode == GPU7_TEX_INST_SAMPLE_C_LZ) + else if(texDim == Latte::E_DIM::DIM_2D_ARRAY) { - src->add(", level(0.0)"); + // 3 coords + src->add("float2("); + _emitTEXSampleCoordInputComponent(shaderContext, texInstruction, 0, LATTE_DECOMPILER_DTYPE_FLOAT); + src->add(", "); + _emitTEXSampleCoordInputComponent(shaderContext, texInstruction, 1, LATTE_DECOMPILER_DTYPE_FLOAT); + src->add("), uint(rint("); + _emitTEXSampleCoordInputComponent(shaderContext, texInstruction, 2, LATTE_DECOMPILER_DTYPE_FLOAT); + src->add("))"); } - } - } - // gradient parameters - if (texOpcode == GPU7_TEX_INST_SAMPLE_G) - { - if (texDim == Latte::E_DIM::DIM_2D || - texDim == Latte::E_DIM::DIM_1D) - { - src->add(", gradient2d(gradH.xy, gradV.xy)"); - } - else - { - cemu_assert_unimplemented(); - } - } + else if(texDim == Latte::E_DIM::DIM_3D) + { + // 3 coords + src->add("float3("); + _emitTEXSampleCoordInputComponent(shaderContext, texInstruction, 0, LATTE_DECOMPILER_DTYPE_FLOAT); + src->add(", "); + _emitTEXSampleCoordInputComponent(shaderContext, texInstruction, 1, LATTE_DECOMPILER_DTYPE_FLOAT); + src->add(", "); + _emitTEXSampleCoordInputComponent(shaderContext, texInstruction, 2, LATTE_DECOMPILER_DTYPE_FLOAT); + src->add(")"); + } + else if( texDim == Latte::E_DIM::DIM_CUBEMAP ) + { + // 2 coords + faceId + cemu_assert_debug(texInstruction->textureFetch.srcSel[0] < 4); + cemu_assert_debug(texInstruction->textureFetch.srcSel[1] < 4); + src->addFmt("redcCUBEReverse({},", _getTexGPRAccess(shaderContext, texInstruction->srcGpr, LATTE_DECOMPILER_DTYPE_FLOAT, texInstruction->textureFetch.srcSel[0], texInstruction->textureFetch.srcSel[1], -1, -1, tempBuffer0)); + _emitTEXSampleCoordInputComponent(shaderContext, texInstruction, 2, LATTE_DECOMPILER_DTYPE_SIGNED_INT); + src->add(")"); + src->addFmt(", uint(cubeMapArrayIndex{})", texInstruction->textureFetch.textureIndex); // cubemap index + } + else if( texDim == Latte::E_DIM::DIM_1D ) + { + // 1 coord + src->add(_getTexGPRAccess(shaderContext, texInstruction->srcGpr, LATTE_DECOMPILER_DTYPE_FLOAT, texInstruction->textureFetch.srcSel[0], -1, -1, -1, tempBuffer0)); + } + else + { + // 2 coords + src->add("float2("); + _emitTEXSampleCoordInputComponent(shaderContext, texInstruction, 0, LATTE_DECOMPILER_DTYPE_FLOAT); + src->add(","); + _emitTEXSampleCoordInputComponent(shaderContext, texInstruction, 1, LATTE_DECOMPILER_DTYPE_FLOAT); + src->add(")"); + // avoid truncate to effectively round downwards on texel edges + if (ActiveSettings::ForceSamplerRoundToPrecision()) + src->addFmt("+ float2(1.0)/float2(tex{}.get_width(), tex{}.get_height())/512.0", texInstruction->textureFetch.textureIndex, texInstruction->textureFetch.textureIndex); + } + // lod or lod bias parameter + // 1D textures don't support lod + if (texDim != Latte::E_DIM::DIM_1D && texDim != Latte::E_DIM::DIM_1D_ARRAY) + { + if (texOpcode == GPU7_TEX_INST_SAMPLE_L || texOpcode == GPU7_TEX_INST_SAMPLE_LB || texOpcode == GPU7_TEX_INST_SAMPLE_C_L) + { + src->add(", "); + if (texOpcode == GPU7_TEX_INST_SAMPLE_LB) + { + src->addFmt("bias({})", _FormatFloatAsConstant((float)texInstruction->textureFetch.lodBias / 16.0f)); + } + else + { + // TODO: is this correct? + src->add("level("); + _emitTEXSampleCoordInputComponent(shaderContext, texInstruction, 3, LATTE_DECOMPILER_DTYPE_FLOAT); + src->add(")"); + } + } + else if (texOpcode == GPU7_TEX_INST_SAMPLE_LZ || texOpcode == GPU7_TEX_INST_SAMPLE_C_LZ) + { + src->add(", level(0.0)"); + } + } + } + // gradient parameters + if (texOpcode == GPU7_TEX_INST_SAMPLE_G) + { + if (texDim == Latte::E_DIM::DIM_2D || + texDim == Latte::E_DIM::DIM_1D) + { + src->add(", gradient2d(gradH.xy, gradV.xy)"); + } + else + { + cemu_assert_unimplemented(); + } + } - // offset - if( texOpcode == GPU7_TEX_INST_SAMPLE_L || texOpcode == GPU7_TEX_INST_SAMPLE_LZ || texOpcode == GPU7_TEX_INST_SAMPLE_C_LZ || texOpcode == GPU7_TEX_INST_SAMPLE || texOpcode == GPU7_TEX_INST_SAMPLE_C ) - { - if( hasOffset ) - { - uint8 offsetComponentCount = 0; - if( texDim == Latte::E_DIM::DIM_1D ) - offsetComponentCount = 1; - else if( texDim == Latte::E_DIM::DIM_2D ) - offsetComponentCount = 2; - else if( texDim == Latte::E_DIM::DIM_3D ) - offsetComponentCount = 3; - else if( texDim == Latte::E_DIM::DIM_2D_ARRAY ) - offsetComponentCount = 2; - else - cemu_assert_unimplemented(); + // offset + if( texOpcode == GPU7_TEX_INST_SAMPLE_L || texOpcode == GPU7_TEX_INST_SAMPLE_LZ || texOpcode == GPU7_TEX_INST_SAMPLE_C_LZ || texOpcode == GPU7_TEX_INST_SAMPLE || texOpcode == GPU7_TEX_INST_SAMPLE_C ) + { + if( hasOffset ) + { + uint8 offsetComponentCount = 0; + if( texDim == Latte::E_DIM::DIM_1D ) + offsetComponentCount = 1; + else if( texDim == Latte::E_DIM::DIM_2D ) + offsetComponentCount = 2; + else if( texDim == Latte::E_DIM::DIM_3D ) + offsetComponentCount = 3; + else if( texDim == Latte::E_DIM::DIM_2D_ARRAY ) + offsetComponentCount = 2; + else + cemu_assert_unimplemented(); - if( (texInstruction->textureFetch.offsetX&1) ) - cemu_assert_unimplemented(); - if( (texInstruction->textureFetch.offsetY&1) ) - cemu_assert_unimplemented(); - if ((texInstruction->textureFetch.offsetZ & 1)) - cemu_assert_unimplemented(); + if( (texInstruction->textureFetch.offsetX&1) ) + cemu_assert_unimplemented(); + if( (texInstruction->textureFetch.offsetY&1) ) + cemu_assert_unimplemented(); + if ((texInstruction->textureFetch.offsetZ & 1)) + cemu_assert_unimplemented(); - if( offsetComponentCount == 1 ) - src->addFmt(",{}", texInstruction->textureFetch.offsetX/2); - else if( offsetComponentCount == 2 ) - src->addFmt(",int2({},{})", texInstruction->textureFetch.offsetX/2, texInstruction->textureFetch.offsetY/2, texInstruction->textureFetch.offsetZ/2); - else if( offsetComponentCount == 3 ) - src->addFmt(",int3({},{},{})", texInstruction->textureFetch.offsetX/2, texInstruction->textureFetch.offsetY/2, texInstruction->textureFetch.offsetZ/2); - } - } + if( offsetComponentCount == 1 ) + src->addFmt(",{}", texInstruction->textureFetch.offsetX/2); + else if( offsetComponentCount == 2 ) + src->addFmt(",int2({},{})", texInstruction->textureFetch.offsetX/2, texInstruction->textureFetch.offsetY/2, texInstruction->textureFetch.offsetZ/2); + else if( offsetComponentCount == 3 ) + src->addFmt(",int3({},{},{})", texInstruction->textureFetch.offsetX/2, texInstruction->textureFetch.offsetY/2, texInstruction->textureFetch.offsetZ/2); + } + } - // lod bias (TODO: wht?) + // lod bias (TODO: wht?) + + src->add(")"); + } - src->add(")"); // sample_compare doesn't return a float if (!isCompare) { From 217e2edda3df57e4b5e9d328b201f2676c62f5d0 Mon Sep 17 00:00:00 2001 From: Samuliak Date: Sat, 11 Jan 2025 10:26:35 +0100 Subject: [PATCH 11/12] check if pixel formats match for framebuffer fetch --- .../LatteDecompilerAnalyzer.cpp | 9 ++++- .../LatteDecompilerEmitMSL.cpp | 36 +++++++++---------- 2 files changed, 26 insertions(+), 19 deletions(-) diff --git a/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerAnalyzer.cpp b/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerAnalyzer.cpp index 9a40d7436..4d924e94e 100644 --- a/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerAnalyzer.cpp +++ b/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerAnalyzer.cpp @@ -9,6 +9,9 @@ #include "Cafe/HW/Latte/Core/LatteShader.h" #include "Cafe/HW/Latte/Renderer/Renderer.h" +// Defined in LatteTextureLegacy.cpp +Latte::E_GX2SURFFMT LatteTexture_ReconstructGX2Format(const Latte::LATTE_SQ_TEX_RESOURCE_WORD1_N& texUnitWord1, const Latte::LATTE_SQ_TEX_RESOURCE_WORD4_N& texUnitWord4); + /* * Return index of used color attachment based on shader pixel export index (0-7) */ @@ -876,10 +879,13 @@ void LatteDecompiler_analyze(LatteDecompilerShaderContext* shaderContext, LatteD // TODO: uncomment? /* auto lastMip = texRegister.word5.get_LAST_LEVEL(); + // TODO: multiple mip levels could technically be supported as well if (lastMip != 0) continue; */ + Latte::E_GX2SURFFMT format = LatteTexture_ReconstructGX2Format(texRegister.word1, texRegister.word4); + // Check if the texture is used as render target for (sint32 j = 0; j < LATTE_NUM_COLOR_TARGET; j++) { @@ -890,9 +896,10 @@ void LatteDecompiler_analyze(LatteDecompilerShaderContext* shaderContext, LatteD uint32 regColorBufferBase = colorBufferRegBase[mmCB_COLOR0_BASE - mmCB_COLOR0_BASE] & 0xFFFFFF00; // the low 8 bits are ignored? How to Survive seems to rely on this MPTR colorBufferPhysMem = regColorBufferBase; + Latte::E_GX2SURFFMT colorBufferFormat = LatteMRT::GetColorBufferFormat(j, *shaderContext->contextRegistersNew); // TODO: check if mip matches as well? - if (physAddr == colorBufferPhysMem) + if (physAddr == colorBufferPhysMem && format == colorBufferFormat) { shader->textureRenderTargetIndex[textureIndex] = j; break; diff --git a/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitMSL.cpp b/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitMSL.cpp index ba0180dc9..488cc2d58 100644 --- a/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitMSL.cpp +++ b/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitMSL.cpp @@ -2293,7 +2293,7 @@ static void _emitTEXSampleTextureCode(LatteDecompilerShaderContext* shaderContex if (isGather) src->add("gather"); else - src->add("sample"); + src->add("sample"); if (isCompare) src->add("_compare"); src->addFmt("(samplr{}, ", texInstruction->textureFetch.textureIndex); @@ -2456,25 +2456,25 @@ static void _emitTEXSampleTextureCode(LatteDecompilerShaderContext* shaderContex // 1D textures don't support lod if (texDim != Latte::E_DIM::DIM_1D && texDim != Latte::E_DIM::DIM_1D_ARRAY) { - if (texOpcode == GPU7_TEX_INST_SAMPLE_L || texOpcode == GPU7_TEX_INST_SAMPLE_LB || texOpcode == GPU7_TEX_INST_SAMPLE_C_L) - { - src->add(", "); - if (texOpcode == GPU7_TEX_INST_SAMPLE_LB) - { - src->addFmt("bias({})", _FormatFloatAsConstant((float)texInstruction->textureFetch.lodBias / 16.0f)); - } - else - { - // TODO: is this correct? - src->add("level("); + if (texOpcode == GPU7_TEX_INST_SAMPLE_L || texOpcode == GPU7_TEX_INST_SAMPLE_LB || texOpcode == GPU7_TEX_INST_SAMPLE_C_L) + { + src->add(", "); + if (texOpcode == GPU7_TEX_INST_SAMPLE_LB) + { + src->addFmt("bias({})", _FormatFloatAsConstant((float)texInstruction->textureFetch.lodBias / 16.0f)); + } + else + { + // TODO: is this correct? + src->add("level("); _emitTEXSampleCoordInputComponent(shaderContext, texInstruction, 3, LATTE_DECOMPILER_DTYPE_FLOAT); src->add(")"); - } - } - else if (texOpcode == GPU7_TEX_INST_SAMPLE_LZ || texOpcode == GPU7_TEX_INST_SAMPLE_C_LZ) - { - src->add(", level(0.0)"); - } + } + } + else if (texOpcode == GPU7_TEX_INST_SAMPLE_LZ || texOpcode == GPU7_TEX_INST_SAMPLE_C_LZ) + { + src->add(", level(0.0)"); + } } } // gradient parameters From f4985c481efe0676f7a77d7f634aab941504e12a Mon Sep 17 00:00:00 2001 From: Samuliak Date: Sat, 11 Jan 2025 10:59:28 +0100 Subject: [PATCH 12/12] add an option to disable framebuffer fetch --- src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp | 2 +- src/config/CemuConfig.cpp | 4 +++- src/config/CemuConfig.h | 3 ++- src/gui/GeneralSettings2.cpp | 14 ++++++++++++++ src/gui/GeneralSettings2.h | 1 + 5 files changed, 21 insertions(+), 3 deletions(-) diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp b/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp index bd6f93158..dc3b8ae04 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp @@ -87,7 +87,7 @@ MetalRenderer::MetalRenderer() // Feature support m_isAppleGPU = m_device->supportsFamily(MTL::GPUFamilyApple1); - m_supportsFramebufferFetch = m_device->supportsFamily(MTL::GPUFamilyApple2); + m_supportsFramebufferFetch = GetConfig().framebuffer_fetch.GetValue() ? m_device->supportsFamily(MTL::GPUFamilyApple2) : false; m_hasUnifiedMemory = m_device->hasUnifiedMemory(); m_supportsMetal3 = m_device->supportsFamily(MTL::GPUFamilyMetal3); m_recommendedMaxVRAMUsage = m_device->recommendedMaxWorkingSetSize(); diff --git a/src/config/CemuConfig.cpp b/src/config/CemuConfig.cpp index c22d71503..dc38647a3 100644 --- a/src/config/CemuConfig.cpp +++ b/src/config/CemuConfig.cpp @@ -338,6 +338,7 @@ void CemuConfig::Load(XMLConfigParser& parser) #endif gdb_port = debug.get("GDBPort", 1337); gpu_capture_dir = debug.get("GPUCaptureDir", ""); + framebuffer_fetch = debug.get("FramebufferFetch", true); // input auto input = parser.get("Input"); @@ -540,7 +541,8 @@ void CemuConfig::Save(XMLConfigParser& parser) debug.set("CrashDumpUnix", crash_dump.GetValue()); #endif debug.set("GDBPort", gdb_port); - debug.set("GPUCaptureDir", gpu_capture_dir.GetValue()); + debug.set("GPUCaptureDir", gpu_capture_dir); + debug.set("FramebufferFetch", framebuffer_fetch); // input auto input = config.set("Input"); diff --git a/src/config/CemuConfig.h b/src/config/CemuConfig.h index 56af04652..0990c6523 100644 --- a/src/config/CemuConfig.h +++ b/src/config/CemuConfig.h @@ -527,7 +527,8 @@ struct CemuConfig // debug ConfigValueBounds crash_dump{ CrashDump::Disabled }; ConfigValue gdb_port{ 1337 }; - ConfigValue gpu_capture_dir{}; + ConfigValue gpu_capture_dir{ "" }; + ConfigValue framebuffer_fetch{ true }; void Load(XMLConfigParser& parser); void Save(XMLConfigParser& parser); diff --git a/src/gui/GeneralSettings2.cpp b/src/gui/GeneralSettings2.cpp index c5bc974d7..31d164819 100644 --- a/src/gui/GeneralSettings2.cpp +++ b/src/gui/GeneralSettings2.cpp @@ -910,6 +910,18 @@ wxPanel* GeneralSettings2::AddDebugPage(wxNotebook* notebook) debug_panel_sizer->Add(debug_row, 0, wxALL | wxEXPAND, 5); } + { + auto* debug_row = new wxFlexGridSizer(0, 2, 0, 0); + debug_row->SetFlexibleDirection(wxBOTH); + debug_row->SetNonFlexibleGrowMode(wxFLEX_GROWMODE_SPECIFIED); + + m_framebuffer_fetch = new wxCheckBox(panel, wxID_ANY, _("Framebuffer fetch")); + m_framebuffer_fetch->SetToolTip(_("Enable framebuffer fetch for eligible textures on supported devices.")); + + debug_row->Add(m_framebuffer_fetch, 0, wxALL | wxEXPAND, 5); + debug_panel_sizer->Add(debug_row, 0, wxALL | wxEXPAND, 5); + } + panel->SetSizerAndFit(debug_panel_sizer); return panel; @@ -1121,6 +1133,7 @@ void GeneralSettings2::StoreConfig() config.crash_dump = (CrashDump)m_crash_dump->GetSelection(); config.gdb_port = m_gdb_port->GetValue(); config.gpu_capture_dir = m_gpu_capture_dir->GetValue().utf8_string(); + config.framebuffer_fetch = m_framebuffer_fetch->IsChecked(); g_config.Save(); } @@ -1816,6 +1829,7 @@ void GeneralSettings2::ApplyConfig() m_crash_dump->SetSelection((int)config.crash_dump.GetValue()); m_gdb_port->SetValue(config.gdb_port.GetValue()); m_gpu_capture_dir->SetValue(wxHelper::FromUtf8(config.gpu_capture_dir.GetValue())); + m_framebuffer_fetch->SetValue(config.framebuffer_fetch); } void GeneralSettings2::OnAudioAPISelected(wxCommandEvent& event) diff --git a/src/gui/GeneralSettings2.h b/src/gui/GeneralSettings2.h index 54a782544..58459e958 100644 --- a/src/gui/GeneralSettings2.h +++ b/src/gui/GeneralSettings2.h @@ -80,6 +80,7 @@ class GeneralSettings2 : public wxDialog wxChoice* m_crash_dump; wxSpinCtrl* m_gdb_port; wxTextCtrl* m_gpu_capture_dir; + wxCheckBox* m_framebuffer_fetch; void OnAccountCreate(wxCommandEvent& event); void OnAccountDelete(wxCommandEvent& event);