Skip to content

Commit

Permalink
Merge pull request #16 from SamoZ256/metal-accurate-barriers
Browse files Browse the repository at this point in the history
Framebuffer fetch
  • Loading branch information
SamoZ256 authored Jan 12, 2025
2 parents 68aa405 + f4985c4 commit 159a10e
Show file tree
Hide file tree
Showing 12 changed files with 510 additions and 361 deletions.
107 changes: 59 additions & 48 deletions src/Cafe/HW/Latte/Core/LatteShader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -504,11 +504,31 @@ void LatteSHRC_UpdateVSBaseHash(uint8* vertexShaderPtr, uint32 vertexShaderSize,
uint64 vsHash2 = 0;
_calculateShaderProgramHash(vsProgramCode, vertexShaderSize, &hashCacheVS, &vsHash1, &vsHash2);
uint64 vsHash = vsHash1 + vsHash2 + _activeFetchShader->key + _activePSImportTable.key + (usesGeometryShader ? 0x1111ULL : 0ULL);

uint32 tmp = LatteGPUState.contextNew.PA_CL_VTE_CNTL.getRawValue() ^ 0x43F;
vsHash += tmp;

auto primitiveType = LatteGPUState.contextNew.VGT_PRIMITIVE_TYPE.get_PRIMITIVE_MODE();
// TODO: include always in the hash in case of geometry shader or rect shader
if (primitiveType == Latte::LATTE_VGT_PRIMITIVE_TYPE::E_PRIMITIVE_TYPE::RECTS)
{
vsHash += 13ULL;
}
else if (primitiveType == Latte::LATTE_VGT_PRIMITIVE_TYPE::E_PRIMITIVE_TYPE::POINTS)
{
// required for Vulkan since we have to write the pointsize in the shader
vsHash += 71ULL;
}
vsHash += (LatteGPUState.contextRegister[mmVGT_STRMOUT_EN] ? 21 : 0);
// halfZ
if (LatteGPUState.contextNew.PA_CL_CLIP_CNTL.get_DX_CLIP_SPACE_DEF())
vsHash += 0x1537;

if (g_renderer->GetType() == RendererAPI::Metal)
{
if (usesGeometryShader || _activeFetchShader->mtlFetchVertexManually)
{
for (sint32 g = 0; g < _activeFetchShader->bufferGroups.size(); g++)
for (sint32 g = 0; g < _activeFetchShader->bufferGroups.size(); g++)
{
LatteParsedFetchShaderBufferGroup_t& group = _activeFetchShader->bufferGroups[g];
uint32 bufferIndex = group.attributeBufferIndex;
Expand All @@ -522,47 +542,28 @@ void LatteSHRC_UpdateVSBaseHash(uint8* vertexShaderPtr, uint32 vertexShaderSize,

if (!usesGeometryShader)
{
// Rasterization
bool rasterizationEnabled = !LatteGPUState.contextNew.PA_CL_CLIP_CNTL.get_DX_RASTERIZATION_KILL();
// Rasterization
bool rasterizationEnabled = !LatteGPUState.contextNew.PA_CL_CLIP_CNTL.get_DX_RASTERIZATION_KILL();

// HACK
if (!LatteGPUState.contextNew.PA_CL_VTE_CNTL.get_VPORT_X_OFFSET_ENA())
rasterizationEnabled = true;
// HACK
if (!LatteGPUState.contextNew.PA_CL_VTE_CNTL.get_VPORT_X_OFFSET_ENA())
rasterizationEnabled = true;

const auto& polygonControlReg = LatteGPUState.contextNew.PA_SU_SC_MODE_CNTL;
uint32 cullFront = polygonControlReg.get_CULL_FRONT();
uint32 cullBack = polygonControlReg.get_CULL_BACK();
if (cullFront && cullBack)
rasterizationEnabled = false;
const auto& polygonControlReg = LatteGPUState.contextNew.PA_SU_SC_MODE_CNTL;
uint32 cullFront = polygonControlReg.get_CULL_FRONT();
uint32 cullBack = polygonControlReg.get_CULL_BACK();
if (cullFront && cullBack)
rasterizationEnabled = false;

if (rasterizationEnabled)
vsHash += 51ULL;
if (rasterizationEnabled)
vsHash += 51ULL;

// Vertex fetch
if (_activeFetchShader->mtlFetchVertexManually)
vsHash += 349ULL;
// Vertex fetch
if (_activeFetchShader->mtlFetchVertexManually)
vsHash += 349ULL;
}
}

uint32 tmp = LatteGPUState.contextNew.PA_CL_VTE_CNTL.getRawValue() ^ 0x43F;
vsHash += tmp;

auto primitiveType = LatteGPUState.contextNew.VGT_PRIMITIVE_TYPE.get_PRIMITIVE_MODE();
// TODO: include always in the hash in case of geometry shader or rect shader
if (primitiveType == Latte::LATTE_VGT_PRIMITIVE_TYPE::E_PRIMITIVE_TYPE::RECTS)
{
vsHash += 13ULL;
}
else if (primitiveType == Latte::LATTE_VGT_PRIMITIVE_TYPE::E_PRIMITIVE_TYPE::POINTS)
{
// required for Vulkan since we have to write the pointsize in the shader
vsHash += 71ULL;
}
vsHash += (LatteGPUState.contextRegister[mmVGT_STRMOUT_EN] ? 21 : 0);
// halfZ
if (LatteGPUState.contextNew.PA_CL_CLIP_CNTL.get_DX_CLIP_SPACE_DEF())
vsHash += 0x1537;

_shaderBaseHash_vs = vsHash;
}

Expand All @@ -589,19 +590,6 @@ void LatteSHRC_UpdatePSBaseHash(uint8* pixelShaderPtr, uint32 pixelShaderSize, b
// get vertex shader
uint64 psHash = psHash1 + psHash2 + _activePSImportTable.key + (usesGeometryShader ? hashCacheGS.prevHash1 : 0ULL);

#if ENABLE_METAL
if (g_renderer->GetType() == RendererAPI::Metal)
{
for (uint8 i = 0; i < LATTE_NUM_COLOR_TARGET; i++)
{
auto format = LatteMRT::GetColorBufferFormat(i, LatteGPUState.contextNew);
uint8 dataType = (uint8)GetMtlPixelFormatInfo(format, false).dataType;
psHash += (uint64)dataType;
psHash = std::rotl<uint64>(psHash, 7);
}
}
#endif

_shaderBaseHash_ps = psHash;
}

Expand Down Expand Up @@ -635,6 +623,7 @@ uint64 LatteSHRC_CalcVSAuxHash(LatteDecompilerShader* vertexShader, uint32* cont
auxHashTex += 0x333;
}
}

return auxHash + auxHashTex;
}

Expand Down Expand Up @@ -668,6 +657,28 @@ uint64 LatteSHRC_CalcPSAuxHash(LatteDecompilerShader* pixelShader, uint32* conte
auxHash = (auxHash << 3) | (auxHash >> 61);
auxHash += (uint64)dim;
}

// Textures as render targets
for (uint32 i = 0; i < pixelShader->textureUnitListCount; i++)
{
uint8 t = pixelShader->textureUnitList[i];
auxHash = std::rotl<uint64>(auxHash, 11);
auxHash += (uint64)pixelShader->textureRenderTargetIndex[t];
}

#if ENABLE_METAL
if (g_renderer->GetType() == RendererAPI::Metal)
{
for (uint8 i = 0; i < LATTE_NUM_COLOR_TARGET; i++)
{
auto format = LatteMRT::GetColorBufferFormat(i, LatteGPUState.contextNew);
uint8 dataType = (uint8)GetMtlPixelFormatInfo(format, false).dataType;
auxHash = std::rotl<uint64>(auxHash, 7);
auxHash += (uint64)dataType;
}
}
#endif

return auxHash;
}

Expand Down
1 change: 1 addition & 0 deletions src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompiler.h
Original file line number Diff line number Diff line change
Expand Up @@ -183,6 +183,7 @@ struct LatteDecompilerShader
std::bitset<LATTE_NUM_MAX_TEX_UNITS> textureUnitMask2;
uint16 textureUnitSamplerAssignment[LATTE_NUM_MAX_TEX_UNITS]{ 0 }; // LATTE_DECOMPILER_SAMPLER_NONE means undefined
bool textureUsesDepthCompare[LATTE_NUM_MAX_TEX_UNITS]{};
uint8 textureRenderTargetIndex[LATTE_NUM_MAX_TEX_UNITS];

// analyzer stage (pixel outputs)
uint32 pixelColorOutputMask{ 0 }; // from LSB to MSB, 1 bit per written output. 1 if written (indices of color attachments)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,9 @@
#include "Cafe/HW/Latte/Core/LatteShader.h"
#include "Cafe/HW/Latte/Renderer/Renderer.h"

// Defined in LatteTextureLegacy.cpp
Latte::E_GX2SURFFMT LatteTexture_ReconstructGX2Format(const Latte::LATTE_SQ_TEX_RESOURCE_WORD1_N& texUnitWord1, const Latte::LATTE_SQ_TEX_RESOURCE_WORD4_N& texUnitWord4);

/*
* Return index of used color attachment based on shader pixel export index (0-7)
*/
Expand Down Expand Up @@ -850,6 +853,59 @@ void LatteDecompiler_analyze(LatteDecompilerShaderContext* shaderContext, LatteD
shader->textureUnitList[shader->textureUnitListCount] = i;
shader->textureUnitListCount++;
}
shader->textureRenderTargetIndex[i] = 255;
}
// check if textures are used as render targets
if (shader->shaderType == LatteConst::ShaderType::Pixel)
{
uint8 colorBufferMask = LatteMRT::GetActiveColorBufferMask(shader, *shaderContext->contextRegistersNew);
for (sint32 i = 0; i < shader->textureUnitListCount; i++)
{
sint32 textureIndex = shader->textureUnitList[i];
const auto& texRegister = texRegs[textureIndex];

// get physical address of texture data
MPTR physAddr = (texRegister.word2.get_BASE_ADDRESS() << 8);
if (physAddr == MPTR_NULL)
continue; // invalid data

// Check for dimension
auto dim = shader->textureUnitDim[textureIndex];
// TODO: 2D arrays could technically be supported as well
if (dim != Latte::E_DIM::DIM_2D)
continue;

// Check for mip level
// TODO: uncomment?
/*
auto lastMip = texRegister.word5.get_LAST_LEVEL();
// TODO: multiple mip levels could technically be supported as well
if (lastMip != 0)
continue;
*/

Latte::E_GX2SURFFMT format = LatteTexture_ReconstructGX2Format(texRegister.word1, texRegister.word4);

// Check if the texture is used as render target
for (sint32 j = 0; j < LATTE_NUM_COLOR_TARGET; j++)
{
if (((colorBufferMask) & (1 << j)) == 0)
continue; // color buffer not enabled

uint32* colorBufferRegBase = shaderContext->contextRegisters + (mmCB_COLOR0_BASE + j);
uint32 regColorBufferBase = colorBufferRegBase[mmCB_COLOR0_BASE - mmCB_COLOR0_BASE] & 0xFFFFFF00; // the low 8 bits are ignored? How to Survive seems to rely on this

MPTR colorBufferPhysMem = regColorBufferBase;
Latte::E_GX2SURFFMT colorBufferFormat = LatteMRT::GetColorBufferFormat(j, *shaderContext->contextRegistersNew);

// TODO: check if mip matches as well?
if (physAddr == colorBufferPhysMem && format == colorBufferFormat)
{
shader->textureRenderTargetIndex[textureIndex] = j;
break;
}
}
}
}
// for geometry shaders check the copy shader for stream writes
if (shader->shaderType == LatteConst::ShaderType::Geometry && shaderContext->parsedGSCopyShader->list_streamWrites.empty() == false)
Expand Down
Loading

0 comments on commit 159a10e

Please sign in to comment.