diff --git a/Source/Core/VideoBackends/Software/Tev.cpp b/Source/Core/VideoBackends/Software/Tev.cpp index 65227339e4..be6f14d610 100644 --- a/Source/Core/VideoBackends/Software/Tev.cpp +++ b/Source/Core/VideoBackends/Software/Tev.cpp @@ -6,6 +6,7 @@ #include #include +#include #include "Common/ChunkFile.h" #include "Common/CommonTypes.h" @@ -485,20 +486,16 @@ void Tev::Indirect(unsigned int stageNum, s32 s, s32 t) // matrix multiply - results might overflow, but we don't care since we only use the lower 24 bits // of the result. - const int indmtxid = indirect.mid & 3; - if (indmtxid) + if (indirect.matrix_index != IndMtxIndex::Off) { - const IND_MTX& indmtx = bpmem.indmtx[indmtxid - 1]; - const int scale = - ((u32)indmtx.col0.s0 << 0) | ((u32)indmtx.col1.s1 << 2) | ((u32)indmtx.col2.s2 << 4); + const IND_MTX& indmtx = bpmem.indmtx[static_cast(indirect.matrix_index.Value()) - 1]; - int shift; + const int shift = 17 - indmtx.GetScale(); - switch (indirect.mid & 12) + switch (indirect.matrix_id) { - case 0: + case IndMtxId::Indirect: // matrix values are S0.10, output format is S17.7, so divide by 8 - shift = (17 - scale); indtevtrans[0] = (indmtx.col0.ma * indcoord[0] + indmtx.col1.mc * indcoord[1] + indmtx.col2.me * indcoord[2]) >> 3; @@ -506,25 +503,29 @@ void Tev::Indirect(unsigned int stageNum, s32 s, s32 t) indmtx.col2.mf * indcoord[2]) >> 3; break; - case 4: // s matrix + case IndMtxId::S: // s is S17.7, matrix elements are divided by 256, output is S17.7, so divide by 256. - TODO: // Maybe, since s is actually stored as S24, we should divide by 256*64? - shift = (17 - scale); indtevtrans[0] = s * indcoord[0] / 256; indtevtrans[1] = t * indcoord[0] / 256; break; - case 8: // t matrix - shift = (17 - scale); + case IndMtxId::T: indtevtrans[0] = s * indcoord[1] / 256; indtevtrans[1] = t * indcoord[1] / 256; break; default: + PanicAlertFmt("Invalid indirect matrix ID {}", indirect.matrix_id); return; } indtevtrans[0] = shift >= 0 ? indtevtrans[0] >> shift : indtevtrans[0] << -shift; indtevtrans[1] = shift >= 0 ? indtevtrans[1] >> shift : indtevtrans[1] << -shift; } + else + { + // If matrix_index is Off (0), matrix_id should be Indirect (0) + ASSERT(indirect.matrix_id == IndMtxId::Indirect); + } if (indirect.fb_addprev) { @@ -559,9 +560,16 @@ void Tev::Draw() const int stageNum2 = stageNum >> 1; const int stageOdd = stageNum & 1; - const u32 texcoordSel = bpmem.tevindref.getTexCoord(stageNum); + u32 texcoordSel = bpmem.tevindref.getTexCoord(stageNum); const u32 texmap = bpmem.tevindref.getTexMap(stageNum); + // Quirk: when the tex coord is not less than the number of tex gens (i.e. the tex coord does + // not exist), then tex coord 0 is used (though sometimes glitchy effects happen on console). + // This affects the Mario portrait in Luigi's Mansion, where the developers forgot to set + // the number of tex gens to 2 (bug 11462). + if (texcoordSel >= bpmem.genMode.numtexgens) + texcoordSel = 0; + const TEXSCALE& texscale = bpmem.texscale[stageNum2]; const s32 scaleS = stageOdd ? texscale.ss1 : texscale.ss0; const s32 scaleT = stageOdd ? texscale.ts1 : texscale.ts0; @@ -592,8 +600,13 @@ void Tev::Draw() const TevStageCombiner::ColorCombiner& cc = bpmem.combiners[stageNum].colorC; const TevStageCombiner::AlphaCombiner& ac = bpmem.combiners[stageNum].alphaC; - const int texcoordSel = order.getTexCoord(stageOdd); - const int texmap = order.getTexMap(stageOdd); + u32 texcoordSel = order.getTexCoord(stageOdd); + const u32 texmap = order.getTexMap(stageOdd); + + // Quirk: when the tex coord is not less than the number of tex gens (i.e. the tex coord does + // not exist), then tex coord 0 is used (though sometimes glitchy effects happen on console). + if (texcoordSel >= bpmem.genMode.numtexgens) + texcoordSel = 0; Indirect(stageNum, Uv[texcoordSel].s, Uv[texcoordSel].t); @@ -603,8 +616,17 @@ void Tev::Draw() // RGBA u8 texel[4]; - TextureSampler::Sample(TexCoord.s, TexCoord.t, TextureLod[stageNum], TextureLinear[stageNum], - texmap, texel); + if (bpmem.genMode.numtexgens > 0) + { + TextureSampler::Sample(TexCoord.s, TexCoord.t, TextureLod[stageNum], + TextureLinear[stageNum], texmap, texel); + } + else + { + // It seems like the result is always black when no tex coords are enabled, but further + // hardware testing is needed. + std::memset(texel, 0, 4); + } #if ALLOW_TEV_DUMPS if (g_ActiveConfig.bDumpTevTextureFetches) diff --git a/Source/Core/VideoCommon/BPMemory.h b/Source/Core/VideoCommon/BPMemory.h index f22ceda2cd..569b1747f3 100644 --- a/Source/Core/VideoCommon/BPMemory.h +++ b/Source/Core/VideoCommon/BPMemory.h @@ -300,6 +300,31 @@ struct fmt::formatter : EnumFormatter formatter() : EnumFormatter({"None", "S", "T", "ST", "U", "SU", "TU", "STU"}) {} }; +enum class IndMtxIndex : u32 +{ + Off = 0, + Matrix0 = 1, + Matrix1 = 2, + Matrix2 = 3, +}; +template <> +struct fmt::formatter : EnumFormatter +{ + formatter() : EnumFormatter({"Off", "Matrix 0", "Matrix 1", "Matrix 2"}) {} +}; + +enum class IndMtxId : u32 +{ + Indirect = 0, + S = 1, + T = 2, +}; +template <> +struct fmt::formatter : EnumFormatter +{ + formatter() : EnumFormatter({"Indirect", "S", "T"}) {} +}; + // Indirect texture bump alpha enum class IndTexBumpAlpha : u32 { @@ -335,7 +360,7 @@ union IND_MTXA { BitField<0, 11, s32> ma; BitField<11, 11, s32> mb; - BitField<22, 2, u32> s0; // bits 0-1 of scale factor + BitField<22, 2, u8, u32> s0; // bits 0-1 of scale factor u32 hex; }; @@ -343,7 +368,7 @@ union IND_MTXB { BitField<0, 11, s32> mc; BitField<11, 11, s32> md; - BitField<22, 2, u32> s1; // bits 2-3 of scale factor + BitField<22, 2, u8, u32> s1; // bits 2-3 of scale factor u32 hex; }; @@ -351,7 +376,7 @@ union IND_MTXC { BitField<0, 11, s32> me; BitField<11, 11, s32> mf; - BitField<22, 2, u32> s2; // bits 4-5 of scale factor + BitField<22, 2, u8, u32> s2; // bits 4-5 of scale factor u32 hex; }; @@ -360,6 +385,7 @@ struct IND_MTX IND_MTXA col0; IND_MTXB col1; IND_MTXC col2; + u8 GetScale() const { return (col0.s0 << 0) | (col1.s1 << 2) | (col2.s2 << 4); } }; union IND_IMASK @@ -475,8 +501,12 @@ union TevStageIndirect BitField<4, 1, bool, u32> bias_s; BitField<5, 1, bool, u32> bias_t; BitField<6, 1, bool, u32> bias_u; - BitField<7, 2, IndTexBumpAlpha> bs; // Indicates which coordinate will become the 'bump alpha' - BitField<9, 4, u32> mid; // Matrix ID to multiply offsets with + BitField<7, 2, IndTexBumpAlpha> bs; // Indicates which coordinate will become the 'bump alpha' + // Indicates which indirect matrix is used when matrix_id is Indirect. + // Also always indicates which indirect matrix to use for the scale factor, even with S or T. + BitField<9, 2, IndMtxIndex> matrix_index; + // Should be set to Indirect (0) if matrix_index is Off (0) + BitField<11, 2, IndMtxId> matrix_id; BitField<13, 3, IndTexWrap> sw; // Wrapping factor for S of regular coord BitField<16, 3, IndTexWrap> tw; // Wrapping factor for T of regular coord BitField<19, 1, bool, u32> lb_utclod; // Use modified or unmodified texture @@ -492,9 +522,9 @@ union TevStageIndirect u32 fullhex; - // If bs and mid are zero, the result of the stage is independent of + // If bs and matrix are zero, the result of the stage is independent of // the texture sample data, so we can skip sampling the texture. - bool IsActive() const { return bs != IndTexBumpAlpha::Off || mid != 0; } + bool IsActive() const { return bs != IndTexBumpAlpha::Off || matrix_index != IndMtxIndex::Off; } }; template <> struct fmt::formatter @@ -508,13 +538,15 @@ struct fmt::formatter "Format: {}\n" "Bias: {}\n" "Bump alpha: {}\n" + "Offset matrix index: {}\n" "Offset matrix ID: {}\n" "Regular coord S wrapping factor: {}\n" "Regular coord T wrapping factor: {}\n" "Use modified texture coordinates for LOD computation: {}\n" "Add texture coordinates from previous TEV stage: {}", - tevind.bt, tevind.fmt, tevind.bias, tevind.bs, tevind.mid, tevind.sw, - tevind.tw, tevind.lb_utclod ? "Yes" : "No", tevind.fb_addprev ? "Yes" : "No"); + tevind.bt, tevind.fmt, tevind.bias, tevind.bs, tevind.matrix_index, + tevind.matrix_id, tevind.sw, tevind.tw, tevind.lb_utclod ? "Yes" : "No", + tevind.fb_addprev ? "Yes" : "No"); } }; @@ -600,13 +632,13 @@ struct fmt::formatter union RAS1_IREF { BitField<0, 3, u32> bi0; // Indirect tex stage 0 ntexmap - BitField<3, 3, u32> bc0; // Indirect tex stage 0 ntexmap + BitField<3, 3, u32> bc0; // Indirect tex stage 0 ntexcoord BitField<6, 3, u32> bi1; BitField<9, 3, u32> bc1; BitField<12, 3, u32> bi2; - BitField<15, 3, u32> bc3; // Typo? - BitField<18, 3, u32> bi4; - BitField<21, 3, u32> bc4; + BitField<15, 3, u32> bc2; + BitField<18, 3, u32> bi3; + BitField<21, 3, u32> bc3; u32 hex; u32 getTexCoord(int i) const { return (hex >> (6 * i + 3)) & 7; } @@ -625,8 +657,8 @@ struct fmt::formatter "Stage 1 ntexmap: {}\nStage 1 ntexcoord: {}\n" "Stage 2 ntexmap: {}\nStage 2 ntexcoord: {}\n" "Stage 3 ntexmap: {}\nStage 3 ntexcoord: {}", - indref.bi0, indref.bc0, indref.bi1, indref.bc1, indref.bi2, indref.bc3, - indref.bi4, indref.bc4); + indref.bi0, indref.bc0, indref.bi1, indref.bc1, indref.bi2, indref.bc2, + indref.bi3, indref.bc3); } }; @@ -911,6 +943,8 @@ union GenMode BitField<7, 1, u32> unused; // 1 bit unused? BitField<8, 1, bool, u32> flat_shading; // unconfirmed BitField<9, 1, bool, u32> multisampling; + // This value is 1 less than the actual number (0-15 map to 1-16). + // In other words there is always at least 1 tev stage BitField<10, 4, u32> numtevstages; BitField<14, 2, CullMode> cullmode; BitField<16, 3, u32> numindstages; @@ -937,7 +971,7 @@ struct fmt::formatter "ZFreeze: {}", mode.numtexgens, mode.numcolchans, mode.unused, mode.flat_shading ? "Yes" : "No", mode.multisampling ? "Yes" : "No", - mode.numtevstages, mode.cullmode, mode.numindstages, + mode.numtevstages + 1, mode.cullmode, mode.numindstages, mode.zfreeze ? "Yes" : "No"); } }; @@ -1912,7 +1946,7 @@ struct BPMemory GenMode genMode; u32 display_copy_filter[4]; // 01-04 u32 unknown; // 05 - // indirect matrices (set by GXSetIndTexMtx, selected by TevStageIndirect::mid) + // indirect matrices (set by GXSetIndTexMtx, selected by TevStageIndirect::matrix_index) // abc form a 2x3 offset matrix, there's 3 such matrices // the 3 offset matrices can either be indirect type, S-type, or T-type // 6bit scale factor s is distributed across IND_MTXA/B/C. diff --git a/Source/Core/VideoCommon/GXPipelineTypes.h b/Source/Core/VideoCommon/GXPipelineTypes.h index 1ddd853597..8892d386f5 100644 --- a/Source/Core/VideoCommon/GXPipelineTypes.h +++ b/Source/Core/VideoCommon/GXPipelineTypes.h @@ -20,6 +20,7 @@ namespace VideoCommon // As pipelines encompass both shader UIDs and render states, changes to either of these should // also increment the pipeline UID version. Incrementing the UID version will cause all UID // caches to be invalidated. +// TODO: Remove PixelShaderUid hasindstage on the next UID version bump constexpr u32 GX_PIPELINE_UID_VERSION = 2; // Last changed in PR 9122 struct GXPipelineUid diff --git a/Source/Core/VideoCommon/PixelShaderGen.cpp b/Source/Core/VideoCommon/PixelShaderGen.cpp index ab59bfd52a..636bf2be7f 100644 --- a/Source/Core/VideoCommon/PixelShaderGen.cpp +++ b/Source/Core/VideoCommon/PixelShaderGen.cpp @@ -220,13 +220,10 @@ PixelShaderUid GetPixelShaderUid() // indirect texture map lookup int nIndirectStagesUsed = 0; - if (uid_data->genMode_numindstages > 0) + for (unsigned int i = 0; i < numStages; ++i) { - for (unsigned int i = 0; i < numStages; ++i) - { - if (bpmem.tevind[i].IsActive() && bpmem.tevind[i].bt < uid_data->genMode_numindstages) - nIndirectStagesUsed |= 1 << bpmem.tevind[i].bt; - } + if (bpmem.tevind[i].IsActive()) + nIndirectStagesUsed |= 1 << bpmem.tevind[i].bt; } uid_data->nIndirectStagesUsed = nIndirectStagesUsed; @@ -238,16 +235,14 @@ PixelShaderUid GetPixelShaderUid() for (unsigned int n = 0; n < numStages; n++) { - int texcoord = bpmem.tevorders[n / 2].getTexCoord(n & 1); - bool bHasTexCoord = (u32)texcoord < bpmem.genMode.numtexgens; - // HACK to handle cases where the tex gen is not enabled - if (!bHasTexCoord) - texcoord = bpmem.genMode.numtexgens; + uid_data->stagehash[n].tevorders_texcoord = bpmem.tevorders[n / 2].getTexCoord(n & 1); + // hasindstage previously was used as a criterion to set tevind to 0, but there are variables in + // tevind that are used even if the indirect stage is disabled, so now it is only left in to + // avoid breaking existing UIDs (in most cases, games will have 0 in tevind anyways) + // TODO: Remove hasindstage on the next UID version bump uid_data->stagehash[n].hasindstage = bpmem.tevind[n].bt < bpmem.genMode.numindstages; - uid_data->stagehash[n].tevorders_texcoord = texcoord; - if (uid_data->stagehash[n].hasindstage) - uid_data->stagehash[n].tevind = bpmem.tevind[n].hex; + uid_data->stagehash[n].tevind = bpmem.tevind[n].hex; TevStageCombiner::ColorCombiner& cc = bpmem.combiners[n].colorC; TevStageCombiner::AlphaCombiner& ac = bpmem.combiners[n].alphaC; @@ -361,7 +356,7 @@ void ClearUnusedPixelShaderUidBits(APIType api_type, const ShaderHostConfig& hos uid_data->bounding_box &= host_config.bounding_box & host_config.backend_bbox; } -void WritePixelShaderCommonHeader(ShaderCode& out, APIType api_type, u32 num_texgens, +void WritePixelShaderCommonHeader(ShaderCode& out, APIType api_type, const ShaderHostConfig& host_config, bool bounding_box) { // dot product for integer vectors @@ -546,8 +541,7 @@ ShaderCode GeneratePixelShaderCode(APIType api_type, const ShaderHostConfig& hos uid_data->genMode_numtexgens, uid_data->genMode_numindstages); // Stuff that is shared between ubershaders and pixelgen. - WritePixelShaderCommonHeader(out, api_type, uid_data->genMode_numtexgens, host_config, - uid_data->bounding_box); + WritePixelShaderCommonHeader(out, api_type, host_config, uid_data->bounding_box); if (uid_data->forced_early_z && g_ActiveConfig.backend_info.bSupportsEarlyZ) { @@ -775,9 +769,11 @@ ShaderCode GeneratePixelShaderCode(APIType api_type, const ShaderHostConfig& hos out.Write("col1 = float4(0.0, 0.0, 0.0, 0.0);\n"); } - // HACK to handle cases where the tex gen is not enabled if (uid_data->genMode_numtexgens == 0) { + // TODO: This is a hack to ensure that shaders still compile when setting out of bounds tex + // coord indices to 0. Ideally, it shouldn't exist at all, but the exact behavior hasn't been + // tested. out.Write("\tint2 fixpoint_uv0 = int2(0, 0);\n\n"); } else @@ -796,24 +792,34 @@ ShaderCode GeneratePixelShaderCode(APIType api_type, const ShaderHostConfig& hos { if ((uid_data->nIndirectStagesUsed & (1U << i)) != 0) { - const u32 texcoord = uid_data->GetTevindirefCoord(i); + u32 texcoord = uid_data->GetTevindirefCoord(i); const u32 texmap = uid_data->GetTevindirefMap(i); - if (texcoord < uid_data->genMode_numtexgens) - { - out.SetConstantsUsed(C_INDTEXSCALE + i / 2, C_INDTEXSCALE + i / 2); - out.Write("\ttempcoord = fixpoint_uv{} >> " I_INDTEXSCALE "[{}].{};\n", texcoord, i / 2, - (i & 1) != 0 ? "zw" : "xy"); - } - else - { - out.Write("\ttempcoord = int2(0, 0);\n"); - } + // Quirk: when the tex coord is not less than the number of tex gens (i.e. the tex coord does + // not exist), then tex coord 0 is used (though sometimes glitchy effects happen on console). + // This affects the Mario portrait in Luigi's Mansion, where the developers forgot to set + // the number of tex gens to 2 (bug 11462). + if (texcoord >= uid_data->genMode_numtexgens) + texcoord = 0; + + out.SetConstantsUsed(C_INDTEXSCALE + i / 2, C_INDTEXSCALE + i / 2); + out.Write("\ttempcoord = fixpoint_uv{} >> " I_INDTEXSCALE "[{}].{};\n", texcoord, i / 2, + (i & 1) ? "zw" : "xy"); out.Write("\tint3 iindtex{} = ", i); SampleTexture(out, "float2(tempcoord)", "abg", texmap, stereo, api_type); } } + for (u32 i = uid_data->genMode_numindstages; i < 4; i++) + { + // Referencing a stage above the number of ind stages is undefined behavior, + // and on console produces a noise pattern (details unknown). + // TODO: This behavior is nowhere near that, but it ensures the shader still compiles. + if ((uid_data->nIndirectStagesUsed & (1U << i)) != 0) + { + out.Write("\tint3 iindtex{} = int3(0, 0, 0); // Undefined behavior on console\n", i); + } + } for (u32 i = 0; i < numStages; i++) { @@ -950,17 +956,15 @@ static void WriteStage(ShaderCode& out, const pixel_shader_uid_data* uid_data, i const auto& stage = uid_data->stagehash[n]; out.Write("\n\t// TEV stage {}\n", n); - // HACK to handle cases where the tex gen is not enabled + // Quirk: when the tex coord is not less than the number of tex gens (i.e. the tex coord does not + // exist), then tex coord 0 is used (though sometimes glitchy effects happen on console). u32 texcoord = stage.tevorders_texcoord; const bool has_tex_coord = texcoord < uid_data->genMode_numtexgens; if (!has_tex_coord) texcoord = 0; - if (stage.hasindstage) { - TevStageIndirect tevind; - tevind.hex = stage.tevind; - + const TevStageIndirect tevind{.hex = stage.tevind}; out.Write("\t// indirect op\n"); // Perform the indirect op on the incoming regular coordinates @@ -991,7 +995,7 @@ static void WriteStage(ShaderCode& out, const pixel_shader_uid_data* uid_data, i // TODO: Should we reset alphabump to 0 here? } - if (tevind.mid != 0) + if (tevind.matrix_index != IndMtxIndex::Off) { // format static constexpr std::array tev_ind_fmt_mask{ @@ -1038,11 +1042,14 @@ static void WriteStage(ShaderCode& out, const pixel_shader_uid_data* uid_data, i tev_ind_bias_add[u32(tevind.fmt.Value())]); } + // Multiplied by 2 because each matrix has two rows. + // Note also that the 4th column of the matrix contains the scale factor. + const u32 mtxidx = 2 * (static_cast(tevind.matrix_index.Value()) - 1); + // multiply by offset matrix and scale - calculations are likely to overflow badly, // yet it works out since we only care about the lower 23 bits (+1 sign bit) of the result - if (tevind.mid <= 3) + if (tevind.matrix_id == IndMtxId::Indirect) { - const u32 mtxidx = 2 * (tevind.mid - 1); out.SetConstantsUsed(C_INDTEXMTX + mtxidx, C_INDTEXMTX + mtxidx); out.Write("\tint2 indtevtrans{} = int2(idot(" I_INDTEXMTX @@ -1064,10 +1071,9 @@ static void WriteStage(ShaderCode& out, const pixel_shader_uid_data* uid_data, i out.Write("\telse indtevtrans{} <<= (-" I_INDTEXMTX "[{}].w);\n", n, mtxidx); } } - else if (tevind.mid <= 7 && has_tex_coord) - { // s matrix - ASSERT(tevind.mid >= 5); - const u32 mtxidx = 2 * (tevind.mid - 5); + else if (tevind.matrix_id == IndMtxId::S) + { + ASSERT(has_tex_coord); out.SetConstantsUsed(C_INDTEXMTX + mtxidx, C_INDTEXMTX + mtxidx); out.Write("\tint2 indtevtrans{} = int2(fixpoint_uv{} * iindtevcrd{}.xx) >> 8;\n", n, @@ -1086,10 +1092,9 @@ static void WriteStage(ShaderCode& out, const pixel_shader_uid_data* uid_data, i out.Write("\telse indtevtrans{} <<= (-" I_INDTEXMTX "[{}].w);\n", n, mtxidx); } } - else if (tevind.mid <= 11 && has_tex_coord) - { // t matrix - ASSERT(tevind.mid >= 9); - const u32 mtxidx = 2 * (tevind.mid - 9); + else if (tevind.matrix_id == IndMtxId::T) + { + ASSERT(has_tex_coord); out.SetConstantsUsed(C_INDTEXMTX + mtxidx, C_INDTEXMTX + mtxidx); out.Write("\tint2 indtevtrans{} = int2(fixpoint_uv{} * iindtevcrd{}.yy) >> 8;\n", n, @@ -1112,20 +1117,22 @@ static void WriteStage(ShaderCode& out, const pixel_shader_uid_data* uid_data, i else { out.Write("\tint2 indtevtrans{} = int2(0, 0);\n", n); + ASSERT(false); // Unknown value for matrix_id } } else { out.Write("\tint2 indtevtrans{} = int2(0, 0);\n", n); + // If matrix_index is Off (0), matrix_id should be Indirect (0) + ASSERT(tevind.matrix_id == IndMtxId::Indirect); } // --------- // Wrapping // --------- - // TODO: Should the last element be 1 or (1<<7)? - static constexpr std::array tev_ind_wrap_start{ - "0", "(256<<7)", "(128<<7)", "(64<<7)", "(32<<7)", "(16<<7)", "1", + static constexpr std::array tev_ind_wrap_start{ + "(256<<7)", "(128<<7)", "(64<<7)", "(32<<7)", "(16<<7)", }; // wrap S @@ -1133,14 +1140,14 @@ static void WriteStage(ShaderCode& out, const pixel_shader_uid_data* uid_data, i { out.Write("\twrappedcoord.x = fixpoint_uv{}.x;\n", texcoord); } - else if (tevind.sw == IndTexWrap::ITW_0) + else if (tevind.sw >= IndTexWrap::ITW_0) // 7 (Invalid) appears to behave the same as 6 (ITW_0) { out.Write("\twrappedcoord.x = 0;\n"); } else { out.Write("\twrappedcoord.x = fixpoint_uv{}.x & ({} - 1);\n", texcoord, - tev_ind_wrap_start[u32(tevind.sw.Value())]); + tev_ind_wrap_start[u32(tevind.sw.Value()) - u32(IndTexWrap::ITW_256)]); } // wrap T @@ -1148,14 +1155,14 @@ static void WriteStage(ShaderCode& out, const pixel_shader_uid_data* uid_data, i { out.Write("\twrappedcoord.y = fixpoint_uv{}.y;\n", texcoord); } - else if (tevind.tw == IndTexWrap::ITW_0) + else if (tevind.tw >= IndTexWrap::ITW_0) // 7 (Invalid) appears to behave the same as 6 (ITW_0) { out.Write("\twrappedcoord.y = 0;\n"); } else { out.Write("\twrappedcoord.y = fixpoint_uv{}.y & ({} - 1);\n", texcoord, - tev_ind_wrap_start[u32(tevind.tw.Value())]); + tev_ind_wrap_start[u32(tevind.tw.Value()) - u32(IndTexWrap::ITW_256)]); } if (tevind.fb_addprev) // add previous tevcoord @@ -1191,7 +1198,7 @@ static void WriteStage(ShaderCode& out, const pixel_shader_uid_data* uid_data, i out.Write("\trastemp = {}.{};\n", tev_ras_table[u32(stage.tevorders_colorchan)], rasswap); } - if (stage.tevorders_enable) + if (stage.tevorders_enable && uid_data->genMode_numtexgens > 0) { // Generate swizzle string to represent the texture color channel swapping const char texswap[5] = { @@ -1202,17 +1209,15 @@ static void WriteStage(ShaderCode& out, const pixel_shader_uid_data* uid_data, i '\0', }; - if (!stage.hasindstage) - { - // calc tevcord - if (has_tex_coord) - out.Write("\ttevcoord.xy = fixpoint_uv{};\n", texcoord); - else - out.Write("\ttevcoord.xy = int2(0, 0);\n"); - } out.Write("\ttextemp = "); SampleTexture(out, "float2(tevcoord.xy)", texswap, stage.tevorders_texmap, stereo, api_type); } + else if (uid_data->genMode_numtexgens == 0) + { + // It seems like the result is always black when no tex coords are enabled, but further testing + // is needed. + out.Write("\ttextemp = int4(0, 0, 0, 0);\n"); + } else { out.Write("\ttextemp = int4(255, 255, 255, 255);\n"); diff --git a/Source/Core/VideoCommon/PixelShaderGen.h b/Source/Core/VideoCommon/PixelShaderGen.h index a9ffa25498..d75fac8eb7 100644 --- a/Source/Core/VideoCommon/PixelShaderGen.h +++ b/Source/Core/VideoCommon/PixelShaderGen.h @@ -66,9 +66,9 @@ struct pixel_shader_uid_data u32 tevindref_bi1 : 3; u32 tevindref_bc1 : 3; u32 tevindref_bi2 : 3; + u32 tevindref_bc2 : 3; + u32 tevindref_bi3 : 3; u32 tevindref_bc3 : 3; - u32 tevindref_bi4 : 3; - u32 tevindref_bc4 : 3; void SetTevindrefValues(int index, u32 texcoord, u32 texmap) { @@ -84,55 +84,39 @@ struct pixel_shader_uid_data } else if (index == 2) { - tevindref_bc3 = texcoord; + tevindref_bc2 = texcoord; tevindref_bi2 = texmap; } else if (index == 3) { - tevindref_bc4 = texcoord; - tevindref_bi4 = texmap; + tevindref_bc3 = texcoord; + tevindref_bi3 = texmap; } } u32 GetTevindirefCoord(int index) const { if (index == 0) - { return tevindref_bc0; - } else if (index == 1) - { return tevindref_bc1; - } else if (index == 2) - { - return tevindref_bc3; - } + return tevindref_bc2; else if (index == 3) - { - return tevindref_bc4; - } + return tevindref_bc3; return 0; } u32 GetTevindirefMap(int index) const { if (index == 0) - { return tevindref_bi0; - } else if (index == 1) - { return tevindref_bi1; - } else if (index == 2) - { return tevindref_bi2; - } else if (index == 3) - { - return tevindref_bi4; - } + return tevindref_bi3; return 0; } @@ -149,6 +133,7 @@ struct pixel_shader_uid_data u32 pad1 : 6; // TODO: Clean up the swapXY mess + // TODO: remove hasindstage, as it no longer does anything useful u32 hasindstage : 1; u32 tevind : 21; u32 tevksel_swap1a : 2; @@ -174,7 +159,7 @@ using PixelShaderUid = ShaderUid; ShaderCode GeneratePixelShaderCode(APIType api_type, const ShaderHostConfig& host_config, const pixel_shader_uid_data* uid_data); -void WritePixelShaderCommonHeader(ShaderCode& out, APIType api_type, u32 num_texgens, +void WritePixelShaderCommonHeader(ShaderCode& out, APIType api_type, const ShaderHostConfig& host_config, bool bounding_box); void ClearUnusedPixelShaderUidBits(APIType api_type, const ShaderHostConfig& host_config, PixelShaderUid* uid); diff --git a/Source/Core/VideoCommon/PixelShaderManager.cpp b/Source/Core/VideoCommon/PixelShaderManager.cpp index bae48623c7..0ae1ced1e4 100644 --- a/Source/Core/VideoCommon/PixelShaderManager.cpp +++ b/Source/Core/VideoCommon/PixelShaderManager.cpp @@ -148,29 +148,18 @@ void PixelShaderManager::SetConstants() for (u32 i = 0; i < (bpmem.genMode.numtevstages + 1); ++i) { - u32 stage = bpmem.tevind[i].bt; - if (stage < bpmem.genMode.numindstages) - { - // We set some extra bits so the ubershader can quickly check if these - // features are in use. - if (bpmem.tevind[i].IsActive()) - constants.pack1[stage][3] = - bpmem.tevindref.getTexCoord(stage) | bpmem.tevindref.getTexMap(stage) << 8 | 1 << 16; - // Note: a tevind of zero just happens to be a passthrough, so no need - // to set an extra bit. - constants.pack1[i][2] = bpmem.tevind[i].hex; // TODO: This match shadergen, but videosw - // will always wrap. + // Note: a tevind of zero just happens to be a passthrough, so no need + // to set an extra bit. Furthermore, wrap and add to previous apply even if there is no + // indirect stage. + constants.pack1[i][2] = bpmem.tevind[i].hex; - // The ubershader uses tevind != 0 as a condition whether to calculate texcoords, - // even when texture is disabled, instead of the stage < bpmem.genMode.numindstages. - // We set an unused bit here to indicate that the stage is active, even if it - // is just a pass-through. - constants.pack1[i][2] |= 0x80000000; - } - else - { - constants.pack1[i][2] = 0; - } + u32 stage = bpmem.tevind[i].bt; + + // We use an extra bit (1 << 16) to provide a fast way of testing if this feature is in use. + // Note also that this is indexed by indirect stage, not by TEV stage. + if (bpmem.tevind[i].IsActive() && stage < bpmem.genMode.numindstages) + constants.pack1[stage][3] = + bpmem.tevindref.getTexCoord(stage) | bpmem.tevindref.getTexMap(stage) << 8 | 1 << 16; } dirty = true; @@ -336,9 +325,7 @@ void PixelShaderManager::SetIndTexScaleChanged(bool high) void PixelShaderManager::SetIndMatrixChanged(int matrixidx) { - int scale = ((u32)bpmem.indmtx[matrixidx].col0.s0 << 0) | - ((u32)bpmem.indmtx[matrixidx].col1.s1 << 2) | - ((u32)bpmem.indmtx[matrixidx].col2.s2 << 4); + const u8 scale = bpmem.indmtx[matrixidx].GetScale(); // xyz - static matrix // w - dynamic matrix scale / 128 diff --git a/Source/Core/VideoCommon/UberShaderPixel.cpp b/Source/Core/VideoCommon/UberShaderPixel.cpp index c35ce2821d..642896a3dc 100644 --- a/Source/Core/VideoCommon/UberShaderPixel.cpp +++ b/Source/Core/VideoCommon/UberShaderPixel.cpp @@ -64,7 +64,7 @@ ShaderCode GenPixelShader(APIType ApiType, const ShaderHostConfig& host_config, out.Write("// Pixel UberShader for {} texgens{}{}\n", numTexgen, early_depth ? ", early-depth" : "", per_pixel_depth ? ", per-pixel depth" : ""); - WritePixelShaderCommonHeader(out, ApiType, numTexgen, host_config, bounding_box); + WritePixelShaderCommonHeader(out, ApiType, host_config, bounding_box); WriteUberShaderCommonHeader(out, ApiType, host_config); if (per_pixel_lighting) WriteLightingFunction(out); @@ -148,19 +148,16 @@ ShaderCode GenPixelShader(APIType ApiType, const ShaderHostConfig& host_config, } // Uniform index -> texture coordinates + // Quirk: when the tex coord is not less than the number of tex gens (i.e. the tex coord does + // not exist), then tex coord 0 is used (though sometimes glitchy effects happen on console). + // This affects the Mario portrait in Luigi's Mansion, where the developers forgot to set + // the number of tex gens to 2 (bug 11462). if (numTexgen > 0) { - if (ApiType != APIType::D3D) - { - out.Write("float3 selectTexCoord(uint index) {{\n"); - } - else - { - out.Write("float3 selectTexCoord(uint index"); - for (u32 i = 0; i < numTexgen; i++) - out.Write(", float3 tex{}", i); - out.Write(") {{\n"); - } + out.Write("int2 selectTexCoord(uint index"); + for (u32 i = 0; i < numTexgen; i++) + out.Write(", int2 fixpoint_uv{}", i); + out.Write(") {{\n"); if (ApiType == APIType::D3D) { @@ -168,48 +165,51 @@ ShaderCode GenPixelShader(APIType ApiType, const ShaderHostConfig& host_config, for (u32 i = 0; i < numTexgen; i++) { out.Write(" case {}u:\n" - " return tex{};\n", + " return fixpoint_uv{};\n", i, i); } out.Write(" default:\n" - " return float3(0.0, 0.0, 0.0);\n" + " return fixpoint_uv0;\n" " }}\n"); } else { + out.Write(" if (index >= {}u) {{\n", numTexgen); + out.Write(" return fixpoint_uv0;\n" + " }}\n"); if (numTexgen > 4) out.Write(" if (index < 4u) {{\n"); if (numTexgen > 2) out.Write(" if (index < 2u) {{\n"); if (numTexgen > 1) - out.Write(" return (index == 0u) ? tex0 : tex1;\n"); + out.Write(" return (index == 0u) ? fixpoint_uv0 : fixpoint_uv1;\n"); else - out.Write(" return (index == 0u) ? tex0 : float3(0.0, 0.0, 0.0);\n"); + out.Write(" return fixpoint_uv0;\n"); if (numTexgen > 2) { - out.Write(" }} else {{\n"); // >= 2 + out.Write(" }} else {{\n"); // >= 2 < min(4, numTexgen) if (numTexgen > 3) - out.Write(" return (index == 2u) ? tex2 : tex3;\n"); + out.Write(" return (index == 2u) ? fixpoint_uv2 : fixpoint_uv3;\n"); else - out.Write(" return (index == 2u) ? tex2 : float3(0.0, 0.0, 0.0);\n"); + out.Write(" return fixpoint_uv2;\n"); out.Write(" }}\n"); } if (numTexgen > 4) { - out.Write(" }} else {{\n"); // >= 4 <= 8 + out.Write(" }} else {{\n"); // >= 4 < min(8, numTexgen) if (numTexgen > 6) out.Write(" if (index < 6u) {{\n"); if (numTexgen > 5) - out.Write(" return (index == 4u) ? tex4 : tex5;\n"); + out.Write(" return (index == 4u) ? fixpoint_uv4 : fixpoint_uv5;\n"); else - out.Write(" return (index == 4u) ? tex4 : float3(0.0, 0.0, 0.0);\n"); + out.Write(" return fixpoint_uv4;\n"); if (numTexgen > 6) { - out.Write(" }} else {{\n"); // >= 6 <= 8 + out.Write(" }} else {{\n"); // >= 6 < min(8, numTexgen) if (numTexgen > 7) - out.Write(" return (index == 6u) ? tex6 : tex7;\n"); + out.Write(" return (index == 6u) ? fixpoint_uv6 : fixpoint_uv7;\n"); else - out.Write(" return (index == 6u) ? tex6 : float3(0.0, 0.0, 0.0);\n"); + out.Write(" return fixpoint_uv6;\n"); out.Write(" }}\n"); } out.Write(" }}\n"); @@ -287,15 +287,15 @@ ShaderCode GenPixelShader(APIType ApiType, const ShaderHostConfig& host_config, // ====================== const auto LookupIndirectTexture = [&out, stereo](std::string_view out_var_name, std::string_view in_index_name) { + // in_index_name is the indirect stage, not the tev stage + // bpmem_iref is packed differently from RAS1_IREF out.Write("{{\n" " uint iref = bpmem_iref({});\n" " if ( iref != 0u)\n" " {{\n" " uint texcoord = bitfieldExtract(iref, 0, 3);\n" " uint texmap = bitfieldExtract(iref, 8, 3);\n" - " float3 uv = getTexCoord(texcoord);\n" - " int2 fixedPoint_uv = int2((uv.z == 0.0 ? uv.xy : (uv.xy / uv.z)) * " I_TEXDIMS - "[texcoord].zw);\n" + " int2 fixedPoint_uv = getTexCoord(texcoord);\n" "\n" " if (({} & 1u) == 0u)\n" " fixedPoint_uv = fixedPoint_uv >> " I_INDTEXSCALE "[{} >> 1].xy;\n" @@ -306,6 +306,10 @@ ShaderCode GenPixelShader(APIType ApiType, const ShaderHostConfig& host_config, "[texmap].xy, {})).abg;\n", in_index_name, in_index_name, in_index_name, in_index_name, out_var_name, stereo ? "float(layer)" : "0.0"); + // There is always a bit set in bpmem_iref if the data is valid (matrix is not off, and the + // indirect texture stage is enabled). If the matrix is off, the result doesn't matter; if the + // indirect texture stage is disabled, the result is undefined (and produces a glitchy pattern + // on hardware, different from this). out.Write(" }}\n" " else\n" " {{\n" @@ -666,21 +670,14 @@ ShaderCode GenPixelShader(APIType ApiType, const ShaderHostConfig& host_config, "\n"); } - // Since the texture coodinate variables aren't global, we need to pass - // them to the select function in D3D. + // Since the fixed-point texture coodinate variables aren't global, we need to pass + // them to the select function. This applies to all backends. if (numTexgen > 0) { - if (ApiType != APIType::D3D) - { - out.Write("#define getTexCoord(index) selectTexCoord((index))\n\n"); - } - else - { - out.Write("#define getTexCoord(index) selectTexCoord((index)"); - for (u32 i = 0; i < numTexgen; i++) - out.Write(", tex{}", i); - out.Write(")\n\n"); - } + out.Write("#define getTexCoord(index) selectTexCoord((index)"); + for (u32 i = 0; i < numTexgen; i++) + out.Write(", fixpoint_uv{}", i); + out.Write(")\n\n"); } if (ApiType == APIType::OpenGL || ApiType == APIType::Vulkan) @@ -788,11 +785,18 @@ ShaderCode GenPixelShader(APIType ApiType, const ShaderHostConfig& host_config, // Disable texturing when there are no texgens (for now) if (numTexgen != 0) { - out.Write(" uint tex_coord = {};\n", + for (u32 i = 0; i < numTexgen; i++) + { + out.Write(" int2 fixpoint_uv{} = int2(", i); + out.Write("(tex{}.z == 0.0 ? tex{}.xy : tex{}.xy / tex{}.z)", i, i, i, i); + out.Write(" * " I_TEXDIMS "[{}].zw);\n", i); + // TODO: S24 overflows here? + } + + out.Write("\n" + " uint tex_coord = {};\n", BitfieldExtract<&TwoTevStageOrders::texcoord0>("ss.order")); - out.Write(" float3 uv = getTexCoord(tex_coord);\n" - " int2 fixedPoint_uv = int2((uv.z == 0.0 ? uv.xy : (uv.xy / uv.z)) * " I_TEXDIMS - "[tex_coord].zw);\n" + out.Write(" int2 fixedPoint_uv = getTexCoord(tex_coord);\n" "\n" " bool texture_enabled = (ss.order & {}u) != 0u;\n", 1 << TwoTevStageOrders().enable0.StartBit()); @@ -806,7 +810,10 @@ ShaderCode GenPixelShader(APIType ApiType, const ShaderHostConfig& host_config, out.Write(" uint fmt = {};\n", BitfieldExtract<&TevStageIndirect::fmt>("tevind")); out.Write(" uint bias = {};\n", BitfieldExtract<&TevStageIndirect::bias>("tevind")); out.Write(" uint bt = {};\n", BitfieldExtract<&TevStageIndirect::bt>("tevind")); - out.Write(" uint mid = {};\n", BitfieldExtract<&TevStageIndirect::mid>("tevind")); + out.Write(" uint matrix_index = {};\n", + BitfieldExtract<&TevStageIndirect::matrix_index>("tevind")); + out.Write(" uint matrix_id = {};\n", + BitfieldExtract<&TevStageIndirect::matrix_id>("tevind")); out.Write("\n"); out.Write(" int3 indcoord;\n"); LookupIndirectTexture("indcoord", "bt"); @@ -846,12 +853,12 @@ ShaderCode GenPixelShader(APIType ApiType, const ShaderHostConfig& host_config, "\n" " // Matrix multiply\n" " int2 indtevtrans = int2(0, 0);\n" - " if ((mid & 3u) != 0u)\n" + " if (matrix_index != 0u)\n" " {{\n" - " uint mtxidx = 2u * ((mid & 3u) - 1u);\n" + " uint mtxidx = 2u * (matrix_index - 1u);\n" " int shift = " I_INDTEXMTX "[mtxidx].w;\n" "\n" - " switch (mid >> 2)\n" + " switch (matrix_id)\n" " {{\n" " case 0u: // 3x2 S0.10 matrix\n" " indtevtrans = int2(idot(" I_INDTEXMTX