Fix shader 8-bit and 16-bit STS/STG (#2741)

* Fix 8 and 16-bit STG

* Fix 8 and 16-bit STS

* Shader cache version bump
This commit is contained in:
gdkchan 2021-10-18 20:24:15 -03:00 committed by GitHub
parent 052deebf26
commit 63f1663fa9
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
20 changed files with 266 additions and 38 deletions

View File

@ -40,7 +40,7 @@ namespace Ryujinx.Graphics.Gpu.Shader
/// <summary>
/// Version of the codegen (to be changed when codegen or guest format change).
/// </summary>
private const ulong ShaderCodeGenVersion = 2750;
private const ulong ShaderCodeGenVersion = 2741;
// Progress reporting helpers
private volatile int _shaderCount;

View File

@ -270,6 +270,16 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl
AppendHelperFunction(context, "Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/ShuffleXor.glsl");
}
if ((info.HelperFunctionsMask & HelperFunctionsMask.StoreSharedSmallInt) != 0)
{
AppendHelperFunction(context, "Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/StoreSharedSmallInt.glsl");
}
if ((info.HelperFunctionsMask & HelperFunctionsMask.StoreStorageSmallInt) != 0)
{
AppendHelperFunction(context, "Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/StoreStorageSmallInt.glsl");
}
if ((info.HelperFunctionsMask & HelperFunctionsMask.SwizzleAdd) != 0)
{
AppendHelperFunction(context, "Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/SwizzleAdd.glsl");

View File

@ -5,7 +5,7 @@ int Helper_AtomicMaxS32(int offset, int value)
{
oldValue = $SHARED_MEM$[offset];
newValue = uint(max(int(oldValue), value));
} while (atomicCompSwap($SHARED_MEM$[offset], newValue, oldValue) != oldValue);
} while (atomicCompSwap($SHARED_MEM$[offset], oldValue, newValue) != oldValue);
return int(oldValue);
}
@ -16,6 +16,6 @@ int Helper_AtomicMinS32(int offset, int value)
{
oldValue = $SHARED_MEM$[offset];
newValue = uint(min(int(oldValue), value));
} while (atomicCompSwap($SHARED_MEM$[offset], newValue, oldValue) != oldValue);
} while (atomicCompSwap($SHARED_MEM$[offset], oldValue, newValue) != oldValue);
return int(oldValue);
}

View File

@ -5,7 +5,7 @@ int Helper_AtomicMaxS32(int index, int offset, int value)
{
oldValue = $STORAGE_MEM$[index].data[offset];
newValue = uint(max(int(oldValue), value));
} while (atomicCompSwap($STORAGE_MEM$[index].data[offset], newValue, oldValue) != oldValue);
} while (atomicCompSwap($STORAGE_MEM$[index].data[offset], oldValue, newValue) != oldValue);
return int(oldValue);
}
@ -16,6 +16,6 @@ int Helper_AtomicMinS32(int index, int offset, int value)
{
oldValue = $STORAGE_MEM$[index].data[offset];
newValue = uint(min(int(oldValue), value));
} while (atomicCompSwap($STORAGE_MEM$[index].data[offset], newValue, oldValue) != oldValue);
} while (atomicCompSwap($STORAGE_MEM$[index].data[offset], oldValue, newValue) != oldValue);
return int(oldValue);
}

View File

@ -13,5 +13,10 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl
public static string ShuffleUp = "Helper_ShuffleUp";
public static string ShuffleXor = "Helper_ShuffleXor";
public static string SwizzleAdd = "Helper_SwizzleAdd";
public static string StoreShared16 = "Helper_StoreShared16";
public static string StoreShared8 = "Helper_StoreShared8";
public static string StoreStorage16 = "Helper_StoreStorage16";
public static string StoreStorage8 = "Helper_StoreStorage8";
}
}

View File

@ -0,0 +1,23 @@
void Helper_StoreShared16(int offset, uint value)
{
int wordOffset = offset >> 2;
int bitOffset = (offset & 3) * 8;
uint oldValue, newValue;
do
{
oldValue = $SHARED_MEM$[wordOffset];
newValue = bitfieldInsert(oldValue, value, bitOffset, 16);
} while (atomicCompSwap($SHARED_MEM$[wordOffset], oldValue, newValue) != oldValue);
}
void Helper_StoreShared8(int offset, uint value)
{
int wordOffset = offset >> 2;
int bitOffset = (offset & 3) * 8;
uint oldValue, newValue;
do
{
oldValue = $SHARED_MEM$[wordOffset];
newValue = bitfieldInsert(oldValue, value, bitOffset, 8);
} while (atomicCompSwap($SHARED_MEM$[wordOffset], oldValue, newValue) != oldValue);
}

View File

@ -0,0 +1,23 @@
void Helper_StoreStorage16(int index, int offset, uint value)
{
int wordOffset = offset >> 2;
int bitOffset = (offset & 3) * 8;
uint oldValue, newValue;
do
{
oldValue = $STORAGE_MEM$[index].data[wordOffset];
newValue = bitfieldInsert(oldValue, value, bitOffset, 16);
} while (atomicCompSwap($STORAGE_MEM$[index].data[wordOffset], oldValue, newValue) != oldValue);
}
void Helper_StoreStorage8(int index, int offset, uint value)
{
int wordOffset = offset >> 2;
int bitOffset = (offset & 3) * 8;
uint oldValue, newValue;
do
{
oldValue = $STORAGE_MEM$[index].data[wordOffset];
newValue = bitfieldInsert(oldValue, value, bitOffset, 8);
} while (atomicCompSwap($STORAGE_MEM$[index].data[wordOffset], oldValue, newValue) != oldValue);
}

View File

@ -185,9 +185,21 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl.Instructions
case Instruction.StoreShared:
return StoreShared(context, operation);
case Instruction.StoreShared16:
return StoreShared16(context, operation);
case Instruction.StoreShared8:
return StoreShared8(context, operation);
case Instruction.StoreStorage:
return StoreStorage(context, operation);
case Instruction.StoreStorage16:
return StoreStorage16(context, operation);
case Instruction.StoreStorage8:
return StoreStorage8(context, operation);
case Instruction.TextureSample:
return TextureSample(context, operation);

View File

@ -113,7 +113,11 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl.Instructions
Add(Instruction.StoreAttribute, InstType.Special);
Add(Instruction.StoreLocal, InstType.Special);
Add(Instruction.StoreShared, InstType.Special);
Add(Instruction.StoreShared16, InstType.Special);
Add(Instruction.StoreShared8, InstType.Special);
Add(Instruction.StoreStorage, InstType.Special);
Add(Instruction.StoreStorage16, InstType.Special);
Add(Instruction.StoreStorage8, InstType.Special);
Add(Instruction.Subtract, InstType.OpBinary, "-", 2);
Add(Instruction.SwizzleAdd, InstType.CallTernary, HelperFunctionNames.SwizzleAdd);
Add(Instruction.TextureSample, InstType.Special);

View File

@ -365,6 +365,34 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl.Instructions
return $"{arrayName}[{offsetExpr}] = {src}";
}
public static string StoreShared16(CodeGenContext context, AstOperation operation)
{
IAstNode src1 = operation.GetSource(0);
IAstNode src2 = operation.GetSource(1);
string offsetExpr = GetSoureExpr(context, src1, GetSrcVarType(operation.Inst, 0));
VariableType srcType = OperandManager.GetNodeDestType(context, src2);
string src = TypeConversion.ReinterpretCast(context, src2, srcType, VariableType.U32);
return $"{HelperFunctionNames.StoreShared16}({offsetExpr}, {src})";
}
public static string StoreShared8(CodeGenContext context, AstOperation operation)
{
IAstNode src1 = operation.GetSource(0);
IAstNode src2 = operation.GetSource(1);
string offsetExpr = GetSoureExpr(context, src1, GetSrcVarType(operation.Inst, 0));
VariableType srcType = OperandManager.GetNodeDestType(context, src2);
string src = TypeConversion.ReinterpretCast(context, src2, srcType, VariableType.U32);
return $"{HelperFunctionNames.StoreShared8}({offsetExpr}, {src})";
}
public static string StoreStorage(CodeGenContext context, AstOperation operation)
{
IAstNode src1 = operation.GetSource(0);
@ -383,6 +411,42 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl.Instructions
return $"{sb} = {src}";
}
public static string StoreStorage16(CodeGenContext context, AstOperation operation)
{
IAstNode src1 = operation.GetSource(0);
IAstNode src2 = operation.GetSource(1);
IAstNode src3 = operation.GetSource(2);
string indexExpr = GetSoureExpr(context, src1, GetSrcVarType(operation.Inst, 0));
string offsetExpr = GetSoureExpr(context, src2, GetSrcVarType(operation.Inst, 1));
VariableType srcType = OperandManager.GetNodeDestType(context, src3);
string src = TypeConversion.ReinterpretCast(context, src3, srcType, VariableType.U32);
string sb = GetStorageBufferAccessor(indexExpr, offsetExpr, context.Config.Stage);
return $"{HelperFunctionNames.StoreStorage16}({indexExpr}, {offsetExpr}, {src})";
}
public static string StoreStorage8(CodeGenContext context, AstOperation operation)
{
IAstNode src1 = operation.GetSource(0);
IAstNode src2 = operation.GetSource(1);
IAstNode src3 = operation.GetSource(2);
string indexExpr = GetSoureExpr(context, src1, GetSrcVarType(operation.Inst, 0));
string offsetExpr = GetSoureExpr(context, src2, GetSrcVarType(operation.Inst, 1));
VariableType srcType = OperandManager.GetNodeDestType(context, src3);
string src = TypeConversion.ReinterpretCast(context, src3, srcType, VariableType.U32);
string sb = GetStorageBufferAccessor(indexExpr, offsetExpr, context.Config.Stage);
return $"{HelperFunctionNames.StoreStorage8}({indexExpr}, {offsetExpr}, {src})";
}
public static string TextureSample(CodeGenContext context, AstOperation operation)
{
AstTextureOperation texOp = (AstTextureOperation)operation;

View File

@ -366,23 +366,33 @@ namespace Ryujinx.Graphics.Shader.Instructions
Operand value = Register(isRz ? rd : rd + index, RegisterType.Gpr);
Operand elemOffset = context.IAdd(wordOffset, Const(index));
if (isSmallInt)
if (isSmallInt && region == MemoryRegion.Local)
{
Operand word = null;
switch (region)
{
case MemoryRegion.Local: word = context.LoadLocal(elemOffset); break;
case MemoryRegion.Shared: word = context.LoadShared(elemOffset); break;
}
Operand word = context.LoadLocal(elemOffset);
value = InsertSmallInt(context, (LsSize)size, bitOffset, word, value);
}
switch (region)
if (region == MemoryRegion.Local)
{
case MemoryRegion.Local: context.StoreLocal(elemOffset, value); break;
case MemoryRegion.Shared: context.StoreShared(elemOffset, value); break;
context.StoreLocal(elemOffset, value);
}
else if (region == MemoryRegion.Shared)
{
switch (size)
{
case LsSize2.U8:
case LsSize2.S8:
context.StoreShared8(baseOffset, value);
break;
case LsSize2.U16:
case LsSize2.S16:
context.StoreShared16(baseOffset, value);
break;
default:
context.StoreShared(elemOffset, value);
break;
}
}
}
}
@ -401,8 +411,6 @@ namespace Ryujinx.Graphics.Shader.Instructions
return;
}
bool isSmallInt = size < LsSize2.B32;
int count = GetVectorCount((LsSize)size);
(Operand addrLow, Operand addrHigh) = Get40BitsAddress(context, new Register(ra, RegisterType.Gpr), extended, offset);
@ -415,14 +423,20 @@ namespace Ryujinx.Graphics.Shader.Instructions
Operand value = Register(isRz ? rd : rd + index, RegisterType.Gpr);
if (isSmallInt)
Operand addrLowOffset = context.IAdd(addrLow, Const(index * 4));
if (size == LsSize2.U8 || size == LsSize2.S8)
{
Operand word = context.LoadGlobal(addrLow, addrHigh);
value = InsertSmallInt(context, (LsSize)size, bitOffset, word, value);
context.StoreGlobal8(addrLowOffset, addrHigh, value);
}
else if (size == LsSize2.U16 || size == LsSize2.S16)
{
context.StoreGlobal16(addrLowOffset, addrHigh, value);
}
else
{
context.StoreGlobal(addrLowOffset, addrHigh, value);
}
context.StoreGlobal(context.IAdd(addrLow, Const(index * 4)), addrHigh, value);
}
}

View File

@ -111,9 +111,15 @@ namespace Ryujinx.Graphics.Shader.IntermediateRepresentation
SquareRoot,
StoreAttribute,
StoreGlobal,
StoreGlobal16,
StoreGlobal8,
StoreLocal,
StoreShared,
StoreShared16,
StoreShared8,
StoreStorage,
StoreStorage16,
StoreStorage8,
Subtract,
SwizzleAdd,
TextureSample,

View File

@ -17,6 +17,8 @@
<EmbeddedResource Include="CodeGen\Glsl\HelperFunctions\ShuffleDown.glsl" />
<EmbeddedResource Include="CodeGen\Glsl\HelperFunctions\ShuffleUp.glsl" />
<EmbeddedResource Include="CodeGen\Glsl\HelperFunctions\ShuffleXor.glsl" />
<EmbeddedResource Include="CodeGen\Glsl\HelperFunctions\StoreSharedSmallInt.glsl" />
<EmbeddedResource Include="CodeGen\Glsl\HelperFunctions\StoreStorageSmallInt.glsl" />
<EmbeddedResource Include="CodeGen\Glsl\HelperFunctions\SwizzleAdd.glsl" />
<EmbeddedResource Include="CodeGen\Glsl\HelperFunctions\TexelFetchScale_fp.glsl" />
<EmbeddedResource Include="CodeGen\Glsl\HelperFunctions\TexelFetchScale_cp.glsl" />

View File

@ -13,6 +13,8 @@ namespace Ryujinx.Graphics.Shader.StructuredIr
ShuffleDown = 1 << 5,
ShuffleUp = 1 << 6,
ShuffleXor = 1 << 7,
SwizzleAdd = 1 << 8
StoreSharedSmallInt = 1 << 8,
StoreStorageSmallInt = 1 << 9,
SwizzleAdd = 1 << 10
}
}

View File

@ -120,7 +120,11 @@ namespace Ryujinx.Graphics.Shader.StructuredIr
Add(Instruction.StoreGlobal, VariableType.None, VariableType.S32, VariableType.S32, VariableType.U32);
Add(Instruction.StoreLocal, VariableType.None, VariableType.S32, VariableType.U32);
Add(Instruction.StoreShared, VariableType.None, VariableType.S32, VariableType.U32);
Add(Instruction.StoreShared16, VariableType.None, VariableType.S32, VariableType.U32);
Add(Instruction.StoreShared8, VariableType.None, VariableType.S32, VariableType.U32);
Add(Instruction.StoreStorage, VariableType.None, VariableType.S32, VariableType.S32, VariableType.U32);
Add(Instruction.StoreStorage16, VariableType.None, VariableType.S32, VariableType.S32, VariableType.U32);
Add(Instruction.StoreStorage8, VariableType.None, VariableType.S32, VariableType.S32, VariableType.U32);
Add(Instruction.Subtract, VariableType.Scalar, VariableType.Scalar, VariableType.Scalar);
Add(Instruction.SwizzleAdd, VariableType.F32, VariableType.F32, VariableType.F32, VariableType.S32);
Add(Instruction.TextureSample, VariableType.F32);

View File

@ -203,6 +203,14 @@ namespace Ryujinx.Graphics.Shader.StructuredIr
case Instruction.ShuffleXor:
context.Info.HelperFunctionsMask |= HelperFunctionsMask.ShuffleXor;
break;
case Instruction.StoreShared16:
case Instruction.StoreShared8:
context.Info.HelperFunctionsMask |= HelperFunctionsMask.StoreSharedSmallInt;
break;
case Instruction.StoreStorage16:
case Instruction.StoreStorage8:
context.Info.HelperFunctionsMask |= HelperFunctionsMask.StoreStorageSmallInt;
break;
case Instruction.SwizzleAdd:
context.Info.HelperFunctionsMask |= HelperFunctionsMask.SwizzleAdd;
break;

View File

@ -627,6 +627,16 @@ namespace Ryujinx.Graphics.Shader.Translation
return context.Add(Instruction.StoreGlobal, null, a, b, c);
}
public static Operand StoreGlobal16(this EmitterContext context, Operand a, Operand b, Operand c)
{
return context.Add(Instruction.StoreGlobal16, null, a, b, c);
}
public static Operand StoreGlobal8(this EmitterContext context, Operand a, Operand b, Operand c)
{
return context.Add(Instruction.StoreGlobal8, null, a, b, c);
}
public static Operand StoreLocal(this EmitterContext context, Operand a, Operand b)
{
return context.Add(Instruction.StoreLocal, null, a, b);
@ -637,6 +647,16 @@ namespace Ryujinx.Graphics.Shader.Translation
return context.Add(Instruction.StoreShared, null, a, b);
}
public static Operand StoreShared16(this EmitterContext context, Operand a, Operand b)
{
return context.Add(Instruction.StoreShared16, null, a, b);
}
public static Operand StoreShared8(this EmitterContext context, Operand a, Operand b)
{
return context.Add(Instruction.StoreShared8, null, a, b);
}
public static Operand UnpackDouble2x32High(this EmitterContext context, Operand a)
{
return UnpackDouble2x32(context, a, 1);

View File

@ -20,7 +20,9 @@ namespace Ryujinx.Graphics.Shader.Translation
{
return (inst.IsAtomic() && IsGlobalMr(inst)) ||
inst == Instruction.LoadGlobal ||
inst == Instruction.StoreGlobal;
inst == Instruction.StoreGlobal ||
inst == Instruction.StoreGlobal16 ||
inst == Instruction.StoreGlobal8;
}
private static bool IsGlobalMr(Instruction inst)

View File

@ -59,7 +59,8 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations
Operation operation = (Operation)node.Value;
bool isAtomic = operation.Inst.IsAtomic();
bool isWrite = isAtomic || operation.Inst == Instruction.StoreGlobal;
bool isStg16Or8 = operation.Inst == Instruction.StoreGlobal16 || operation.Inst == Instruction.StoreGlobal8;
bool isWrite = isAtomic || operation.Inst == Instruction.StoreGlobal || isStg16Or8;
config.SetUsedStorageBuffer(storageIndex, isWrite);
@ -78,12 +79,18 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations
node.List.AddBefore(node, andOp);
Operand byteOffset = Local();
Operand wordOffset = Local();
Operation subOp = new Operation(Instruction.Subtract, byteOffset, addrLow, baseAddrTrunc);
Operation shrOp = new Operation(Instruction.ShiftRightU32, wordOffset, byteOffset, Const(2));
node.List.AddBefore(node, subOp);
if (isStg16Or8)
{
return byteOffset;
}
Operand wordOffset = Local();
Operation shrOp = new Operation(Instruction.ShiftRightU32, wordOffset, byteOffset, Const(2));
node.List.AddBefore(node, shrOp);
return wordOffset;
@ -113,7 +120,14 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations
}
else
{
storageOp = new Operation(Instruction.StoreStorage, null, sources);
Instruction storeInst = operation.Inst switch
{
Instruction.StoreGlobal16 => Instruction.StoreStorage16,
Instruction.StoreGlobal8 => Instruction.StoreStorage8,
_ => Instruction.StoreStorage
};
storageOp = new Operation(storeInst, null, sources);
}
for (int index = 0; index < operation.SourcesCount; index++)

View File

@ -49,7 +49,8 @@ namespace Ryujinx.Graphics.Shader.Translation
Operation operation = (Operation)node.Value;
bool isAtomic = operation.Inst.IsAtomic();
bool isWrite = isAtomic || operation.Inst == Instruction.StoreGlobal;
bool isStg16Or8 = operation.Inst == Instruction.StoreGlobal16 || operation.Inst == Instruction.StoreGlobal8;
bool isWrite = isAtomic || operation.Inst == Instruction.StoreGlobal || isStg16Or8;
Operation storageOp;
@ -97,12 +98,19 @@ namespace Ryujinx.Graphics.Shader.Translation
Operand baseAddrTrunc = PrependOperation(Instruction.BitwiseAnd, sbBaseAddrLow, alignMask);
Operand byteOffset = PrependOperation(Instruction.Subtract, addrLow, baseAddrTrunc);
Operand wordOffset = PrependOperation(Instruction.ShiftRightU32, byteOffset, Const(2));
Operand[] sources = new Operand[operation.SourcesCount];
sources[0] = sbSlot;
sources[1] = wordOffset;
if (isStg16Or8)
{
sources[1] = byteOffset;
}
else
{
sources[1] = PrependOperation(Instruction.ShiftRightU32, byteOffset, Const(2));
}
for (int index = 2; index < operation.SourcesCount; index++)
{
@ -121,7 +129,14 @@ namespace Ryujinx.Graphics.Shader.Translation
}
else
{
storageOp = new Operation(Instruction.StoreStorage, null, sources);
Instruction storeInst = operation.Inst switch
{
Instruction.StoreGlobal16 => Instruction.StoreStorage16,
Instruction.StoreGlobal8 => Instruction.StoreStorage8,
_ => Instruction.StoreStorage
};
storageOp = new Operation(storeInst, null, sources);
}
for (int index = 0; index < operation.SourcesCount; index++)