Merge pull request #11523 from degasus/OGL_KHR_subgroup

VideoBackend/OGL: Prefer KHR_shader_subgroup over NV_shader_thread.
This commit is contained in:
JMC47
2023-02-10 04:47:20 -05:00
committed by GitHub
8 changed files with 66 additions and 33 deletions

View File

@ -489,7 +489,14 @@ bool PopulateConfig(GLContext* m_main_gl_context)
else if (GLExtensions::Version() >= 430)
{
// TODO: We should really parse the GL_SHADING_LANGUAGE_VERSION token.
g_ogl_config.eSupportedGLSLVersion = Glsl430;
if (GLExtensions::Version() >= 450)
{
g_ogl_config.eSupportedGLSLVersion = Glsl450;
}
else
{
g_ogl_config.eSupportedGLSLVersion = Glsl430;
}
g_ogl_config.bSupportsTextureStorage = true;
g_ogl_config.bSupportsImageLoadStore = true;
g_Config.backend_info.bSupportsSSAA = true;
@ -531,8 +538,23 @@ bool PopulateConfig(GLContext* m_main_gl_context)
if (g_ogl_config.max_samples < 1 || !g_ogl_config.bSupportsMSAA)
g_ogl_config.max_samples = 1;
g_ogl_config.bSupportsShaderThreadShuffleNV =
GLExtensions::Supports("GL_NV_shader_thread_shuffle");
const bool bSupportsIsHelperInvocation = g_ogl_config.bIsES ?
g_ogl_config.eSupportedGLSLVersion >= GlslEs320 :
g_ogl_config.eSupportedGLSLVersion >= Glsl450;
g_ogl_config.bSupportsKHRShaderSubgroup =
GLExtensions::Supports("GL_KHR_shader_subgroup") && bSupportsIsHelperInvocation;
if (g_ogl_config.bSupportsKHRShaderSubgroup)
{
// Check for the features: basic + arithmetic + ballot
GLint supported_features = 0;
glGetIntegerv(GL_SUBGROUP_SUPPORTED_FEATURES_KHR, &supported_features);
if (~supported_features &
(GL_SUBGROUP_FEATURE_BASIC_BIT_KHR | GL_SUBGROUP_FEATURE_ARITHMETIC_BIT_KHR |
GL_SUBGROUP_FEATURE_BALLOT_BIT_KHR))
{
g_ogl_config.bSupportsKHRShaderSubgroup = false;
}
}
// We require texel buffers, image load store, and compute shaders to enable GPU texture decoding.
// If the driver doesn't expose the extensions, but supports GL4.3/GLES3.1, it will still be

View File

@ -15,8 +15,9 @@ enum GlslVersion
Glsl140,
Glsl150,
Glsl330,
Glsl400, // and above
Glsl430,
Glsl400, // and above
Glsl430, // 430 - 440
Glsl450, // 450 - xxx
GlslEs300, // GLES 3.0
GlslEs310, // GLES 3.1
GlslEs320, // GLES 3.2
@ -61,7 +62,7 @@ struct VideoConfig
bool bSupportsBitfield;
bool bSupportsTextureSubImage;
EsFbFetchType SupportedFramebufferFetch;
bool bSupportsShaderThreadShuffleNV;
bool bSupportsKHRShaderSubgroup; // basic + arithmetic + ballot
const char* gl_vendor;
const char* gl_renderer;

View File

@ -78,6 +78,8 @@ static std::string GetGLSLVersionString()
return "#version 400";
case Glsl430:
return "#version 430";
case Glsl450:
return "#version 450";
default:
// Shouldn't ever hit this
return "#version ERROR";
@ -720,25 +722,18 @@ void ProgramShaderCache::CreateHeader()
}
std::string shader_shuffle_string;
if (g_ogl_config.bSupportsShaderThreadShuffleNV)
if (g_ogl_config.bSupportsKHRShaderSubgroup)
{
shader_shuffle_string = R"(
#extension GL_NV_shader_thread_group : enable
#extension GL_NV_shader_thread_shuffle : enable
#extension GL_KHR_shader_subgroup_basic : enable
#extension GL_KHR_shader_subgroup_arithmetic : enable
#extension GL_KHR_shader_subgroup_ballot : enable
#define SUPPORTS_SUBGROUP_REDUCTION 1
// The xor shuffle below produces incorrect results if all threads in a warp are not active.
#define CAN_USE_SUBGROUP_REDUCTION (ballotThreadNV(true) == 0xFFFFFFFFu)
#define IS_HELPER_INVOCATION gl_HelperThreadNV
#define IS_FIRST_ACTIVE_INVOCATION (gl_ThreadInWarpNV == findLSB(ballotThreadNV(!gl_HelperThreadNV)))
#define SUBGROUP_REDUCTION(func, value) value = func(value, shuffleXorNV(value, 16, 32)); \
value = func(value, shuffleXorNV(value, 8, 32)); \
value = func(value, shuffleXorNV(value, 4, 32)); \
value = func(value, shuffleXorNV(value, 2, 32)); \
value = func(value, shuffleXorNV(value, 1, 32));
#define SUBGROUP_MIN(value) SUBGROUP_REDUCTION(min, value)
#define SUBGROUP_MAX(value) SUBGROUP_REDUCTION(max, value)
#define IS_HELPER_INVOCATION gl_HelperInvocation
#define IS_FIRST_ACTIVE_INVOCATION (subgroupElect())
#define SUBGROUP_MIN(value) value = subgroupMin(value)
#define SUBGROUP_MAX(value) value = subgroupMax(value)
)";
}