Merge pull request #11523 from degasus/OGL_KHR_subgroup

VideoBackend/OGL: Prefer KHR_shader_subgroup over NV_shader_thread.
This commit is contained in:
JMC47 2023-02-10 04:47:20 -05:00 committed by GitHub
commit 258151fe5a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 66 additions and 33 deletions

View File

@ -37,6 +37,7 @@
#include "Common/GL/GLExtensions/EXT_texture_filter_anisotropic.h"
#include "Common/GL/GLExtensions/HP_occlusion_test.h"
#include "Common/GL/GLExtensions/KHR_debug.h"
#include "Common/GL/GLExtensions/KHR_shader_subgroup.h"
#include "Common/GL/GLExtensions/NV_depth_buffer_float.h"
#include "Common/GL/GLExtensions/NV_occlusion_query_samples.h"
#include "Common/GL/GLExtensions/NV_primitive_restart.h"

View File

@ -0,0 +1,19 @@
/*
** Copyright (c) 2013-2015 The Khronos Group Inc.
** SPDX-License-Identifier: MIT
*/
#include "Common/GL/GLExtensions/gl_common.h"
#define GL_SUBGROUP_SIZE_KHR 0x9532
#define GL_SUBGROUP_SUPPORTED_STAGES_KHR 0x9533
#define GL_SUBGROUP_SUPPORTED_FEATURES_KHR 0x9534
#define GL_SUBGROUP_QUAD_ALL_STAGES_KHR 0x9535
#define GL_SUBGROUP_FEATURE_BASIC_BIT_KHR 0x00000001
#define GL_SUBGROUP_FEATURE_VOTE_BIT_KHR 0x00000002
#define GL_SUBGROUP_FEATURE_ARITHMETIC_BIT_KHR 0x00000004
#define GL_SUBGROUP_FEATURE_BALLOT_BIT_KHR 0x00000008
#define GL_SUBGROUP_FEATURE_SHUFFLE_BIT_KHR 0x00000010
#define GL_SUBGROUP_FEATURE_SHUFFLE_RELATIVE_BIT_KHR 0x00000020
#define GL_SUBGROUP_FEATURE_CLUSTERED_BIT_KHR 0x00000040
#define GL_SUBGROUP_FEATURE_QUAD_BIT_KHR 0x00000080

View File

@ -386,7 +386,6 @@ static const std::string_view SUBGROUP_HELPER_HEADER = R"(
#extension GL_KHR_shader_subgroup_ballot : enable
#define SUPPORTS_SUBGROUP_REDUCTION 1
#define CAN_USE_SUBGROUP_REDUCTION true
#define IS_HELPER_INVOCATION gl_HelperInvocation
#define IS_FIRST_ACTIVE_INVOCATION (subgroupElect())
#define SUBGROUP_MIN(value) value = subgroupMin(value)

View File

@ -489,7 +489,14 @@ bool PopulateConfig(GLContext* m_main_gl_context)
else if (GLExtensions::Version() >= 430)
{
// TODO: We should really parse the GL_SHADING_LANGUAGE_VERSION token.
g_ogl_config.eSupportedGLSLVersion = Glsl430;
if (GLExtensions::Version() >= 450)
{
g_ogl_config.eSupportedGLSLVersion = Glsl450;
}
else
{
g_ogl_config.eSupportedGLSLVersion = Glsl430;
}
g_ogl_config.bSupportsTextureStorage = true;
g_ogl_config.bSupportsImageLoadStore = true;
g_Config.backend_info.bSupportsSSAA = true;
@ -531,8 +538,23 @@ bool PopulateConfig(GLContext* m_main_gl_context)
if (g_ogl_config.max_samples < 1 || !g_ogl_config.bSupportsMSAA)
g_ogl_config.max_samples = 1;
g_ogl_config.bSupportsShaderThreadShuffleNV =
GLExtensions::Supports("GL_NV_shader_thread_shuffle");
const bool bSupportsIsHelperInvocation = g_ogl_config.bIsES ?
g_ogl_config.eSupportedGLSLVersion >= GlslEs320 :
g_ogl_config.eSupportedGLSLVersion >= Glsl450;
g_ogl_config.bSupportsKHRShaderSubgroup =
GLExtensions::Supports("GL_KHR_shader_subgroup") && bSupportsIsHelperInvocation;
if (g_ogl_config.bSupportsKHRShaderSubgroup)
{
// Check for the features: basic + arithmetic + ballot
GLint supported_features = 0;
glGetIntegerv(GL_SUBGROUP_SUPPORTED_FEATURES_KHR, &supported_features);
if (~supported_features &
(GL_SUBGROUP_FEATURE_BASIC_BIT_KHR | GL_SUBGROUP_FEATURE_ARITHMETIC_BIT_KHR |
GL_SUBGROUP_FEATURE_BALLOT_BIT_KHR))
{
g_ogl_config.bSupportsKHRShaderSubgroup = false;
}
}
// We require texel buffers, image load store, and compute shaders to enable GPU texture decoding.
// If the driver doesn't expose the extensions, but supports GL4.3/GLES3.1, it will still be

View File

@ -15,8 +15,9 @@ enum GlslVersion
Glsl140,
Glsl150,
Glsl330,
Glsl400, // and above
Glsl430,
Glsl400, // and above
Glsl430, // 430 - 440
Glsl450, // 450 - xxx
GlslEs300, // GLES 3.0
GlslEs310, // GLES 3.1
GlslEs320, // GLES 3.2
@ -61,7 +62,7 @@ struct VideoConfig
bool bSupportsBitfield;
bool bSupportsTextureSubImage;
EsFbFetchType SupportedFramebufferFetch;
bool bSupportsShaderThreadShuffleNV;
bool bSupportsKHRShaderSubgroup; // basic + arithmetic + ballot
const char* gl_vendor;
const char* gl_renderer;

View File

@ -78,6 +78,8 @@ static std::string GetGLSLVersionString()
return "#version 400";
case Glsl430:
return "#version 430";
case Glsl450:
return "#version 450";
default:
// Shouldn't ever hit this
return "#version ERROR";
@ -720,25 +722,18 @@ void ProgramShaderCache::CreateHeader()
}
std::string shader_shuffle_string;
if (g_ogl_config.bSupportsShaderThreadShuffleNV)
if (g_ogl_config.bSupportsKHRShaderSubgroup)
{
shader_shuffle_string = R"(
#extension GL_NV_shader_thread_group : enable
#extension GL_NV_shader_thread_shuffle : enable
#extension GL_KHR_shader_subgroup_basic : enable
#extension GL_KHR_shader_subgroup_arithmetic : enable
#extension GL_KHR_shader_subgroup_ballot : enable
#define SUPPORTS_SUBGROUP_REDUCTION 1
// The xor shuffle below produces incorrect results if all threads in a warp are not active.
#define CAN_USE_SUBGROUP_REDUCTION (ballotThreadNV(true) == 0xFFFFFFFFu)
#define IS_HELPER_INVOCATION gl_HelperThreadNV
#define IS_FIRST_ACTIVE_INVOCATION (gl_ThreadInWarpNV == findLSB(ballotThreadNV(!gl_HelperThreadNV)))
#define SUBGROUP_REDUCTION(func, value) value = func(value, shuffleXorNV(value, 16, 32)); \
value = func(value, shuffleXorNV(value, 8, 32)); \
value = func(value, shuffleXorNV(value, 4, 32)); \
value = func(value, shuffleXorNV(value, 2, 32)); \
value = func(value, shuffleXorNV(value, 1, 32));
#define SUBGROUP_MIN(value) SUBGROUP_REDUCTION(min, value)
#define SUBGROUP_MAX(value) SUBGROUP_REDUCTION(max, value)
#define IS_HELPER_INVOCATION gl_HelperInvocation
#define IS_FIRST_ACTIVE_INVOCATION (subgroupElect())
#define SUBGROUP_MIN(value) value = subgroupMin(value)
#define SUBGROUP_MAX(value) value = subgroupMax(value)
)";
}

View File

@ -81,9 +81,8 @@ static const char SUBGROUP_HELPER_HEADER[] = R"(
#extension GL_KHR_shader_subgroup_ballot : enable
#define SUPPORTS_SUBGROUP_REDUCTION 1
#define CAN_USE_SUBGROUP_REDUCTION true
#define IS_HELPER_INVOCATION gl_HelperInvocation
#define IS_FIRST_ACTIVE_INVOCATION (gl_SubgroupInvocationID == subgroupBallotFindLSB(subgroupBallot(!gl_HelperInvocation)))
#define IS_FIRST_ACTIVE_INVOCATION (subgroupElect())
#define SUBGROUP_MIN(value) value = subgroupMin(value)
#define SUBGROUP_MAX(value) value = subgroupMax(value)
)";

View File

@ -457,15 +457,12 @@ void UpdateBoundingBox(float2 rawpos) {{
int2 pos_br = pos | 1; // round up to odd
#ifdef SUPPORTS_SUBGROUP_REDUCTION
if (CAN_USE_SUBGROUP_REDUCTION) {{
int2 min_pos = IS_HELPER_INVOCATION ? int2(2147483647, 2147483647) : pos_tl;
int2 max_pos = IS_HELPER_INVOCATION ? int2(-2147483648, -2147483648) : pos_br;
SUBGROUP_MIN(min_pos);
SUBGROUP_MAX(max_pos);
if (!IS_HELPER_INVOCATION)
{{
SUBGROUP_MIN(pos_tl);
SUBGROUP_MAX(pos_br);
if (IS_FIRST_ACTIVE_INVOCATION)
UpdateBoundingBoxBuffer(min_pos, max_pos);
}} else {{
UpdateBoundingBoxBuffer(pos_tl, pos_br);
UpdateBoundingBoxBuffer(pos_tl, pos_br);
}}
#else
UpdateBoundingBoxBuffer(pos_tl, pos_br);