From 454e1dd9a2434bf68121d8a2765a0f2413ba4dcf Mon Sep 17 00:00:00 2001 From: degasus Date: Mon, 25 Nov 2013 13:03:35 +0100 Subject: [PATCH] OpenGL: attributeless rendering for efb2ram This wasn't as easy as we now have to cache also the uniform locations. --- .../OGL/Src/TextureConverter.cpp | 74 +++++++------------ 1 file changed, 26 insertions(+), 48 deletions(-) diff --git a/Source/Core/VideoBackends/OGL/Src/TextureConverter.cpp b/Source/Core/VideoBackends/OGL/Src/TextureConverter.cpp index c5f96dba14..161468d2b0 100644 --- a/Source/Core/VideoBackends/OGL/Src/TextureConverter.cpp +++ b/Source/Core/VideoBackends/OGL/Src/TextureConverter.cpp @@ -34,24 +34,23 @@ const int renderBufferWidth = 1024; const int renderBufferHeight = 1024; static SHADER s_rgbToYuyvProgram; +static int s_rgbToYuyvUniform_loc; + static SHADER s_yuyvToRgbProgram; // Not all slots are taken - but who cares. const u32 NUM_ENCODING_PROGRAMS = 64; static SHADER s_encodingPrograms[NUM_ENCODING_PROGRAMS]; - -static GLuint s_encode_VBO = 0; -static GLuint s_encode_VAO = 0; -static TargetRectangle s_cached_sourceRc; +static int s_encodingUniform_loc[NUM_ENCODING_PROGRAMS]; static const char *VProgram = - "ATTRIN vec2 rawpos;\n" - "ATTRIN vec2 tex0;\n" "VARYOUT vec2 uv0;\n" + "uniform vec4 copy_position;\n" // left, top, right, bottom "void main()\n" "{\n" - " uv0 = tex0;\n" - " gl_Position = vec4(rawpos, 0.0, 1.0);\n" + " vec2 rawpos = vec2(gl_VertexID&1, gl_VertexID&2);\n" + " gl_Position = vec4(rawpos*2.0-1.0, 0.0, 1.0);\n" + " uv0 = mix(copy_position.xy, copy_position.zw, rawpos);\n" "}\n"; void CreatePrograms() @@ -73,14 +72,24 @@ void CreatePrograms() * inbetween the two Pixels, and only blurs over these two pixels. */ // Output is BGRA because that is slightly faster than RGBA. + const char *VProgramRgbToYuyv = + "VARYOUT vec2 uv0;\n" + "uniform vec4 copy_position;\n" // left, top, right, bottom + "uniform sampler2D samp9;\n" + "void main()\n" + "{\n" + " vec2 rawpos = vec2(gl_VertexID&1, gl_VertexID&2);\n" + " gl_Position = vec4(rawpos*2.0-1.0, 0.0, 1.0);\n" + " uv0 = mix(copy_position.xy, copy_position.zw, rawpos) / textureSize(samp9, 0);\n" + "}\n"; const char *FProgramRgbToYuyv = "uniform sampler2D samp9;\n" "VARYIN vec2 uv0;\n" "out vec4 ocol0;\n" "void main()\n" "{\n" - " vec3 c0 = texture(samp9, (uv0 - dFdx(uv0) * 0.25) / textureSize(samp9, 0)).rgb;\n" - " vec3 c1 = texture(samp9, (uv0 + dFdx(uv0) * 0.25) / textureSize(samp9, 0)).rgb;\n" + " vec3 c0 = texture(samp9, (uv0 - dFdx(uv0) * 0.25)).rgb;\n" + " vec3 c1 = texture(samp9, (uv0 + dFdx(uv0) * 0.25)).rgb;\n" " vec3 c01 = (c0 + c1) * 0.5;\n" " vec3 y_const = vec3(0.257,0.504,0.098);\n" " vec3 u_const = vec3(-0.148,-0.291,0.439);\n" @@ -88,6 +97,8 @@ void CreatePrograms() " vec4 const3 = vec4(0.0625,0.5,0.0625,0.5);\n" " ocol0 = vec4(dot(c1,y_const),dot(c01,u_const),dot(c0,y_const),dot(c01, v_const)) + const3;\n" "}\n"; + ProgramShaderCache::CompileShader(s_rgbToYuyvProgram, VProgramRgbToYuyv, FProgramRgbToYuyv); + s_rgbToYuyvUniform_loc = glGetUniformLocation(s_rgbToYuyvProgram.glprogid, "copy_position"); /* TODO: Accuracy Improvements * @@ -121,8 +132,6 @@ void CreatePrograms() " yComp + (2.018 * uComp),\n" " 1.0);\n" "}\n"; - - ProgramShaderCache::CompileShader(s_rgbToYuyvProgram, VProgram, FProgramRgbToYuyv); ProgramShaderCache::CompileShader(s_yuyvToRgbProgram, VProgramYuyvToRgb, FProgramYuyvToRgb); } @@ -150,6 +159,7 @@ SHADER &GetOrCreateEncodingShader(u32 format) #endif ProgramShaderCache::CompileShader(s_encodingPrograms[format], VProgram, shader); + s_encodingUniform_loc[format] = glGetUniformLocation(s_encodingPrograms[format].glprogid, "copy_position"); } return s_encodingPrograms[format]; } @@ -158,19 +168,6 @@ void Init() { glGenFramebuffers(1, &s_texConvFrameBuffer); - glGenBuffers(1, &s_encode_VBO ); - glGenVertexArrays(1, &s_encode_VAO ); - glBindBuffer(GL_ARRAY_BUFFER, s_encode_VBO ); - glBindVertexArray( s_encode_VAO ); - glEnableVertexAttribArray(SHADER_POSITION_ATTRIB); - glVertexAttribPointer(SHADER_POSITION_ATTRIB, 2, GL_FLOAT, 0, sizeof(GLfloat)*4, (GLfloat*)NULL); - glEnableVertexAttribArray(SHADER_TEXTURE0_ATTRIB); - glVertexAttribPointer(SHADER_TEXTURE0_ATTRIB, 2, GL_FLOAT, 0, sizeof(GLfloat)*4, (GLfloat*)NULL+2); - s_cached_sourceRc.top = -1; - s_cached_sourceRc.bottom = -1; - s_cached_sourceRc.left = -1; - s_cached_sourceRc.right = -1; - glActiveTexture(GL_TEXTURE0 + 9); glGenTextures(1, &s_srcTexture); glBindTexture(GL_TEXTURE_2D, s_srcTexture); @@ -189,8 +186,6 @@ void Shutdown() glDeleteTextures(1, &s_srcTexture); glDeleteTextures(1, &s_dstTexture); glDeleteFramebuffers(1, &s_texConvFrameBuffer); - glDeleteBuffers(1, &s_encode_VBO ); - glDeleteVertexArrays(1, &s_encode_VAO ); s_rgbToYuyvProgram.Destroy(); s_yuyvToRgbProgram.Destroy(); @@ -205,7 +200,7 @@ void Shutdown() void EncodeToRamUsingShader(GLuint srcTexture, const TargetRectangle& sourceRc, u8* destAddr, int dstWidth, int dstHeight, int readStride, - bool toTexture, bool linearFilter) + bool toTexture, bool linearFilter, int uniform_loc) { @@ -235,25 +230,8 @@ void EncodeToRamUsingShader(GLuint srcTexture, const TargetRectangle& sourceRc, glViewport(0, 0, (GLsizei)dstWidth, (GLsizei)dstHeight); - GL_REPORT_ERRORD(); - if(!(s_cached_sourceRc == sourceRc)) { - GLfloat vertices[] = { - -1.f, -1.f, - (float)sourceRc.left, (float)sourceRc.top, - -1.f, 1.f, - (float)sourceRc.left, (float)sourceRc.bottom, - 1.f, -1.f, - (float)sourceRc.right, (float)sourceRc.top, - 1.f, 1.f, - (float)sourceRc.right, (float)sourceRc.bottom - }; - glBindBuffer(GL_ARRAY_BUFFER, s_encode_VBO ); - glBufferData(GL_ARRAY_BUFFER, 4*4*sizeof(GLfloat), vertices, GL_STREAM_DRAW); + glUniform4f(uniform_loc, sourceRc.left, sourceRc.top, sourceRc.right, sourceRc.bottom); - s_cached_sourceRc = sourceRc; - } - - glBindVertexArray( s_encode_VAO ); glDrawArrays(GL_TRIANGLE_STRIP, 0, 4); GL_REPORT_ERRORD(); @@ -345,7 +323,7 @@ int EncodeToRamFromTexture(u32 address,GLuint source_texture, bool bFromZBuffer, TexDecoder_GetBlockWidthInTexels(format); EncodeToRamUsingShader(source_texture, scaledSource, dest_ptr, expandedWidth / samples, expandedHeight, readStride, - true, bScaleByHalf > 0 && !bFromZBuffer); + true, bScaleByHalf > 0 && !bFromZBuffer, s_encodingUniform_loc[format]); return size_in_bytes; // TODO: D3D11 is calculating this value differently! } @@ -359,7 +337,7 @@ void EncodeToRamYUYV(GLuint srcTexture, const TargetRectangle& sourceRc, u8* des // We enable linear filtering, because the gamecube does filtering in the vertical direction when // yscale is enabled. // Otherwise we get jaggies when a game uses yscaling (most PAL games) - EncodeToRamUsingShader(srcTexture, sourceRc, destAddr, dstWidth / 2, dstHeight, 0, false, true); + EncodeToRamUsingShader(srcTexture, sourceRc, destAddr, dstWidth / 2, dstHeight, 0, false, true, s_rgbToYuyvUniform_loc); FramebufferManager::SetFramebuffer(0); TextureCache::DisableStage(0); g_renderer->RestoreAPIState();