From e8f23af10b8205daff8c348058cbb1abc8ee4c5e Mon Sep 17 00:00:00 2001 From: degasus Date: Mon, 25 Nov 2013 08:32:41 +0100 Subject: [PATCH 01/21] OpenGL: always use texture2d as efb --- .../OGL/Src/FramebufferManager.cpp | 42 +++++++++---------- .../OGL/Src/FramebufferManager.h | 8 ---- .../VideoBackends/OGL/Src/TextureCache.cpp | 2 +- .../OGL/Src/TextureConverter.cpp | 18 ++++---- 4 files changed, 31 insertions(+), 39 deletions(-) diff --git a/Source/Core/VideoBackends/OGL/Src/FramebufferManager.cpp b/Source/Core/VideoBackends/OGL/Src/FramebufferManager.cpp index 1ce5e552a2..af130fc7aa 100644 --- a/Source/Core/VideoBackends/OGL/Src/FramebufferManager.cpp +++ b/Source/Core/VideoBackends/OGL/Src/FramebufferManager.cpp @@ -79,24 +79,24 @@ FramebufferManager::FramebufferManager(int targetWidth, int targetHeight, int ms m_efbDepth = glObj[1]; m_resolvedColorTexture = glObj[2]; // needed for pixel format convertion - glBindTexture(getFbType(), m_efbColor); - glTexParameteri(getFbType(), GL_TEXTURE_MAX_LEVEL, 0); - glTexImage2D(getFbType(), 0, GL_RGBA, m_targetWidth, m_targetHeight, 0, GL_RGBA, GL_UNSIGNED_BYTE, NULL); + glBindTexture(GL_TEXTURE_2D, m_efbColor); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAX_LEVEL, 0); + glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, m_targetWidth, m_targetHeight, 0, GL_RGBA, GL_UNSIGNED_BYTE, NULL); - glBindTexture(getFbType(), m_efbDepth); - glTexParameteri(getFbType(), GL_TEXTURE_MAX_LEVEL, 0); - glTexImage2D(getFbType(), 0, GL_DEPTH_COMPONENT24, m_targetWidth, m_targetHeight, 0, GL_DEPTH_COMPONENT, GL_UNSIGNED_INT, NULL); + glBindTexture(GL_TEXTURE_2D, m_efbDepth); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAX_LEVEL, 0); + glTexImage2D(GL_TEXTURE_2D, 0, GL_DEPTH_COMPONENT24, m_targetWidth, m_targetHeight, 0, GL_DEPTH_COMPONENT, GL_UNSIGNED_INT, NULL); - glBindTexture(getFbType(), m_resolvedColorTexture); - glTexParameteri(getFbType(), GL_TEXTURE_MAX_LEVEL, 0); - glTexImage2D(getFbType(), 0, GL_RGBA, m_targetWidth, m_targetHeight, 0, GL_RGBA, GL_UNSIGNED_BYTE, NULL); + glBindTexture(GL_TEXTURE_2D, m_resolvedColorTexture); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAX_LEVEL, 0); + glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, m_targetWidth, m_targetHeight, 0, GL_RGBA, GL_UNSIGNED_BYTE, NULL); // Bind target textures to the EFB framebuffer. glBindFramebuffer(GL_FRAMEBUFFER, m_efbFramebuffer); - glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, getFbType(), m_efbColor, 0); - glFramebufferTexture2D(GL_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, getFbType(), m_efbDepth, 0); + glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, m_efbColor, 0); + glFramebufferTexture2D(GL_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, m_efbDepth, 0); GL_REPORT_FBO_ERROR(); } @@ -144,20 +144,20 @@ FramebufferManager::FramebufferManager(int targetWidth, int targetHeight, int ms m_resolvedColorTexture = glObj[0]; m_resolvedDepthTexture = glObj[1]; - glBindTexture(getFbType(), m_resolvedColorTexture); - glTexParameteri(getFbType(), GL_TEXTURE_MAX_LEVEL, 0); - glTexImage2D(getFbType(), 0, GL_RGBA, m_targetWidth, m_targetHeight, 0, GL_RGBA, GL_UNSIGNED_BYTE, NULL); + glBindTexture(GL_TEXTURE_2D, m_resolvedColorTexture); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAX_LEVEL, 0); + glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, m_targetWidth, m_targetHeight, 0, GL_RGBA, GL_UNSIGNED_BYTE, NULL); - glBindTexture(getFbType(), m_resolvedDepthTexture); - glTexParameteri(getFbType(), GL_TEXTURE_MAX_LEVEL, 0); - glTexImage2D(getFbType(), 0, GL_DEPTH_COMPONENT24, m_targetWidth, m_targetHeight, 0, GL_DEPTH_COMPONENT, GL_UNSIGNED_INT, NULL); + glBindTexture(GL_TEXTURE_2D, m_resolvedDepthTexture); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAX_LEVEL, 0); + glTexImage2D(GL_TEXTURE_2D, 0, GL_DEPTH_COMPONENT24, m_targetWidth, m_targetHeight, 0, GL_DEPTH_COMPONENT, GL_UNSIGNED_INT, NULL); // Bind resolved textures to resolved framebuffer. glBindFramebuffer(GL_FRAMEBUFFER, m_resolvedFramebuffer); - glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, getFbType(), m_resolvedColorTexture, 0); - glFramebufferTexture2D(GL_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, getFbType(), m_resolvedDepthTexture, 0); + glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, m_resolvedColorTexture, 0); + glFramebufferTexture2D(GL_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, m_resolvedDepthTexture, 0); GL_REPORT_FBO_ERROR(); @@ -386,11 +386,11 @@ void FramebufferManager::ReinterpretPixelData(unsigned int convtype) m_resolvedColorTexture = src_texture; // also switch them on fbo - glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, getFbType(), m_efbColor, 0); + glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, m_efbColor, 0); } glViewport(0,0, m_targetWidth, m_targetHeight); glActiveTexture(GL_TEXTURE0 + 9); - glBindTexture(getFbType(), src_texture); + glBindTexture(GL_TEXTURE_2D, src_texture); m_pixel_format_shaders[convtype ? 1 : 0].Bind(); glBindVertexArray(m_pixel_format_vao); diff --git a/Source/Core/VideoBackends/OGL/Src/FramebufferManager.h b/Source/Core/VideoBackends/OGL/Src/FramebufferManager.h index 7b93b239ae..d3e32075de 100644 --- a/Source/Core/VideoBackends/OGL/Src/FramebufferManager.h +++ b/Source/Core/VideoBackends/OGL/Src/FramebufferManager.h @@ -57,14 +57,6 @@ struct XFBSource : public XFBSourceBase const GLuint texture; }; -inline GLenum getFbType() -{ -#ifndef USE_GLES3 - return GL_TEXTURE_RECTANGLE; -#endif - return GL_TEXTURE_2D; -} - class FramebufferManager : public FramebufferManagerBase { public: diff --git a/Source/Core/VideoBackends/OGL/Src/TextureCache.cpp b/Source/Core/VideoBackends/OGL/Src/TextureCache.cpp index aa197c293c..070ffe24d0 100644 --- a/Source/Core/VideoBackends/OGL/Src/TextureCache.cpp +++ b/Source/Core/VideoBackends/OGL/Src/TextureCache.cpp @@ -296,7 +296,7 @@ void TextureCache::TCacheEntry::FromRenderTarget(u32 dstAddr, unsigned int dstFo GL_REPORT_ERRORD(); glActiveTexture(GL_TEXTURE0+9); - glBindTexture(getFbType(), read_texture); + glBindTexture(GL_TEXTURE_2D, read_texture); glViewport(0, 0, virtual_width, virtual_height); diff --git a/Source/Core/VideoBackends/OGL/Src/TextureConverter.cpp b/Source/Core/VideoBackends/OGL/Src/TextureConverter.cpp index f653dfbf01..418e4048cd 100644 --- a/Source/Core/VideoBackends/OGL/Src/TextureConverter.cpp +++ b/Source/Core/VideoBackends/OGL/Src/TextureConverter.cpp @@ -178,8 +178,8 @@ void Init() glActiveTexture(GL_TEXTURE0 + 9); glGenTextures(1, &s_srcTexture); - glBindTexture(getFbType(), s_srcTexture); - glTexParameteri(getFbType(), GL_TEXTURE_MAX_LEVEL, 0); + glBindTexture(GL_TEXTURE_2D, s_srcTexture); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAX_LEVEL, 0); glGenTextures(1, &s_dstTexture); glBindTexture(GL_TEXTURE_2D, s_dstTexture); @@ -223,17 +223,17 @@ void EncodeToRamUsingShader(GLuint srcTexture, const TargetRectangle& sourceRc, // set source texture glActiveTexture(GL_TEXTURE0+9); - glBindTexture(getFbType(), srcTexture); + glBindTexture(GL_TEXTURE_2D, srcTexture); if (linearFilter) { - glTexParameteri(getFbType(), GL_TEXTURE_MAG_FILTER, GL_LINEAR); - glTexParameteri(getFbType(), GL_TEXTURE_MIN_FILTER, GL_LINEAR); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR); } else { - glTexParameteri(getFbType(), GL_TEXTURE_MAG_FILTER, GL_NEAREST); - glTexParameteri(getFbType(), GL_TEXTURE_MIN_FILTER, GL_NEAREST); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); } GL_REPORT_ERRORD(); @@ -394,8 +394,8 @@ void DecodeToTexture(u32 xfbAddr, int srcWidth, int srcHeight, GLuint destTextur // activate source texture // set srcAddr as data for source texture glActiveTexture(GL_TEXTURE0+9); - glBindTexture(getFbType(), s_srcTexture); - glTexImage2D(getFbType(), 0, GL_RGBA, srcWidth / 2, srcHeight, 0, GL_BGRA, GL_UNSIGNED_BYTE, srcAddr); + glBindTexture(GL_TEXTURE_2D, s_srcTexture); + glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, srcWidth / 2, srcHeight, 0, GL_BGRA, GL_UNSIGNED_BYTE, srcAddr); glViewport(0, 0, srcWidth, srcHeight); s_yuyvToRgbProgram.Bind(); From 1a3e790d9e85ec5aceb4cb54b085c6219ee8d87a Mon Sep 17 00:00:00 2001 From: degasus Date: Mon, 25 Nov 2013 08:38:00 +0100 Subject: [PATCH 02/21] OpenGL: fix xfb for texture2d --- .../Core/VideoBackends/OGL/Src/TextureConverter.cpp | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/Source/Core/VideoBackends/OGL/Src/TextureConverter.cpp b/Source/Core/VideoBackends/OGL/Src/TextureConverter.cpp index 418e4048cd..c5f96dba14 100644 --- a/Source/Core/VideoBackends/OGL/Src/TextureConverter.cpp +++ b/Source/Core/VideoBackends/OGL/Src/TextureConverter.cpp @@ -74,13 +74,13 @@ void CreatePrograms() */ // Output is BGRA because that is slightly faster than RGBA. const char *FProgramRgbToYuyv = - "uniform sampler2DRect samp9;\n" + "uniform sampler2D samp9;\n" "VARYIN vec2 uv0;\n" "out vec4 ocol0;\n" "void main()\n" "{\n" - " vec3 c0 = texture2DRect(samp9, uv0 - dFdx(uv0) * 0.25).rgb;\n" - " vec3 c1 = texture2DRect(samp9, uv0 + dFdx(uv0) * 0.25).rgb;\n" + " vec3 c0 = texture(samp9, (uv0 - dFdx(uv0) * 0.25) / textureSize(samp9, 0)).rgb;\n" + " vec3 c1 = texture(samp9, (uv0 + dFdx(uv0) * 0.25) / textureSize(samp9, 0)).rgb;\n" " vec3 c01 = (c0 + c1) * 0.5;\n" " vec3 y_const = vec3(0.257,0.504,0.098);\n" " vec3 u_const = vec3(-0.148,-0.291,0.439);\n" @@ -103,20 +103,15 @@ void CreatePrograms() " gl_Position = vec4(rawpos*2.0-1.0, 0.0, 1.0);\n" "}\n"; const char *FProgramYuyvToRgb = - "uniform sampler2DRect samp9;\n" + "uniform sampler2D samp9;\n" "VARYIN vec2 uv0;\n" "out vec4 ocol0;\n" "void main()\n" "{\n" " ivec2 uv = ivec2(gl_FragCoord.xy);\n" -#ifdef USE_GLES3 // We switch top/bottom here. TODO: move this to screen blit. " ivec2 ts = textureSize(samp9, 0);\n" " vec4 c0 = texelFetch(samp9, ivec2(uv.x/2, ts.y-uv.y-1), 0);\n" -#else - " ivec2 ts = textureSize(samp9);\n" - " vec4 c0 = texelFetch(samp9, ivec2(uv.x/2, ts.y-uv.y-1));\n" -#endif " float y = mix(c0.b, c0.r, (uv.x & 1) == 1);\n" " float yComp = 1.164 * (y - 0.0625);\n" " float uComp = c0.g - 0.5;\n" From b904d56036f9a16e6fa95d832f388aa5d7229995 Mon Sep 17 00:00:00 2001 From: degasus Date: Mon, 25 Nov 2013 08:43:55 +0100 Subject: [PATCH 03/21] OpenGL: fix efb2tex for texture2d --- Source/Core/VideoBackends/OGL/Src/TextureCache.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Source/Core/VideoBackends/OGL/Src/TextureCache.cpp b/Source/Core/VideoBackends/OGL/Src/TextureCache.cpp index 070ffe24d0..b08a465302 100644 --- a/Source/Core/VideoBackends/OGL/Src/TextureCache.cpp +++ b/Source/Core/VideoBackends/OGL/Src/TextureCache.cpp @@ -403,25 +403,25 @@ void TextureCache::TCacheEntry::FromRenderTarget(u32 dstAddr, unsigned int dstFo TextureCache::TextureCache() { const char *pColorMatrixProg = - "uniform sampler2DRect samp9;\n" + "uniform sampler2D samp9;\n" "uniform vec4 colmat[7];\n" "VARYIN vec2 uv0;\n" "out vec4 ocol0;\n" "\n" "void main(){\n" - " vec4 texcol = texture2DRect(samp9, uv0);\n" + " vec4 texcol = texture(samp9, uv0 / textureSize(samp9, 0));\n" " texcol = round(texcol * colmat[5]) * colmat[6];\n" " ocol0 = texcol * mat4(colmat[0], colmat[1], colmat[2], colmat[3]) + colmat[4];\n" "}\n"; const char *pDepthMatrixProg = - "uniform sampler2DRect samp9;\n" + "uniform sampler2D samp9;\n" "uniform vec4 colmat[5];\n" "VARYIN vec2 uv0;\n" "out vec4 ocol0;\n" "\n" "void main(){\n" - " vec4 texcol = texture2DRect(samp9, uv0);\n" + " vec4 texcol = texture(samp9, uv0 / textureSize(samp9, 0));\n" " vec4 EncodedDepth = fract((texcol.r * (16777215.0/16777216.0)) * vec4(1.0,256.0,256.0*256.0,1.0));\n" " texcol = round(EncodedDepth * (16777216.0/16777215.0) * vec4(255.0,255.0,255.0,15.0)) / vec4(255.0,255.0,255.0,15.0);\n" " ocol0 = texcol * mat4(colmat[0], colmat[1], colmat[2], colmat[3]) + colmat[4];" From 146e435009eec015a160e35d13cad4f22d7a8d69 Mon Sep 17 00:00:00 2001 From: degasus Date: Mon, 25 Nov 2013 08:56:01 +0100 Subject: [PATCH 04/21] OpenGL: fix efb2ram for texture2D This was hacky as hell. Our efb2ram shader generator is just freaked out. --- Source/Core/VideoCommon/Src/TextureConversionShader.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Source/Core/VideoCommon/Src/TextureConversionShader.cpp b/Source/Core/VideoCommon/Src/TextureConversionShader.cpp index 99e9f702c7..022d18b427 100644 --- a/Source/Core/VideoCommon/Src/TextureConversionShader.cpp +++ b/Source/Core/VideoCommon/Src/TextureConversionShader.cpp @@ -81,7 +81,7 @@ void WriteSwizzler(char*& p, u32 format, API_TYPE ApiType) if (ApiType == API_OPENGL) { WRITE(p, "#define samp0 samp9\n"); - WRITE(p, "uniform sampler2DRect samp0;\n"); + WRITE(p, "uniform sampler2D samp0;\n"); WRITE(p, " out vec4 ocol0;\n"); WRITE(p, " VARYIN float2 uv0;\n"); @@ -146,7 +146,7 @@ void Write32BitSwizzler(char*& p, u32 format, API_TYPE ApiType) if (ApiType == API_OPENGL) { WRITE(p, "#define samp0 samp9\n"); - WRITE(p, "uniform sampler2DRect samp0;\n"); + WRITE(p, "uniform sampler2D samp0;\n"); WRITE(p, " out float4 ocol0;\n"); WRITE(p, " VARYIN float2 uv0;\n"); @@ -202,7 +202,7 @@ void WriteSampleColor(char*& p, const char* colorComp, const char* dest, API_TYP if (ApiType == API_D3D) texSampleOpName = "tex0.Sample"; else // OGL - texSampleOpName = "texture2DRect"; + texSampleOpName = "texture"; // the increment of sampleUv.x is delayed, so we perform it here. see WriteIncrementSampleX. const char* texSampleIncrementUnit; @@ -211,7 +211,7 @@ void WriteSampleColor(char*& p, const char* colorComp, const char* dest, API_TYP else // OGL texSampleIncrementUnit = I_COLORS"[0].x"; - WRITE(p, " %s = %s(samp0, sampleUv + float2(%d.0 * (%s), 0.0)).%s;\n", + WRITE(p, " %s = %s(samp0, (sampleUv + float2(%d.0 * (%s), 0.0)) / textureSize(samp0, 0)).%s;\n", dest, texSampleOpName, s_incrementSampleXCount, texSampleIncrementUnit, colorComp); } From afcf0e65d1a548ea0215454c58a91708fad2e92a Mon Sep 17 00:00:00 2001 From: degasus Date: Mon, 25 Nov 2013 08:59:04 +0100 Subject: [PATCH 05/21] OpenGL: fix emulate format changes for texture2d --- Source/Core/VideoBackends/OGL/Src/FramebufferManager.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Source/Core/VideoBackends/OGL/Src/FramebufferManager.cpp b/Source/Core/VideoBackends/OGL/Src/FramebufferManager.cpp index af130fc7aa..14b02b2f14 100644 --- a/Source/Core/VideoBackends/OGL/Src/FramebufferManager.cpp +++ b/Source/Core/VideoBackends/OGL/Src/FramebufferManager.cpp @@ -199,11 +199,11 @@ FramebufferManager::FramebufferManager(int targetWidth, int targetHeight, int ms "}\n"; char ps_rgba6_to_rgb8[] = - "uniform sampler2DRect samp9;\n" + "uniform sampler2D samp9;\n" "out vec4 ocol0;\n" "void main()\n" "{\n" - " ivec4 src6 = ivec4(round(texture2DRect(samp9, gl_FragCoord.xy) * 63.f));\n" + " ivec4 src6 = ivec4(round(texelFetch(samp9, ivec2(gl_FragCoord.xy), 0) * 63.f));\n" " ivec4 dst8;\n" " dst8.r = (src6.r << 2) | (src6.g >> 4);\n" " dst8.g = ((src6.g & 0xF) << 4) | (src6.b >> 2);\n" @@ -213,11 +213,11 @@ FramebufferManager::FramebufferManager(int targetWidth, int targetHeight, int ms "}"; char ps_rgb8_to_rgba6[] = - "uniform sampler2DRect samp9;\n" + "uniform sampler2D samp9;\n" "out vec4 ocol0;\n" "void main()\n" "{\n" - " ivec4 src8 = ivec4(round(texture2DRect(samp9, gl_FragCoord.xy) * 255.f));\n" + " ivec4 src8 = ivec4(round(texelFetch(samp9, ivec2(gl_FragCoord.xy), 0) * 255.f));\n" " ivec4 dst6;\n" " dst6.r = src8.r >> 2;\n" " dst6.g = ((src8.r & 0x3) << 4) | (src8.g >> 4);\n" From b93756df870021d239e95d17cf76335cd3417e95 Mon Sep 17 00:00:00 2001 From: degasus Date: Mon, 25 Nov 2013 09:05:50 +0100 Subject: [PATCH 06/21] OpenGL: drop texture_rect hack Everything is moved to texture2d (but often in a hacky way), so we don't need this global hack any more. --- Source/Core/VideoBackends/OGL/Src/ProgramShaderCache.cpp | 9 --------- 1 file changed, 9 deletions(-) diff --git a/Source/Core/VideoBackends/OGL/Src/ProgramShaderCache.cpp b/Source/Core/VideoBackends/OGL/Src/ProgramShaderCache.cpp index f998c6ced2..9a9a6f3cf9 100644 --- a/Source/Core/VideoBackends/OGL/Src/ProgramShaderCache.cpp +++ b/Source/Core/VideoBackends/OGL/Src/ProgramShaderCache.cpp @@ -589,11 +589,6 @@ void ProgramShaderCache::CreateHeader ( void ) "#define frac fract\n" "#define lerp mix\n" - // texture2d hack - "%s\n" - "%s\n" - "%s\n" - , v==GLSLES3 ? "#version 300 es" : v==GLSL_130 ? "#version 130" : v==GLSL_140 ? "#version 140" : "#version 150" , g_ActiveConfig.backend_info.bSupportsGLSLUBO && v Date: Mon, 25 Nov 2013 12:19:34 +0100 Subject: [PATCH 07/21] OpenGL: cleanup efb2tex Also use attributeless rendering. But we need the src rect, so set it by uniform. If there is a slowdown here (I doubt as the driver likely has a fast path to update uniforms) then we should check if this rect changes and only then update the uniform. --- .../VideoBackends/OGL/Src/TextureCache.cpp | 80 ++++--------------- 1 file changed, 16 insertions(+), 64 deletions(-) diff --git a/Source/Core/VideoBackends/OGL/Src/TextureCache.cpp b/Source/Core/VideoBackends/OGL/Src/TextureCache.cpp index b08a465302..af7e121ffe 100644 --- a/Source/Core/VideoBackends/OGL/Src/TextureCache.cpp +++ b/Source/Core/VideoBackends/OGL/Src/TextureCache.cpp @@ -45,6 +45,8 @@ static SHADER s_ColorMatrixProgram; static SHADER s_DepthMatrixProgram; static GLuint s_ColorMatrixUniform; static GLuint s_DepthMatrixUniform; +static GLuint s_ColorCopyPositionUniform; +static GLuint s_DepthCopyPositionUniform; static u32 s_ColorCbufid; static u32 s_DepthCbufid; @@ -52,13 +54,6 @@ static u32 s_Textures[8]; static u32 s_ActiveTexture; static u32 s_NextStage; -struct VBOCache { - GLuint vbo; - GLuint vao; - TargetRectangle targetSource; -}; -static std::map s_VBO; - bool SaveTexture(const std::string filename, u32 textarget, u32 tex, int virtual_width, int virtual_height, unsigned int level) { #ifndef USE_GLES3 @@ -311,53 +306,12 @@ void TextureCache::TCacheEntry::FromRenderTarget(u32 dstAddr, unsigned int dstFo glUniform4fv(s_ColorMatrixUniform, 7, colmat); s_ColorCbufid = cbufid; } + + TargetRectangle R = g_renderer->ConvertEFBRectangle(srcRect); + glUniform4f(srcFormat == PIXELFMT_Z24 ? s_DepthCopyPositionUniform : s_ColorCopyPositionUniform, + R.left, R.top, R.right, R.bottom); GL_REPORT_ERRORD(); - TargetRectangle targetSource = g_renderer->ConvertEFBRectangle(srcRect); - GL_REPORT_ERRORD(); - - // should be unique enough, if not, vbo will "only" be uploaded to much - u64 targetSourceHash = u64(targetSource.left)<<48 | u64(targetSource.top)<<32 | u64(targetSource.right)<<16 | u64(targetSource.bottom); - std::map::iterator vbo_it = s_VBO.find(targetSourceHash); - - if(vbo_it == s_VBO.end()) { - VBOCache item; - item.targetSource.bottom = -1; - item.targetSource.top = -1; - item.targetSource.left = -1; - item.targetSource.right = -1; - glGenBuffers(1, &item.vbo); - glGenVertexArrays(1, &item.vao); - - glBindBuffer(GL_ARRAY_BUFFER, item.vbo); - glBindVertexArray(item.vao); - - glEnableVertexAttribArray(SHADER_POSITION_ATTRIB); - glVertexAttribPointer(SHADER_POSITION_ATTRIB, 2, GL_FLOAT, 0, sizeof(GLfloat)*4, (GLfloat*)NULL); - glEnableVertexAttribArray(SHADER_TEXTURE0_ATTRIB); - glVertexAttribPointer(SHADER_TEXTURE0_ATTRIB, 2, GL_FLOAT, 0, sizeof(GLfloat)*4, (GLfloat*)NULL+2); - - vbo_it = s_VBO.insert(std::pair(targetSourceHash, item)).first; - } - if(!(vbo_it->second.targetSource == targetSource)) { - GLfloat vertices[] = { - -1.f, 1.f, - (GLfloat)targetSource.left, (GLfloat)targetSource.bottom, - -1.f, -1.f, - (GLfloat)targetSource.left, (GLfloat)targetSource.top, - 1.f, 1.f, - (GLfloat)targetSource.right, (GLfloat)targetSource.bottom, - 1.f, -1.f, - (GLfloat)targetSource.right, (GLfloat)targetSource.top - }; - - glBindBuffer(GL_ARRAY_BUFFER, vbo_it->second.vbo); - glBufferData(GL_ARRAY_BUFFER, 4*4*sizeof(GLfloat), vertices, GL_STREAM_DRAW); - - vbo_it->second.targetSource = targetSource; - } - - glBindVertexArray(vbo_it->second.vao); glDrawArrays(GL_TRIANGLE_STRIP, 0, 4); GL_REPORT_ERRORD(); @@ -409,7 +363,7 @@ TextureCache::TextureCache() "out vec4 ocol0;\n" "\n" "void main(){\n" - " vec4 texcol = texture(samp9, uv0 / textureSize(samp9, 0));\n" + " vec4 texcol = texture(samp9, uv0);\n" " texcol = round(texcol * colmat[5]) * colmat[6];\n" " ocol0 = texcol * mat4(colmat[0], colmat[1], colmat[2], colmat[3]) + colmat[4];\n" "}\n"; @@ -421,20 +375,21 @@ TextureCache::TextureCache() "out vec4 ocol0;\n" "\n" "void main(){\n" - " vec4 texcol = texture(samp9, uv0 / textureSize(samp9, 0));\n" + " vec4 texcol = texture(samp9, uv0);\n" " vec4 EncodedDepth = fract((texcol.r * (16777215.0/16777216.0)) * vec4(1.0,256.0,256.0*256.0,1.0));\n" " texcol = round(EncodedDepth * (16777216.0/16777215.0) * vec4(255.0,255.0,255.0,15.0)) / vec4(255.0,255.0,255.0,15.0);\n" " ocol0 = texcol * mat4(colmat[0], colmat[1], colmat[2], colmat[3]) + colmat[4];" "}\n"; const char *VProgram = - "ATTRIN vec2 rawpos;\n" - "ATTRIN vec2 tex0;\n" "VARYOUT vec2 uv0;\n" + "uniform sampler2D samp9;\n" + "uniform vec4 copy_position;\n" // left, top, right, bottom "void main()\n" "{\n" - " uv0 = tex0;\n" - " gl_Position = vec4(rawpos,0,1);\n" + " vec2 rawpos = vec2(gl_VertexID&1, gl_VertexID&2);\n" + " uv0 = mix(copy_position.xy, copy_position.zw, rawpos) / textureSize(samp9, 0);\n" + " gl_Position = vec4(rawpos*2.0-1.0, 0.0, 1.0);\n" "}\n"; ProgramShaderCache::CompileShader(s_ColorMatrixProgram, VProgram, pColorMatrixProg); @@ -445,6 +400,9 @@ TextureCache::TextureCache() s_ColorCbufid = -1; s_DepthCbufid = -1; + s_ColorCopyPositionUniform = glGetUniformLocation(s_ColorMatrixProgram.glprogid, "copy_position"); + s_DepthCopyPositionUniform = glGetUniformLocation(s_DepthMatrixProgram.glprogid, "copy_position"); + s_ActiveTexture = -1; s_NextStage = -1; for(auto& gtex : s_Textures) @@ -456,12 +414,6 @@ TextureCache::~TextureCache() { s_ColorMatrixProgram.Destroy(); s_DepthMatrixProgram.Destroy(); - - for(auto& cache : s_VBO) { - glDeleteBuffers(1, &cache.second.vbo); - glDeleteVertexArrays(1, &cache.second.vao); - } - s_VBO.clear(); } void TextureCache::DisableStage(unsigned int stage) From 38fe05b1df7ad575b12834dfaba2788cff12ce9e Mon Sep 17 00:00:00 2001 From: degasus Date: Mon, 25 Nov 2013 12:27:54 +0100 Subject: [PATCH 08/21] OpenGL: attributeless rendering in emulate format changes only cleanup --- .../OGL/Src/FramebufferManager.cpp | 24 ++----------------- .../OGL/Src/FramebufferManager.h | 2 -- 2 files changed, 2 insertions(+), 24 deletions(-) diff --git a/Source/Core/VideoBackends/OGL/Src/FramebufferManager.cpp b/Source/Core/VideoBackends/OGL/Src/FramebufferManager.cpp index 14b02b2f14..9abd47a361 100644 --- a/Source/Core/VideoBackends/OGL/Src/FramebufferManager.cpp +++ b/Source/Core/VideoBackends/OGL/Src/FramebufferManager.cpp @@ -33,8 +33,6 @@ GLuint FramebufferManager::m_resolvedDepthTexture; GLuint FramebufferManager::m_xfbFramebuffer; // reinterpret pixel format -GLuint FramebufferManager::m_pixel_format_vao; -GLuint FramebufferManager::m_pixel_format_vbo; SHADER FramebufferManager::m_pixel_format_shaders[2]; @@ -177,25 +175,10 @@ FramebufferManager::FramebufferManager(int targetWidth, int targetHeight, int ms glClear(GL_COLOR_BUFFER_BIT|GL_DEPTH_BUFFER_BIT); // reinterpret pixel format - glGenBuffers(1, &m_pixel_format_vbo); - glGenVertexArrays(1, &m_pixel_format_vao); - glBindVertexArray(m_pixel_format_vao); - glBindBuffer(GL_ARRAY_BUFFER, m_pixel_format_vbo); - glEnableVertexAttribArray(SHADER_POSITION_ATTRIB); - glVertexAttribPointer(SHADER_POSITION_ATTRIB, 2, GL_FLOAT, 0, sizeof(GLfloat)*2, NULL); - - float vertices[] = { - -1.0, -1.0, - 1.0, -1.0, - -1.0, 1.0, - 1.0, 1.0, - }; - glBufferData(GL_ARRAY_BUFFER, sizeof(vertices), vertices, GL_STATIC_DRAW); - char vs[] = - "ATTRIN vec2 rawpos;\n" "void main(void) {\n" - " gl_Position = vec4(rawpos,0,1);\n" + " vec2 rawpos = vec2(gl_VertexID&1, gl_VertexID&2);\n" + " gl_Position = vec4(rawpos*2.0-1.0, 0.0, 1.0);\n" "}\n"; char ps_rgba6_to_rgb8[] = @@ -261,8 +244,6 @@ FramebufferManager::~FramebufferManager() m_efbDepth = 0; // reinterpret pixel format - glDeleteVertexArrays(1, &m_pixel_format_vao); - glDeleteBuffers(1, &m_pixel_format_vbo); m_pixel_format_shaders[0].Destroy(); m_pixel_format_shaders[1].Destroy(); } @@ -393,7 +374,6 @@ void FramebufferManager::ReinterpretPixelData(unsigned int convtype) glBindTexture(GL_TEXTURE_2D, src_texture); m_pixel_format_shaders[convtype ? 1 : 0].Bind(); - glBindVertexArray(m_pixel_format_vao); glDrawArrays(GL_TRIANGLE_STRIP, 0, 4); g_renderer->RestoreAPIState(); diff --git a/Source/Core/VideoBackends/OGL/Src/FramebufferManager.h b/Source/Core/VideoBackends/OGL/Src/FramebufferManager.h index d3e32075de..9fbcdbfedd 100644 --- a/Source/Core/VideoBackends/OGL/Src/FramebufferManager.h +++ b/Source/Core/VideoBackends/OGL/Src/FramebufferManager.h @@ -113,8 +113,6 @@ private: static GLuint m_xfbFramebuffer; // Only used in MSAA mode // For pixel format draw - static GLuint m_pixel_format_vbo; - static GLuint m_pixel_format_vao; static SHADER m_pixel_format_shaders[2]; }; From 6ed3f82affe9f866a580bcddfaa6f760f92a1b3e Mon Sep 17 00:00:00 2001 From: degasus Date: Mon, 25 Nov 2013 12:36:17 +0100 Subject: [PATCH 09/21] OpenGL: attributeless rendering for postprocessing --- .../VideoBackends/OGL/Src/PostProcessing.cpp | 34 +++---------------- 1 file changed, 4 insertions(+), 30 deletions(-) diff --git a/Source/Core/VideoBackends/OGL/Src/PostProcessing.cpp b/Source/Core/VideoBackends/OGL/Src/PostProcessing.cpp index 84fb0c4d1c..c0460e489a 100644 --- a/Source/Core/VideoBackends/OGL/Src/PostProcessing.cpp +++ b/Source/Core/VideoBackends/OGL/Src/PostProcessing.cpp @@ -25,18 +25,15 @@ static u32 s_width; static u32 s_height; static GLuint s_fbo; static GLuint s_texture; -static GLuint s_vao; -static GLuint s_vbo; static GLuint s_uniform_resolution; static char s_vertex_shader[] = - "in vec2 rawpos;\n" - "in vec2 tex0;\n" "out vec2 uv0;\n" "void main(void) {\n" - " gl_Position = vec4(rawpos,0,1);\n" - " uv0 = tex0;\n" + " vec2 rawpos = vec2(gl_VertexID&1, gl_VertexID&2);\n" + " gl_Position = vec4(rawpos*2.0-1.0, 0.0, 1.0);\n" + " uv0 = rawpos;\n" "}\n"; void Init() @@ -56,34 +53,14 @@ void Init() glBindFramebuffer(GL_FRAMEBUFFER, s_fbo); glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, s_texture, 0); FramebufferManager::SetFramebuffer(0); - - glGenBuffers(1, &s_vbo); - glBindBuffer(GL_ARRAY_BUFFER, s_vbo); - GLfloat vertices[] = { - -1.f, -1.f, 0.f, 0.f, - -1.f, 1.f, 0.f, 1.f, - 1.f, -1.f, 1.f, 0.f, - 1.f, 1.f, 1.f, 1.f - }; - glBufferData(GL_ARRAY_BUFFER, sizeof(vertices), vertices, GL_STATIC_DRAW); - - glGenVertexArrays(1, &s_vao); - glBindVertexArray( s_vao ); - glEnableVertexAttribArray(SHADER_POSITION_ATTRIB); - glVertexAttribPointer(SHADER_POSITION_ATTRIB, 2, GL_FLOAT, 0, sizeof(GLfloat)*4, NULL); - glEnableVertexAttribArray(SHADER_TEXTURE0_ATTRIB); - glVertexAttribPointer(SHADER_TEXTURE0_ATTRIB, 2, GL_FLOAT, 0, sizeof(GLfloat)*4, (GLfloat*)NULL+2); } void Shutdown() { s_shader.Destroy(); - glDeleteFramebuffers(1, &s_vbo); + glDeleteFramebuffers(1, &s_fbo); glDeleteTextures(1, &s_texture); - - glDeleteBuffers(1, &s_vbo); - glDeleteVertexArrays(1, &s_vao); } void ReloadShader() @@ -103,7 +80,6 @@ void BlitToScreen() glBindFramebuffer(GL_DRAW_FRAMEBUFFER, 0); glViewport(0, 0, s_width, s_height); - glBindVertexArray(s_vao); s_shader.Bind(); glUniform4f(s_uniform_resolution, (float)s_width, (float)s_height, 1.0f/(float)s_width, 1.0f/(float)s_height); @@ -111,7 +87,6 @@ void BlitToScreen() glActiveTexture(GL_TEXTURE0+9); glBindTexture(GL_TEXTURE_2D, s_texture); glDrawArrays(GL_TRIANGLE_STRIP, 0, 4); - glBindTexture(GL_TEXTURE_2D, 0); /* glBindFramebuffer(GL_READ_FRAMEBUFFER, s_fbo); @@ -132,7 +107,6 @@ void Update ( u32 width, u32 height ) glActiveTexture(GL_TEXTURE0+9); glBindTexture(GL_TEXTURE_2D, s_texture); glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, width, height, 0, GL_RGBA, GL_UNSIGNED_BYTE, NULL); - glBindTexture(GL_TEXTURE_2D, 0); } } From 454e1dd9a2434bf68121d8a2765a0f2413ba4dcf Mon Sep 17 00:00:00 2001 From: degasus Date: Mon, 25 Nov 2013 13:03:35 +0100 Subject: [PATCH 10/21] OpenGL: attributeless rendering for efb2ram This wasn't as easy as we now have to cache also the uniform locations. --- .../OGL/Src/TextureConverter.cpp | 74 +++++++------------ 1 file changed, 26 insertions(+), 48 deletions(-) diff --git a/Source/Core/VideoBackends/OGL/Src/TextureConverter.cpp b/Source/Core/VideoBackends/OGL/Src/TextureConverter.cpp index c5f96dba14..161468d2b0 100644 --- a/Source/Core/VideoBackends/OGL/Src/TextureConverter.cpp +++ b/Source/Core/VideoBackends/OGL/Src/TextureConverter.cpp @@ -34,24 +34,23 @@ const int renderBufferWidth = 1024; const int renderBufferHeight = 1024; static SHADER s_rgbToYuyvProgram; +static int s_rgbToYuyvUniform_loc; + static SHADER s_yuyvToRgbProgram; // Not all slots are taken - but who cares. const u32 NUM_ENCODING_PROGRAMS = 64; static SHADER s_encodingPrograms[NUM_ENCODING_PROGRAMS]; - -static GLuint s_encode_VBO = 0; -static GLuint s_encode_VAO = 0; -static TargetRectangle s_cached_sourceRc; +static int s_encodingUniform_loc[NUM_ENCODING_PROGRAMS]; static const char *VProgram = - "ATTRIN vec2 rawpos;\n" - "ATTRIN vec2 tex0;\n" "VARYOUT vec2 uv0;\n" + "uniform vec4 copy_position;\n" // left, top, right, bottom "void main()\n" "{\n" - " uv0 = tex0;\n" - " gl_Position = vec4(rawpos, 0.0, 1.0);\n" + " vec2 rawpos = vec2(gl_VertexID&1, gl_VertexID&2);\n" + " gl_Position = vec4(rawpos*2.0-1.0, 0.0, 1.0);\n" + " uv0 = mix(copy_position.xy, copy_position.zw, rawpos);\n" "}\n"; void CreatePrograms() @@ -73,14 +72,24 @@ void CreatePrograms() * inbetween the two Pixels, and only blurs over these two pixels. */ // Output is BGRA because that is slightly faster than RGBA. + const char *VProgramRgbToYuyv = + "VARYOUT vec2 uv0;\n" + "uniform vec4 copy_position;\n" // left, top, right, bottom + "uniform sampler2D samp9;\n" + "void main()\n" + "{\n" + " vec2 rawpos = vec2(gl_VertexID&1, gl_VertexID&2);\n" + " gl_Position = vec4(rawpos*2.0-1.0, 0.0, 1.0);\n" + " uv0 = mix(copy_position.xy, copy_position.zw, rawpos) / textureSize(samp9, 0);\n" + "}\n"; const char *FProgramRgbToYuyv = "uniform sampler2D samp9;\n" "VARYIN vec2 uv0;\n" "out vec4 ocol0;\n" "void main()\n" "{\n" - " vec3 c0 = texture(samp9, (uv0 - dFdx(uv0) * 0.25) / textureSize(samp9, 0)).rgb;\n" - " vec3 c1 = texture(samp9, (uv0 + dFdx(uv0) * 0.25) / textureSize(samp9, 0)).rgb;\n" + " vec3 c0 = texture(samp9, (uv0 - dFdx(uv0) * 0.25)).rgb;\n" + " vec3 c1 = texture(samp9, (uv0 + dFdx(uv0) * 0.25)).rgb;\n" " vec3 c01 = (c0 + c1) * 0.5;\n" " vec3 y_const = vec3(0.257,0.504,0.098);\n" " vec3 u_const = vec3(-0.148,-0.291,0.439);\n" @@ -88,6 +97,8 @@ void CreatePrograms() " vec4 const3 = vec4(0.0625,0.5,0.0625,0.5);\n" " ocol0 = vec4(dot(c1,y_const),dot(c01,u_const),dot(c0,y_const),dot(c01, v_const)) + const3;\n" "}\n"; + ProgramShaderCache::CompileShader(s_rgbToYuyvProgram, VProgramRgbToYuyv, FProgramRgbToYuyv); + s_rgbToYuyvUniform_loc = glGetUniformLocation(s_rgbToYuyvProgram.glprogid, "copy_position"); /* TODO: Accuracy Improvements * @@ -121,8 +132,6 @@ void CreatePrograms() " yComp + (2.018 * uComp),\n" " 1.0);\n" "}\n"; - - ProgramShaderCache::CompileShader(s_rgbToYuyvProgram, VProgram, FProgramRgbToYuyv); ProgramShaderCache::CompileShader(s_yuyvToRgbProgram, VProgramYuyvToRgb, FProgramYuyvToRgb); } @@ -150,6 +159,7 @@ SHADER &GetOrCreateEncodingShader(u32 format) #endif ProgramShaderCache::CompileShader(s_encodingPrograms[format], VProgram, shader); + s_encodingUniform_loc[format] = glGetUniformLocation(s_encodingPrograms[format].glprogid, "copy_position"); } return s_encodingPrograms[format]; } @@ -158,19 +168,6 @@ void Init() { glGenFramebuffers(1, &s_texConvFrameBuffer); - glGenBuffers(1, &s_encode_VBO ); - glGenVertexArrays(1, &s_encode_VAO ); - glBindBuffer(GL_ARRAY_BUFFER, s_encode_VBO ); - glBindVertexArray( s_encode_VAO ); - glEnableVertexAttribArray(SHADER_POSITION_ATTRIB); - glVertexAttribPointer(SHADER_POSITION_ATTRIB, 2, GL_FLOAT, 0, sizeof(GLfloat)*4, (GLfloat*)NULL); - glEnableVertexAttribArray(SHADER_TEXTURE0_ATTRIB); - glVertexAttribPointer(SHADER_TEXTURE0_ATTRIB, 2, GL_FLOAT, 0, sizeof(GLfloat)*4, (GLfloat*)NULL+2); - s_cached_sourceRc.top = -1; - s_cached_sourceRc.bottom = -1; - s_cached_sourceRc.left = -1; - s_cached_sourceRc.right = -1; - glActiveTexture(GL_TEXTURE0 + 9); glGenTextures(1, &s_srcTexture); glBindTexture(GL_TEXTURE_2D, s_srcTexture); @@ -189,8 +186,6 @@ void Shutdown() glDeleteTextures(1, &s_srcTexture); glDeleteTextures(1, &s_dstTexture); glDeleteFramebuffers(1, &s_texConvFrameBuffer); - glDeleteBuffers(1, &s_encode_VBO ); - glDeleteVertexArrays(1, &s_encode_VAO ); s_rgbToYuyvProgram.Destroy(); s_yuyvToRgbProgram.Destroy(); @@ -205,7 +200,7 @@ void Shutdown() void EncodeToRamUsingShader(GLuint srcTexture, const TargetRectangle& sourceRc, u8* destAddr, int dstWidth, int dstHeight, int readStride, - bool toTexture, bool linearFilter) + bool toTexture, bool linearFilter, int uniform_loc) { @@ -235,25 +230,8 @@ void EncodeToRamUsingShader(GLuint srcTexture, const TargetRectangle& sourceRc, glViewport(0, 0, (GLsizei)dstWidth, (GLsizei)dstHeight); - GL_REPORT_ERRORD(); - if(!(s_cached_sourceRc == sourceRc)) { - GLfloat vertices[] = { - -1.f, -1.f, - (float)sourceRc.left, (float)sourceRc.top, - -1.f, 1.f, - (float)sourceRc.left, (float)sourceRc.bottom, - 1.f, -1.f, - (float)sourceRc.right, (float)sourceRc.top, - 1.f, 1.f, - (float)sourceRc.right, (float)sourceRc.bottom - }; - glBindBuffer(GL_ARRAY_BUFFER, s_encode_VBO ); - glBufferData(GL_ARRAY_BUFFER, 4*4*sizeof(GLfloat), vertices, GL_STREAM_DRAW); + glUniform4f(uniform_loc, sourceRc.left, sourceRc.top, sourceRc.right, sourceRc.bottom); - s_cached_sourceRc = sourceRc; - } - - glBindVertexArray( s_encode_VAO ); glDrawArrays(GL_TRIANGLE_STRIP, 0, 4); GL_REPORT_ERRORD(); @@ -345,7 +323,7 @@ int EncodeToRamFromTexture(u32 address,GLuint source_texture, bool bFromZBuffer, TexDecoder_GetBlockWidthInTexels(format); EncodeToRamUsingShader(source_texture, scaledSource, dest_ptr, expandedWidth / samples, expandedHeight, readStride, - true, bScaleByHalf > 0 && !bFromZBuffer); + true, bScaleByHalf > 0 && !bFromZBuffer, s_encodingUniform_loc[format]); return size_in_bytes; // TODO: D3D11 is calculating this value differently! } @@ -359,7 +337,7 @@ void EncodeToRamYUYV(GLuint srcTexture, const TargetRectangle& sourceRc, u8* des // We enable linear filtering, because the gamecube does filtering in the vertical direction when // yscale is enabled. // Otherwise we get jaggies when a game uses yscaling (most PAL games) - EncodeToRamUsingShader(srcTexture, sourceRc, destAddr, dstWidth / 2, dstHeight, 0, false, true); + EncodeToRamUsingShader(srcTexture, sourceRc, destAddr, dstWidth / 2, dstHeight, 0, false, true, s_rgbToYuyvUniform_loc); FramebufferManager::SetFramebuffer(0); TextureCache::DisableStage(0); g_renderer->RestoreAPIState(); From a289e0604f3fd3663a7d32e8aafbe9d8286b35ad Mon Sep 17 00:00:00 2001 From: degasus Date: Mon, 25 Nov 2013 14:53:44 +0100 Subject: [PATCH 11/21] TextureConverter: remove D3D9 foo This file is in VideoCommon, but as D3D11 doesn't use it and D3D9 is dropped, it's time to clean up. --- .../Src/TextureConversionShader.cpp | 47 +++---------------- 1 file changed, 7 insertions(+), 40 deletions(-) diff --git a/Source/Core/VideoCommon/Src/TextureConversionShader.cpp b/Source/Core/VideoCommon/Src/TextureConversionShader.cpp index 022d18b427..c78b854535 100644 --- a/Source/Core/VideoCommon/Src/TextureConversionShader.cpp +++ b/Source/Core/VideoCommon/Src/TextureConversionShader.cpp @@ -57,15 +57,6 @@ u16 GetEncodedSampleCount(u32 format) } } -const char* WriteRegister(API_TYPE ApiType, const char *prefix, const u32 num) -{ - if (ApiType == API_OPENGL) - return ""; // Once we switch to GLSL 1.3 we can do something here - static char result[64]; - sprintf(result, " : register(%s%d)", prefix, num); - return result; -} - // block dimensions : widthStride, heightStride // texture dims : width, height, x offset, y offset void WriteSwizzler(char*& p, u32 format, API_TYPE ApiType) @@ -73,7 +64,7 @@ void WriteSwizzler(char*& p, u32 format, API_TYPE ApiType) // [0] left, top, right, bottom of source rectangle within source texture // [1] width and height of destination texture in pixels // Two were merged for GLSL - WRITE(p, "uniform float4 " I_COLORS"[2] %s;\n", WriteRegister(ApiType, "c", C_COLORS)); + WRITE(p, "uniform float4 " I_COLORS"[2];\n"); int blkW = TexDecoder_GetBlockWidthInTexels(format); int blkH = TexDecoder_GetBlockHeightInTexels(format); @@ -118,16 +109,9 @@ void WriteSwizzler(char*& p, u32 format, API_TYPE ApiType) WRITE(p, " sampleUv = sampleUv * " I_COLORS"[0].xy;\n"); - if (ApiType == API_OPENGL) - WRITE(p," sampleUv.y = " I_COLORS"[1].y - sampleUv.y;\n"); + WRITE(p," sampleUv.y = " I_COLORS"[1].y - sampleUv.y;\n"); WRITE(p, " sampleUv = sampleUv + " I_COLORS"[1].zw;\n"); - - if (ApiType != API_OPENGL) - { - WRITE(p, " sampleUv = sampleUv + float2(0.0,1.0);\n"); // still need to determine the reason for this - WRITE(p, " sampleUv = sampleUv / " I_COLORS"[0].zw;\n"); - } } // block dimensions : widthStride, heightStride @@ -137,7 +121,7 @@ void Write32BitSwizzler(char*& p, u32 format, API_TYPE ApiType) // [0] left, top, right, bottom of source rectangle within source texture // [1] width and height of destination texture in pixels // Two were merged for GLSL - WRITE(p, "uniform float4 " I_COLORS"[2] %s;\n", WriteRegister(ApiType, "c", C_COLORS)); + WRITE(p, "uniform float4 " I_COLORS"[2];\n"); int blkW = TexDecoder_GetBlockWidthInTexels(format); int blkH = TexDecoder_GetBlockHeightInTexels(format); @@ -184,35 +168,18 @@ void Write32BitSwizzler(char*& p, u32 format, API_TYPE ApiType) WRITE(p, " sampleUv.y = yb + xoff;\n"); WRITE(p, " sampleUv = sampleUv * " I_COLORS"[0].xy;\n"); - if (ApiType == API_OPENGL) - WRITE(p," sampleUv.y = " I_COLORS"[1].y - sampleUv.y;\n"); + WRITE(p," sampleUv.y = " I_COLORS"[1].y - sampleUv.y;\n"); WRITE(p, " sampleUv = sampleUv + " I_COLORS"[1].zw;\n"); - - if (ApiType != API_OPENGL) - { - WRITE(p, " sampleUv = sampleUv + float2(0.0,1.0);\n");// still to determine the reason for this - WRITE(p, " sampleUv = sampleUv / " I_COLORS"[0].zw;\n"); - } } void WriteSampleColor(char*& p, const char* colorComp, const char* dest, API_TYPE ApiType) { - const char* texSampleOpName; - if (ApiType == API_D3D) - texSampleOpName = "tex0.Sample"; - else // OGL - texSampleOpName = "texture"; - // the increment of sampleUv.x is delayed, so we perform it here. see WriteIncrementSampleX. - const char* texSampleIncrementUnit; - if (ApiType == API_D3D) - texSampleIncrementUnit = I_COLORS"[0].x / " I_COLORS"[0].z"; - else // OGL - texSampleIncrementUnit = I_COLORS"[0].x"; + const char* texSampleIncrementUnit = I_COLORS"[0].x"; - WRITE(p, " %s = %s(samp0, (sampleUv + float2(%d.0 * (%s), 0.0)) / textureSize(samp0, 0)).%s;\n", - dest, texSampleOpName, s_incrementSampleXCount, texSampleIncrementUnit, colorComp); + WRITE(p, " %s = texture(samp0, (sampleUv + float2(%d.0 * (%s), 0.0)) / textureSize(samp0, 0)).%s;\n", + dest, s_incrementSampleXCount, texSampleIncrementUnit, colorComp); } void WriteColorToIntensity(char*& p, const char* src, const char* dest) From bcb31b09d38463f8a7a515858a2143477bfc8935 Mon Sep 17 00:00:00 2001 From: degasus Date: Mon, 25 Nov 2013 15:01:18 +0100 Subject: [PATCH 12/21] TextureConverter: Use gl_FragCoord instead of uv0 --- .../OGL/Src/TextureConverter.cpp | 29 ++++++++----------- .../Src/TextureConversionShader.cpp | 12 +++----- 2 files changed, 16 insertions(+), 25 deletions(-) diff --git a/Source/Core/VideoBackends/OGL/Src/TextureConverter.cpp b/Source/Core/VideoBackends/OGL/Src/TextureConverter.cpp index 161468d2b0..08932f562e 100644 --- a/Source/Core/VideoBackends/OGL/Src/TextureConverter.cpp +++ b/Source/Core/VideoBackends/OGL/Src/TextureConverter.cpp @@ -41,17 +41,6 @@ static SHADER s_yuyvToRgbProgram; // Not all slots are taken - but who cares. const u32 NUM_ENCODING_PROGRAMS = 64; static SHADER s_encodingPrograms[NUM_ENCODING_PROGRAMS]; -static int s_encodingUniform_loc[NUM_ENCODING_PROGRAMS]; - -static const char *VProgram = - "VARYOUT vec2 uv0;\n" - "uniform vec4 copy_position;\n" // left, top, right, bottom - "void main()\n" - "{\n" - " vec2 rawpos = vec2(gl_VertexID&1, gl_VertexID&2);\n" - " gl_Position = vec4(rawpos*2.0-1.0, 0.0, 1.0);\n" - " uv0 = mix(copy_position.xy, copy_position.zw, rawpos);\n" - "}\n"; void CreatePrograms() { @@ -158,8 +147,14 @@ SHADER &GetOrCreateEncodingShader(u32 format) } #endif + const char *VProgram = + "void main()\n" + "{\n" + " vec2 rawpos = vec2(gl_VertexID&1, gl_VertexID&2);\n" + " gl_Position = vec4(rawpos*2.0-1.0, 0.0, 1.0);\n" + "}\n"; + ProgramShaderCache::CompileShader(s_encodingPrograms[format], VProgram, shader); - s_encodingUniform_loc[format] = glGetUniformLocation(s_encodingPrograms[format].glprogid, "copy_position"); } return s_encodingPrograms[format]; } @@ -200,7 +195,7 @@ void Shutdown() void EncodeToRamUsingShader(GLuint srcTexture, const TargetRectangle& sourceRc, u8* destAddr, int dstWidth, int dstHeight, int readStride, - bool toTexture, bool linearFilter, int uniform_loc) + bool toTexture, bool linearFilter) { @@ -230,8 +225,6 @@ void EncodeToRamUsingShader(GLuint srcTexture, const TargetRectangle& sourceRc, glViewport(0, 0, (GLsizei)dstWidth, (GLsizei)dstHeight); - glUniform4f(uniform_loc, sourceRc.left, sourceRc.top, sourceRc.right, sourceRc.bottom); - glDrawArrays(GL_TRIANGLE_STRIP, 0, 4); GL_REPORT_ERRORD(); @@ -323,7 +316,7 @@ int EncodeToRamFromTexture(u32 address,GLuint source_texture, bool bFromZBuffer, TexDecoder_GetBlockWidthInTexels(format); EncodeToRamUsingShader(source_texture, scaledSource, dest_ptr, expandedWidth / samples, expandedHeight, readStride, - true, bScaleByHalf > 0 && !bFromZBuffer, s_encodingUniform_loc[format]); + true, bScaleByHalf > 0 && !bFromZBuffer); return size_in_bytes; // TODO: D3D11 is calculating this value differently! } @@ -334,10 +327,12 @@ void EncodeToRamYUYV(GLuint srcTexture, const TargetRectangle& sourceRc, u8* des s_rgbToYuyvProgram.Bind(); + glUniform4f(s_rgbToYuyvUniform_loc, sourceRc.left, sourceRc.top, sourceRc.right, sourceRc.bottom); + // We enable linear filtering, because the gamecube does filtering in the vertical direction when // yscale is enabled. // Otherwise we get jaggies when a game uses yscaling (most PAL games) - EncodeToRamUsingShader(srcTexture, sourceRc, destAddr, dstWidth / 2, dstHeight, 0, false, true, s_rgbToYuyvUniform_loc); + EncodeToRamUsingShader(srcTexture, sourceRc, destAddr, dstWidth / 2, dstHeight, 0, false, true); FramebufferManager::SetFramebuffer(0); TextureCache::DisableStage(0); g_renderer->RestoreAPIState(); diff --git a/Source/Core/VideoCommon/Src/TextureConversionShader.cpp b/Source/Core/VideoCommon/Src/TextureConversionShader.cpp index c78b854535..a6f63ad6ee 100644 --- a/Source/Core/VideoCommon/Src/TextureConversionShader.cpp +++ b/Source/Core/VideoCommon/Src/TextureConversionShader.cpp @@ -75,7 +75,6 @@ void WriteSwizzler(char*& p, u32 format, API_TYPE ApiType) WRITE(p, "uniform sampler2D samp0;\n"); WRITE(p, " out vec4 ocol0;\n"); - WRITE(p, " VARYIN float2 uv0;\n"); WRITE(p, "void main()\n"); } else // D3D @@ -84,13 +83,12 @@ void WriteSwizzler(char*& p, u32 format, API_TYPE ApiType) WRITE(p, "Texture2D Tex0 : register(t0);\n"); WRITE(p,"void main(\n"); - WRITE(p," out float4 ocol0 : SV_Target,\n"); - WRITE(p," in float2 uv0 : TEXCOORD0)\n"); + WRITE(p," out float4 ocol0 : SV_Target)\n"); } WRITE(p, "{\n" " float2 sampleUv;\n" - " float2 uv1 = floor(uv0);\n"); + " float2 uv1 = floor(gl_FragCoord.xy);\n"); WRITE(p, " uv1.x = uv1.x * %d.0;\n", samples); @@ -133,7 +131,6 @@ void Write32BitSwizzler(char*& p, u32 format, API_TYPE ApiType) WRITE(p, "uniform sampler2D samp0;\n"); WRITE(p, " out float4 ocol0;\n"); - WRITE(p, " VARYIN float2 uv0;\n"); WRITE(p, "void main()\n"); } else @@ -142,14 +139,13 @@ void Write32BitSwizzler(char*& p, u32 format, API_TYPE ApiType) WRITE(p, "Texture2D Tex0 : register(t0);\n"); WRITE(p,"void main(\n"); - WRITE(p," out float4 ocol0 : SV_Target,\n"); - WRITE(p," in float2 uv0 : TEXCOORD0)\n"); + WRITE(p," out float4 ocol0 : SV_Target)\n"); } WRITE(p, "{\n" " float2 sampleUv;\n" - " float2 uv1 = floor(uv0);\n"); + " float2 uv1 = floor(gl_FragCoord.xy);\n"); WRITE(p, " float yl = floor(uv1.y / %d.0);\n", blkH); WRITE(p, " float yb = yl * %d.0;\n", blkH); From 6750a81972822099980a59f334b7928e5ac92be7 Mon Sep 17 00:00:00 2001 From: degasus Date: Mon, 25 Nov 2013 15:49:13 +0100 Subject: [PATCH 13/21] TextureConverter: Use integer math for swizzling also move int(efb_coord) -> float(ogl_fb_coord) into WriteSampleColor --- .../OGL/Src/ProgramShaderCache.cpp | 3 + .../OGL/Src/TextureConverter.cpp | 13 +-- .../Src/TextureConversionShader.cpp | 86 +++++++++---------- 3 files changed, 47 insertions(+), 55 deletions(-) diff --git a/Source/Core/VideoBackends/OGL/Src/ProgramShaderCache.cpp b/Source/Core/VideoBackends/OGL/Src/ProgramShaderCache.cpp index 9a9a6f3cf9..cc7e29dce5 100644 --- a/Source/Core/VideoBackends/OGL/Src/ProgramShaderCache.cpp +++ b/Source/Core/VideoBackends/OGL/Src/ProgramShaderCache.cpp @@ -584,6 +584,9 @@ void ProgramShaderCache::CreateHeader ( void ) "#define float2 vec2\n" "#define float3 vec3\n" "#define float4 vec4\n" + "#define int2 ivec2\n" + "#define int3 ivec3\n" + "#define int4 ivec4\n" // hlsl to glsl function translation "#define frac fract\n" diff --git a/Source/Core/VideoBackends/OGL/Src/TextureConverter.cpp b/Source/Core/VideoBackends/OGL/Src/TextureConverter.cpp index 08932f562e..eab62d7f7f 100644 --- a/Source/Core/VideoBackends/OGL/Src/TextureConverter.cpp +++ b/Source/Core/VideoBackends/OGL/Src/TextureConverter.cpp @@ -291,17 +291,10 @@ int EncodeToRamFromTexture(u32 address,GLuint source_texture, bool bFromZBuffer, s32 expandedWidth = (width + blkW) & (~blkW); s32 expandedHeight = (height + blkH) & (~blkH); - float sampleStride = bScaleByHalf ? 2.f : 1.f; - - float params[] = { - Renderer::EFBToScaledXf(sampleStride), Renderer::EFBToScaledYf(sampleStride), - 0.0f, 0.0f, - (float)expandedWidth, (float)Renderer::EFBToScaledY(expandedHeight)-1, - (float)Renderer::EFBToScaledX(source.left), (float)Renderer::EFBToScaledY(EFB_HEIGHT - source.top - expandedHeight) - }; - texconv_shader.Bind(); - glUniform4fv(texconv_shader.UniformLocations[0], 2, params); + glUniform4f(texconv_shader.UniformLocations[0], + float(source.left), float(source.top), + (float)expandedWidth, bScaleByHalf ? 2.f : 1.f); TargetRectangle scaledSource; scaledSource.top = 0; diff --git a/Source/Core/VideoCommon/Src/TextureConversionShader.cpp b/Source/Core/VideoCommon/Src/TextureConversionShader.cpp index a6f63ad6ee..573b35383a 100644 --- a/Source/Core/VideoCommon/Src/TextureConversionShader.cpp +++ b/Source/Core/VideoCommon/Src/TextureConversionShader.cpp @@ -64,7 +64,7 @@ void WriteSwizzler(char*& p, u32 format, API_TYPE ApiType) // [0] left, top, right, bottom of source rectangle within source texture // [1] width and height of destination texture in pixels // Two were merged for GLSL - WRITE(p, "uniform float4 " I_COLORS"[2];\n"); + WRITE(p, "uniform float4 " I_COLORS";\n"); int blkW = TexDecoder_GetBlockWidthInTexels(format); int blkH = TexDecoder_GetBlockHeightInTexels(format); @@ -87,29 +87,23 @@ void WriteSwizzler(char*& p, u32 format, API_TYPE ApiType) } WRITE(p, "{\n" - " float2 sampleUv;\n" - " float2 uv1 = floor(gl_FragCoord.xy);\n"); + " int2 sampleUv;\n" + " int2 uv1 = int2(gl_FragCoord.xy);\n"); - WRITE(p, " uv1.x = uv1.x * %d.0;\n", samples); + WRITE(p, " uv1.x = uv1.x * %d;\n", samples); - WRITE(p, " float xl = floor(uv1.x / %d.0);\n", blkW); - WRITE(p, " float xib = uv1.x - (xl * %d.0);\n", blkW); - WRITE(p, " float yl = floor(uv1.y / %d.0);\n", blkH); - WRITE(p, " float yb = yl * %d.0;\n", blkH); - WRITE(p, " float yoff = uv1.y - yb;\n"); - WRITE(p, " float xp = uv1.x + (yoff * " I_COLORS"[1].x);\n"); - WRITE(p, " float xel = floor(xp / %d.0);\n", blkW); - WRITE(p, " float xb = floor(xel / %d.0);\n", blkH); - WRITE(p, " float xoff = xel - (xb * %d.0);\n", blkH); + WRITE(p, " int xl = uv1.x / %d;\n", blkW); + WRITE(p, " int xib = uv1.x - xl * %d;\n", blkW); + WRITE(p, " int yl = uv1.y / %d;\n", blkH); + WRITE(p, " int yb = yl * %d;\n", blkH); + WRITE(p, " int yoff = uv1.y - yb;\n"); + WRITE(p, " int xp = uv1.x + yoff * int(" I_COLORS".z);\n"); + WRITE(p, " int xel = xp / %d;\n", blkW); + WRITE(p, " int xb = xel / %d;\n", blkH); + WRITE(p, " int xoff = xel - xb * %d;\n", blkH); - WRITE(p, " sampleUv.x = xib + (xb * %d.0);\n", blkW); + WRITE(p, " sampleUv.x = xib + xb * %d;\n", blkW); WRITE(p, " sampleUv.y = yb + xoff;\n"); - - WRITE(p, " sampleUv = sampleUv * " I_COLORS"[0].xy;\n"); - - WRITE(p," sampleUv.y = " I_COLORS"[1].y - sampleUv.y;\n"); - - WRITE(p, " sampleUv = sampleUv + " I_COLORS"[1].zw;\n"); } // block dimensions : widthStride, heightStride @@ -119,7 +113,7 @@ void Write32BitSwizzler(char*& p, u32 format, API_TYPE ApiType) // [0] left, top, right, bottom of source rectangle within source texture // [1] width and height of destination texture in pixels // Two were merged for GLSL - WRITE(p, "uniform float4 " I_COLORS"[2];\n"); + WRITE(p, "uniform float4 " I_COLORS";\n"); int blkW = TexDecoder_GetBlockWidthInTexels(format); int blkH = TexDecoder_GetBlockHeightInTexels(format); @@ -144,38 +138,40 @@ void Write32BitSwizzler(char*& p, u32 format, API_TYPE ApiType) WRITE(p, "{\n" - " float2 sampleUv;\n" - " float2 uv1 = floor(gl_FragCoord.xy);\n"); + " int2 sampleUv;\n" + " int2 uv1 = int2(gl_FragCoord.xy);\n"); - WRITE(p, " float yl = floor(uv1.y / %d.0);\n", blkH); - WRITE(p, " float yb = yl * %d.0;\n", blkH); - WRITE(p, " float yoff = uv1.y - yb;\n"); - WRITE(p, " float xp = uv1.x + (yoff * " I_COLORS"[1].x);\n"); - WRITE(p, " float xel = floor(xp / 2.0);\n"); - WRITE(p, " float xb = floor(xel / %d.0);\n", blkH); - WRITE(p, " float xoff = xel - (xb * %d.0);\n", blkH); + WRITE(p, " int yl = uv1.y / %d;\n", blkH); + WRITE(p, " int yb = yl * %d;\n", blkH); + WRITE(p, " int yoff = uv1.y - yb;\n"); + WRITE(p, " int xp = uv1.x + yoff * int(" I_COLORS".z);\n"); + WRITE(p, " int xel = xp / 2;\n"); + WRITE(p, " int xb = xel / %d;\n", blkH); + WRITE(p, " int xoff = xel - xb * %d;\n", blkH); - WRITE(p, " float x2 = uv1.x * 2.0;\n"); - WRITE(p, " float xl = floor(x2 / %d.0);\n", blkW); - WRITE(p, " float xib = x2 - (xl * %d.0);\n", blkW); - WRITE(p, " float halfxb = floor(xb / 2.0);\n"); + WRITE(p, " int x2 = uv1.x * 2;\n"); + WRITE(p, " int xl = x2 / %d;\n", blkW); + WRITE(p, " int xib = x2 - xl * %d;\n", blkW); + WRITE(p, " int halfxb = xb / 2;\n"); - WRITE(p, " sampleUv.x = xib + (halfxb * %d.0);\n", blkW); + WRITE(p, " sampleUv.x = xib + halfxb * %d;\n", blkW); WRITE(p, " sampleUv.y = yb + xoff;\n"); - WRITE(p, " sampleUv = sampleUv * " I_COLORS"[0].xy;\n"); - - WRITE(p," sampleUv.y = " I_COLORS"[1].y - sampleUv.y;\n"); - - WRITE(p, " sampleUv = sampleUv + " I_COLORS"[1].zw;\n"); } void WriteSampleColor(char*& p, const char* colorComp, const char* dest, API_TYPE ApiType) { - // the increment of sampleUv.x is delayed, so we perform it here. see WriteIncrementSampleX. - const char* texSampleIncrementUnit = I_COLORS"[0].x"; - - WRITE(p, " %s = texture(samp0, (sampleUv + float2(%d.0 * (%s), 0.0)) / textureSize(samp0, 0)).%s;\n", - dest, s_incrementSampleXCount, texSampleIncrementUnit, colorComp); + WRITE(p, + "{\n" + "float2 uv = sampleUv + int2(%d,0);\n" // pixel offset + "uv *= " I_COLORS".w;\n" // scale by two (if wanted) + "uv += " I_COLORS".xy;\n" // move to copyed rect + "uv += float2(0.5, 0.5);\n" // move center of pixel + "uv /= float2(%d, %d);\n" // normlize to [0:1] + "uv.y = 1-uv.y;\n" // ogl foo (disable this line for d3d) + "%s = texture(samp0, uv).%s;\n" + "}\n", + s_incrementSampleXCount, EFB_WIDTH, EFB_HEIGHT, dest, colorComp + ); } void WriteColorToIntensity(char*& p, const char* src, const char* dest) From 9dbb262aab891bad6b299199ff8cd0a71a177c31 Mon Sep 17 00:00:00 2001 From: Ryan Houdek Date: Mon, 25 Nov 2013 15:11:06 +0000 Subject: [PATCH 14/21] Fix for OpenGL ES 3. --- Source/Core/VideoBackends/OGL/Src/TextureCache.cpp | 2 +- Source/Core/VideoBackends/OGL/Src/TextureConverter.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Source/Core/VideoBackends/OGL/Src/TextureCache.cpp b/Source/Core/VideoBackends/OGL/Src/TextureCache.cpp index af7e121ffe..0666679ae9 100644 --- a/Source/Core/VideoBackends/OGL/Src/TextureCache.cpp +++ b/Source/Core/VideoBackends/OGL/Src/TextureCache.cpp @@ -388,7 +388,7 @@ TextureCache::TextureCache() "void main()\n" "{\n" " vec2 rawpos = vec2(gl_VertexID&1, gl_VertexID&2);\n" - " uv0 = mix(copy_position.xy, copy_position.zw, rawpos) / textureSize(samp9, 0);\n" + " uv0 = mix(copy_position.xy, copy_position.zw, rawpos) / vec2(textureSize(samp9, 0));\n" " gl_Position = vec4(rawpos*2.0-1.0, 0.0, 1.0);\n" "}\n"; diff --git a/Source/Core/VideoBackends/OGL/Src/TextureConverter.cpp b/Source/Core/VideoBackends/OGL/Src/TextureConverter.cpp index eab62d7f7f..7f8ae23cc9 100644 --- a/Source/Core/VideoBackends/OGL/Src/TextureConverter.cpp +++ b/Source/Core/VideoBackends/OGL/Src/TextureConverter.cpp @@ -69,7 +69,7 @@ void CreatePrograms() "{\n" " vec2 rawpos = vec2(gl_VertexID&1, gl_VertexID&2);\n" " gl_Position = vec4(rawpos*2.0-1.0, 0.0, 1.0);\n" - " uv0 = mix(copy_position.xy, copy_position.zw, rawpos) / textureSize(samp9, 0);\n" + " uv0 = mix(copy_position.xy, copy_position.zw, rawpos) / vec2(textureSize(samp9, 0));\n" "}\n"; const char *FProgramRgbToYuyv = "uniform sampler2D samp9;\n" From 2a2f2fd4ebd29fec3de1498946d3374598d8fb6a Mon Sep 17 00:00:00 2001 From: degasus Date: Mon, 25 Nov 2013 16:19:08 +0100 Subject: [PATCH 15/21] TextureConvertion: merge Write*Swizzler --- .../Src/TextureConversionShader.cpp | 84 ++++--------------- 1 file changed, 17 insertions(+), 67 deletions(-) diff --git a/Source/Core/VideoCommon/Src/TextureConversionShader.cpp b/Source/Core/VideoCommon/Src/TextureConversionShader.cpp index 573b35383a..bf625223dd 100644 --- a/Source/Core/VideoCommon/Src/TextureConversionShader.cpp +++ b/Source/Core/VideoCommon/Src/TextureConversionShader.cpp @@ -61,14 +61,15 @@ u16 GetEncodedSampleCount(u32 format) // texture dims : width, height, x offset, y offset void WriteSwizzler(char*& p, u32 format, API_TYPE ApiType) { - // [0] left, top, right, bottom of source rectangle within source texture - // [1] width and height of destination texture in pixels - // Two were merged for GLSL + // left, top, of source rectangle within source texture + // width of the destination rectangle, scale_factor (1 or 2) WRITE(p, "uniform float4 " I_COLORS";\n"); int blkW = TexDecoder_GetBlockWidthInTexels(format); int blkH = TexDecoder_GetBlockHeightInTexels(format); int samples = GetEncodedSampleCount(format); + // 32 bit textures (RGBA8 and Z24) are store in 2 cache line increments + int factor = samples == 1 ? 2 : 1; if (ApiType == API_OPENGL) { WRITE(p, "#define samp0 samp9\n"); @@ -92,67 +93,16 @@ void WriteSwizzler(char*& p, u32 format, API_TYPE ApiType) WRITE(p, " uv1.x = uv1.x * %d;\n", samples); - WRITE(p, " int xl = uv1.x / %d;\n", blkW); - WRITE(p, " int xib = uv1.x - xl * %d;\n", blkW); WRITE(p, " int yl = uv1.y / %d;\n", blkH); WRITE(p, " int yb = yl * %d;\n", blkH); WRITE(p, " int yoff = uv1.y - yb;\n"); WRITE(p, " int xp = uv1.x + yoff * int(" I_COLORS".z);\n"); - WRITE(p, " int xel = xp / %d;\n", blkW); + WRITE(p, " int xel = xp / %d;\n", samples == 1 ? factor : blkW); WRITE(p, " int xb = xel / %d;\n", blkH); WRITE(p, " int xoff = xel - xb * %d;\n", blkH); - - WRITE(p, " sampleUv.x = xib + xb * %d;\n", blkW); - WRITE(p, " sampleUv.y = yb + xoff;\n"); -} - -// block dimensions : widthStride, heightStride -// texture dims : width, height, x offset, y offset -void Write32BitSwizzler(char*& p, u32 format, API_TYPE ApiType) -{ - // [0] left, top, right, bottom of source rectangle within source texture - // [1] width and height of destination texture in pixels - // Two were merged for GLSL - WRITE(p, "uniform float4 " I_COLORS";\n"); - - int blkW = TexDecoder_GetBlockWidthInTexels(format); - int blkH = TexDecoder_GetBlockHeightInTexels(format); - - // 32 bit textures (RGBA8 and Z24) are store in 2 cache line increments - if (ApiType == API_OPENGL) - { - WRITE(p, "#define samp0 samp9\n"); - WRITE(p, "uniform sampler2D samp0;\n"); - - WRITE(p, " out float4 ocol0;\n"); - WRITE(p, "void main()\n"); - } - else - { - WRITE(p,"sampler samp0 : register(s0);\n"); - WRITE(p, "Texture2D Tex0 : register(t0);\n"); - - WRITE(p,"void main(\n"); - WRITE(p," out float4 ocol0 : SV_Target)\n"); - } - - - WRITE(p, "{\n" - " int2 sampleUv;\n" - " int2 uv1 = int2(gl_FragCoord.xy);\n"); - - WRITE(p, " int yl = uv1.y / %d;\n", blkH); - WRITE(p, " int yb = yl * %d;\n", blkH); - WRITE(p, " int yoff = uv1.y - yb;\n"); - WRITE(p, " int xp = uv1.x + yoff * int(" I_COLORS".z);\n"); - WRITE(p, " int xel = xp / 2;\n"); - WRITE(p, " int xb = xel / %d;\n", blkH); - WRITE(p, " int xoff = xel - xb * %d;\n", blkH); - - WRITE(p, " int x2 = uv1.x * 2;\n"); - WRITE(p, " int xl = x2 / %d;\n", blkW); - WRITE(p, " int xib = x2 - xl * %d;\n", blkW); - WRITE(p, " int halfxb = xb / 2;\n"); + WRITE(p, " int xl = uv1.x * %d / %d;\n", factor, blkW); + WRITE(p, " int xib = uv1.x * %d - xl * %d;\n", factor, blkW); + WRITE(p, " int halfxb = xb / %d;\n", factor); WRITE(p, " sampleUv.x = xib + halfxb * %d;\n", blkW); WRITE(p, " sampleUv.y = yb + xoff;\n"); @@ -161,13 +111,13 @@ void Write32BitSwizzler(char*& p, u32 format, API_TYPE ApiType) void WriteSampleColor(char*& p, const char* colorComp, const char* dest, API_TYPE ApiType) { WRITE(p, - "{\n" - "float2 uv = sampleUv + int2(%d,0);\n" // pixel offset - "uv *= " I_COLORS".w;\n" // scale by two (if wanted) - "uv += " I_COLORS".xy;\n" // move to copyed rect - "uv += float2(0.5, 0.5);\n" // move center of pixel - "uv /= float2(%d, %d);\n" // normlize to [0:1] - "uv.y = 1-uv.y;\n" // ogl foo (disable this line for d3d) + "{\n" // sampleUv is the sample position in (int)gx_coords + "float2 uv = float2(sampleUv) + int2(%d,0);\n" // pixel offset (if more than one pixel is samped) + "uv *= " I_COLORS".w;\n" // scale by two (if wanted) + "uv += " I_COLORS".xy;\n" // move to copyed rect + "uv += float2(0.5, 0.5);\n" // move to center of pixel + "uv /= float2(%d, %d);\n" // normlize to [0:1] + "uv.y = 1.0-uv.y;\n" // ogl foo (disable this line for d3d) "%s = texture(samp0, uv).%s;\n" "}\n", s_incrementSampleXCount, EFB_WIDTH, EFB_HEIGHT, dest, colorComp @@ -462,7 +412,7 @@ void WriteRGBA4443Encoder(char* p,API_TYPE ApiType) void WriteRGBA8Encoder(char* p,API_TYPE ApiType) { - Write32BitSwizzler(p, GX_TF_RGBA8, ApiType); + WriteSwizzler(p, GX_TF_RGBA8, ApiType); WRITE(p, " float cl1 = xb - (halfxb * 2.0);\n"); WRITE(p, " float cl0 = 1.0 - cl1;\n"); @@ -687,7 +637,7 @@ void WriteZ16LEncoder(char* p,API_TYPE ApiType) void WriteZ24Encoder(char* p, API_TYPE ApiType) { - Write32BitSwizzler(p, GX_TF_Z24X8, ApiType); + WriteSwizzler(p, GX_TF_Z24X8, ApiType); WRITE(p, " float cl = xb - (halfxb * 2.0);\n"); From 64a1969e36960ab066c2130a276d005aa272bba9 Mon Sep 17 00:00:00 2001 From: degasus Date: Mon, 25 Nov 2013 16:34:08 +0100 Subject: [PATCH 16/21] TextureConverter: fix scoping --- .../Src/TextureConversionShader.cpp | 22 +++++++++---------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/Source/Core/VideoCommon/Src/TextureConversionShader.cpp b/Source/Core/VideoCommon/Src/TextureConversionShader.cpp index bf625223dd..92c38ca77d 100644 --- a/Source/Core/VideoCommon/Src/TextureConversionShader.cpp +++ b/Source/Core/VideoCommon/Src/TextureConversionShader.cpp @@ -89,7 +89,9 @@ void WriteSwizzler(char*& p, u32 format, API_TYPE ApiType) WRITE(p, "{\n" " int2 sampleUv;\n" - " int2 uv1 = int2(gl_FragCoord.xy);\n"); + " int2 uv1 = int2(gl_FragCoord.xy);\n" + " float2 uv0 = float2(0.0, 0.0);\n" + ); WRITE(p, " uv1.x = uv1.x * %d;\n", samples); @@ -110,16 +112,14 @@ void WriteSwizzler(char*& p, u32 format, API_TYPE ApiType) void WriteSampleColor(char*& p, const char* colorComp, const char* dest, API_TYPE ApiType) { - WRITE(p, - "{\n" // sampleUv is the sample position in (int)gx_coords - "float2 uv = float2(sampleUv) + int2(%d,0);\n" // pixel offset (if more than one pixel is samped) - "uv *= " I_COLORS".w;\n" // scale by two (if wanted) - "uv += " I_COLORS".xy;\n" // move to copyed rect - "uv += float2(0.5, 0.5);\n" // move to center of pixel - "uv /= float2(%d, %d);\n" // normlize to [0:1] - "uv.y = 1.0-uv.y;\n" // ogl foo (disable this line for d3d) - "%s = texture(samp0, uv).%s;\n" - "}\n", + WRITE(p, // sampleUv is the sample position in (int)gx_coords + "uv0 = float2(sampleUv) + int2(%d,0);\n" // pixel offset (if more than one pixel is samped) + "uv0 *= " I_COLORS".w;\n" // scale by two (if wanted) + "uv0 += " I_COLORS".xy;\n" // move to copyed rect + "uv0 += float2(0.5, 0.5);\n" // move to center of pixel + "uv0 /= float2(%d, %d);\n" // normlize to [0:1] + "uv0.y = 1.0-uv0.y;\n" // ogl foo (disable this line for d3d) + "%s = texture(samp0, uv0).%s;\n", s_incrementSampleXCount, EFB_WIDTH, EFB_HEIGHT, dest, colorComp ); } From 421fd0e16e7de9e599c21fadc2f2b545d95bd1c1 Mon Sep 17 00:00:00 2001 From: Ryan Houdek Date: Mon, 25 Nov 2013 15:36:24 +0000 Subject: [PATCH 17/21] Fix OpenGL ES 3. --- Source/Core/VideoCommon/Src/TextureConversionShader.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Source/Core/VideoCommon/Src/TextureConversionShader.cpp b/Source/Core/VideoCommon/Src/TextureConversionShader.cpp index 92c38ca77d..b67a899438 100644 --- a/Source/Core/VideoCommon/Src/TextureConversionShader.cpp +++ b/Source/Core/VideoCommon/Src/TextureConversionShader.cpp @@ -113,7 +113,7 @@ void WriteSwizzler(char*& p, u32 format, API_TYPE ApiType) void WriteSampleColor(char*& p, const char* colorComp, const char* dest, API_TYPE ApiType) { WRITE(p, // sampleUv is the sample position in (int)gx_coords - "uv0 = float2(sampleUv) + int2(%d,0);\n" // pixel offset (if more than one pixel is samped) + "uv0 = float2(sampleUv + int2(%d, 0));\n" // pixel offset (if more than one pixel is samped) "uv0 *= " I_COLORS".w;\n" // scale by two (if wanted) "uv0 += " I_COLORS".xy;\n" // move to copyed rect "uv0 += float2(0.5, 0.5);\n" // move to center of pixel From 11973d31c18c342ab5636296a7a34e7d9f8460e4 Mon Sep 17 00:00:00 2001 From: degasus Date: Mon, 25 Nov 2013 17:01:35 +0100 Subject: [PATCH 18/21] TextureConverter: remove WriteIncrementSampleX --- .../Src/TextureConversionShader.cpp | 186 ++++++------------ 1 file changed, 56 insertions(+), 130 deletions(-) diff --git a/Source/Core/VideoCommon/Src/TextureConversionShader.cpp b/Source/Core/VideoCommon/Src/TextureConversionShader.cpp index b67a899438..e7db8677eb 100644 --- a/Source/Core/VideoCommon/Src/TextureConversionShader.cpp +++ b/Source/Core/VideoCommon/Src/TextureConversionShader.cpp @@ -21,7 +21,6 @@ static char text[16384]; static bool IntensityConstantAdded = false; -static int s_incrementSampleXCount = 0; namespace TextureConversionShader { @@ -110,7 +109,7 @@ void WriteSwizzler(char*& p, u32 format, API_TYPE ApiType) WRITE(p, " sampleUv.y = yb + xoff;\n"); } -void WriteSampleColor(char*& p, const char* colorComp, const char* dest, API_TYPE ApiType) +void WriteSampleColor(char*& p, const char* colorComp, const char* dest, int xoffset, API_TYPE ApiType) { WRITE(p, // sampleUv is the sample position in (int)gx_coords "uv0 = float2(sampleUv + int2(%d, 0));\n" // pixel offset (if more than one pixel is samped) @@ -120,7 +119,7 @@ void WriteSampleColor(char*& p, const char* colorComp, const char* dest, API_TYP "uv0 /= float2(%d, %d);\n" // normlize to [0:1] "uv0.y = 1.0-uv0.y;\n" // ogl foo (disable this line for d3d) "%s = texture(samp0, uv0).%s;\n", - s_incrementSampleXCount, EFB_WIDTH, EFB_HEIGHT, dest, colorComp + xoffset, EFB_WIDTH, EFB_HEIGHT, dest, colorComp ); } @@ -135,25 +134,6 @@ void WriteColorToIntensity(char*& p, const char* src, const char* dest) // don't add IntensityConst.a yet, because doing it later is faster and uses less instructions, due to vectorization } -void WriteIncrementSampleX(char*& p,API_TYPE ApiType) -{ - // the shader compiler apparently isn't smart or aggressive enough to recognize that: - // foo1 = lookup(x) - // x = x + increment; - // foo2 = lookup(x) - // x = x + increment; - // foo3 = lookup(x) - // can be replaced with this: - // foo1 = lookup(x + 0.0 * increment) - // foo2 = lookup(x + 1.0 * increment) - // foo3 = lookup(x + 2.0 * increment) - // which looks like the same operations but uses considerably fewer ALU instruction slots. - // thus, instead of using the former method, we only increment a counter internally here, - // and we wait until WriteSampleColor to write out the constant multiplier - // to achieve the increment as in the latter case. - s_incrementSampleXCount++; -} - void WriteToBitDepth(char*& p, u8 depth, const char* src, const char* dest) { WRITE(p, " %s = floor(%s * 255.0 / exp2(8.0 - %d.0));\n", dest, src, depth); @@ -163,7 +143,6 @@ void WriteEncoderEnd(char* p, API_TYPE ApiType) { WRITE(p, "}\n"); IntensityConstantAdded = false; - s_incrementSampleXCount = 0; } void WriteI8Encoder(char* p, API_TYPE ApiType) @@ -171,19 +150,16 @@ void WriteI8Encoder(char* p, API_TYPE ApiType) WriteSwizzler(p, GX_TF_I8, ApiType); WRITE(p, " float3 texSample;\n"); - WriteSampleColor(p, "rgb", "texSample", ApiType); + WriteSampleColor(p, "rgb", "texSample", 0, ApiType); WriteColorToIntensity(p, "texSample", "ocol0.b"); - WriteIncrementSampleX(p, ApiType); - WriteSampleColor(p, "rgb", "texSample", ApiType); + WriteSampleColor(p, "rgb", "texSample", 1, ApiType); WriteColorToIntensity(p, "texSample", "ocol0.g"); - WriteIncrementSampleX(p, ApiType); - WriteSampleColor(p, "rgb", "texSample", ApiType); + WriteSampleColor(p, "rgb", "texSample", 2, ApiType); WriteColorToIntensity(p, "texSample", "ocol0.r"); - WriteIncrementSampleX(p, ApiType); - WriteSampleColor(p, "rgb", "texSample", ApiType); + WriteSampleColor(p, "rgb", "texSample", 3, ApiType); WriteColorToIntensity(p, "texSample", "ocol0.a"); WRITE(p, " ocol0.rgba += IntensityConst.aaaa;\n"); // see WriteColorToIntensity @@ -198,35 +174,28 @@ void WriteI4Encoder(char* p, API_TYPE ApiType) WRITE(p, " float4 color0;\n"); WRITE(p, " float4 color1;\n"); - WriteSampleColor(p, "rgb", "texSample", ApiType); + WriteSampleColor(p, "rgb", "texSample", 0, ApiType); WriteColorToIntensity(p, "texSample", "color0.b"); - WriteIncrementSampleX(p, ApiType); - WriteSampleColor(p, "rgb", "texSample", ApiType); + WriteSampleColor(p, "rgb", "texSample", 1, ApiType); WriteColorToIntensity(p, "texSample", "color1.b"); - WriteIncrementSampleX(p, ApiType); - WriteSampleColor(p, "rgb", "texSample", ApiType); + WriteSampleColor(p, "rgb", "texSample", 2, ApiType); WriteColorToIntensity(p, "texSample", "color0.g"); - WriteIncrementSampleX(p, ApiType); - WriteSampleColor(p, "rgb", "texSample", ApiType); + WriteSampleColor(p, "rgb", "texSample", 3, ApiType); WriteColorToIntensity(p, "texSample", "color1.g"); - WriteIncrementSampleX(p, ApiType); - WriteSampleColor(p, "rgb", "texSample", ApiType); + WriteSampleColor(p, "rgb", "texSample", 4, ApiType); WriteColorToIntensity(p, "texSample", "color0.r"); - WriteIncrementSampleX(p, ApiType); - WriteSampleColor(p, "rgb", "texSample", ApiType); + WriteSampleColor(p, "rgb", "texSample", 5, ApiType); WriteColorToIntensity(p, "texSample", "color1.r"); - WriteIncrementSampleX(p, ApiType); - WriteSampleColor(p, "rgb", "texSample", ApiType); + WriteSampleColor(p, "rgb", "texSample", 6, ApiType); WriteColorToIntensity(p, "texSample", "color0.a"); - WriteIncrementSampleX(p, ApiType); - WriteSampleColor(p, "rgb", "texSample", ApiType); + WriteSampleColor(p, "rgb", "texSample", 7, ApiType); WriteColorToIntensity(p, "texSample", "color1.a"); WRITE(p, " color0.rgba += IntensityConst.aaaa;\n"); @@ -244,12 +213,11 @@ void WriteIA8Encoder(char* p,API_TYPE ApiType) WriteSwizzler(p, GX_TF_IA8, ApiType); WRITE(p, " float4 texSample;\n"); - WriteSampleColor(p, "rgba", "texSample", ApiType); + WriteSampleColor(p, "rgba", "texSample", 0, ApiType); WRITE(p, " ocol0.b = texSample.a;\n"); WriteColorToIntensity(p, "texSample", "ocol0.g"); - WriteIncrementSampleX(p, ApiType); - WriteSampleColor(p, "rgba", "texSample", ApiType); + WriteSampleColor(p, "rgba", "texSample", 1, ApiType); WRITE(p, " ocol0.r = texSample.a;\n"); WriteColorToIntensity(p, "texSample", "ocol0.a"); @@ -265,22 +233,19 @@ void WriteIA4Encoder(char* p,API_TYPE ApiType) WRITE(p, " float4 color0;\n"); WRITE(p, " float4 color1;\n"); - WriteSampleColor(p, "rgba", "texSample", ApiType); + WriteSampleColor(p, "rgba", "texSample", 0, ApiType); WRITE(p, " color0.b = texSample.a;\n"); WriteColorToIntensity(p, "texSample", "color1.b"); - WriteIncrementSampleX(p, ApiType); - WriteSampleColor(p, "rgba", "texSample", ApiType); + WriteSampleColor(p, "rgba", "texSample", 1, ApiType); WRITE(p, " color0.g = texSample.a;\n"); WriteColorToIntensity(p, "texSample", "color1.g"); - WriteIncrementSampleX(p, ApiType); - WriteSampleColor(p, "rgba", "texSample", ApiType); + WriteSampleColor(p, "rgba", "texSample", 2, ApiType); WRITE(p, " color0.r = texSample.a;\n"); WriteColorToIntensity(p, "texSample", "color1.r"); - WriteIncrementSampleX(p, ApiType); - WriteSampleColor(p, "rgba", "texSample", ApiType); + WriteSampleColor(p, "rgba", "texSample", 3, ApiType); WRITE(p, " color0.a = texSample.a;\n"); WriteColorToIntensity(p, "texSample", "color1.a"); @@ -297,9 +262,8 @@ void WriteRGB565Encoder(char* p,API_TYPE ApiType) { WriteSwizzler(p, GX_TF_RGB565, ApiType); - WriteSampleColor(p, "rgb", "float3 texSample0", ApiType); - WriteIncrementSampleX(p, ApiType); - WriteSampleColor(p, "rgb", "float3 texSample1", ApiType); + WriteSampleColor(p, "rgb", "float3 texSample0", 0, ApiType); + WriteSampleColor(p, "rgb", "float3 texSample1", 1, ApiType); WRITE(p, " float2 texRs = float2(texSample0.r, texSample1.r);\n"); WRITE(p, " float2 texGs = float2(texSample0.g, texSample1.g);\n"); WRITE(p, " float2 texBs = float2(texSample0.b, texSample1.b);\n"); @@ -326,7 +290,7 @@ void WriteRGB5A3Encoder(char* p,API_TYPE ApiType) WRITE(p, " float gUpper;\n"); WRITE(p, " float gLower;\n"); - WriteSampleColor(p, "rgba", "texSample", ApiType); + WriteSampleColor(p, "rgba", "texSample", 0, ApiType); // 0.8784 = 224 / 255 which is the maximum alpha value that can be represented in 3 bits WRITE(p, "if(texSample.a > 0.878f) {\n"); @@ -353,9 +317,8 @@ void WriteRGB5A3Encoder(char* p,API_TYPE ApiType) WRITE(p, "}\n"); - WriteIncrementSampleX(p, ApiType); - WriteSampleColor(p, "rgba", "texSample", ApiType); + WriteSampleColor(p, "rgba", "texSample", 1, ApiType); WRITE(p, "if(texSample.a > 0.878f) {\n"); @@ -392,15 +355,13 @@ void WriteRGBA4443Encoder(char* p,API_TYPE ApiType) WRITE(p, " float4 color0;\n"); WRITE(p, " float4 color1;\n"); - WriteSampleColor(p, "rgba", "texSample", ApiType); + WriteSampleColor(p, "rgba", "texSample", 0, ApiType); WriteToBitDepth(p, 3, "texSample.a", "color0.b"); WriteToBitDepth(p, 4, "texSample.r", "color1.b"); WriteToBitDepth(p, 4, "texSample.g", "color0.g"); WriteToBitDepth(p, 4, "texSample.b", "color1.g"); - WriteIncrementSampleX(p, ApiType); - - WriteSampleColor(p, "rgba", "texSample", ApiType); + WriteSampleColor(p, "rgba", "texSample", 1, ApiType); WriteToBitDepth(p, 3, "texSample.a", "color0.r"); WriteToBitDepth(p, 4, "texSample.r", "color1.r"); WriteToBitDepth(p, 4, "texSample.g", "color0.a"); @@ -421,15 +382,13 @@ void WriteRGBA8Encoder(char* p,API_TYPE ApiType) WRITE(p, " float4 color0;\n"); WRITE(p, " float4 color1;\n"); - WriteSampleColor(p, "rgba", "texSample", ApiType); + WriteSampleColor(p, "rgba", "texSample", 0, ApiType); WRITE(p, " color0.b = texSample.a;\n"); WRITE(p, " color0.g = texSample.r;\n"); WRITE(p, " color1.b = texSample.g;\n"); WRITE(p, " color1.g = texSample.b;\n"); - WriteIncrementSampleX(p, ApiType); - - WriteSampleColor(p, "rgba", "texSample", ApiType); + WriteSampleColor(p, "rgba", "texSample", 1, ApiType); WRITE(p, " color0.r = texSample.a;\n"); WRITE(p, " color0.a = texSample.r;\n"); WRITE(p, " color1.r = texSample.g;\n"); @@ -446,28 +405,14 @@ void WriteC4Encoder(char* p, const char* comp,API_TYPE ApiType) WRITE(p, " float4 color0;\n"); WRITE(p, " float4 color1;\n"); - WriteSampleColor(p, comp, "color0.b", ApiType); - WriteIncrementSampleX(p, ApiType); - - WriteSampleColor(p, comp, "color1.b", ApiType); - WriteIncrementSampleX(p, ApiType); - - WriteSampleColor(p, comp, "color0.g", ApiType); - WriteIncrementSampleX(p, ApiType); - - WriteSampleColor(p, comp, "color1.g", ApiType); - WriteIncrementSampleX(p, ApiType); - - WriteSampleColor(p, comp, "color0.r", ApiType); - WriteIncrementSampleX(p, ApiType); - - WriteSampleColor(p, comp, "color1.r", ApiType); - WriteIncrementSampleX(p, ApiType); - - WriteSampleColor(p, comp, "color0.a", ApiType); - WriteIncrementSampleX(p, ApiType); - - WriteSampleColor(p, comp, "color1.a", ApiType); + WriteSampleColor(p, comp, "color0.b", 0, ApiType); + WriteSampleColor(p, comp, "color1.b", 1, ApiType); + WriteSampleColor(p, comp, "color0.g", 2, ApiType); + WriteSampleColor(p, comp, "color1.g", 3, ApiType); + WriteSampleColor(p, comp, "color0.r", 4, ApiType); + WriteSampleColor(p, comp, "color1.r", 5, ApiType); + WriteSampleColor(p, comp, "color0.a", 6, ApiType); + WriteSampleColor(p, comp, "color1.a", 7, ApiType); WriteToBitDepth(p, 4, "color0", "color0"); WriteToBitDepth(p, 4, "color1", "color1"); @@ -480,16 +425,10 @@ void WriteC8Encoder(char* p, const char* comp,API_TYPE ApiType) { WriteSwizzler(p, GX_CTF_R8, ApiType); - WriteSampleColor(p, comp, "ocol0.b", ApiType); - WriteIncrementSampleX(p, ApiType); - - WriteSampleColor(p, comp, "ocol0.g", ApiType); - WriteIncrementSampleX(p, ApiType); - - WriteSampleColor(p, comp, "ocol0.r", ApiType); - WriteIncrementSampleX(p, ApiType); - - WriteSampleColor(p, comp, "ocol0.a", ApiType); + WriteSampleColor(p, comp, "ocol0.b", 0, ApiType); + WriteSampleColor(p, comp, "ocol0.g", 1, ApiType); + WriteSampleColor(p, comp, "ocol0.r", 2, ApiType); + WriteSampleColor(p, comp, "ocol0.a", 3, ApiType); WriteEncoderEnd(p, ApiType); } @@ -501,22 +440,19 @@ void WriteCC4Encoder(char* p, const char* comp,API_TYPE ApiType) WRITE(p, " float4 color0;\n"); WRITE(p, " float4 color1;\n"); - WriteSampleColor(p, comp, "texSample", ApiType); + WriteSampleColor(p, comp, "texSample", 0, ApiType); WRITE(p, " color0.b = texSample.x;\n"); WRITE(p, " color1.b = texSample.y;\n"); - WriteIncrementSampleX(p, ApiType); - WriteSampleColor(p, comp, "texSample", ApiType); + WriteSampleColor(p, comp, "texSample", 1, ApiType); WRITE(p, " color0.g = texSample.x;\n"); WRITE(p, " color1.g = texSample.y;\n"); - WriteIncrementSampleX(p, ApiType); - WriteSampleColor(p, comp, "texSample", ApiType); + WriteSampleColor(p, comp, "texSample", 2, ApiType); WRITE(p, " color0.r = texSample.x;\n"); WRITE(p, " color1.r = texSample.y;\n"); - WriteIncrementSampleX(p, ApiType); - WriteSampleColor(p, comp, "texSample", ApiType); + WriteSampleColor(p, comp, "texSample", 3, ApiType); WRITE(p, " color0.a = texSample.x;\n"); WRITE(p, " color1.a = texSample.y;\n"); @@ -531,10 +467,8 @@ void WriteCC8Encoder(char* p, const char* comp, API_TYPE ApiType) { WriteSwizzler(p, GX_CTF_RA8, ApiType); - WriteSampleColor(p, comp, "ocol0.bg", ApiType); - WriteIncrementSampleX(p, ApiType); - - WriteSampleColor(p, comp, "ocol0.ra", ApiType); + WriteSampleColor(p, comp, "ocol0.bg", 0, ApiType); + WriteSampleColor(p, comp, "ocol0.ra", 1, ApiType); WriteEncoderEnd(p, ApiType); } @@ -545,19 +479,16 @@ void WriteZ8Encoder(char* p, const char* multiplier,API_TYPE ApiType) WRITE(p, " float depth;\n"); - WriteSampleColor(p, "b", "depth", ApiType); + WriteSampleColor(p, "b", "depth", 0, ApiType); WRITE(p, "ocol0.b = frac(depth * %s);\n", multiplier); - WriteIncrementSampleX(p, ApiType); - WriteSampleColor(p, "b", "depth", ApiType); + WriteSampleColor(p, "b", "depth", 1, ApiType); WRITE(p, "ocol0.g = frac(depth * %s);\n", multiplier); - WriteIncrementSampleX(p, ApiType); - WriteSampleColor(p, "b", "depth", ApiType); + WriteSampleColor(p, "b", "depth", 2, ApiType); WRITE(p, "ocol0.r = frac(depth * %s);\n", multiplier); - WriteIncrementSampleX(p, ApiType); - WriteSampleColor(p, "b", "depth", ApiType); + WriteSampleColor(p, "b", "depth", 3, ApiType); WRITE(p, "ocol0.a = frac(depth * %s);\n", multiplier); WriteEncoderEnd(p, ApiType); @@ -572,7 +503,7 @@ void WriteZ16Encoder(char* p,API_TYPE ApiType) // byte order is reversed - WriteSampleColor(p, "b", "depth", ApiType); + WriteSampleColor(p, "b", "depth", 0, ApiType); WRITE(p, " depth *= 16777215.0;\n"); WRITE(p, " expanded.r = floor(depth / (256.0 * 256.0));\n"); @@ -582,9 +513,7 @@ void WriteZ16Encoder(char* p,API_TYPE ApiType) WRITE(p, " ocol0.b = expanded.g / 255.0;\n"); WRITE(p, " ocol0.g = expanded.r / 255.0;\n"); - WriteIncrementSampleX(p, ApiType); - - WriteSampleColor(p, "b", "depth", ApiType); + WriteSampleColor(p, "b", "depth", 1, ApiType); WRITE(p, " depth *= 16777215.0;\n"); WRITE(p, " expanded.r = floor(depth / (256.0 * 256.0));\n"); @@ -606,7 +535,7 @@ void WriteZ16LEncoder(char* p,API_TYPE ApiType) // byte order is reversed - WriteSampleColor(p, "b", "depth", ApiType); + WriteSampleColor(p, "b", "depth", 0, ApiType); WRITE(p, " depth *= 16777215.0;\n"); WRITE(p, " expanded.r = floor(depth / (256.0 * 256.0));\n"); @@ -618,9 +547,7 @@ void WriteZ16LEncoder(char* p,API_TYPE ApiType) WRITE(p, " ocol0.b = expanded.b / 255.0;\n"); WRITE(p, " ocol0.g = expanded.g / 255.0;\n"); - WriteIncrementSampleX(p, ApiType); - - WriteSampleColor(p, "b", "depth", ApiType); + WriteSampleColor(p, "b", "depth", 1, ApiType); WRITE(p, " depth *= 16777215.0;\n"); WRITE(p, " expanded.r = floor(depth / (256.0 * 256.0));\n"); @@ -646,9 +573,8 @@ void WriteZ24Encoder(char* p, API_TYPE ApiType) WRITE(p, " float3 expanded0;\n"); WRITE(p, " float3 expanded1;\n"); - WriteSampleColor(p, "b", "depth0", ApiType); - WriteIncrementSampleX(p, ApiType); - WriteSampleColor(p, "b", "depth1", ApiType); + WriteSampleColor(p, "b", "depth0", 0, ApiType); + WriteSampleColor(p, "b", "depth1", 1, ApiType); for (int i = 0; i < 2; i++) { From 0b4cb2e15fc7c444db19fa55c3167ea0d232db22 Mon Sep 17 00:00:00 2001 From: degasus Date: Mon, 25 Nov 2013 22:27:11 +0100 Subject: [PATCH 19/21] OpenGL: split real xfb + efb2ram framebuffers --- .../OGL/Src/TextureConverter.cpp | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/Source/Core/VideoBackends/OGL/Src/TextureConverter.cpp b/Source/Core/VideoBackends/OGL/Src/TextureConverter.cpp index 7f8ae23cc9..d2cb5b0414 100644 --- a/Source/Core/VideoBackends/OGL/Src/TextureConverter.cpp +++ b/Source/Core/VideoBackends/OGL/Src/TextureConverter.cpp @@ -26,7 +26,7 @@ namespace TextureConverter using OGL::TextureCache; -static GLuint s_texConvFrameBuffer = 0; +static GLuint s_texConvFrameBuffer[2] = {0,0}; static GLuint s_srcTexture = 0; // for decoding from RAM static GLuint s_dstTexture = 0; // for encoding to RAM @@ -161,7 +161,7 @@ SHADER &GetOrCreateEncodingShader(u32 format) void Init() { - glGenFramebuffers(1, &s_texConvFrameBuffer); + glGenFramebuffers(2, s_texConvFrameBuffer); glActiveTexture(GL_TEXTURE0 + 9); glGenTextures(1, &s_srcTexture); @@ -172,6 +172,10 @@ void Init() glBindTexture(GL_TEXTURE_2D, s_dstTexture); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAX_LEVEL, 0); glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, renderBufferWidth, renderBufferHeight, 0, GL_RGBA, GL_UNSIGNED_BYTE, NULL); + + + FramebufferManager::SetFramebuffer(s_texConvFrameBuffer[0]); + glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, s_dstTexture, 0); CreatePrograms(); } @@ -180,7 +184,7 @@ void Shutdown() { glDeleteTextures(1, &s_srcTexture); glDeleteTextures(1, &s_dstTexture); - glDeleteFramebuffers(1, &s_texConvFrameBuffer); + glDeleteFramebuffers(2, s_texConvFrameBuffer); s_rgbToYuyvProgram.Destroy(); s_yuyvToRgbProgram.Destroy(); @@ -190,7 +194,8 @@ void Shutdown() s_srcTexture = 0; s_dstTexture = 0; - s_texConvFrameBuffer = 0; + s_texConvFrameBuffer[0] = 0; + s_texConvFrameBuffer[1] = 0; } void EncodeToRamUsingShader(GLuint srcTexture, const TargetRectangle& sourceRc, @@ -201,9 +206,7 @@ void EncodeToRamUsingShader(GLuint srcTexture, const TargetRectangle& sourceRc, // switch to texture converter frame buffer // attach render buffer as color destination - FramebufferManager::SetFramebuffer(s_texConvFrameBuffer); - - glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, s_dstTexture, 0); + FramebufferManager::SetFramebuffer(s_texConvFrameBuffer[0]); GL_REPORT_ERRORD(); // set source texture @@ -347,7 +350,7 @@ void DecodeToTexture(u32 xfbAddr, int srcWidth, int srcHeight, GLuint destTextur // switch to texture converter frame buffer // attach destTexture as color destination - FramebufferManager::SetFramebuffer(s_texConvFrameBuffer); + FramebufferManager::SetFramebuffer(s_texConvFrameBuffer[1]); glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, destTexture, 0); GL_REPORT_FBO_ERROR(); From 1138c2e155f0db6736dfc00b439fb510b2d6d05f Mon Sep 17 00:00:00 2001 From: degasus Date: Tue, 26 Nov 2013 04:07:59 +0100 Subject: [PATCH 20/21] OpenGL: reset EFB after efb2ram FB initialization --- Source/Core/VideoBackends/OGL/Src/TextureConverter.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/Source/Core/VideoBackends/OGL/Src/TextureConverter.cpp b/Source/Core/VideoBackends/OGL/Src/TextureConverter.cpp index d2cb5b0414..d2e295c4f3 100644 --- a/Source/Core/VideoBackends/OGL/Src/TextureConverter.cpp +++ b/Source/Core/VideoBackends/OGL/Src/TextureConverter.cpp @@ -176,6 +176,7 @@ void Init() FramebufferManager::SetFramebuffer(s_texConvFrameBuffer[0]); glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, s_dstTexture, 0); + FramebufferManager::SetFramebuffer(0); CreatePrograms(); } From 687097d4bc1ee2f8ee60011280823e01907a986a Mon Sep 17 00:00:00 2001 From: degasus Date: Mon, 9 Dec 2013 12:33:50 +0100 Subject: [PATCH 21/21] OGL: use integer uniforms for efb2ram texture converter --- .../VideoBackends/OGL/Src/TextureConverter.cpp | 6 +++--- .../Src/TextureConversionShader.cpp | 18 +++++++++--------- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/Source/Core/VideoBackends/OGL/Src/TextureConverter.cpp b/Source/Core/VideoBackends/OGL/Src/TextureConverter.cpp index d2e295c4f3..2ec8ef4949 100644 --- a/Source/Core/VideoBackends/OGL/Src/TextureConverter.cpp +++ b/Source/Core/VideoBackends/OGL/Src/TextureConverter.cpp @@ -296,9 +296,9 @@ int EncodeToRamFromTexture(u32 address,GLuint source_texture, bool bFromZBuffer, s32 expandedHeight = (height + blkH) & (~blkH); texconv_shader.Bind(); - glUniform4f(texconv_shader.UniformLocations[0], - float(source.left), float(source.top), - (float)expandedWidth, bScaleByHalf ? 2.f : 1.f); + glUniform4i(texconv_shader.UniformLocations[0], + source.left, source.top, + expandedWidth, bScaleByHalf ? 2 : 1); TargetRectangle scaledSource; scaledSource.top = 0; diff --git a/Source/Core/VideoCommon/Src/TextureConversionShader.cpp b/Source/Core/VideoCommon/Src/TextureConversionShader.cpp index e7db8677eb..f271111851 100644 --- a/Source/Core/VideoCommon/Src/TextureConversionShader.cpp +++ b/Source/Core/VideoCommon/Src/TextureConversionShader.cpp @@ -62,7 +62,7 @@ void WriteSwizzler(char*& p, u32 format, API_TYPE ApiType) { // left, top, of source rectangle within source texture // width of the destination rectangle, scale_factor (1 or 2) - WRITE(p, "uniform float4 " I_COLORS";\n"); + WRITE(p, "uniform int4 " I_COLORS";\n"); int blkW = TexDecoder_GetBlockWidthInTexels(format); int blkH = TexDecoder_GetBlockHeightInTexels(format); @@ -97,7 +97,7 @@ void WriteSwizzler(char*& p, u32 format, API_TYPE ApiType) WRITE(p, " int yl = uv1.y / %d;\n", blkH); WRITE(p, " int yb = yl * %d;\n", blkH); WRITE(p, " int yoff = uv1.y - yb;\n"); - WRITE(p, " int xp = uv1.x + yoff * int(" I_COLORS".z);\n"); + WRITE(p, " int xp = uv1.x + yoff * " I_COLORS".z;\n"); WRITE(p, " int xel = xp / %d;\n", samples == 1 ? factor : blkW); WRITE(p, " int xb = xel / %d;\n", blkH); WRITE(p, " int xoff = xel - xb * %d;\n", blkH); @@ -111,13 +111,13 @@ void WriteSwizzler(char*& p, u32 format, API_TYPE ApiType) void WriteSampleColor(char*& p, const char* colorComp, const char* dest, int xoffset, API_TYPE ApiType) { - WRITE(p, // sampleUv is the sample position in (int)gx_coords - "uv0 = float2(sampleUv + int2(%d, 0));\n" // pixel offset (if more than one pixel is samped) - "uv0 *= " I_COLORS".w;\n" // scale by two (if wanted) - "uv0 += " I_COLORS".xy;\n" // move to copyed rect - "uv0 += float2(0.5, 0.5);\n" // move to center of pixel - "uv0 /= float2(%d, %d);\n" // normlize to [0:1] - "uv0.y = 1.0-uv0.y;\n" // ogl foo (disable this line for d3d) + WRITE(p, // sampleUv is the sample position in (int)gx_coords + "uv0 = float2(sampleUv + int2(%d, 0)" // pixel offset (if more than one pixel is samped) + " + " I_COLORS".xy);\n" // move to copyed rect + "uv0 += float2(0.5, 0.5);\n" // move to center of pixel + "uv0 *= float(" I_COLORS".w);\n" // scale by two if needed (this will move to pixels border to filter linear) + "uv0 /= float2(%d, %d);\n" // normlize to [0:1] + "uv0.y = 1.0-uv0.y;\n" // ogl foo (disable this line for d3d) "%s = texture(samp0, uv0).%s;\n", xoffset, EFB_WIDTH, EFB_HEIGHT, dest, colorComp );