diff --git a/Source/Core/VideoBackends/OGL/Src/FramebufferManager.cpp b/Source/Core/VideoBackends/OGL/Src/FramebufferManager.cpp index 1ce5e552a2..9abd47a361 100644 --- a/Source/Core/VideoBackends/OGL/Src/FramebufferManager.cpp +++ b/Source/Core/VideoBackends/OGL/Src/FramebufferManager.cpp @@ -33,8 +33,6 @@ GLuint FramebufferManager::m_resolvedDepthTexture; GLuint FramebufferManager::m_xfbFramebuffer; // reinterpret pixel format -GLuint FramebufferManager::m_pixel_format_vao; -GLuint FramebufferManager::m_pixel_format_vbo; SHADER FramebufferManager::m_pixel_format_shaders[2]; @@ -79,24 +77,24 @@ FramebufferManager::FramebufferManager(int targetWidth, int targetHeight, int ms m_efbDepth = glObj[1]; m_resolvedColorTexture = glObj[2]; // needed for pixel format convertion - glBindTexture(getFbType(), m_efbColor); - glTexParameteri(getFbType(), GL_TEXTURE_MAX_LEVEL, 0); - glTexImage2D(getFbType(), 0, GL_RGBA, m_targetWidth, m_targetHeight, 0, GL_RGBA, GL_UNSIGNED_BYTE, NULL); + glBindTexture(GL_TEXTURE_2D, m_efbColor); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAX_LEVEL, 0); + glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, m_targetWidth, m_targetHeight, 0, GL_RGBA, GL_UNSIGNED_BYTE, NULL); - glBindTexture(getFbType(), m_efbDepth); - glTexParameteri(getFbType(), GL_TEXTURE_MAX_LEVEL, 0); - glTexImage2D(getFbType(), 0, GL_DEPTH_COMPONENT24, m_targetWidth, m_targetHeight, 0, GL_DEPTH_COMPONENT, GL_UNSIGNED_INT, NULL); + glBindTexture(GL_TEXTURE_2D, m_efbDepth); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAX_LEVEL, 0); + glTexImage2D(GL_TEXTURE_2D, 0, GL_DEPTH_COMPONENT24, m_targetWidth, m_targetHeight, 0, GL_DEPTH_COMPONENT, GL_UNSIGNED_INT, NULL); - glBindTexture(getFbType(), m_resolvedColorTexture); - glTexParameteri(getFbType(), GL_TEXTURE_MAX_LEVEL, 0); - glTexImage2D(getFbType(), 0, GL_RGBA, m_targetWidth, m_targetHeight, 0, GL_RGBA, GL_UNSIGNED_BYTE, NULL); + glBindTexture(GL_TEXTURE_2D, m_resolvedColorTexture); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAX_LEVEL, 0); + glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, m_targetWidth, m_targetHeight, 0, GL_RGBA, GL_UNSIGNED_BYTE, NULL); // Bind target textures to the EFB framebuffer. glBindFramebuffer(GL_FRAMEBUFFER, m_efbFramebuffer); - glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, getFbType(), m_efbColor, 0); - glFramebufferTexture2D(GL_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, getFbType(), m_efbDepth, 0); + glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, m_efbColor, 0); + glFramebufferTexture2D(GL_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, m_efbDepth, 0); GL_REPORT_FBO_ERROR(); } @@ -144,20 +142,20 @@ FramebufferManager::FramebufferManager(int targetWidth, int targetHeight, int ms m_resolvedColorTexture = glObj[0]; m_resolvedDepthTexture = glObj[1]; - glBindTexture(getFbType(), m_resolvedColorTexture); - glTexParameteri(getFbType(), GL_TEXTURE_MAX_LEVEL, 0); - glTexImage2D(getFbType(), 0, GL_RGBA, m_targetWidth, m_targetHeight, 0, GL_RGBA, GL_UNSIGNED_BYTE, NULL); + glBindTexture(GL_TEXTURE_2D, m_resolvedColorTexture); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAX_LEVEL, 0); + glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, m_targetWidth, m_targetHeight, 0, GL_RGBA, GL_UNSIGNED_BYTE, NULL); - glBindTexture(getFbType(), m_resolvedDepthTexture); - glTexParameteri(getFbType(), GL_TEXTURE_MAX_LEVEL, 0); - glTexImage2D(getFbType(), 0, GL_DEPTH_COMPONENT24, m_targetWidth, m_targetHeight, 0, GL_DEPTH_COMPONENT, GL_UNSIGNED_INT, NULL); + glBindTexture(GL_TEXTURE_2D, m_resolvedDepthTexture); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAX_LEVEL, 0); + glTexImage2D(GL_TEXTURE_2D, 0, GL_DEPTH_COMPONENT24, m_targetWidth, m_targetHeight, 0, GL_DEPTH_COMPONENT, GL_UNSIGNED_INT, NULL); // Bind resolved textures to resolved framebuffer. glBindFramebuffer(GL_FRAMEBUFFER, m_resolvedFramebuffer); - glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, getFbType(), m_resolvedColorTexture, 0); - glFramebufferTexture2D(GL_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, getFbType(), m_resolvedDepthTexture, 0); + glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, m_resolvedColorTexture, 0); + glFramebufferTexture2D(GL_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, m_resolvedDepthTexture, 0); GL_REPORT_FBO_ERROR(); @@ -177,33 +175,18 @@ FramebufferManager::FramebufferManager(int targetWidth, int targetHeight, int ms glClear(GL_COLOR_BUFFER_BIT|GL_DEPTH_BUFFER_BIT); // reinterpret pixel format - glGenBuffers(1, &m_pixel_format_vbo); - glGenVertexArrays(1, &m_pixel_format_vao); - glBindVertexArray(m_pixel_format_vao); - glBindBuffer(GL_ARRAY_BUFFER, m_pixel_format_vbo); - glEnableVertexAttribArray(SHADER_POSITION_ATTRIB); - glVertexAttribPointer(SHADER_POSITION_ATTRIB, 2, GL_FLOAT, 0, sizeof(GLfloat)*2, NULL); - - float vertices[] = { - -1.0, -1.0, - 1.0, -1.0, - -1.0, 1.0, - 1.0, 1.0, - }; - glBufferData(GL_ARRAY_BUFFER, sizeof(vertices), vertices, GL_STATIC_DRAW); - char vs[] = - "ATTRIN vec2 rawpos;\n" "void main(void) {\n" - " gl_Position = vec4(rawpos,0,1);\n" + " vec2 rawpos = vec2(gl_VertexID&1, gl_VertexID&2);\n" + " gl_Position = vec4(rawpos*2.0-1.0, 0.0, 1.0);\n" "}\n"; char ps_rgba6_to_rgb8[] = - "uniform sampler2DRect samp9;\n" + "uniform sampler2D samp9;\n" "out vec4 ocol0;\n" "void main()\n" "{\n" - " ivec4 src6 = ivec4(round(texture2DRect(samp9, gl_FragCoord.xy) * 63.f));\n" + " ivec4 src6 = ivec4(round(texelFetch(samp9, ivec2(gl_FragCoord.xy), 0) * 63.f));\n" " ivec4 dst8;\n" " dst8.r = (src6.r << 2) | (src6.g >> 4);\n" " dst8.g = ((src6.g & 0xF) << 4) | (src6.b >> 2);\n" @@ -213,11 +196,11 @@ FramebufferManager::FramebufferManager(int targetWidth, int targetHeight, int ms "}"; char ps_rgb8_to_rgba6[] = - "uniform sampler2DRect samp9;\n" + "uniform sampler2D samp9;\n" "out vec4 ocol0;\n" "void main()\n" "{\n" - " ivec4 src8 = ivec4(round(texture2DRect(samp9, gl_FragCoord.xy) * 255.f));\n" + " ivec4 src8 = ivec4(round(texelFetch(samp9, ivec2(gl_FragCoord.xy), 0) * 255.f));\n" " ivec4 dst6;\n" " dst6.r = src8.r >> 2;\n" " dst6.g = ((src8.r & 0x3) << 4) | (src8.g >> 4);\n" @@ -261,8 +244,6 @@ FramebufferManager::~FramebufferManager() m_efbDepth = 0; // reinterpret pixel format - glDeleteVertexArrays(1, &m_pixel_format_vao); - glDeleteBuffers(1, &m_pixel_format_vbo); m_pixel_format_shaders[0].Destroy(); m_pixel_format_shaders[1].Destroy(); } @@ -386,14 +367,13 @@ void FramebufferManager::ReinterpretPixelData(unsigned int convtype) m_resolvedColorTexture = src_texture; // also switch them on fbo - glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, getFbType(), m_efbColor, 0); + glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, m_efbColor, 0); } glViewport(0,0, m_targetWidth, m_targetHeight); glActiveTexture(GL_TEXTURE0 + 9); - glBindTexture(getFbType(), src_texture); + glBindTexture(GL_TEXTURE_2D, src_texture); m_pixel_format_shaders[convtype ? 1 : 0].Bind(); - glBindVertexArray(m_pixel_format_vao); glDrawArrays(GL_TRIANGLE_STRIP, 0, 4); g_renderer->RestoreAPIState(); diff --git a/Source/Core/VideoBackends/OGL/Src/FramebufferManager.h b/Source/Core/VideoBackends/OGL/Src/FramebufferManager.h index 7b93b239ae..9fbcdbfedd 100644 --- a/Source/Core/VideoBackends/OGL/Src/FramebufferManager.h +++ b/Source/Core/VideoBackends/OGL/Src/FramebufferManager.h @@ -57,14 +57,6 @@ struct XFBSource : public XFBSourceBase const GLuint texture; }; -inline GLenum getFbType() -{ -#ifndef USE_GLES3 - return GL_TEXTURE_RECTANGLE; -#endif - return GL_TEXTURE_2D; -} - class FramebufferManager : public FramebufferManagerBase { public: @@ -121,8 +113,6 @@ private: static GLuint m_xfbFramebuffer; // Only used in MSAA mode // For pixel format draw - static GLuint m_pixel_format_vbo; - static GLuint m_pixel_format_vao; static SHADER m_pixel_format_shaders[2]; }; diff --git a/Source/Core/VideoBackends/OGL/Src/PostProcessing.cpp b/Source/Core/VideoBackends/OGL/Src/PostProcessing.cpp index 84fb0c4d1c..c0460e489a 100644 --- a/Source/Core/VideoBackends/OGL/Src/PostProcessing.cpp +++ b/Source/Core/VideoBackends/OGL/Src/PostProcessing.cpp @@ -25,18 +25,15 @@ static u32 s_width; static u32 s_height; static GLuint s_fbo; static GLuint s_texture; -static GLuint s_vao; -static GLuint s_vbo; static GLuint s_uniform_resolution; static char s_vertex_shader[] = - "in vec2 rawpos;\n" - "in vec2 tex0;\n" "out vec2 uv0;\n" "void main(void) {\n" - " gl_Position = vec4(rawpos,0,1);\n" - " uv0 = tex0;\n" + " vec2 rawpos = vec2(gl_VertexID&1, gl_VertexID&2);\n" + " gl_Position = vec4(rawpos*2.0-1.0, 0.0, 1.0);\n" + " uv0 = rawpos;\n" "}\n"; void Init() @@ -56,34 +53,14 @@ void Init() glBindFramebuffer(GL_FRAMEBUFFER, s_fbo); glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, s_texture, 0); FramebufferManager::SetFramebuffer(0); - - glGenBuffers(1, &s_vbo); - glBindBuffer(GL_ARRAY_BUFFER, s_vbo); - GLfloat vertices[] = { - -1.f, -1.f, 0.f, 0.f, - -1.f, 1.f, 0.f, 1.f, - 1.f, -1.f, 1.f, 0.f, - 1.f, 1.f, 1.f, 1.f - }; - glBufferData(GL_ARRAY_BUFFER, sizeof(vertices), vertices, GL_STATIC_DRAW); - - glGenVertexArrays(1, &s_vao); - glBindVertexArray( s_vao ); - glEnableVertexAttribArray(SHADER_POSITION_ATTRIB); - glVertexAttribPointer(SHADER_POSITION_ATTRIB, 2, GL_FLOAT, 0, sizeof(GLfloat)*4, NULL); - glEnableVertexAttribArray(SHADER_TEXTURE0_ATTRIB); - glVertexAttribPointer(SHADER_TEXTURE0_ATTRIB, 2, GL_FLOAT, 0, sizeof(GLfloat)*4, (GLfloat*)NULL+2); } void Shutdown() { s_shader.Destroy(); - glDeleteFramebuffers(1, &s_vbo); + glDeleteFramebuffers(1, &s_fbo); glDeleteTextures(1, &s_texture); - - glDeleteBuffers(1, &s_vbo); - glDeleteVertexArrays(1, &s_vao); } void ReloadShader() @@ -103,7 +80,6 @@ void BlitToScreen() glBindFramebuffer(GL_DRAW_FRAMEBUFFER, 0); glViewport(0, 0, s_width, s_height); - glBindVertexArray(s_vao); s_shader.Bind(); glUniform4f(s_uniform_resolution, (float)s_width, (float)s_height, 1.0f/(float)s_width, 1.0f/(float)s_height); @@ -111,7 +87,6 @@ void BlitToScreen() glActiveTexture(GL_TEXTURE0+9); glBindTexture(GL_TEXTURE_2D, s_texture); glDrawArrays(GL_TRIANGLE_STRIP, 0, 4); - glBindTexture(GL_TEXTURE_2D, 0); /* glBindFramebuffer(GL_READ_FRAMEBUFFER, s_fbo); @@ -132,7 +107,6 @@ void Update ( u32 width, u32 height ) glActiveTexture(GL_TEXTURE0+9); glBindTexture(GL_TEXTURE_2D, s_texture); glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, width, height, 0, GL_RGBA, GL_UNSIGNED_BYTE, NULL); - glBindTexture(GL_TEXTURE_2D, 0); } } diff --git a/Source/Core/VideoBackends/OGL/Src/ProgramShaderCache.cpp b/Source/Core/VideoBackends/OGL/Src/ProgramShaderCache.cpp index f998c6ced2..cc7e29dce5 100644 --- a/Source/Core/VideoBackends/OGL/Src/ProgramShaderCache.cpp +++ b/Source/Core/VideoBackends/OGL/Src/ProgramShaderCache.cpp @@ -584,16 +584,14 @@ void ProgramShaderCache::CreateHeader ( void ) "#define float2 vec2\n" "#define float3 vec3\n" "#define float4 vec4\n" + "#define int2 ivec2\n" + "#define int3 ivec3\n" + "#define int4 ivec4\n" // hlsl to glsl function translation "#define frac fract\n" "#define lerp mix\n" - // texture2d hack - "%s\n" - "%s\n" - "%s\n" - , v==GLSLES3 ? "#version 300 es" : v==GLSL_130 ? "#version 130" : v==GLSL_140 ? "#version 140" : "#version 150" , g_ActiveConfig.backend_info.bSupportsGLSLUBO && v s_VBO; - bool SaveTexture(const std::string filename, u32 textarget, u32 tex, int virtual_width, int virtual_height, unsigned int level) { #ifndef USE_GLES3 @@ -296,7 +291,7 @@ void TextureCache::TCacheEntry::FromRenderTarget(u32 dstAddr, unsigned int dstFo GL_REPORT_ERRORD(); glActiveTexture(GL_TEXTURE0+9); - glBindTexture(getFbType(), read_texture); + glBindTexture(GL_TEXTURE_2D, read_texture); glViewport(0, 0, virtual_width, virtual_height); @@ -311,53 +306,12 @@ void TextureCache::TCacheEntry::FromRenderTarget(u32 dstAddr, unsigned int dstFo glUniform4fv(s_ColorMatrixUniform, 7, colmat); s_ColorCbufid = cbufid; } + + TargetRectangle R = g_renderer->ConvertEFBRectangle(srcRect); + glUniform4f(srcFormat == PIXELFMT_Z24 ? s_DepthCopyPositionUniform : s_ColorCopyPositionUniform, + R.left, R.top, R.right, R.bottom); GL_REPORT_ERRORD(); - TargetRectangle targetSource = g_renderer->ConvertEFBRectangle(srcRect); - GL_REPORT_ERRORD(); - - // should be unique enough, if not, vbo will "only" be uploaded to much - u64 targetSourceHash = u64(targetSource.left)<<48 | u64(targetSource.top)<<32 | u64(targetSource.right)<<16 | u64(targetSource.bottom); - std::map::iterator vbo_it = s_VBO.find(targetSourceHash); - - if(vbo_it == s_VBO.end()) { - VBOCache item; - item.targetSource.bottom = -1; - item.targetSource.top = -1; - item.targetSource.left = -1; - item.targetSource.right = -1; - glGenBuffers(1, &item.vbo); - glGenVertexArrays(1, &item.vao); - - glBindBuffer(GL_ARRAY_BUFFER, item.vbo); - glBindVertexArray(item.vao); - - glEnableVertexAttribArray(SHADER_POSITION_ATTRIB); - glVertexAttribPointer(SHADER_POSITION_ATTRIB, 2, GL_FLOAT, 0, sizeof(GLfloat)*4, (GLfloat*)NULL); - glEnableVertexAttribArray(SHADER_TEXTURE0_ATTRIB); - glVertexAttribPointer(SHADER_TEXTURE0_ATTRIB, 2, GL_FLOAT, 0, sizeof(GLfloat)*4, (GLfloat*)NULL+2); - - vbo_it = s_VBO.insert(std::pair(targetSourceHash, item)).first; - } - if(!(vbo_it->second.targetSource == targetSource)) { - GLfloat vertices[] = { - -1.f, 1.f, - (GLfloat)targetSource.left, (GLfloat)targetSource.bottom, - -1.f, -1.f, - (GLfloat)targetSource.left, (GLfloat)targetSource.top, - 1.f, 1.f, - (GLfloat)targetSource.right, (GLfloat)targetSource.bottom, - 1.f, -1.f, - (GLfloat)targetSource.right, (GLfloat)targetSource.top - }; - - glBindBuffer(GL_ARRAY_BUFFER, vbo_it->second.vbo); - glBufferData(GL_ARRAY_BUFFER, 4*4*sizeof(GLfloat), vertices, GL_STREAM_DRAW); - - vbo_it->second.targetSource = targetSource; - } - - glBindVertexArray(vbo_it->second.vao); glDrawArrays(GL_TRIANGLE_STRIP, 0, 4); GL_REPORT_ERRORD(); @@ -403,38 +357,39 @@ void TextureCache::TCacheEntry::FromRenderTarget(u32 dstAddr, unsigned int dstFo TextureCache::TextureCache() { const char *pColorMatrixProg = - "uniform sampler2DRect samp9;\n" + "uniform sampler2D samp9;\n" "uniform vec4 colmat[7];\n" "VARYIN vec2 uv0;\n" "out vec4 ocol0;\n" "\n" "void main(){\n" - " vec4 texcol = texture2DRect(samp9, uv0);\n" + " vec4 texcol = texture(samp9, uv0);\n" " texcol = round(texcol * colmat[5]) * colmat[6];\n" " ocol0 = texcol * mat4(colmat[0], colmat[1], colmat[2], colmat[3]) + colmat[4];\n" "}\n"; const char *pDepthMatrixProg = - "uniform sampler2DRect samp9;\n" + "uniform sampler2D samp9;\n" "uniform vec4 colmat[5];\n" "VARYIN vec2 uv0;\n" "out vec4 ocol0;\n" "\n" "void main(){\n" - " vec4 texcol = texture2DRect(samp9, uv0);\n" + " vec4 texcol = texture(samp9, uv0);\n" " vec4 EncodedDepth = fract((texcol.r * (16777215.0/16777216.0)) * vec4(1.0,256.0,256.0*256.0,1.0));\n" " texcol = round(EncodedDepth * (16777216.0/16777215.0) * vec4(255.0,255.0,255.0,15.0)) / vec4(255.0,255.0,255.0,15.0);\n" " ocol0 = texcol * mat4(colmat[0], colmat[1], colmat[2], colmat[3]) + colmat[4];" "}\n"; const char *VProgram = - "ATTRIN vec2 rawpos;\n" - "ATTRIN vec2 tex0;\n" "VARYOUT vec2 uv0;\n" + "uniform sampler2D samp9;\n" + "uniform vec4 copy_position;\n" // left, top, right, bottom "void main()\n" "{\n" - " uv0 = tex0;\n" - " gl_Position = vec4(rawpos,0,1);\n" + " vec2 rawpos = vec2(gl_VertexID&1, gl_VertexID&2);\n" + " uv0 = mix(copy_position.xy, copy_position.zw, rawpos) / vec2(textureSize(samp9, 0));\n" + " gl_Position = vec4(rawpos*2.0-1.0, 0.0, 1.0);\n" "}\n"; ProgramShaderCache::CompileShader(s_ColorMatrixProgram, VProgram, pColorMatrixProg); @@ -445,6 +400,9 @@ TextureCache::TextureCache() s_ColorCbufid = -1; s_DepthCbufid = -1; + s_ColorCopyPositionUniform = glGetUniformLocation(s_ColorMatrixProgram.glprogid, "copy_position"); + s_DepthCopyPositionUniform = glGetUniformLocation(s_DepthMatrixProgram.glprogid, "copy_position"); + s_ActiveTexture = -1; s_NextStage = -1; for(auto& gtex : s_Textures) @@ -456,12 +414,6 @@ TextureCache::~TextureCache() { s_ColorMatrixProgram.Destroy(); s_DepthMatrixProgram.Destroy(); - - for(auto& cache : s_VBO) { - glDeleteBuffers(1, &cache.second.vbo); - glDeleteVertexArrays(1, &cache.second.vao); - } - s_VBO.clear(); } void TextureCache::DisableStage(unsigned int stage) diff --git a/Source/Core/VideoBackends/OGL/Src/TextureConverter.cpp b/Source/Core/VideoBackends/OGL/Src/TextureConverter.cpp index 75bf04b8b9..8124db15e7 100644 --- a/Source/Core/VideoBackends/OGL/Src/TextureConverter.cpp +++ b/Source/Core/VideoBackends/OGL/Src/TextureConverter.cpp @@ -26,7 +26,7 @@ namespace TextureConverter using OGL::TextureCache; -static GLuint s_texConvFrameBuffer = 0; +static GLuint s_texConvFrameBuffer[2] = {0,0}; static GLuint s_srcTexture = 0; // for decoding from RAM static GLuint s_dstTexture = 0; // for encoding to RAM @@ -34,28 +34,16 @@ const int renderBufferWidth = 1024; const int renderBufferHeight = 1024; static SHADER s_rgbToYuyvProgram; +static int s_rgbToYuyvUniform_loc; + static SHADER s_yuyvToRgbProgram; // Not all slots are taken - but who cares. const u32 NUM_ENCODING_PROGRAMS = 64; static SHADER s_encodingPrograms[NUM_ENCODING_PROGRAMS]; -static GLuint s_encode_VBO = 0; -static GLuint s_encode_VAO = 0; -static TargetRectangle s_cached_sourceRc; - static GLuint s_PBO = 0; // for readback with different strides -static const char *VProgram = - "ATTRIN vec2 rawpos;\n" - "ATTRIN vec2 tex0;\n" - "VARYOUT vec2 uv0;\n" - "void main()\n" - "{\n" - " uv0 = tex0;\n" - " gl_Position = vec4(rawpos, 0.0, 1.0);\n" - "}\n"; - void CreatePrograms() { /* TODO: Accuracy Improvements @@ -75,14 +63,24 @@ void CreatePrograms() * inbetween the two Pixels, and only blurs over these two pixels. */ // Output is BGRA because that is slightly faster than RGBA. + const char *VProgramRgbToYuyv = + "VARYOUT vec2 uv0;\n" + "uniform vec4 copy_position;\n" // left, top, right, bottom + "uniform sampler2D samp9;\n" + "void main()\n" + "{\n" + " vec2 rawpos = vec2(gl_VertexID&1, gl_VertexID&2);\n" + " gl_Position = vec4(rawpos*2.0-1.0, 0.0, 1.0);\n" + " uv0 = mix(copy_position.xy, copy_position.zw, rawpos) / vec2(textureSize(samp9, 0));\n" + "}\n"; const char *FProgramRgbToYuyv = - "uniform sampler2DRect samp9;\n" + "uniform sampler2D samp9;\n" "VARYIN vec2 uv0;\n" "out vec4 ocol0;\n" "void main()\n" "{\n" - " vec3 c0 = texture2DRect(samp9, uv0 - dFdx(uv0) * 0.25).rgb;\n" - " vec3 c1 = texture2DRect(samp9, uv0 + dFdx(uv0) * 0.25).rgb;\n" + " vec3 c0 = texture(samp9, (uv0 - dFdx(uv0) * 0.25)).rgb;\n" + " vec3 c1 = texture(samp9, (uv0 + dFdx(uv0) * 0.25)).rgb;\n" " vec3 c01 = (c0 + c1) * 0.5;\n" " vec3 y_const = vec3(0.257,0.504,0.098);\n" " vec3 u_const = vec3(-0.148,-0.291,0.439);\n" @@ -90,6 +88,8 @@ void CreatePrograms() " vec4 const3 = vec4(0.0625,0.5,0.0625,0.5);\n" " ocol0 = vec4(dot(c1,y_const),dot(c01,u_const),dot(c0,y_const),dot(c01, v_const)) + const3;\n" "}\n"; + ProgramShaderCache::CompileShader(s_rgbToYuyvProgram, VProgramRgbToYuyv, FProgramRgbToYuyv); + s_rgbToYuyvUniform_loc = glGetUniformLocation(s_rgbToYuyvProgram.glprogid, "copy_position"); /* TODO: Accuracy Improvements * @@ -105,20 +105,15 @@ void CreatePrograms() " gl_Position = vec4(rawpos*2.0-1.0, 0.0, 1.0);\n" "}\n"; const char *FProgramYuyvToRgb = - "uniform sampler2DRect samp9;\n" + "uniform sampler2D samp9;\n" "VARYIN vec2 uv0;\n" "out vec4 ocol0;\n" "void main()\n" "{\n" " ivec2 uv = ivec2(gl_FragCoord.xy);\n" -#ifdef USE_GLES3 // We switch top/bottom here. TODO: move this to screen blit. " ivec2 ts = textureSize(samp9, 0);\n" " vec4 c0 = texelFetch(samp9, ivec2(uv.x/2, ts.y-uv.y-1), 0);\n" -#else - " ivec2 ts = textureSize(samp9);\n" - " vec4 c0 = texelFetch(samp9, ivec2(uv.x/2, ts.y-uv.y-1));\n" -#endif " float y = mix(c0.b, c0.r, (uv.x & 1) == 1);\n" " float yComp = 1.164 * (y - 0.0625);\n" " float uComp = c0.g - 0.5;\n" @@ -128,8 +123,6 @@ void CreatePrograms() " yComp + (2.018 * uComp),\n" " 1.0);\n" "}\n"; - - ProgramShaderCache::CompileShader(s_rgbToYuyvProgram, VProgram, FProgramRgbToYuyv); ProgramShaderCache::CompileShader(s_yuyvToRgbProgram, VProgramYuyvToRgb, FProgramYuyvToRgb); } @@ -156,6 +149,13 @@ SHADER &GetOrCreateEncodingShader(u32 format) } #endif + const char *VProgram = + "void main()\n" + "{\n" + " vec2 rawpos = vec2(gl_VertexID&1, gl_VertexID&2);\n" + " gl_Position = vec4(rawpos*2.0-1.0, 0.0, 1.0);\n" + "}\n"; + ProgramShaderCache::CompileShader(s_encodingPrograms[format], VProgram, shader); } return s_encodingPrograms[format]; @@ -163,30 +163,22 @@ SHADER &GetOrCreateEncodingShader(u32 format) void Init() { - glGenFramebuffers(1, &s_texConvFrameBuffer); - - glGenBuffers(1, &s_encode_VBO ); - glGenVertexArrays(1, &s_encode_VAO ); - glBindBuffer(GL_ARRAY_BUFFER, s_encode_VBO ); - glBindVertexArray( s_encode_VAO ); - glEnableVertexAttribArray(SHADER_POSITION_ATTRIB); - glVertexAttribPointer(SHADER_POSITION_ATTRIB, 2, GL_FLOAT, 0, sizeof(GLfloat)*4, (GLfloat*)NULL); - glEnableVertexAttribArray(SHADER_TEXTURE0_ATTRIB); - glVertexAttribPointer(SHADER_TEXTURE0_ATTRIB, 2, GL_FLOAT, 0, sizeof(GLfloat)*4, (GLfloat*)NULL+2); - s_cached_sourceRc.top = -1; - s_cached_sourceRc.bottom = -1; - s_cached_sourceRc.left = -1; - s_cached_sourceRc.right = -1; + glGenFramebuffers(2, s_texConvFrameBuffer); glActiveTexture(GL_TEXTURE0 + 9); glGenTextures(1, &s_srcTexture); - glBindTexture(getFbType(), s_srcTexture); - glTexParameteri(getFbType(), GL_TEXTURE_MAX_LEVEL, 0); + glBindTexture(GL_TEXTURE_2D, s_srcTexture); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAX_LEVEL, 0); glGenTextures(1, &s_dstTexture); glBindTexture(GL_TEXTURE_2D, s_dstTexture); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAX_LEVEL, 0); glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, renderBufferWidth, renderBufferHeight, 0, GL_RGBA, GL_UNSIGNED_BYTE, NULL); + + + FramebufferManager::SetFramebuffer(s_texConvFrameBuffer[0]); + glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, s_dstTexture, 0); + FramebufferManager::SetFramebuffer(0); glGenBuffers(1, &s_PBO); @@ -197,10 +189,8 @@ void Shutdown() { glDeleteTextures(1, &s_srcTexture); glDeleteTextures(1, &s_dstTexture); - glDeleteFramebuffers(1, &s_texConvFrameBuffer); - glDeleteBuffers(1, &s_encode_VBO ); - glDeleteVertexArrays(1, &s_encode_VAO ); glDeleteBuffers(1, &s_PBO); + glDeleteFramebuffers(2, s_texConvFrameBuffer); s_rgbToYuyvProgram.Destroy(); s_yuyvToRgbProgram.Destroy(); @@ -210,8 +200,9 @@ void Shutdown() s_srcTexture = 0; s_dstTexture = 0; - s_texConvFrameBuffer = 0; s_PBO = 0; + s_texConvFrameBuffer[0] = 0; + s_texConvFrameBuffer[1] = 0; } void EncodeToRamUsingShader(GLuint srcTexture, const TargetRectangle& sourceRc, @@ -222,49 +213,28 @@ void EncodeToRamUsingShader(GLuint srcTexture, const TargetRectangle& sourceRc, // switch to texture converter frame buffer // attach render buffer as color destination - FramebufferManager::SetFramebuffer(s_texConvFrameBuffer); - - glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, s_dstTexture, 0); + FramebufferManager::SetFramebuffer(s_texConvFrameBuffer[0]); GL_REPORT_ERRORD(); // set source texture glActiveTexture(GL_TEXTURE0+9); - glBindTexture(getFbType(), srcTexture); + glBindTexture(GL_TEXTURE_2D, srcTexture); if (linearFilter) { - glTexParameteri(getFbType(), GL_TEXTURE_MAG_FILTER, GL_LINEAR); - glTexParameteri(getFbType(), GL_TEXTURE_MIN_FILTER, GL_LINEAR); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR); } else { - glTexParameteri(getFbType(), GL_TEXTURE_MAG_FILTER, GL_NEAREST); - glTexParameteri(getFbType(), GL_TEXTURE_MIN_FILTER, GL_NEAREST); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); } GL_REPORT_ERRORD(); glViewport(0, 0, (GLsizei)dstWidth, (GLsizei)dstHeight); - GL_REPORT_ERRORD(); - if(!(s_cached_sourceRc == sourceRc)) { - GLfloat vertices[] = { - -1.f, -1.f, - (float)sourceRc.left, (float)sourceRc.top, - -1.f, 1.f, - (float)sourceRc.left, (float)sourceRc.bottom, - 1.f, -1.f, - (float)sourceRc.right, (float)sourceRc.top, - 1.f, 1.f, - (float)sourceRc.right, (float)sourceRc.bottom - }; - glBindBuffer(GL_ARRAY_BUFFER, s_encode_VBO ); - glBufferData(GL_ARRAY_BUFFER, 4*4*sizeof(GLfloat), vertices, GL_STREAM_DRAW); - - s_cached_sourceRc = sourceRc; - } - - glBindVertexArray( s_encode_VAO ); glDrawArrays(GL_TRIANGLE_STRIP, 0, 4); GL_REPORT_ERRORD(); @@ -342,17 +312,10 @@ int EncodeToRamFromTexture(u32 address,GLuint source_texture, bool bFromZBuffer, s32 expandedWidth = (width + blkW) & (~blkW); s32 expandedHeight = (height + blkH) & (~blkH); - float sampleStride = bScaleByHalf ? 2.f : 1.f; - - float params[] = { - Renderer::EFBToScaledXf(sampleStride), Renderer::EFBToScaledYf(sampleStride), - 0.0f, 0.0f, - (float)expandedWidth, (float)Renderer::EFBToScaledY(expandedHeight)-1, - (float)Renderer::EFBToScaledX(source.left), (float)Renderer::EFBToScaledY(EFB_HEIGHT - source.top - expandedHeight) - }; - texconv_shader.Bind(); - glUniform4fv(texconv_shader.UniformLocations[0], 2, params); + glUniform4i(texconv_shader.UniformLocations[0], + source.left, source.top, + expandedWidth, bScaleByHalf ? 2 : 1); TargetRectangle scaledSource; scaledSource.top = 0; @@ -378,6 +341,8 @@ void EncodeToRamYUYV(GLuint srcTexture, const TargetRectangle& sourceRc, u8* des s_rgbToYuyvProgram.Bind(); + glUniform4f(s_rgbToYuyvUniform_loc, sourceRc.left, sourceRc.top, sourceRc.right, sourceRc.bottom); + // We enable linear filtering, because the gamecube does filtering in the vertical direction when // yscale is enabled. // Otherwise we get jaggies when a game uses yscaling (most PAL games) @@ -403,7 +368,7 @@ void DecodeToTexture(u32 xfbAddr, int srcWidth, int srcHeight, GLuint destTextur // switch to texture converter frame buffer // attach destTexture as color destination - FramebufferManager::SetFramebuffer(s_texConvFrameBuffer); + FramebufferManager::SetFramebuffer(s_texConvFrameBuffer[1]); glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, destTexture, 0); GL_REPORT_FBO_ERROR(); @@ -411,8 +376,8 @@ void DecodeToTexture(u32 xfbAddr, int srcWidth, int srcHeight, GLuint destTextur // activate source texture // set srcAddr as data for source texture glActiveTexture(GL_TEXTURE0+9); - glBindTexture(getFbType(), s_srcTexture); - glTexImage2D(getFbType(), 0, GL_RGBA, srcWidth / 2, srcHeight, 0, GL_BGRA, GL_UNSIGNED_BYTE, srcAddr); + glBindTexture(GL_TEXTURE_2D, s_srcTexture); + glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, srcWidth / 2, srcHeight, 0, GL_BGRA, GL_UNSIGNED_BYTE, srcAddr); glViewport(0, 0, srcWidth, srcHeight); s_yuyvToRgbProgram.Bind(); diff --git a/Source/Core/VideoCommon/Src/TextureConversionShader.cpp b/Source/Core/VideoCommon/Src/TextureConversionShader.cpp index 99e9f702c7..f271111851 100644 --- a/Source/Core/VideoCommon/Src/TextureConversionShader.cpp +++ b/Source/Core/VideoCommon/Src/TextureConversionShader.cpp @@ -21,7 +21,6 @@ static char text[16384]; static bool IntensityConstantAdded = false; -static int s_incrementSampleXCount = 0; namespace TextureConversionShader { @@ -57,34 +56,25 @@ u16 GetEncodedSampleCount(u32 format) } } -const char* WriteRegister(API_TYPE ApiType, const char *prefix, const u32 num) -{ - if (ApiType == API_OPENGL) - return ""; // Once we switch to GLSL 1.3 we can do something here - static char result[64]; - sprintf(result, " : register(%s%d)", prefix, num); - return result; -} - // block dimensions : widthStride, heightStride // texture dims : width, height, x offset, y offset void WriteSwizzler(char*& p, u32 format, API_TYPE ApiType) { - // [0] left, top, right, bottom of source rectangle within source texture - // [1] width and height of destination texture in pixels - // Two were merged for GLSL - WRITE(p, "uniform float4 " I_COLORS"[2] %s;\n", WriteRegister(ApiType, "c", C_COLORS)); + // left, top, of source rectangle within source texture + // width of the destination rectangle, scale_factor (1 or 2) + WRITE(p, "uniform int4 " I_COLORS";\n"); int blkW = TexDecoder_GetBlockWidthInTexels(format); int blkH = TexDecoder_GetBlockHeightInTexels(format); int samples = GetEncodedSampleCount(format); + // 32 bit textures (RGBA8 and Z24) are store in 2 cache line increments + int factor = samples == 1 ? 2 : 1; if (ApiType == API_OPENGL) { WRITE(p, "#define samp0 samp9\n"); - WRITE(p, "uniform sampler2DRect samp0;\n"); + WRITE(p, "uniform sampler2D samp0;\n"); WRITE(p, " out vec4 ocol0;\n"); - WRITE(p, " VARYIN float2 uv0;\n"); WRITE(p, "void main()\n"); } else // D3D @@ -93,126 +83,44 @@ void WriteSwizzler(char*& p, u32 format, API_TYPE ApiType) WRITE(p, "Texture2D Tex0 : register(t0);\n"); WRITE(p,"void main(\n"); - WRITE(p," out float4 ocol0 : SV_Target,\n"); - WRITE(p," in float2 uv0 : TEXCOORD0)\n"); + WRITE(p," out float4 ocol0 : SV_Target)\n"); } WRITE(p, "{\n" - " float2 sampleUv;\n" - " float2 uv1 = floor(uv0);\n"); + " int2 sampleUv;\n" + " int2 uv1 = int2(gl_FragCoord.xy);\n" + " float2 uv0 = float2(0.0, 0.0);\n" + ); - WRITE(p, " uv1.x = uv1.x * %d.0;\n", samples); + WRITE(p, " uv1.x = uv1.x * %d;\n", samples); - WRITE(p, " float xl = floor(uv1.x / %d.0);\n", blkW); - WRITE(p, " float xib = uv1.x - (xl * %d.0);\n", blkW); - WRITE(p, " float yl = floor(uv1.y / %d.0);\n", blkH); - WRITE(p, " float yb = yl * %d.0;\n", blkH); - WRITE(p, " float yoff = uv1.y - yb;\n"); - WRITE(p, " float xp = uv1.x + (yoff * " I_COLORS"[1].x);\n"); - WRITE(p, " float xel = floor(xp / %d.0);\n", blkW); - WRITE(p, " float xb = floor(xel / %d.0);\n", blkH); - WRITE(p, " float xoff = xel - (xb * %d.0);\n", blkH); + WRITE(p, " int yl = uv1.y / %d;\n", blkH); + WRITE(p, " int yb = yl * %d;\n", blkH); + WRITE(p, " int yoff = uv1.y - yb;\n"); + WRITE(p, " int xp = uv1.x + yoff * " I_COLORS".z;\n"); + WRITE(p, " int xel = xp / %d;\n", samples == 1 ? factor : blkW); + WRITE(p, " int xb = xel / %d;\n", blkH); + WRITE(p, " int xoff = xel - xb * %d;\n", blkH); + WRITE(p, " int xl = uv1.x * %d / %d;\n", factor, blkW); + WRITE(p, " int xib = uv1.x * %d - xl * %d;\n", factor, blkW); + WRITE(p, " int halfxb = xb / %d;\n", factor); - WRITE(p, " sampleUv.x = xib + (xb * %d.0);\n", blkW); + WRITE(p, " sampleUv.x = xib + halfxb * %d;\n", blkW); WRITE(p, " sampleUv.y = yb + xoff;\n"); - - WRITE(p, " sampleUv = sampleUv * " I_COLORS"[0].xy;\n"); - - if (ApiType == API_OPENGL) - WRITE(p," sampleUv.y = " I_COLORS"[1].y - sampleUv.y;\n"); - - WRITE(p, " sampleUv = sampleUv + " I_COLORS"[1].zw;\n"); - - if (ApiType != API_OPENGL) - { - WRITE(p, " sampleUv = sampleUv + float2(0.0,1.0);\n"); // still need to determine the reason for this - WRITE(p, " sampleUv = sampleUv / " I_COLORS"[0].zw;\n"); - } } -// block dimensions : widthStride, heightStride -// texture dims : width, height, x offset, y offset -void Write32BitSwizzler(char*& p, u32 format, API_TYPE ApiType) +void WriteSampleColor(char*& p, const char* colorComp, const char* dest, int xoffset, API_TYPE ApiType) { - // [0] left, top, right, bottom of source rectangle within source texture - // [1] width and height of destination texture in pixels - // Two were merged for GLSL - WRITE(p, "uniform float4 " I_COLORS"[2] %s;\n", WriteRegister(ApiType, "c", C_COLORS)); - - int blkW = TexDecoder_GetBlockWidthInTexels(format); - int blkH = TexDecoder_GetBlockHeightInTexels(format); - - // 32 bit textures (RGBA8 and Z24) are store in 2 cache line increments - if (ApiType == API_OPENGL) - { - WRITE(p, "#define samp0 samp9\n"); - WRITE(p, "uniform sampler2DRect samp0;\n"); - - WRITE(p, " out float4 ocol0;\n"); - WRITE(p, " VARYIN float2 uv0;\n"); - WRITE(p, "void main()\n"); - } - else - { - WRITE(p,"sampler samp0 : register(s0);\n"); - WRITE(p, "Texture2D Tex0 : register(t0);\n"); - - WRITE(p,"void main(\n"); - WRITE(p," out float4 ocol0 : SV_Target,\n"); - WRITE(p," in float2 uv0 : TEXCOORD0)\n"); - } - - - WRITE(p, "{\n" - " float2 sampleUv;\n" - " float2 uv1 = floor(uv0);\n"); - - WRITE(p, " float yl = floor(uv1.y / %d.0);\n", blkH); - WRITE(p, " float yb = yl * %d.0;\n", blkH); - WRITE(p, " float yoff = uv1.y - yb;\n"); - WRITE(p, " float xp = uv1.x + (yoff * " I_COLORS"[1].x);\n"); - WRITE(p, " float xel = floor(xp / 2.0);\n"); - WRITE(p, " float xb = floor(xel / %d.0);\n", blkH); - WRITE(p, " float xoff = xel - (xb * %d.0);\n", blkH); - - WRITE(p, " float x2 = uv1.x * 2.0;\n"); - WRITE(p, " float xl = floor(x2 / %d.0);\n", blkW); - WRITE(p, " float xib = x2 - (xl * %d.0);\n", blkW); - WRITE(p, " float halfxb = floor(xb / 2.0);\n"); - - WRITE(p, " sampleUv.x = xib + (halfxb * %d.0);\n", blkW); - WRITE(p, " sampleUv.y = yb + xoff;\n"); - WRITE(p, " sampleUv = sampleUv * " I_COLORS"[0].xy;\n"); - - if (ApiType == API_OPENGL) - WRITE(p," sampleUv.y = " I_COLORS"[1].y - sampleUv.y;\n"); - - WRITE(p, " sampleUv = sampleUv + " I_COLORS"[1].zw;\n"); - - if (ApiType != API_OPENGL) - { - WRITE(p, " sampleUv = sampleUv + float2(0.0,1.0);\n");// still to determine the reason for this - WRITE(p, " sampleUv = sampleUv / " I_COLORS"[0].zw;\n"); - } -} - -void WriteSampleColor(char*& p, const char* colorComp, const char* dest, API_TYPE ApiType) -{ - const char* texSampleOpName; - if (ApiType == API_D3D) - texSampleOpName = "tex0.Sample"; - else // OGL - texSampleOpName = "texture2DRect"; - - // the increment of sampleUv.x is delayed, so we perform it here. see WriteIncrementSampleX. - const char* texSampleIncrementUnit; - if (ApiType == API_D3D) - texSampleIncrementUnit = I_COLORS"[0].x / " I_COLORS"[0].z"; - else // OGL - texSampleIncrementUnit = I_COLORS"[0].x"; - - WRITE(p, " %s = %s(samp0, sampleUv + float2(%d.0 * (%s), 0.0)).%s;\n", - dest, texSampleOpName, s_incrementSampleXCount, texSampleIncrementUnit, colorComp); + WRITE(p, // sampleUv is the sample position in (int)gx_coords + "uv0 = float2(sampleUv + int2(%d, 0)" // pixel offset (if more than one pixel is samped) + " + " I_COLORS".xy);\n" // move to copyed rect + "uv0 += float2(0.5, 0.5);\n" // move to center of pixel + "uv0 *= float(" I_COLORS".w);\n" // scale by two if needed (this will move to pixels border to filter linear) + "uv0 /= float2(%d, %d);\n" // normlize to [0:1] + "uv0.y = 1.0-uv0.y;\n" // ogl foo (disable this line for d3d) + "%s = texture(samp0, uv0).%s;\n", + xoffset, EFB_WIDTH, EFB_HEIGHT, dest, colorComp + ); } void WriteColorToIntensity(char*& p, const char* src, const char* dest) @@ -226,25 +134,6 @@ void WriteColorToIntensity(char*& p, const char* src, const char* dest) // don't add IntensityConst.a yet, because doing it later is faster and uses less instructions, due to vectorization } -void WriteIncrementSampleX(char*& p,API_TYPE ApiType) -{ - // the shader compiler apparently isn't smart or aggressive enough to recognize that: - // foo1 = lookup(x) - // x = x + increment; - // foo2 = lookup(x) - // x = x + increment; - // foo3 = lookup(x) - // can be replaced with this: - // foo1 = lookup(x + 0.0 * increment) - // foo2 = lookup(x + 1.0 * increment) - // foo3 = lookup(x + 2.0 * increment) - // which looks like the same operations but uses considerably fewer ALU instruction slots. - // thus, instead of using the former method, we only increment a counter internally here, - // and we wait until WriteSampleColor to write out the constant multiplier - // to achieve the increment as in the latter case. - s_incrementSampleXCount++; -} - void WriteToBitDepth(char*& p, u8 depth, const char* src, const char* dest) { WRITE(p, " %s = floor(%s * 255.0 / exp2(8.0 - %d.0));\n", dest, src, depth); @@ -254,7 +143,6 @@ void WriteEncoderEnd(char* p, API_TYPE ApiType) { WRITE(p, "}\n"); IntensityConstantAdded = false; - s_incrementSampleXCount = 0; } void WriteI8Encoder(char* p, API_TYPE ApiType) @@ -262,19 +150,16 @@ void WriteI8Encoder(char* p, API_TYPE ApiType) WriteSwizzler(p, GX_TF_I8, ApiType); WRITE(p, " float3 texSample;\n"); - WriteSampleColor(p, "rgb", "texSample", ApiType); + WriteSampleColor(p, "rgb", "texSample", 0, ApiType); WriteColorToIntensity(p, "texSample", "ocol0.b"); - WriteIncrementSampleX(p, ApiType); - WriteSampleColor(p, "rgb", "texSample", ApiType); + WriteSampleColor(p, "rgb", "texSample", 1, ApiType); WriteColorToIntensity(p, "texSample", "ocol0.g"); - WriteIncrementSampleX(p, ApiType); - WriteSampleColor(p, "rgb", "texSample", ApiType); + WriteSampleColor(p, "rgb", "texSample", 2, ApiType); WriteColorToIntensity(p, "texSample", "ocol0.r"); - WriteIncrementSampleX(p, ApiType); - WriteSampleColor(p, "rgb", "texSample", ApiType); + WriteSampleColor(p, "rgb", "texSample", 3, ApiType); WriteColorToIntensity(p, "texSample", "ocol0.a"); WRITE(p, " ocol0.rgba += IntensityConst.aaaa;\n"); // see WriteColorToIntensity @@ -289,35 +174,28 @@ void WriteI4Encoder(char* p, API_TYPE ApiType) WRITE(p, " float4 color0;\n"); WRITE(p, " float4 color1;\n"); - WriteSampleColor(p, "rgb", "texSample", ApiType); + WriteSampleColor(p, "rgb", "texSample", 0, ApiType); WriteColorToIntensity(p, "texSample", "color0.b"); - WriteIncrementSampleX(p, ApiType); - WriteSampleColor(p, "rgb", "texSample", ApiType); + WriteSampleColor(p, "rgb", "texSample", 1, ApiType); WriteColorToIntensity(p, "texSample", "color1.b"); - WriteIncrementSampleX(p, ApiType); - WriteSampleColor(p, "rgb", "texSample", ApiType); + WriteSampleColor(p, "rgb", "texSample", 2, ApiType); WriteColorToIntensity(p, "texSample", "color0.g"); - WriteIncrementSampleX(p, ApiType); - WriteSampleColor(p, "rgb", "texSample", ApiType); + WriteSampleColor(p, "rgb", "texSample", 3, ApiType); WriteColorToIntensity(p, "texSample", "color1.g"); - WriteIncrementSampleX(p, ApiType); - WriteSampleColor(p, "rgb", "texSample", ApiType); + WriteSampleColor(p, "rgb", "texSample", 4, ApiType); WriteColorToIntensity(p, "texSample", "color0.r"); - WriteIncrementSampleX(p, ApiType); - WriteSampleColor(p, "rgb", "texSample", ApiType); + WriteSampleColor(p, "rgb", "texSample", 5, ApiType); WriteColorToIntensity(p, "texSample", "color1.r"); - WriteIncrementSampleX(p, ApiType); - WriteSampleColor(p, "rgb", "texSample", ApiType); + WriteSampleColor(p, "rgb", "texSample", 6, ApiType); WriteColorToIntensity(p, "texSample", "color0.a"); - WriteIncrementSampleX(p, ApiType); - WriteSampleColor(p, "rgb", "texSample", ApiType); + WriteSampleColor(p, "rgb", "texSample", 7, ApiType); WriteColorToIntensity(p, "texSample", "color1.a"); WRITE(p, " color0.rgba += IntensityConst.aaaa;\n"); @@ -335,12 +213,11 @@ void WriteIA8Encoder(char* p,API_TYPE ApiType) WriteSwizzler(p, GX_TF_IA8, ApiType); WRITE(p, " float4 texSample;\n"); - WriteSampleColor(p, "rgba", "texSample", ApiType); + WriteSampleColor(p, "rgba", "texSample", 0, ApiType); WRITE(p, " ocol0.b = texSample.a;\n"); WriteColorToIntensity(p, "texSample", "ocol0.g"); - WriteIncrementSampleX(p, ApiType); - WriteSampleColor(p, "rgba", "texSample", ApiType); + WriteSampleColor(p, "rgba", "texSample", 1, ApiType); WRITE(p, " ocol0.r = texSample.a;\n"); WriteColorToIntensity(p, "texSample", "ocol0.a"); @@ -356,22 +233,19 @@ void WriteIA4Encoder(char* p,API_TYPE ApiType) WRITE(p, " float4 color0;\n"); WRITE(p, " float4 color1;\n"); - WriteSampleColor(p, "rgba", "texSample", ApiType); + WriteSampleColor(p, "rgba", "texSample", 0, ApiType); WRITE(p, " color0.b = texSample.a;\n"); WriteColorToIntensity(p, "texSample", "color1.b"); - WriteIncrementSampleX(p, ApiType); - WriteSampleColor(p, "rgba", "texSample", ApiType); + WriteSampleColor(p, "rgba", "texSample", 1, ApiType); WRITE(p, " color0.g = texSample.a;\n"); WriteColorToIntensity(p, "texSample", "color1.g"); - WriteIncrementSampleX(p, ApiType); - WriteSampleColor(p, "rgba", "texSample", ApiType); + WriteSampleColor(p, "rgba", "texSample", 2, ApiType); WRITE(p, " color0.r = texSample.a;\n"); WriteColorToIntensity(p, "texSample", "color1.r"); - WriteIncrementSampleX(p, ApiType); - WriteSampleColor(p, "rgba", "texSample", ApiType); + WriteSampleColor(p, "rgba", "texSample", 3, ApiType); WRITE(p, " color0.a = texSample.a;\n"); WriteColorToIntensity(p, "texSample", "color1.a"); @@ -388,9 +262,8 @@ void WriteRGB565Encoder(char* p,API_TYPE ApiType) { WriteSwizzler(p, GX_TF_RGB565, ApiType); - WriteSampleColor(p, "rgb", "float3 texSample0", ApiType); - WriteIncrementSampleX(p, ApiType); - WriteSampleColor(p, "rgb", "float3 texSample1", ApiType); + WriteSampleColor(p, "rgb", "float3 texSample0", 0, ApiType); + WriteSampleColor(p, "rgb", "float3 texSample1", 1, ApiType); WRITE(p, " float2 texRs = float2(texSample0.r, texSample1.r);\n"); WRITE(p, " float2 texGs = float2(texSample0.g, texSample1.g);\n"); WRITE(p, " float2 texBs = float2(texSample0.b, texSample1.b);\n"); @@ -417,7 +290,7 @@ void WriteRGB5A3Encoder(char* p,API_TYPE ApiType) WRITE(p, " float gUpper;\n"); WRITE(p, " float gLower;\n"); - WriteSampleColor(p, "rgba", "texSample", ApiType); + WriteSampleColor(p, "rgba", "texSample", 0, ApiType); // 0.8784 = 224 / 255 which is the maximum alpha value that can be represented in 3 bits WRITE(p, "if(texSample.a > 0.878f) {\n"); @@ -444,9 +317,8 @@ void WriteRGB5A3Encoder(char* p,API_TYPE ApiType) WRITE(p, "}\n"); - WriteIncrementSampleX(p, ApiType); - WriteSampleColor(p, "rgba", "texSample", ApiType); + WriteSampleColor(p, "rgba", "texSample", 1, ApiType); WRITE(p, "if(texSample.a > 0.878f) {\n"); @@ -483,15 +355,13 @@ void WriteRGBA4443Encoder(char* p,API_TYPE ApiType) WRITE(p, " float4 color0;\n"); WRITE(p, " float4 color1;\n"); - WriteSampleColor(p, "rgba", "texSample", ApiType); + WriteSampleColor(p, "rgba", "texSample", 0, ApiType); WriteToBitDepth(p, 3, "texSample.a", "color0.b"); WriteToBitDepth(p, 4, "texSample.r", "color1.b"); WriteToBitDepth(p, 4, "texSample.g", "color0.g"); WriteToBitDepth(p, 4, "texSample.b", "color1.g"); - WriteIncrementSampleX(p, ApiType); - - WriteSampleColor(p, "rgba", "texSample", ApiType); + WriteSampleColor(p, "rgba", "texSample", 1, ApiType); WriteToBitDepth(p, 3, "texSample.a", "color0.r"); WriteToBitDepth(p, 4, "texSample.r", "color1.r"); WriteToBitDepth(p, 4, "texSample.g", "color0.a"); @@ -503,7 +373,7 @@ void WriteRGBA4443Encoder(char* p,API_TYPE ApiType) void WriteRGBA8Encoder(char* p,API_TYPE ApiType) { - Write32BitSwizzler(p, GX_TF_RGBA8, ApiType); + WriteSwizzler(p, GX_TF_RGBA8, ApiType); WRITE(p, " float cl1 = xb - (halfxb * 2.0);\n"); WRITE(p, " float cl0 = 1.0 - cl1;\n"); @@ -512,15 +382,13 @@ void WriteRGBA8Encoder(char* p,API_TYPE ApiType) WRITE(p, " float4 color0;\n"); WRITE(p, " float4 color1;\n"); - WriteSampleColor(p, "rgba", "texSample", ApiType); + WriteSampleColor(p, "rgba", "texSample", 0, ApiType); WRITE(p, " color0.b = texSample.a;\n"); WRITE(p, " color0.g = texSample.r;\n"); WRITE(p, " color1.b = texSample.g;\n"); WRITE(p, " color1.g = texSample.b;\n"); - WriteIncrementSampleX(p, ApiType); - - WriteSampleColor(p, "rgba", "texSample", ApiType); + WriteSampleColor(p, "rgba", "texSample", 1, ApiType); WRITE(p, " color0.r = texSample.a;\n"); WRITE(p, " color0.a = texSample.r;\n"); WRITE(p, " color1.r = texSample.g;\n"); @@ -537,28 +405,14 @@ void WriteC4Encoder(char* p, const char* comp,API_TYPE ApiType) WRITE(p, " float4 color0;\n"); WRITE(p, " float4 color1;\n"); - WriteSampleColor(p, comp, "color0.b", ApiType); - WriteIncrementSampleX(p, ApiType); - - WriteSampleColor(p, comp, "color1.b", ApiType); - WriteIncrementSampleX(p, ApiType); - - WriteSampleColor(p, comp, "color0.g", ApiType); - WriteIncrementSampleX(p, ApiType); - - WriteSampleColor(p, comp, "color1.g", ApiType); - WriteIncrementSampleX(p, ApiType); - - WriteSampleColor(p, comp, "color0.r", ApiType); - WriteIncrementSampleX(p, ApiType); - - WriteSampleColor(p, comp, "color1.r", ApiType); - WriteIncrementSampleX(p, ApiType); - - WriteSampleColor(p, comp, "color0.a", ApiType); - WriteIncrementSampleX(p, ApiType); - - WriteSampleColor(p, comp, "color1.a", ApiType); + WriteSampleColor(p, comp, "color0.b", 0, ApiType); + WriteSampleColor(p, comp, "color1.b", 1, ApiType); + WriteSampleColor(p, comp, "color0.g", 2, ApiType); + WriteSampleColor(p, comp, "color1.g", 3, ApiType); + WriteSampleColor(p, comp, "color0.r", 4, ApiType); + WriteSampleColor(p, comp, "color1.r", 5, ApiType); + WriteSampleColor(p, comp, "color0.a", 6, ApiType); + WriteSampleColor(p, comp, "color1.a", 7, ApiType); WriteToBitDepth(p, 4, "color0", "color0"); WriteToBitDepth(p, 4, "color1", "color1"); @@ -571,16 +425,10 @@ void WriteC8Encoder(char* p, const char* comp,API_TYPE ApiType) { WriteSwizzler(p, GX_CTF_R8, ApiType); - WriteSampleColor(p, comp, "ocol0.b", ApiType); - WriteIncrementSampleX(p, ApiType); - - WriteSampleColor(p, comp, "ocol0.g", ApiType); - WriteIncrementSampleX(p, ApiType); - - WriteSampleColor(p, comp, "ocol0.r", ApiType); - WriteIncrementSampleX(p, ApiType); - - WriteSampleColor(p, comp, "ocol0.a", ApiType); + WriteSampleColor(p, comp, "ocol0.b", 0, ApiType); + WriteSampleColor(p, comp, "ocol0.g", 1, ApiType); + WriteSampleColor(p, comp, "ocol0.r", 2, ApiType); + WriteSampleColor(p, comp, "ocol0.a", 3, ApiType); WriteEncoderEnd(p, ApiType); } @@ -592,22 +440,19 @@ void WriteCC4Encoder(char* p, const char* comp,API_TYPE ApiType) WRITE(p, " float4 color0;\n"); WRITE(p, " float4 color1;\n"); - WriteSampleColor(p, comp, "texSample", ApiType); + WriteSampleColor(p, comp, "texSample", 0, ApiType); WRITE(p, " color0.b = texSample.x;\n"); WRITE(p, " color1.b = texSample.y;\n"); - WriteIncrementSampleX(p, ApiType); - WriteSampleColor(p, comp, "texSample", ApiType); + WriteSampleColor(p, comp, "texSample", 1, ApiType); WRITE(p, " color0.g = texSample.x;\n"); WRITE(p, " color1.g = texSample.y;\n"); - WriteIncrementSampleX(p, ApiType); - WriteSampleColor(p, comp, "texSample", ApiType); + WriteSampleColor(p, comp, "texSample", 2, ApiType); WRITE(p, " color0.r = texSample.x;\n"); WRITE(p, " color1.r = texSample.y;\n"); - WriteIncrementSampleX(p, ApiType); - WriteSampleColor(p, comp, "texSample", ApiType); + WriteSampleColor(p, comp, "texSample", 3, ApiType); WRITE(p, " color0.a = texSample.x;\n"); WRITE(p, " color1.a = texSample.y;\n"); @@ -622,10 +467,8 @@ void WriteCC8Encoder(char* p, const char* comp, API_TYPE ApiType) { WriteSwizzler(p, GX_CTF_RA8, ApiType); - WriteSampleColor(p, comp, "ocol0.bg", ApiType); - WriteIncrementSampleX(p, ApiType); - - WriteSampleColor(p, comp, "ocol0.ra", ApiType); + WriteSampleColor(p, comp, "ocol0.bg", 0, ApiType); + WriteSampleColor(p, comp, "ocol0.ra", 1, ApiType); WriteEncoderEnd(p, ApiType); } @@ -636,19 +479,16 @@ void WriteZ8Encoder(char* p, const char* multiplier,API_TYPE ApiType) WRITE(p, " float depth;\n"); - WriteSampleColor(p, "b", "depth", ApiType); + WriteSampleColor(p, "b", "depth", 0, ApiType); WRITE(p, "ocol0.b = frac(depth * %s);\n", multiplier); - WriteIncrementSampleX(p, ApiType); - WriteSampleColor(p, "b", "depth", ApiType); + WriteSampleColor(p, "b", "depth", 1, ApiType); WRITE(p, "ocol0.g = frac(depth * %s);\n", multiplier); - WriteIncrementSampleX(p, ApiType); - WriteSampleColor(p, "b", "depth", ApiType); + WriteSampleColor(p, "b", "depth", 2, ApiType); WRITE(p, "ocol0.r = frac(depth * %s);\n", multiplier); - WriteIncrementSampleX(p, ApiType); - WriteSampleColor(p, "b", "depth", ApiType); + WriteSampleColor(p, "b", "depth", 3, ApiType); WRITE(p, "ocol0.a = frac(depth * %s);\n", multiplier); WriteEncoderEnd(p, ApiType); @@ -663,7 +503,7 @@ void WriteZ16Encoder(char* p,API_TYPE ApiType) // byte order is reversed - WriteSampleColor(p, "b", "depth", ApiType); + WriteSampleColor(p, "b", "depth", 0, ApiType); WRITE(p, " depth *= 16777215.0;\n"); WRITE(p, " expanded.r = floor(depth / (256.0 * 256.0));\n"); @@ -673,9 +513,7 @@ void WriteZ16Encoder(char* p,API_TYPE ApiType) WRITE(p, " ocol0.b = expanded.g / 255.0;\n"); WRITE(p, " ocol0.g = expanded.r / 255.0;\n"); - WriteIncrementSampleX(p, ApiType); - - WriteSampleColor(p, "b", "depth", ApiType); + WriteSampleColor(p, "b", "depth", 1, ApiType); WRITE(p, " depth *= 16777215.0;\n"); WRITE(p, " expanded.r = floor(depth / (256.0 * 256.0));\n"); @@ -697,7 +535,7 @@ void WriteZ16LEncoder(char* p,API_TYPE ApiType) // byte order is reversed - WriteSampleColor(p, "b", "depth", ApiType); + WriteSampleColor(p, "b", "depth", 0, ApiType); WRITE(p, " depth *= 16777215.0;\n"); WRITE(p, " expanded.r = floor(depth / (256.0 * 256.0));\n"); @@ -709,9 +547,7 @@ void WriteZ16LEncoder(char* p,API_TYPE ApiType) WRITE(p, " ocol0.b = expanded.b / 255.0;\n"); WRITE(p, " ocol0.g = expanded.g / 255.0;\n"); - WriteIncrementSampleX(p, ApiType); - - WriteSampleColor(p, "b", "depth", ApiType); + WriteSampleColor(p, "b", "depth", 1, ApiType); WRITE(p, " depth *= 16777215.0;\n"); WRITE(p, " expanded.r = floor(depth / (256.0 * 256.0));\n"); @@ -728,7 +564,7 @@ void WriteZ16LEncoder(char* p,API_TYPE ApiType) void WriteZ24Encoder(char* p, API_TYPE ApiType) { - Write32BitSwizzler(p, GX_TF_Z24X8, ApiType); + WriteSwizzler(p, GX_TF_Z24X8, ApiType); WRITE(p, " float cl = xb - (halfxb * 2.0);\n"); @@ -737,9 +573,8 @@ void WriteZ24Encoder(char* p, API_TYPE ApiType) WRITE(p, " float3 expanded0;\n"); WRITE(p, " float3 expanded1;\n"); - WriteSampleColor(p, "b", "depth0", ApiType); - WriteIncrementSampleX(p, ApiType); - WriteSampleColor(p, "b", "depth1", ApiType); + WriteSampleColor(p, "b", "depth0", 0, ApiType); + WriteSampleColor(p, "b", "depth1", 1, ApiType); for (int i = 0; i < 2; i++) {