diff --git a/Data/User/OpenCL/TextureDecoder.cl b/Data/User/OpenCL/TextureDecoder.cl index daa4ed81e9..bc7be0a92a 100644 --- a/Data/User/OpenCL/TextureDecoder.cl +++ b/Data/User/OpenCL/TextureDecoder.cl @@ -24,9 +24,9 @@ kernel void DecodeI4(global uchar *dst, { uchar4 val = vload4(srcOffset, src); uchar8 res; - res.even = (val >> 4) & 0x0F; - res.odd = val & 0x0F; - res |= res << 4; + res.even = (val >> (uchar4)4) & (uchar4)0x0F; + res.odd = val & (uchar4)0x0F; + res |= res << (uchar8)4; vstore8(res, 0, dst + ((y + iy)*width + x)); srcOffset++; } @@ -41,9 +41,9 @@ kernel void DecodeI4_RGBA(global uint *dst, { uchar4 val = vload4(srcOffset, src); uchar8 res; - res.even = (val >> 4) & 0x0F; - res.odd = val & 0x0F; - res |= res << 4; + res.even = (val >> (uchar4)4) & (uchar4)0x0F; + res.odd = val & (uchar4)0x0F; + res |= res << (uchar8)4; vstore8(upsample(upsample(res,res),upsample(res,res)), 0, dst + ((y + iy)*width + x)); srcOffset++; } @@ -108,8 +108,8 @@ kernel void DecodeIA4(global ushort *dst, for (int iy = 0; iy < 4; iy++) { val = vload8(srcOffset++, src); - res = upsample(val >> 4, val & 0xF); - res |= res << 4; + res = upsample(val >> (uchar8)4, val & (uchar8)0xF); + res |= res << (ushort8)4; vstore8(res, 0, dst + y*width + x); dst+=width; } @@ -125,10 +125,10 @@ kernel void DecodeIA4_RGBA(global uint *dst, for (int iy = 0; iy < 4; iy++) { val = vload8(srcOffset++, src); - uchar8 a = val >> 4; - uchar8 l = val & 0xF; + uchar8 a = val >> (uchar8)4; + uchar8 l = val & (uchar8)0xF; res = upsample(upsample(a, l), upsample(l,l)); - res |= res << 4; + res |= res << (uint8)4; vstore8(res, 0, dst + y*width + x); dst+=width; } @@ -142,7 +142,8 @@ kernel void DecodeRGBA8(global ushort *dst, for (int iy = 0; iy < 4; iy++) { ushort8 val = (ushort8)(vload4(srcOffset, src), vload4(srcOffset + 4, src)); - ushort8 bgra = rotate(val,8).s40516273; + ushort8 temp = rotate(val, (ushort8)4); + ushort8 bgra = rotate(temp, (ushort8)4).s40516273; vstore8(bgra, 0, dst + ((y + iy)*width + x) * 2); srcOffset++; } @@ -175,7 +176,8 @@ kernel void DecodeRGB565(global ushort *dst, dst += width*y + x; for (int iy = 0; iy < 4; iy++) { - vstore4(rotate(vload4(srcOffset++, src),8), 0, dst + iy*width); + ushort4 val = rotate(vload4(srcOffset++, src),(ushort4)4); + vstore4(rotate(val,(ushort4)4), 0, dst + iy*width); } } @@ -189,10 +191,10 @@ kernel void DecodeRGB565_RGBA(global uchar *dst, uchar8 val = vload8(srcOffset++, src); uchar16 res; - res.even.even = bitselect(val.even, val.even >> 5, 7); - res.odd.even = bitselect((val.odd >> 3) | (val.even << 5), val.even >> 1, 3); - res.even.odd = bitselect(val.odd << 3, val.odd >> 2, 7); - res.odd.odd = 0xFF; + res.even.even = bitselect(val.even, val.even >> (uchar4)5, (uchar4)7); + res.odd.even = bitselect((val.odd >> (uchar4)3) | (val.even << (uchar4)5), val.even >> (uchar4)1, (uchar4)3); + res.even.odd = bitselect(val.odd << (uchar4)3, val.odd >> (uchar4)2, (uchar4)7); + res.odd.odd = (uchar4)0xFF; vstore16(res, 0, dst + ((y + iy)*width + x) * 4); } @@ -207,16 +209,16 @@ kernel void DecodeRGB5A3(global uchar *dst, uchar16 resNoAlpha, resAlpha, choice; #define iterateRGB5A3() \ val = vload8(srcOffset++, src); \ - resNoAlpha.s26AE = val.even << 1; \ - resNoAlpha.s159D = val.even << 6 | val.odd >> 2; \ - resNoAlpha.s048C = val.odd << 3; \ - resNoAlpha = bitselect(resNoAlpha, resNoAlpha >> 5, 0x3); \ - resNoAlpha.s37BF = 0xFF; \ - resAlpha.s26AE = bitselect(val.even << 4, val.even, 0xF); \ - resAlpha.s159D = bitselect(val.odd, val.odd >> 4, 0xF); \ - resAlpha.s048C = bitselect(val.odd << 4, val.odd, 0xF); \ - resAlpha.s37BF = bitselect(val.even << 1, val.even >> 2, 0x1C); \ - resAlpha.s37BF = bitselect(resAlpha.s37BF, val.even >> 5, 0x3); \ + resNoAlpha.s26AE = val.even << (uchar4)1; \ + resNoAlpha.s159D = val.even << (uchar4)6 | val.odd >> (uchar4)2; \ + resNoAlpha.s048C = val.odd << (uchar4)3; \ + resNoAlpha = bitselect(resNoAlpha, resNoAlpha >> (uchar16)5, (uchar16)0x3); \ + resNoAlpha.s37BF = (uchar4)(0xFF); \ + resAlpha.s26AE = bitselect(val.even << (uchar4)4, val.even, (uchar4)0xF); \ + resAlpha.s159D = bitselect(val.odd, val.odd >> (uchar4)4, (uchar4)0xF); \ + resAlpha.s048C = bitselect(val.odd << (uchar4)4, val.odd, (uchar4)0xF); \ + resAlpha.s37BF = bitselect(val.even << (uchar4)1, val.even >> (uchar4)2, (uchar4)0x1C); \ + resAlpha.s37BF = bitselect(resAlpha.s37BF, val.even >> (uchar4)5, (uchar4)0x3); \ choice = (uchar16)((uchar4)(val.even.s0), \ (uchar4)(val.even.s1), \ (uchar4)(val.even.s2), \ @@ -237,16 +239,16 @@ kernel void DecodeRGB5A3_RGBA(global uchar *dst, uchar16 resNoAlpha, resAlpha, choice; #define iterateRGB5A3_RGBA() \ val = vload8(srcOffset++, src); \ - resNoAlpha.s048C = val.even << 1; \ - resNoAlpha.s159D = val.even << 6 | val.odd >> 2; \ - resNoAlpha.s26AE = val.odd << 3; \ - resNoAlpha = bitselect(resNoAlpha, resNoAlpha >> 5, 0x3); \ - resNoAlpha.s37BF = 0xFF; \ - resAlpha.s048C = bitselect(val.even << 4, val.even, 0xF); \ - resAlpha.s159D = bitselect(val.odd, val.odd >> 4, 0xF); \ - resAlpha.s26AE = bitselect(val.odd << 4, val.odd, 0xF); \ - resAlpha.s37BF = bitselect(val.even << 1, val.even >> 2, 0x1C); \ - resAlpha.s37BF = bitselect(resAlpha.s37BF, val.even >> 5, 0x3); \ + resNoAlpha.s048C = val.even << (uchar4)1; \ + resNoAlpha.s159D = val.even << (uchar4)6 | val.odd >> (uchar4)2; \ + resNoAlpha.s26AE = val.odd << (uchar4)3; \ + resNoAlpha = bitselect(resNoAlpha, resNoAlpha >> (uchar16)5, (uchar16)0x3); \ + resNoAlpha.s37BF = (uchar4)(0xFF); \ + resAlpha.s048C = bitselect(val.even << (uchar4)4, val.even, (uchar4)0xF); \ + resAlpha.s159D = bitselect(val.odd, val.odd >> (uchar4)4, (uchar4)0xF); \ + resAlpha.s26AE = bitselect(val.odd << (uchar4)4, val.odd, (uchar4)0xF); \ + resAlpha.s37BF = bitselect(val.even << (uchar4)1, val.even >> (uchar4)2, (uchar4)0x1C); \ + resAlpha.s37BF = bitselect(resAlpha.s37BF, val.even >> (uchar4)5, (uchar4)0x3); \ choice = (uchar16)((uchar4)(val.even.s0), \ (uchar4)(val.even.s1), \ (uchar4)(val.even.s2), \ @@ -274,13 +276,13 @@ kernel void decodeCMPRBlock(global uchar *dst, uchar2 colora565 = (uchar2)(val.s1, val.s3); uchar2 colorb565 = (uchar2)(val.s0, val.s2); - uchar8 color32 = (uchar8)(bitselect(colora565 << 3, colora565 >> 2, 7), - bitselect((colora565 >> 3) | (colorb565 << 5), colorb565 >> 1, 3), - bitselect(colorb565, colorb565 >> 5, 7), + uchar8 color32 = (uchar8)(bitselect(colora565 << (uchar2)3, colora565 >> (uchar2)2, (uchar2)7), + bitselect((colora565 >> (uchar2)3) | (colorb565 << (uchar2)5), colorb565 >> (uchar2)1, (uchar2)3), + bitselect(colorb565, colorb565 >> (uchar2)5, (uchar2)7), (uchar2)0xFF); ushort4 frac2 = convert_ushort4(color32.even) - convert_ushort4(color32.odd); - uchar4 frac = convert_uchar4((frac2 * 3) / 8); + uchar4 frac = convert_uchar4((frac2 * (ushort4)3) / (ushort4)8); ushort4 colorAlpha = upsample((uchar4)(color32.even.s0,color32.even.s1,color32.even.s2,0), rhadd(color32.odd, color32.even)); @@ -320,13 +322,13 @@ kernel void decodeCMPRBlock_RGBA(global uchar *dst, uchar2 colora565 = (uchar2)(val.s1, val.s3); uchar2 colorb565 = (uchar2)(val.s0, val.s2); - uchar8 color32 = (uchar8)(bitselect(colorb565, colorb565 >> 5, 7), - bitselect((colora565 >> 3) | (colorb565 << 5), colorb565 >> 1, 3), - bitselect(colora565 << 3, colora565 >> 2, 7), + uchar8 color32 = (uchar8)(bitselect(colorb565, colorb565 >> (uchar2)5, (uchar2)7), + bitselect((colora565 >> (uchar2)3) | (colorb565 << (uchar2)5), colorb565 >> (uchar2)1, (uchar2)3), + bitselect(colora565 << (uchar2)3, colora565 >> (uchar2)2, (uchar2)7), (uchar2)0xFF); ushort4 frac2 = convert_ushort4(color32.even) - convert_ushort4(color32.odd); - uchar4 frac = convert_uchar4((frac2 * 3) / 8); + uchar4 frac = convert_uchar4((frac2 * (ushort4)3) / (ushort4)8); ushort4 colorAlpha = upsample((uchar4)(color32.even.s0,color32.even.s1,color32.even.s2,0), rhadd(color32.odd, color32.even)); @@ -356,4 +358,4 @@ kernel void DecodeCMPR_RGBA(global uchar *dst, decodeCMPRBlock_RGBA(dst + 16, src, width); src += 8; decodeCMPRBlock_RGBA(dst + 16 * width, src, width); src += 8; decodeCMPRBlock_RGBA(dst + 16 * (width + 1), src, width); -} \ No newline at end of file +} diff --git a/Source/Core/Common/Src/CommonPaths.h b/Source/Core/Common/Src/CommonPaths.h index ca0676ebb8..32f476d62e 100644 --- a/Source/Core/Common/Src/CommonPaths.h +++ b/Source/Core/Common/Src/CommonPaths.h @@ -100,6 +100,7 @@ #define SHADERCACHE_DIR "ShaderCache" #define STATESAVES_DIR "StateSaves" #define SCREENSHOTS_DIR "ScreenShots" +#define OPENCL_DIR "OpenCL" #define LOAD_DIR "Load" #define HIRES_TEXTURES_DIR LOAD_DIR DIR_SEP "Textures" #define DUMP_DIR "Dump" diff --git a/Source/Core/Common/Src/FileUtil.cpp b/Source/Core/Common/Src/FileUtil.cpp index 0a239b6551..3c820ef135 100644 --- a/Source/Core/Common/Src/FileUtil.cpp +++ b/Source/Core/Common/Src/FileUtil.cpp @@ -647,6 +647,7 @@ const char *GetUserPath(int DirIDX) static char ShadersDir[MAX_PATH] = {0}; static char StateSavesDir[MAX_PATH] = {0}; static char ScreenShotsDir[MAX_PATH] = {0}; + static char OpenCLDir[MAX_PATH] = {0}; static char HiresTexturesDir[MAX_PATH] = {0}; static char DumpDir[MAX_PATH] = {0}; static char DumpFramesDir[MAX_PATH] = {0}; @@ -689,6 +690,7 @@ const char *GetUserPath(int DirIDX) snprintf(ShadersDir, sizeof(ShadersDir), "%s" SHADERS_DIR DIR_SEP, UserDir); snprintf(StateSavesDir, sizeof(StateSavesDir), "%s" STATESAVES_DIR DIR_SEP, UserDir); snprintf(ScreenShotsDir, sizeof(ScreenShotsDir), "%s" SCREENSHOTS_DIR DIR_SEP, UserDir); + snprintf(OpenCLDir, sizeof(OpenCLDir), "%s" OPENCL_DIR DIR_SEP, UserDir); snprintf(HiresTexturesDir, sizeof(HiresTexturesDir), "%s" HIRES_TEXTURES_DIR DIR_SEP, UserDir); snprintf(DumpDir, sizeof(DumpDir), "%s" DUMP_DIR DIR_SEP, UserDir); snprintf(DumpFramesDir, sizeof(DumpFramesDir), "%s" DUMP_FRAMES_DIR DIR_SEP, UserDir); @@ -732,6 +734,8 @@ const char *GetUserPath(int DirIDX) return StateSavesDir; case D_SCREENSHOTS_IDX: return ScreenShotsDir; + case D_OPENCL_IDX: + return OpenCLDir; case D_HIRESTEXTURES_IDX: return HiresTexturesDir; case D_DUMP_IDX: diff --git a/Source/Core/Common/Src/FileUtil.h b/Source/Core/Common/Src/FileUtil.h index 69f2b8bf6b..1d7a24ee1e 100644 --- a/Source/Core/Common/Src/FileUtil.h +++ b/Source/Core/Common/Src/FileUtil.h @@ -38,6 +38,7 @@ enum { D_SHADERS_IDX, D_STATESAVES_IDX, D_SCREENSHOTS_IDX, + D_OPENCL_IDX, D_HIRESTEXTURES_IDX, D_DUMP_IDX, D_DUMPFRAMES_IDX, diff --git a/Source/Core/DolphinWX/Src/Main.cpp b/Source/Core/DolphinWX/Src/Main.cpp index d95cc6dabd..0659d3e50a 100644 --- a/Source/Core/DolphinWX/Src/Main.cpp +++ b/Source/Core/DolphinWX/Src/Main.cpp @@ -300,6 +300,7 @@ bool DolphinApp::OnInit() File::CopyDir(SHARED_USER_DIR MAPS_DIR DIR_SEP, File::GetUserPath(D_MAPS_IDX)); File::CopyDir(SHARED_USER_DIR SHADERS_DIR DIR_SEP, File::GetUserPath(D_SHADERS_IDX)); File::CopyDir(SHARED_USER_DIR WII_USER_DIR DIR_SEP, File::GetUserPath(D_WIIUSER_IDX)); + File::CopyDir(SHARED_USER_DIR OPENCL_DIR DIR_SEP, File::GetUserPath(D_OPENCL_IDX)); if (!File::Exists(File::GetUserPath(D_GCUSER_IDX))) File::CreateFullPath(File::GetUserPath(D_GCUSER_IDX)); diff --git a/Source/Core/VideoCommon/Src/OpenCL/OCLTextureDecoder.cpp b/Source/Core/VideoCommon/Src/OpenCL/OCLTextureDecoder.cpp index 13034b2b50..cb0bb04866 100644 --- a/Source/Core/VideoCommon/Src/OpenCL/OCLTextureDecoder.cpp +++ b/Source/Core/VideoCommon/Src/OpenCL/OCLTextureDecoder.cpp @@ -103,7 +103,7 @@ void TexDecoder_OpenCL_Initialize() char **binaries = NULL; char filename[1024]; - sprintf(filename, "%sOpenCL/kernel.bin", File::GetUserPath(D_USER_IDX)); + sprintf(filename, "%skernel.bin", File::GetUserPath(D_OPENCL_IDX)); FILE *input = NULL; @@ -155,7 +155,7 @@ void TexDecoder_OpenCL_Initialize() if (err) { std::string code; - sprintf(filename, "%sOpenCL/TextureDecoder.cl", File::GetUserPath(D_USER_IDX)); + sprintf(filename, "%sTextureDecoder.cl", File::GetUserPath(D_OPENCL_IDX)); if (!File::ReadFileToString(true, filename, code)) { ERROR_LOG(VIDEO, "Failed to load OpenCL code %s - file is missing?", filename); @@ -204,7 +204,7 @@ void TexDecoder_OpenCL_Initialize() if (!err) { - sprintf(filename, "%sOpenCL/kernel.bin", File::GetUserPath(D_USER_IDX)); + sprintf(filename, "%skernel.bin", File::GetUserPath(D_OPENCL_IDX)); const char *current_rev = SVN_REV_STR + '\0'; FILE *output = NULL;