From 9b0357b5e2fd1d25a26718f64bc345b053848261 Mon Sep 17 00:00:00 2001 From: Rodolfo Osvaldo Bogado Date: Sat, 28 Aug 2010 15:09:42 +0000 Subject: [PATCH] sometimes to advance you have to make a step back. use plain vertex arrays instead of VBOs to render in Opengl plugin as the nature of the data make VBOs slower. This must bring, depending on the implementation, a good speedup in opengl. in my system now opengl and d3d9 have a difference of 1 to 5 fps depending of the game. some cleanup and a little work pointing to future improvements in the way of rendering. please test and check for any errors. git-svn-id: https://dolphin-emu.googlecode.com/svn/trunk@6139 8ced0084-cf51-0410-be5f-012b33b47a6e --- Source/Core/Common/Src/Hash.cpp | 116 ++++ Source/Core/Common/Src/Hash.h | 2 +- Source/Core/Common/Src/LinearDiskCache.cpp | 2 +- .../VideoCommon/Src/NativeVertexWriter.cpp | 1 + .../Core/VideoCommon/Src/NativeVertexWriter.h | 1 + .../Core/VideoCommon/Src/OpcodeDecoding.cpp | 4 +- Source/Core/VideoCommon/Src/OpcodeDecoding.h | 2 +- .../Core/VideoCommon/Src/TextureDecoder.cpp | 117 ---- Source/Core/VideoCommon/Src/TextureDecoder.h | 5 +- .../Core/VideoCommon/Src/VertexShaderGen.cpp | 4 - .../Plugin_VideoDX11/Src/TextureCache.cpp | 4 +- .../Plugin_VideoDX9/Src/TextureCache.cpp | 8 +- .../Plugin_VideoDX9/Src/TextureConverter.cpp | 2 +- .../Plugins/Plugin_VideoOGL/Src/DLCache.cpp | 519 +++++++++--------- .../Src/NativeVertexFormat.cpp | 20 +- .../Plugin_VideoOGL/Src/TextureConverter.cpp | 2 +- .../Plugin_VideoOGL/Src/TextureMngr.cpp | 8 +- .../Plugin_VideoOGL/Src/VertexManager.cpp | 21 +- 18 files changed, 414 insertions(+), 424 deletions(-) diff --git a/Source/Core/Common/Src/Hash.cpp b/Source/Core/Common/Src/Hash.cpp index 3f4602ecdc..b7611e4e4e 100644 --- a/Source/Core/Common/Src/Hash.cpp +++ b/Source/Core/Common/Src/Hash.cpp @@ -119,3 +119,119 @@ u32 HashEctor(const u8* ptr, int length) return(crc); } + +#ifdef _M_X64 +u64 GetHash64(const u8 *src, int len, u32 samples) +{ + const u64 m = 0xc6a4a7935bd1e995; + const int r = 47; + + u64 h = len * m; + u32 Step = (len/8); + const u64 * data = (const u64 *)src; + const u64 * end = data + Step; + if(samples == 0) samples = Step; + Step = Step / samples; + if(Step < 1) Step = 1; + while(data < end) + { + u64 k = data[0]; + data+=Step; + k *= m; + k ^= k >> r; + k *= m; + h ^= k; + h *= m; + } + + const u8 * data2 = (const u8*)end; + + switch(len & 7) + { + case 7: h ^= u64(data2[6]) << 48; + case 6: h ^= u64(data2[5]) << 40; + case 5: h ^= u64(data2[4]) << 32; + case 4: h ^= u64(data2[3]) << 24; + case 3: h ^= u64(data2[2]) << 16; + case 2: h ^= u64(data2[1]) << 8; + case 1: h ^= u64(data2[0]); + h *= m; + }; + + h ^= h >> r; + h *= m; + h ^= h >> r; + + return h; +} + +#else +u64 GetHash64(const u8 *src, int len, u32 samples) +{ + const u32 m = 0x5bd1e995; + const int r = 24; + + u32 h1 = len; + u32 h2 = 0; + + u32 Step = (len / 4); + const u32 * data = (const u32 *)src; + const u32 * end = data + Step; + const u8 * uEnd = (const u8 *)end; + if(samples == 0) samples = Step; + Step = Step / samples; + + if(Step < 2) Step = 2; + + while(data < end) + { + u32 k1 = data[0]; + k1 *= m; + k1 ^= k1 >> r; + k1 *= m; + h1 *= m; + h1 ^= k1; + + + u32 k2 = data[1]; + k2 *= m; + k2 ^= k2 >> r; + k2 *= m; + h2 *= m; + h2 ^= k2; + data+=Step; + } + + if((len & 7) > 3) + { + u32 k1 = *(end - 1); + k1 *= m; + k1 ^= k1 >> r; + k1 *= m; + h1 *= m; + h1 ^= k1; + len -= 4; + } + + switch(len & 3) + { + case 3: h2 ^= uEnd[2] << 16; + case 2: h2 ^= uEnd[1] << 8; + case 1: h2 ^= uEnd[0]; + h2 *= m; + }; + + h1 ^= h2 >> 18; h1 *= m; + h2 ^= h1 >> 22; h2 *= m; + h1 ^= h2 >> 17; h1 *= m; + h2 ^= h1 >> 19; h2 *= m; + + u64 h = h1; + + h = (h << 32) | h2; + + return h; +} + + +#endif diff --git a/Source/Core/Common/Src/Hash.h b/Source/Core/Common/Src/Hash.h index 8acdb29ecc..3ada52def9 100644 --- a/Source/Core/Common/Src/Hash.h +++ b/Source/Core/Common/Src/Hash.h @@ -24,5 +24,5 @@ u32 HashFletcher(const u8* data_u8, size_t length); // FAST. Length & 1 == 0. u32 HashAdler32(const u8* data, size_t len); // Fairly accurate, slightly slower u32 HashFNV(const u8* ptr, int length); // Another fast and decent hash u32 HashEctor(const u8* ptr, int length); // JUNK. DO NOT USE FOR NEW THINGS - +u64 GetHash64(const u8 *src, int len, u32 samples); #endif // _HASH_H_ diff --git a/Source/Core/Common/Src/LinearDiskCache.cpp b/Source/Core/Common/Src/LinearDiskCache.cpp index 6dec1114bd..fb3649d5e9 100644 --- a/Source/Core/Common/Src/LinearDiskCache.cpp +++ b/Source/Core/Common/Src/LinearDiskCache.cpp @@ -22,7 +22,7 @@ static const char ID[4] = {'D', 'C', 'A', 'C'}; // Update this to the current SVN revision every time you change shader generation code. // We don't automatically get this from SVN_REV because that would mean regenerating the // shader cache for every revision, graphics-related or not, which is simply annoying. -const int version = 6124; +const int version = 6139; LinearDiskCache::LinearDiskCache() : file_(NULL), num_entries_(0) { diff --git a/Source/Core/VideoCommon/Src/NativeVertexWriter.cpp b/Source/Core/VideoCommon/Src/NativeVertexWriter.cpp index 250dad7e9d..bd418b1ed8 100644 --- a/Source/Core/VideoCommon/Src/NativeVertexWriter.cpp +++ b/Source/Core/VideoCommon/Src/NativeVertexWriter.cpp @@ -22,6 +22,7 @@ namespace VertexManager { u8* s_pCurBufferPointer = NULL; +u8* s_pBaseBufferPointer = NULL; } diff --git a/Source/Core/VideoCommon/Src/NativeVertexWriter.h b/Source/Core/VideoCommon/Src/NativeVertexWriter.h index bd9d2d3f14..6764c7abcd 100644 --- a/Source/Core/VideoCommon/Src/NativeVertexWriter.h +++ b/Source/Core/VideoCommon/Src/NativeVertexWriter.h @@ -29,6 +29,7 @@ int GetRemainingVertices(int primitive); // remaining number of vertices that ca // TODO: move, rename. extern u8* s_pCurBufferPointer; +extern u8* s_pBaseBufferPointer; } diff --git a/Source/Core/VideoCommon/Src/OpcodeDecoding.cpp b/Source/Core/VideoCommon/Src/OpcodeDecoding.cpp index c284a9643e..4fb9574478 100644 --- a/Source/Core/VideoCommon/Src/OpcodeDecoding.cpp +++ b/Source/Core/VideoCommon/Src/OpcodeDecoding.cpp @@ -233,12 +233,12 @@ static void Decode() { u32 Cmd2 = DataReadU32(); int transfer_size = ((Cmd2 >> 16) & 15) + 1; - u32 address = Cmd2 & 0xFFFF; + u32 xf_address = Cmd2 & 0xFFFF; // TODO - speed this up. pshufb? u32 data_buffer[16]; for (int i = 0; i < transfer_size; i++) data_buffer[i] = DataReadU32(); - LoadXFReg(transfer_size, address, data_buffer); + LoadXFReg(transfer_size, xf_address, data_buffer); INCSTAT(stats.thisFrame.numXFLoads); } break; diff --git a/Source/Core/VideoCommon/Src/OpcodeDecoding.h b/Source/Core/VideoCommon/Src/OpcodeDecoding.h index 7172f98ab0..db83a8a554 100644 --- a/Source/Core/VideoCommon/Src/OpcodeDecoding.h +++ b/Source/Core/VideoCommon/Src/OpcodeDecoding.h @@ -48,5 +48,5 @@ void OpcodeDecoder_Init(); void OpcodeDecoder_Shutdown(); void OpcodeDecoder_Run(bool skipped_frame); - +void ExecuteDisplayList(u32 address, u32 size); #endif // _OPCODE_DECODING_H diff --git a/Source/Core/VideoCommon/Src/TextureDecoder.cpp b/Source/Core/VideoCommon/Src/TextureDecoder.cpp index 9b5baae5ec..fff138c8f1 100644 --- a/Source/Core/VideoCommon/Src/TextureDecoder.cpp +++ b/Source/Core/VideoCommon/Src/TextureDecoder.cpp @@ -20,7 +20,6 @@ #include "CPUDetect.h" #include "TextureDecoder.h" - #include "OpenCL.h" #if defined(HAVE_OPENCL) && HAVE_OPENCL #include "OpenCL/OCLTextureDecoder.h" @@ -96,122 +95,6 @@ int TexDecoder_GetTextureSizeInBytes(int width, int height, int format) return (width * height * TexDecoder_GetTexelSizeInNibbles(format)) / 2; } -#ifdef _M_X64 -u64 TexDecoder_GetHash64(const u8 *src, int len, u32 samples) -{ - const u64 m = 0xc6a4a7935bd1e995; - const int r = 47; - - u64 h = len * m; - u32 Step = (len/8); - const u64 * data = (const u64 *)src; - const u64 * end = data + Step; - if(samples == 0) samples = Step; - Step = Step / samples; - if(Step < 1) Step = 1; - while(data < end) - { - u64 k = data[0]; - data+=Step; - k *= m; - k ^= k >> r; - k *= m; - h ^= k; - h *= m; - } - - const u8 * data2 = (const u8*)end; - - switch(len & 7) - { - case 7: h ^= u64(data2[6]) << 48; - case 6: h ^= u64(data2[5]) << 40; - case 5: h ^= u64(data2[4]) << 32; - case 4: h ^= u64(data2[3]) << 24; - case 3: h ^= u64(data2[2]) << 16; - case 2: h ^= u64(data2[1]) << 8; - case 1: h ^= u64(data2[0]); - h *= m; - }; - - h ^= h >> r; - h *= m; - h ^= h >> r; - - return h; -} - -#else -u64 TexDecoder_GetHash64(const u8 *src, int len, u32 samples) -{ - const u32 m = 0x5bd1e995; - const int r = 24; - - u32 h1 = len; - u32 h2 = 0; - - u32 Step = (len / 4); - const u32 * data = (const u32 *)src; - const u32 * end = data + Step; - const u8 * uEnd = (const u8 *)end; - if(samples == 0) samples = Step; - Step = Step / samples; - - if(Step < 2) Step = 2; - - while(data < end) - { - u32 k1 = data[0]; - k1 *= m; - k1 ^= k1 >> r; - k1 *= m; - h1 *= m; - h1 ^= k1; - - - u32 k2 = data[1]; - k2 *= m; - k2 ^= k2 >> r; - k2 *= m; - h2 *= m; - h2 ^= k2; - data+=Step; - } - - if((len & 7) > 3) - { - u32 k1 = *(end - 1); - k1 *= m; - k1 ^= k1 >> r; - k1 *= m; - h1 *= m; - h1 ^= k1; - len -= 4; - } - - switch(len & 3) - { - case 3: h2 ^= uEnd[2] << 16; - case 2: h2 ^= uEnd[1] << 8; - case 1: h2 ^= uEnd[0]; - h2 *= m; - }; - - h1 ^= h2 >> 18; h1 *= m; - h2 ^= h1 >> 22; h2 *= m; - h1 ^= h2 >> 17; h1 *= m; - h2 ^= h1 >> 19; h2 *= m; - - u64 h = h1; - - h = (h << 32) | h2; - - return h; -} - - -#endif - int TexDecoder_GetBlockWidthInTexels(u32 format) { switch (format) diff --git a/Source/Core/VideoCommon/Src/TextureDecoder.h b/Source/Core/VideoCommon/Src/TextureDecoder.h index b8d5201fc6..828dd3e8bc 100644 --- a/Source/Core/VideoCommon/Src/TextureDecoder.h +++ b/Source/Core/VideoCommon/Src/TextureDecoder.h @@ -17,7 +17,7 @@ #ifndef _TEXTUREDECODER_H #define _TEXTUREDECODER_H - +#include "hash.h" enum { TMEM_SIZE = 1024*1024, @@ -86,9 +86,6 @@ enum PC_TexFormat PC_TexFormat TexDecoder_Decode(u8 *dst, const u8 *src, int width, int height, int texformat, int tlutaddr, int tlutfmt,bool rgbaOnly = false); PC_TexFormat GetPC_TexFormat(int texformat, int tlutfmt); void TexDecoder_DecodeTexel(u8 *dst, const u8 *src, int s, int t, int imageWidth, int texformat, int tlutaddr, int tlutfmt); - -u64 TexDecoder_GetHash64(const u8 *src, int len, u32 samples = 0); - void TexDecoder_SetTexFmtOverlayOptions(bool enable, bool center); #endif diff --git a/Source/Core/VideoCommon/Src/VertexShaderGen.cpp b/Source/Core/VideoCommon/Src/VertexShaderGen.cpp index 2a278696c2..6a40609e11 100644 --- a/Source/Core/VideoCommon/Src/VertexShaderGen.cpp +++ b/Source/Core/VideoCommon/Src/VertexShaderGen.cpp @@ -45,10 +45,6 @@ void GetVertexShaderId(VERTEXSHADERUID *uid, u32 components) (u32)xfregs.colChans[i].alpha.matsource) << 15; } - // fog - uid->values[1] |= (((u32)bpmem.fog.c_proj_fsel.fsel & 3) << 30); - uid->values[2] |= (((u32)bpmem.fog.c_proj_fsel.fsel >> 2) << 30); - u32 *pcurvalue = &uid->values[3]; for (int i = 0; i < xfregs.numTexGens; ++i) { TexMtxInfo tinfo = xfregs.texcoords[i].texmtxinfo; diff --git a/Source/Plugins/Plugin_VideoDX11/Src/TextureCache.cpp b/Source/Plugins/Plugin_VideoDX11/Src/TextureCache.cpp index 6976bdcfb1..4cfbace2e8 100644 --- a/Source/Plugins/Plugin_VideoDX11/Src/TextureCache.cpp +++ b/Source/Plugins/Plugin_VideoDX11/Src/TextureCache.cpp @@ -207,7 +207,7 @@ TextureCache::TCacheEntry* TextureCache::Load(unsigned int stage, u32 address, u // hires textures and texture dumping not supported, yet if (g_ActiveConfig.bSafeTextureCache/* || g_ActiveConfig.bHiresTextures || g_ActiveConfig.bDumpTextures*/) { - texHash = TexDecoder_GetHash64(ptr,TexDecoder_GetTextureSizeInBytes(expandedWidth, expandedHeight, tex_format),g_ActiveConfig.iSafeTextureCache_ColorSamples); + texHash = GetHash64(ptr,TexDecoder_GetTextureSizeInBytes(expandedWidth, expandedHeight, tex_format),g_ActiveConfig.iSafeTextureCache_ColorSamples); if ((tex_format == GX_TF_C4) || (tex_format == GX_TF_C8) || (tex_format == GX_TF_C14X2)) { // WARNING! texID != address now => may break CopyRenderTargetToTexture (cf. TODO up) @@ -217,7 +217,7 @@ TextureCache::TCacheEntry* TextureCache::Load(unsigned int stage, u32 address, u // each other stored in a single texture, and uses the palette to make different characters // visible or invisible. Thus, unless we want to recreate the textures for every drawn character, // we must make sure that texture with different tluts get different IDs. - u64 tlutHash = TexDecoder_GetHash64(&texMem[tlutaddr], TexDecoder_GetPaletteSize(tex_format),g_ActiveConfig.iSafeTextureCache_ColorSamples); + u64 tlutHash = GetHash64(&texMem[tlutaddr], TexDecoder_GetPaletteSize(tex_format),g_ActiveConfig.iSafeTextureCache_ColorSamples); texHash ^= tlutHash; if (g_ActiveConfig.bSafeTextureCache) { diff --git a/Source/Plugins/Plugin_VideoDX9/Src/TextureCache.cpp b/Source/Plugins/Plugin_VideoDX9/Src/TextureCache.cpp index d52e04c85e..91f0b6a442 100644 --- a/Source/Plugins/Plugin_VideoDX9/Src/TextureCache.cpp +++ b/Source/Plugins/Plugin_VideoDX9/Src/TextureCache.cpp @@ -166,7 +166,7 @@ TextureCache::TCacheEntry *TextureCache::Load(int stage, u32 address, int width, if (g_ActiveConfig.bSafeTextureCache || g_ActiveConfig.bHiresTextures || g_ActiveConfig.bDumpTextures) { - texHash = TexDecoder_GetHash64(ptr,TexDecoder_GetTextureSizeInBytes(expandedWidth, expandedHeight, tex_format),g_ActiveConfig.iSafeTextureCache_ColorSamples); + texHash = GetHash64(ptr,TexDecoder_GetTextureSizeInBytes(expandedWidth, expandedHeight, tex_format),g_ActiveConfig.iSafeTextureCache_ColorSamples); if ((tex_format == GX_TF_C4) || (tex_format == GX_TF_C8) || (tex_format == GX_TF_C14X2)) { // WARNING! texID != address now => may break CopyRenderTargetToTexture (cf. TODO up) @@ -176,7 +176,7 @@ TextureCache::TCacheEntry *TextureCache::Load(int stage, u32 address, int width, // each other stored in a single texture, and uses the palette to make different characters // visible or invisible. Thus, unless we want to recreate the textures for every drawn character, // we must make sure that texture with different tluts get different IDs. - u64 tlutHash = TexDecoder_GetHash64(&texMem[tlutaddr], TexDecoder_GetPaletteSize(tex_format),g_ActiveConfig.iSafeTextureCache_ColorSamples); + u64 tlutHash = GetHash64(&texMem[tlutaddr], TexDecoder_GetPaletteSize(tex_format),g_ActiveConfig.iSafeTextureCache_ColorSamples); texHash ^= tlutHash; if (g_ActiveConfig.bSafeTextureCache) { @@ -200,10 +200,10 @@ TextureCache::TCacheEntry *TextureCache::Load(int stage, u32 address, int width, { if(!g_ActiveConfig.bCopyEFBToTexture) { - hash_value = TexDecoder_GetHash64(ptr,TexDecoder_GetTextureSizeInBytes(expandedWidth, expandedHeight, tex_format),g_ActiveConfig.iSafeTextureCache_ColorSamples); + hash_value = GetHash64(ptr,TexDecoder_GetTextureSizeInBytes(expandedWidth, expandedHeight, tex_format),g_ActiveConfig.iSafeTextureCache_ColorSamples); if ((tex_format == GX_TF_C4) || (tex_format == GX_TF_C8) || (tex_format == GX_TF_C14X2)) { - hash_value ^= TexDecoder_GetHash64(&texMem[tlutaddr], TexDecoder_GetPaletteSize(tex_format),g_ActiveConfig.iSafeTextureCache_ColorSamples); + hash_value ^= GetHash64(&texMem[tlutaddr], TexDecoder_GetPaletteSize(tex_format),g_ActiveConfig.iSafeTextureCache_ColorSamples); } } else diff --git a/Source/Plugins/Plugin_VideoDX9/Src/TextureConverter.cpp b/Source/Plugins/Plugin_VideoDX9/Src/TextureConverter.cpp index bd59f72789..ba04ae59be 100644 --- a/Source/Plugins/Plugin_VideoDX9/Src/TextureConverter.cpp +++ b/Source/Plugins/Plugin_VideoDX9/Src/TextureConverter.cpp @@ -450,7 +450,7 @@ u64 EncodeToRamFromTexture(u32 address,LPDIRECT3DTEXTURE9 source_texture,u32 Sou EncodeToRamUsingShader(texconv_shader, source_texture, scaledSource, dest_ptr, expandedWidth / samples, expandedHeight,readStride, true, bScaleByHalf > 0); TextureCache::MakeRangeDynamic(address,size_in_bytes); u64 Hashvalue = 0; - Hashvalue = TexDecoder_GetHash64(dest_ptr,size_in_bytes,g_ActiveConfig.iSafeTextureCache_ColorSamples); + Hashvalue = GetHash64(dest_ptr,size_in_bytes,g_ActiveConfig.iSafeTextureCache_ColorSamples); return Hashvalue; } diff --git a/Source/Plugins/Plugin_VideoOGL/Src/DLCache.cpp b/Source/Plugins/Plugin_VideoOGL/Src/DLCache.cpp index 8b6b35dc4f..dbc4eee3a8 100644 --- a/Source/Plugins/Plugin_VideoOGL/Src/DLCache.cpp +++ b/Source/Plugins/Plugin_VideoOGL/Src/DLCache.cpp @@ -78,7 +78,7 @@ struct CachedDisplayList bool uncachable; // if set, this DL will always be interpreted. This gets set if hash ever changes. int pass; - u32 dl_hash; + u64 dl_hash; int check; int next_check; @@ -128,136 +128,125 @@ bool AnalyzeAndRunDisplayList(u32 address, int size, CachedDisplayList *dl) int num_index_xf = 0; //int num_draw_call = 0; // unused? - u8 *old_datareader = g_pVideoData; - g_pVideoData = Memory_GetPtr(address); + u8* old_pVideoData = g_pVideoData; + u8* startAddress = Memory_GetPtr(address); - u8 *end = g_pVideoData + size; - while (g_pVideoData < end) + // Avoid the crash if Memory_GetPtr failed .. + if (startAddress != 0) { - // Yet another reimplementation of the DL reading... - int cmd_byte = DataReadU8(); - switch (cmd_byte) + g_pVideoData = startAddress; + + // temporarily swap dl and non-dl (small "hack" for the stats) + Statistics::SwapDL(); + + u8 *end = g_pVideoData + size; + while (g_pVideoData < end) { - case GX_NOP: - break; - - case GX_LOAD_CP_REG: //0x08 + // Yet another reimplementation of the DL reading... + int cmd_byte = DataReadU8(); + switch (cmd_byte) { - // Execute - u8 sub_cmd = DataReadU8(); - u32 value = DataReadU32(); - LoadCPReg(sub_cmd, value); - INCSTAT(stats.thisFrame.numCPLoads); + case GX_NOP: + break; - // Analyze - num_cp_reg++; - } - break; + case GX_LOAD_CP_REG: //0x08 + { + u8 sub_cmd = DataReadU8(); + u32 value = DataReadU32(); + LoadCPReg(sub_cmd, value); + INCSTAT(stats.thisFrame.numCPLoads); + num_cp_reg++; + } + break; - case GX_LOAD_XF_REG: - { - // Execute - u32 Cmd2 = DataReadU32(); - int transfer_size = ((Cmd2 >> 16) & 15) + 1; - u32 xf_address = Cmd2 & 0xFFFF; - // TODO - speed this up. pshufb? - u32 data_buffer[16]; - for (int i = 0; i < transfer_size; i++) - data_buffer[i] = DataReadU32(); - LoadXFReg(transfer_size, xf_address, data_buffer); - INCSTAT(stats.thisFrame.numXFLoads); + case GX_LOAD_XF_REG: + { + u32 Cmd2 = DataReadU32(); + int transfer_size = ((Cmd2 >> 16) & 15) + 1; + u32 xf_address = Cmd2 & 0xFFFF; + // TODO - speed this up. pshufb? + u32 data_buffer[16]; + for (int i = 0; i < transfer_size; i++) + data_buffer[i] = DataReadU32(); + LoadXFReg(transfer_size, xf_address, data_buffer); + INCSTAT(stats.thisFrame.numXFLoads); + num_xf_reg++; + } + break; - // Analyze - num_xf_reg++; - } - break; - - case GX_LOAD_INDX_A: //used for position matrices - { - u32 value = DataReadU32(); - // Execute - LoadIndexedXF(value, 0xC); - // Analyze - num_index_xf++; - } - break; - case GX_LOAD_INDX_B: //used for normal matrices - { - u32 value = DataReadU32(); - // Execute - LoadIndexedXF(value, 0xD); - // Analyze - num_index_xf++; - } - break; - case GX_LOAD_INDX_C: //used for postmatrices - { - u32 value = DataReadU32(); - // Execute - LoadIndexedXF(value, 0xE); - // Analyze - num_index_xf++; - } - break; - case GX_LOAD_INDX_D: //used for lights - { - u32 value = DataReadU32(); - // Execute - LoadIndexedXF(value, 0xF); - // Analyze - num_index_xf++; - } - break; - - case GX_CMD_CALL_DL: - PanicAlert("Seeing DL call inside DL."); - break; - - case GX_CMD_UNKNOWN_METRICS: - // zelda 4 swords calls it and checks the metrics registers after that - break; - - case GX_CMD_INVL_VC:// Invalidate (vertex cache?) - DEBUG_LOG(VIDEO, "Invalidate (vertex cache?)"); - break; - - case GX_LOAD_BP_REG: //0x61 - { - u32 bp_cmd = DataReadU32(); - // Execute - LoadBPReg(bp_cmd); - INCSTAT(stats.thisFrame.numBPLoads); - - // Analyze - } - break; + case GX_LOAD_INDX_A: //used for position matrices + { + LoadIndexedXF(DataReadU32(), 0xC); + num_index_xf++; + } + break; + case GX_LOAD_INDX_B: //used for normal matrices + { + LoadIndexedXF(DataReadU32(), 0xD); + num_index_xf++; + } + break; + case GX_LOAD_INDX_C: //used for postmatrices + { + LoadIndexedXF(DataReadU32(), 0xE); + num_index_xf++; + } + break; + case GX_LOAD_INDX_D: //used for lights + { + LoadIndexedXF(DataReadU32(), 0xF); + num_index_xf++; + } + break; + case GX_CMD_CALL_DL: + { + u32 address = DataReadU32(); + u32 count = DataReadU32(); + ExecuteDisplayList(address, count); + } + break; + case GX_CMD_UNKNOWN_METRICS: // zelda 4 swords calls it and checks the metrics registers after that + DEBUG_LOG(VIDEO, "GX 0x44: %08x", cmd_byte); + break; + case GX_CMD_INVL_VC: // Invalidate Vertex Cache + DEBUG_LOG(VIDEO, "Invalidate (vertex cache?)"); + break; + case GX_LOAD_BP_REG: //0x61 + { + u32 bp_cmd = DataReadU32(); + LoadBPReg(bp_cmd); + INCSTAT(stats.thisFrame.numBPLoads); + } + break; // draw primitives - default: - if (cmd_byte & 0x80) - { - // load vertices (use computed vertex size from FifoCommandRunnable above) + default: + if (cmd_byte & 0x80) + { + // load vertices (use computed vertex size from FifoCommandRunnable above) + u16 numVertices = DataReadU16(); - // Execute - u16 numVertices = DataReadU16(); - - VertexLoaderManager::RunVertices( - cmd_byte & GX_VAT_MASK, // Vertex loader index (0 - 7) - (cmd_byte & GX_PRIMITIVE_MASK) >> GX_PRIMITIVE_SHIFT, - numVertices); - - // Analyze - } - else - { - ERROR_LOG(VIDEO, "DLCache::CompileAndRun: Illegal command %02x", cmd_byte); + VertexLoaderManager::RunVertices( + cmd_byte & GX_VAT_MASK, // Vertex loader index (0 - 7) + (cmd_byte & GX_PRIMITIVE_MASK) >> GX_PRIMITIVE_SHIFT, + numVertices); + } + else + { + ERROR_LOG(VIDEO, "OpcodeDecoding::Decode: Illegal command %02x", cmd_byte); + break; + } break; } - break; } + INCSTAT(stats.numDListsCalled); + INCSTAT(stats.thisFrame.numDListsCalled); + // un-swap + Statistics::SwapDL(); } - g_pVideoData = old_datareader; + // reset to the old pointer + g_pVideoData = old_pVideoData; return true; } @@ -271,173 +260,175 @@ bool AnalyzeAndRunDisplayList(u32 address, int size, CachedDisplayList *dl) // have the compiled code so we don't have to interpret anymore, we just run it. bool CompileAndRunDisplayList(u32 address, int size, CachedDisplayList *dl) { - VertexManager::Flush(); + u8* old_pVideoData = g_pVideoData; + u8* startAddress = Memory_GetPtr(address); - u8 *old_datareader = g_pVideoData; - g_pVideoData = Memory_GetPtr(address); - - u8 *end = g_pVideoData + size; - - emitter.AlignCode4(); - dl->compiled_code = emitter.GetCodePtr(); - emitter.ABI_EmitPrologue(4); - - while (g_pVideoData < end) + // Avoid the crash if Memory_GetPtr failed .. + if (startAddress != 0) { - // Yet another reimplementation of the DL reading... - int cmd_byte = DataReadU8(); - switch (cmd_byte) + g_pVideoData = startAddress; + + // temporarily swap dl and non-dl (small "hack" for the stats) + Statistics::SwapDL(); + + u8 *end = g_pVideoData + size; + + emitter.AlignCode4(); + dl->compiled_code = emitter.GetCodePtr(); + emitter.ABI_EmitPrologue(4); + + while (g_pVideoData < end) { - case GX_NOP: - // Execute - // Compile - break; - - case GX_LOAD_CP_REG: //0x08 + // Yet another reimplementation of the DL reading... + int cmd_byte = DataReadU8(); + switch (cmd_byte) { + case GX_NOP: // Execute - u8 sub_cmd = DataReadU8(); - u32 value = DataReadU32(); - LoadCPReg(sub_cmd, value); - INCSTAT(stats.thisFrame.numCPLoads); - // Compile - emitter.ABI_CallFunctionCC((void *)&LoadCPReg, sub_cmd, value); - } - break; + break; - case GX_LOAD_XF_REG: - { - // Execute - u32 Cmd2 = DataReadU32(); - int transfer_size = ((Cmd2 >> 16) & 15) + 1; - u32 xf_address = Cmd2 & 0xFFFF; - // TODO - speed this up. pshufb? - u8 *real_data_buffer = AllocStaticData(4 * transfer_size); - u32 *data_buffer = (u32 *)real_data_buffer; - for (int i = 0; i < transfer_size; i++) - data_buffer[i] = DataReadU32(); - LoadXFReg(transfer_size, xf_address, data_buffer); - INCSTAT(stats.thisFrame.numXFLoads); + case GX_LOAD_CP_REG: //0x08 + { + // Execute + u8 sub_cmd = DataReadU8(); + u32 value = DataReadU32(); + LoadCPReg(sub_cmd, value); + INCSTAT(stats.thisFrame.numCPLoads); - // Compile - emitter.ABI_CallFunctionCCP((void *)&LoadXFReg, transfer_size, address, data_buffer); - } - break; + // Compile + emitter.ABI_CallFunctionCC((void *)&LoadCPReg, sub_cmd, value); + } + break; - case GX_LOAD_INDX_A: //used for position matrices - { - u32 value = DataReadU32(); - // Execute - LoadIndexedXF(value, 0xC); - // Compile - emitter.ABI_CallFunctionCC((void *)&LoadIndexedXF, value, 0xC); - } - break; - case GX_LOAD_INDX_B: //used for normal matrices - { - u32 value = DataReadU32(); - // Execute - LoadIndexedXF(value, 0xD); - // Compile - emitter.ABI_CallFunctionCC((void *)&LoadIndexedXF, value, 0xD); - } - break; - case GX_LOAD_INDX_C: //used for postmatrices - { - u32 value = DataReadU32(); - // Execute - LoadIndexedXF(value, 0xE); - // Compile - emitter.ABI_CallFunctionCC((void *)&LoadIndexedXF, value, 0xE); - } - break; - case GX_LOAD_INDX_D: //used for lights - { - u32 value = DataReadU32(); - // Execute - LoadIndexedXF(value, 0xF); - // Compile - emitter.ABI_CallFunctionCC((void *)&LoadIndexedXF, value, 0xF); - } - break; + case GX_LOAD_XF_REG: + { + // Execute + u32 Cmd2 = DataReadU32(); + int transfer_size = ((Cmd2 >> 16) & 15) + 1; + u32 xf_address = Cmd2 & 0xFFFF; + // TODO - speed this up. pshufb? + u8 *real_data_buffer = AllocStaticData(4 * transfer_size); + u32 *data_buffer = (u32 *)real_data_buffer; + for (int i = 0; i < transfer_size; i++) + data_buffer[i] = DataReadU32(); + LoadXFReg(transfer_size, xf_address, data_buffer); + INCSTAT(stats.thisFrame.numXFLoads); - case GX_CMD_CALL_DL: - PanicAlert("Seeing DL call inside DL."); - break; + // Compile + emitter.ABI_CallFunctionCCP((void *)&LoadXFReg, transfer_size, xf_address, data_buffer); + } + break; - case GX_CMD_UNKNOWN_METRICS: - // zelda 4 swords calls it and checks the metrics registers after that - break; + case GX_LOAD_INDX_A: //used for position matrices + { + u32 value = DataReadU32(); + // Execute + LoadIndexedXF(value, 0xC); + // Compile + emitter.ABI_CallFunctionCC((void *)&LoadIndexedXF, value, 0xC); + } + break; + case GX_LOAD_INDX_B: //used for normal matrices + { + u32 value = DataReadU32(); + // Execute + LoadIndexedXF(value, 0xD); + // Compile + emitter.ABI_CallFunctionCC((void *)&LoadIndexedXF, value, 0xD); + } + break; + case GX_LOAD_INDX_C: //used for postmatrices + { + u32 value = DataReadU32(); + // Execute + LoadIndexedXF(value, 0xE); + // Compile + emitter.ABI_CallFunctionCC((void *)&LoadIndexedXF, value, 0xE); + } + break; + case GX_LOAD_INDX_D: //used for lights + { + u32 value = DataReadU32(); + // Execute + LoadIndexedXF(value, 0xF); + // Compile + emitter.ABI_CallFunctionCC((void *)&LoadIndexedXF, value, 0xF); + } + break; - case GX_CMD_INVL_VC:// Invalidate (vertex cache?) - DEBUG_LOG(VIDEO, "Invalidate (vertex cache?)"); - break; + case GX_CMD_CALL_DL: + { + u32 address = DataReadU32(); + u32 count = DataReadU32(); + ExecuteDisplayList(address, count); + emitter.ABI_CallFunctionCC((void *)&ExecuteDisplayList, address, count); + } + break; - case GX_LOAD_BP_REG: //0x61 - { - u32 bp_cmd = DataReadU32(); - // Execute - LoadBPReg(bp_cmd); - INCSTAT(stats.thisFrame.numBPLoads); - // Compile - emitter.ABI_CallFunctionC((void *)&LoadBPReg, bp_cmd); - } - break; + case GX_CMD_UNKNOWN_METRICS: + // zelda 4 swords calls it and checks the metrics registers after that + break; - // draw primitives - default: - if (cmd_byte & 0x80) - { - // load vertices (use computed vertex size from FifoCommandRunnable above) + case GX_CMD_INVL_VC:// Invalidate (vertex cache?) + DEBUG_LOG(VIDEO, "Invalidate (vertex cache?)"); + break; - // Execute - u16 numVertices = DataReadU16(); + case GX_LOAD_BP_REG: //0x61 + { + u32 bp_cmd = DataReadU32(); + // Execute + LoadBPReg(bp_cmd); + INCSTAT(stats.thisFrame.numBPLoads); + // Compile + emitter.ABI_CallFunctionC((void *)&LoadBPReg, bp_cmd); + } + break; - u64 pre_draw_video_data = (u64)g_pVideoData; + // draw primitives + default: + if (cmd_byte & 0x80) + { + // load vertices (use computed vertex size from FifoCommandRunnable above) - VertexLoaderManager::RunVertices( - cmd_byte & GX_VAT_MASK, // Vertex loader index (0 - 7) - (cmd_byte & GX_PRIMITIVE_MASK) >> GX_PRIMITIVE_SHIFT, - numVertices); + // Execute + u16 numVertices = DataReadU16(); - // Compile -#ifdef _M_X64 - emitter.MOV(64, R(RAX), Imm64(pre_draw_video_data)); - emitter.MOV(64, M(&g_pVideoData), R(RAX)); -#else - emitter.MOV(32, R(EAX), Imm32((u32)pre_draw_video_data)); - emitter.MOV(32, M(&g_pVideoData), R(EAX)); -#endif - emitter.ABI_CallFunctionCCC( - (void *)&VertexLoaderManager::RunVertices, - cmd_byte & GX_VAT_MASK, // Vertex loader index (0 - 7) - (cmd_byte & GX_PRIMITIVE_MASK) >> GX_PRIMITIVE_SHIFT, - numVertices); - } - else - { - ERROR_LOG(VIDEO, "DLCache::CompileAndRun: Illegal command %02x", cmd_byte); + u64 pre_draw_video_data = (u64)g_pVideoData; + + VertexLoaderManager::RunVertices( + cmd_byte & GX_VAT_MASK, // Vertex loader index (0 - 7) + (cmd_byte & GX_PRIMITIVE_MASK) >> GX_PRIMITIVE_SHIFT, + numVertices); + + // Compile + #ifdef _M_X64 + emitter.MOV(64, R(RAX), Imm64(pre_draw_video_data)); + emitter.MOV(64, M(&g_pVideoData), R(RAX)); + #else + emitter.MOV(32, R(EAX), Imm32((u32)pre_draw_video_data)); + emitter.MOV(32, M(&g_pVideoData), R(EAX)); + #endif + emitter.ABI_CallFunctionCCC( + (void *)&VertexLoaderManager::RunVertices, + cmd_byte & GX_VAT_MASK, // Vertex loader index (0 - 7) + (cmd_byte & GX_PRIMITIVE_MASK) >> GX_PRIMITIVE_SHIFT, + numVertices); + } + else + { + ERROR_LOG(VIDEO, "DLCache::CompileAndRun: Illegal command %02x", cmd_byte); + break; + } break; } - break; } + emitter.ABI_EmitEpilogue(4); } - - emitter.ABI_EmitEpilogue(4); - - g_pVideoData = old_datareader; + g_pVideoData = old_pVideoData; return true; } -// This one's pretty expensive. We should check if we can get away with only -// hashing the entire DL the first 3 frames or something. -u32 ComputeDLHash(u32 address, u32 size) -{ - u8 *ptr = Memory_GetPtr(address); - return HashFletcher(ptr, size & ~1); -} - void Init() { dlcode_cache = (u8 *)AllocateExecutableMemory(DL_CODE_CACHE_SIZE, false); // Don't need low memory. @@ -485,8 +476,12 @@ bool HandleDisplayList(u32 address, u32 size) { // Disable display list caching since the benefit isn't much to write home about // right now... + //Fixed DlistCaching now is fully functional benefits still marginal but when vertex data is stored here the story will be diferent :) + //to test remove the next line; return false; + + if(size == 0) return false; u64 dl_id = DLCache::CreateMapId(address, size); DLCache::DLMap::iterator iter = DLCache::dl_map.find(dl_id); @@ -509,7 +504,7 @@ bool HandleDisplayList(u32 address, u32 size) break; case DLCache::DLPASS_COMPILE: // First, check that the hash is the same as the last time. - if (dl.dl_hash != HashAdler32(Memory_GetPtr(address), size)) + if (dl.dl_hash != GetHash64(Memory_GetPtr(address), size,0)) { // PanicAlert("uncachable %08x", address); dl.uncachable = true; @@ -524,13 +519,13 @@ bool HandleDisplayList(u32 address, u32 size) dl.check--; if (dl.check <= 0) { - if (dl.dl_hash != HashAdler32(Memory_GetPtr(address), size)) + if (dl.dl_hash != GetHash64(Memory_GetPtr(address), size,0)) { dl.uncachable = true; return false; } dl.check = dl.next_check; - dl.next_check *= 2; + //dl.next_check *= 2; if (dl.next_check > 1024) dl.next_check = 1024; } @@ -546,7 +541,7 @@ bool HandleDisplayList(u32 address, u32 size) DLCache::CachedDisplayList dl; if (DLCache::AnalyzeAndRunDisplayList(address, size, &dl)) { - dl.dl_hash = HashAdler32(Memory_GetPtr(address), size); + dl.dl_hash = GetHash64(Memory_GetPtr(address), size,0); dl.pass = DLCache::DLPASS_COMPILE; dl.check = 1; dl.next_check = 1; diff --git a/Source/Plugins/Plugin_VideoOGL/Src/NativeVertexFormat.cpp b/Source/Plugins/Plugin_VideoOGL/Src/NativeVertexFormat.cpp index 41b15b40b0..7641008a70 100644 --- a/Source/Plugins/Plugin_VideoOGL/Src/NativeVertexFormat.cpp +++ b/Source/Plugins/Plugin_VideoOGL/Src/NativeVertexFormat.cpp @@ -29,13 +29,13 @@ #define COMPILED_CODE_SIZE 4096 u32 s_prevcomponents; // previous state set - +/* #ifdef _WIN32 #ifdef _M_IX86 #define USE_JIT #endif #endif - +*/ // Note the use of CallCdeclFunction3I etc. // This is a horrible hack that is necessary because in 64-bit mode, Opengl32.dll is based way, way above the 32-bit // address space that is within reach of a CALL, and just doing &fn gives us these high uncallable addresses. So we @@ -181,21 +181,21 @@ void GLVertexFormat::SetupVertexPointers() const { #ifdef USE_JIT ((void (*)())(void*)m_compiledCode)(); #else - glVertexPointer(3, GL_FLOAT, vtx_decl.stride, 0); + glVertexPointer(3, GL_FLOAT, vtx_decl.stride, VertexManager::s_pBaseBufferPointer); if (vtx_decl.num_normals >= 1) { - glNormalPointer(VarToGL(vtx_decl.normal_gl_type), vtx_decl.stride, (void *)vtx_decl.normal_offset[0]); + glNormalPointer(VarToGL(vtx_decl.normal_gl_type), vtx_decl.stride, (void *)(VertexManager::s_pBaseBufferPointer + vtx_decl.normal_offset[0])); if (vtx_decl.num_normals == 3) { - glVertexAttribPointer(SHADER_NORM1_ATTRIB, vtx_decl.normal_gl_size, VarToGL(vtx_decl.normal_gl_type), GL_TRUE, vtx_decl.stride, (void *)vtx_decl.normal_offset[1]); - glVertexAttribPointer(SHADER_NORM2_ATTRIB, vtx_decl.normal_gl_size, VarToGL(vtx_decl.normal_gl_type), GL_TRUE, vtx_decl.stride, (void *)vtx_decl.normal_offset[2]); + glVertexAttribPointer(SHADER_NORM1_ATTRIB, vtx_decl.normal_gl_size, VarToGL(vtx_decl.normal_gl_type), GL_TRUE, vtx_decl.stride, (void *)(VertexManager::s_pBaseBufferPointer + vtx_decl.normal_offset[1])); + glVertexAttribPointer(SHADER_NORM2_ATTRIB, vtx_decl.normal_gl_size, VarToGL(vtx_decl.normal_gl_type), GL_TRUE, vtx_decl.stride, (void *)(VertexManager::s_pBaseBufferPointer + vtx_decl.normal_offset[2])); } } for (int i = 0; i < 2; i++) { if (vtx_decl.color_offset[i] != -1) { if (i == 0) - glColorPointer(4, GL_UNSIGNED_BYTE, vtx_decl.stride, (void *)vtx_decl.color_offset[i]); + glColorPointer(4, GL_UNSIGNED_BYTE, vtx_decl.stride, (void *)(VertexManager::s_pBaseBufferPointer + vtx_decl.color_offset[i])); else { - glSecondaryColorPointer(4, GL_UNSIGNED_BYTE, vtx_decl.stride, (void *)vtx_decl.color_offset[i]); + glSecondaryColorPointer(4, GL_UNSIGNED_BYTE, vtx_decl.stride, (void *)(VertexManager::s_pBaseBufferPointer + vtx_decl.color_offset[i])); } } } @@ -205,12 +205,12 @@ void GLVertexFormat::SetupVertexPointers() const { int id = GL_TEXTURE0 + i; glClientActiveTexture(id); glTexCoordPointer(vtx_decl.texcoord_size[i], VarToGL(vtx_decl.texcoord_gl_type[i]), - vtx_decl.stride, (void *)vtx_decl.texcoord_offset[i]); + vtx_decl.stride, (void *)(VertexManager::s_pBaseBufferPointer + vtx_decl.texcoord_offset[i])); } } if (vtx_decl.posmtx_offset != -1) { - glVertexAttribPointer(SHADER_POSMTX_ATTRIB, 4, GL_UNSIGNED_BYTE, GL_FALSE, vtx_decl.stride, (void *)vtx_decl.posmtx_offset); + glVertexAttribPointer(SHADER_POSMTX_ATTRIB, 4, GL_UNSIGNED_BYTE, GL_FALSE, vtx_decl.stride, (void *)(VertexManager::s_pBaseBufferPointer + vtx_decl.posmtx_offset)); } #endif } diff --git a/Source/Plugins/Plugin_VideoOGL/Src/TextureConverter.cpp b/Source/Plugins/Plugin_VideoOGL/Src/TextureConverter.cpp index d4e57ec3d2..77a92a70b7 100644 --- a/Source/Plugins/Plugin_VideoOGL/Src/TextureConverter.cpp +++ b/Source/Plugins/Plugin_VideoOGL/Src/TextureConverter.cpp @@ -381,7 +381,7 @@ u64 EncodeToRamFromTexture(u32 address,GLuint source_texture,float MValueX,float EncodeToRamUsingShader(texconv_shader, source_texture, scaledSource, dest_ptr, expandedWidth / samples, expandedHeight, readStride, true, bScaleByHalf > 0 && !bFromZBuffer); TextureMngr::MakeRangeDynamic(address,size_in_bytes); - return TexDecoder_GetHash64(dest_ptr,size_in_bytes,g_ActiveConfig.iSafeTextureCache_ColorSamples); + return GetHash64(dest_ptr,size_in_bytes,g_ActiveConfig.iSafeTextureCache_ColorSamples); } void EncodeToRamYUYV(GLuint srcTexture, const TargetRectangle& sourceRc, diff --git a/Source/Plugins/Plugin_VideoOGL/Src/TextureMngr.cpp b/Source/Plugins/Plugin_VideoOGL/Src/TextureMngr.cpp index 7224adaf0a..0fdeed288f 100644 --- a/Source/Plugins/Plugin_VideoOGL/Src/TextureMngr.cpp +++ b/Source/Plugins/Plugin_VideoOGL/Src/TextureMngr.cpp @@ -290,7 +290,7 @@ TextureMngr::TCacheEntry* TextureMngr::Load(int texstage, u32 address, int width FullFormat = (tex_format | (tlutfmt << 16)); if (g_ActiveConfig.bSafeTextureCache || g_ActiveConfig.bHiresTextures || g_ActiveConfig.bDumpTextures) { - texHash = TexDecoder_GetHash64(ptr,TexDecoder_GetTextureSizeInBytes(expandedWidth, expandedHeight, tex_format),g_ActiveConfig.iSafeTextureCache_ColorSamples); + texHash = GetHash64(ptr,TexDecoder_GetTextureSizeInBytes(expandedWidth, expandedHeight, tex_format),g_ActiveConfig.iSafeTextureCache_ColorSamples); if ((tex_format == GX_TF_C4) || (tex_format == GX_TF_C8) || (tex_format == GX_TF_C14X2)) { // WARNING! texID != address now => may break CopyRenderTargetToTexture (cf. TODO up) @@ -300,7 +300,7 @@ TextureMngr::TCacheEntry* TextureMngr::Load(int texstage, u32 address, int width // each other stored in a single texture, and uses the palette to make different characters // visible or invisible. Thus, unless we want to recreate the textures for every drawn character, // we must make sure that texture with different tluts get different IDs. - u64 tlutHash = TexDecoder_GetHash64(&texMem[tlutaddr], TexDecoder_GetPaletteSize(tex_format),g_ActiveConfig.iSafeTextureCache_ColorSamples); + u64 tlutHash = GetHash64(&texMem[tlutaddr], TexDecoder_GetPaletteSize(tex_format),g_ActiveConfig.iSafeTextureCache_ColorSamples); texHash ^= tlutHash; if (g_ActiveConfig.bSafeTextureCache) { @@ -324,10 +324,10 @@ TextureMngr::TCacheEntry* TextureMngr::Load(int texstage, u32 address, int width { if(!g_ActiveConfig.bCopyEFBToTexture) { - hash_value = TexDecoder_GetHash64(ptr,TexDecoder_GetTextureSizeInBytes(expandedWidth, expandedHeight, tex_format),g_ActiveConfig.iSafeTextureCache_ColorSamples); + hash_value = GetHash64(ptr,TexDecoder_GetTextureSizeInBytes(expandedWidth, expandedHeight, tex_format),g_ActiveConfig.iSafeTextureCache_ColorSamples); if ((tex_format == GX_TF_C4) || (tex_format == GX_TF_C8) || (tex_format == GX_TF_C14X2)) { - hash_value ^= TexDecoder_GetHash64(&texMem[tlutaddr], TexDecoder_GetPaletteSize(tex_format),g_ActiveConfig.iSafeTextureCache_ColorSamples); + hash_value ^= GetHash64(&texMem[tlutaddr], TexDecoder_GetPaletteSize(tex_format),g_ActiveConfig.iSafeTextureCache_ColorSamples); } } else diff --git a/Source/Plugins/Plugin_VideoOGL/Src/VertexManager.cpp b/Source/Plugins/Plugin_VideoOGL/Src/VertexManager.cpp index 22695d7657..5241d7ee48 100644 --- a/Source/Plugins/Plugin_VideoOGL/Src/VertexManager.cpp +++ b/Source/Plugins/Plugin_VideoOGL/Src/VertexManager.cpp @@ -60,8 +60,8 @@ static GLint max_Index_size = 0; #define MAXIBUFFERSIZE 0xFFFF #define MAXVBOBUFFERCOUNT 0x8 -static GLuint s_vboBuffers[MAXVBOBUFFERCOUNT] = {0}; -static int s_nCurVBOIndex = 0; // current free buffer +//static GLuint s_vboBuffers[MAXVBOBUFFERCOUNT] = {0}; +//static int s_nCurVBOIndex = 0; // current free buffer static bool Flushed=false; @@ -79,8 +79,9 @@ bool Init() PIBuffer = new u16[max_Index_size]; IndexGenerator::Start(TIBuffer,LIBuffer,PIBuffer); s_pCurBufferPointer = LocalVBuffer; - s_nCurVBOIndex = 0; - glGenBuffers(ARRAYSIZE(s_vboBuffers), s_vboBuffers); + s_pBaseBufferPointer = LocalVBuffer; + //s_nCurVBOIndex = 0; + //glGenBuffers(ARRAYSIZE(s_vboBuffers), s_vboBuffers); glEnableClientState(GL_VERTEX_ARRAY); g_nativeVertexFmt = NULL; Flushed=false; @@ -95,13 +96,13 @@ void Shutdown() delete [] TIBuffer; delete [] LIBuffer; delete [] PIBuffer; - glDeleteBuffers(ARRAYSIZE(s_vboBuffers), s_vboBuffers); - s_nCurVBOIndex = 0; + //glDeleteBuffers(ARRAYSIZE(s_vboBuffers), s_vboBuffers); + //s_nCurVBOIndex = 0; } void ResetBuffer() { - s_nCurVBOIndex = (s_nCurVBOIndex + 1) % ARRAYSIZE(s_vboBuffers); + //s_nCurVBOIndex = (s_nCurVBOIndex + 1) % ARRAYSIZE(s_vboBuffers); s_pCurBufferPointer = LocalVBuffer; } @@ -237,8 +238,8 @@ void Flush() (void)GL_REPORT_ERROR(); - glBindBuffer(GL_ARRAY_BUFFER, s_vboBuffers[s_nCurVBOIndex]); - glBufferData(GL_ARRAY_BUFFER, s_pCurBufferPointer - LocalVBuffer, LocalVBuffer, GL_STREAM_DRAW); + //glBindBuffer(GL_ARRAY_BUFFER, s_vboBuffers[s_nCurVBOIndex]); + //glBufferData(GL_ARRAY_BUFFER, s_pCurBufferPointer - LocalVBuffer, LocalVBuffer, GL_STREAM_DRAW); GL_REPORT_ERRORD(); // setup the pointers @@ -339,7 +340,7 @@ void Flush() if (bpmem.blendmode.blendenable || bpmem.blendmode.subtract) glEnable(GL_BLEND); } - s_nCurVBOIndex = (s_nCurVBOIndex + 1) % ARRAYSIZE(s_vboBuffers); + //s_nCurVBOIndex = (s_nCurVBOIndex + 1) % ARRAYSIZE(s_vboBuffers); s_pCurBufferPointer = LocalVBuffer; IndexGenerator::Start(TIBuffer,LIBuffer,PIBuffer);