From 9f62ef6cb34ebc91aa82715ce8b036c0af7affef Mon Sep 17 00:00:00 2001 From: mobserveur Date: Fri, 5 Jul 2024 20:13:59 +0200 Subject: Tuning floater and buffer mapping improvements This commit adds the tuning floater accessible via the fps button, and improvements to the buffer mapping optimisation modes. --- indra/llrender/llvertexbuffer.cpp | 132 ++++++++------------- indra/llrender/llvertexbuffer.h | 4 +- indra/llwindow/llwindow.h | 4 + indra/llwindow/llwindowmacosx.cpp | 17 +++ indra/llwindow/llwindowsdl.cpp | 12 ++ indra/newview/app_settings/settings.xml | 28 +++++ indra/newview/llstatusbar.cpp | 13 +- indra/newview/llstatusbar.h | 4 +- indra/newview/llviewerfloaterreg.cpp | 4 + indra/newview/mpvfloatertuning.cpp | 90 ++++++++++++++ indra/newview/mpvfloatertuning.h | 48 ++++++++ .../default/xui/en/floater_mpv_performance.xml | 104 ++++++++++++++++ .../skins/default/xui/en/panel_status_bar.xml | 20 ++-- 13 files changed, 386 insertions(+), 94 deletions(-) create mode 100644 indra/newview/mpvfloatertuning.cpp create mode 100644 indra/newview/mpvfloatertuning.h create mode 100644 indra/newview/skins/default/xui/en/floater_mpv_performance.xml diff --git a/indra/llrender/llvertexbuffer.cpp b/indra/llrender/llvertexbuffer.cpp index 52fb58187a..30a7ed796a 100644 --- a/indra/llrender/llvertexbuffer.cpp +++ b/indra/llrender/llvertexbuffer.cpp @@ -37,6 +37,10 @@ #include "llglslshader.h" #include "llmemory.h" +#include "llcontrol.h" + +extern LLControlGroup gSavedSettings; + //Next Highest Power Of Two //helper function, returns first number > v that is a power of 2, or v if v is already a power of 2 U32 nhpo2(U32 v) @@ -530,6 +534,7 @@ U32 LLVertexBuffer::sGLRenderIndices = 0; U32 LLVertexBuffer::sLastMask = 0; U32 LLVertexBuffer::sVertexCount = 0; +U32 LLVertexBuffer::sMappingMode = gSavedSettings.getU32("MPVBufferOptiMode"); //NOTE: each component must be AT LEAST 4 bytes in size to avoid a performance penalty on AMD hardware const U32 LLVertexBuffer::sTypeSize[LLVertexBuffer::TYPE_MAX] = @@ -1144,93 +1149,58 @@ U8* LLVertexBuffer::mapIndexBuffer(U32 index, S32 count) // start -- first byte to copy // end -- last byte to copy (NOT last byte + 1) // data -- mMappedData or mMappedIndexData -static void flush_vbo(GLenum target, U32 start, U32 end, void* data) +static void flush_vbo(GLenum target, U32 start, U32 end, void* data, S16 mode) { - if (end != 0) + if (end == 0) return; + + if (mode == 0) + { + if(gGLManager.mIsApple) mode = 2; + else mode = 1; + } + + if (mode == 1) { - //Note (observeur): I maintained the profile "glBufferSubData" names because i'm not sure if it would impact any statistics part somewhere in the code. LL_PROFILE_ZONE_NAMED_CATEGORY_VERTEX("glBufferSubData"); LL_PROFILE_ZONE_NUM(start); LL_PROFILE_ZONE_NUM(end); LL_PROFILE_ZONE_NUM(end-start); - U32 size = end-start+1; - U32 block_size = 65536; + const U32 block_size = 65536; - //Note (observeur): The following code is executed on non Apple gpus. Using glMapBufferRange() didn't show obvious benefit on the other tested platforms (intel igpu, amd igpu and nVidia dgpus). - if(!gGLManager.mIsApple) - { - for (U32 i = start; i <= end; i += block_size) - { - LL_PROFILE_ZONE_NAMED_CATEGORY_VERTEX("glBufferSubData block"); - LL_PROFILE_GPU_ZONE("glBufferSubData"); - U32 tend = llmin(i + block_size, end); - U32 size = tend - i + 1; - glBufferSubData(target, i, size, (U8*) data + (i-start)); - } - - return; - } - - //Note (observeur): glBufferSubData() was causing synchronization stalls on Apple GPUs resulting to heavy stutters and lower performance in the world and UI rendering. Using glMapBufferRange() benefits Macs with Apple gpus enormously. - - //Note (observeur): Other bits such as GL_MAP_INVALIDATE_RANGE_BIT or GL_MAP_UNSYNCHRONIZED_BIT didn't seem to make much of a difference on Apple gpus, so we stick to the simple way. - U32 MapBits = GL_MAP_WRITE_BIT; - - //Note (observeur): Using a block size of 0 will call the following block and map the buffer all in once. It doesn't bother Apple machines, it might actually benefit them a little bit. A larger value is also fine. The largest buffers I observed where around 2mb or 3mb while most of buffers are smaller than 50000 bytes. - block_size = 524288; - - //Note (observeur): This is called in case block_size is set to 0 (All in one mapping). - if(block_size == 0) + for (U32 i = start; i <= end; i += block_size) { - U8 * mptr = NULL; LL_PROFILE_ZONE_NAMED_CATEGORY_VERTEX("glBufferSubData block"); LL_PROFILE_GPU_ZONE("glBufferSubData"); - - mptr = (U8*) glMapBufferRange( target, start, size, MapBits); - - if(mptr) - { - std::memcpy(mptr, (U8*) data, size); - glUnmapBuffer(target); - } - else - { - LL_WARNS() << "glMapBufferRange() returned NULL" << LL_ENDL; - } - return; + U32 tend = llmin(i + block_size, end); + //U32 size = tend - i + 1; + glBufferSubData(target, i, tend - i +1, (U8*) data + (i-start)); } - //Note (observeur): The following code is executed in case of block_size is superior to 0 - - //Note (observeur): This is for analysis purpose only - //if(size > block_size) - //{ - // LL_INFOS() << "Large data range (MB MODE) : " << size << LL_ENDL; - //} + return; + } - U8 * mptr = NULL; + U32 MapBits = GL_MAP_WRITE_BIT; + if (mode>2) MapBits = GL_MAP_WRITE_BIT | GL_MAP_UNSYNCHRONIZED_BIT; - for (U32 i = start; i <= end; i += block_size) - { - LL_PROFILE_ZONE_NAMED_CATEGORY_VERTEX("glBufferSubData block"); - LL_PROFILE_GPU_ZONE("glBufferSubData"); - U32 tend = llmin(i + block_size, end); - size = tend - i + 1; + U32 buffer_size = end-start+1; - mptr = (U8*) glMapBufferRange( target, i, size, MapBits ); + U8 * mptr = NULL; + mptr = (U8*) glMapBufferRange( target, start, end-start+1, MapBits); - if(mptr) - { - std::memcpy(mptr, (U8*) data + (i-start), size); - glUnmapBuffer(target); - } - else - { - LL_WARNS() << "glMapBufferRange() returned NULL" << LL_ENDL; - } + if (mptr) + { + std::memcpy(mptr, (U8*) data, buffer_size); + if(!glUnmapBuffer(target)) + { + LL_WARNS() << "glUnmapBuffer() failed" << LL_ENDL; } } + else + { + LL_WARNS() << "glMapBufferRange() returned NULL" << LL_ENDL; + } + } void LLVertexBuffer::unmapBuffer() @@ -1266,13 +1236,13 @@ void LLVertexBuffer::unmapBuffer() } else { - flush_vbo(GL_ARRAY_BUFFER, start, end, (U8*)mMappedData + start); + flush_vbo(GL_ARRAY_BUFFER, start, end, (U8*)mMappedData + start, sMappingMode); start = region.mStart; end = region.mEnd; } } - flush_vbo(GL_ARRAY_BUFFER, start, end, (U8*)mMappedData + start); + flush_vbo(GL_ARRAY_BUFFER, start, end, (U8*)mMappedData + start, sMappingMode); mMappedVertexRegions.clear(); } @@ -1300,13 +1270,14 @@ void LLVertexBuffer::unmapBuffer() } else { - flush_vbo(GL_ELEMENT_ARRAY_BUFFER, start, end, (U8*)mMappedIndexData + start); + flush_vbo(GL_ELEMENT_ARRAY_BUFFER, start, end, (U8*)mMappedIndexData + start, sMappingMode); + start = region.mStart; end = region.mEnd; } } - flush_vbo(GL_ELEMENT_ARRAY_BUFFER, start, end, (U8*)mMappedIndexData + start); + flush_vbo(GL_ELEMENT_ARRAY_BUFFER, start, end, (U8*)mMappedIndexData + start, sMappingMode); mMappedIndexRegions.clear(); } @@ -1562,43 +1533,43 @@ void LLVertexBuffer::setupVertexBuffer() void LLVertexBuffer::setPositionData(const LLVector4a* data) { llassert(sGLRenderBuffer == mGLBuffer); - flush_vbo(GL_ARRAY_BUFFER, 0, sizeof(LLVector4a) * getNumVerts()-1, (U8*) data); + flush_vbo(GL_ARRAY_BUFFER, 0, sizeof(LLVector4a) * getNumVerts()-1, (U8*) data, sMappingMode); } void LLVertexBuffer::setTexCoordData(const LLVector2* data) { llassert(sGLRenderBuffer == mGLBuffer); - flush_vbo(GL_ARRAY_BUFFER, mOffsets[TYPE_TEXCOORD0], mOffsets[TYPE_TEXCOORD0] + sTypeSize[TYPE_TEXCOORD0] * getNumVerts() - 1, (U8*)data); + flush_vbo(GL_ARRAY_BUFFER, mOffsets[TYPE_TEXCOORD0], mOffsets[TYPE_TEXCOORD0] + sTypeSize[TYPE_TEXCOORD0] * getNumVerts() - 1, (U8*) data, sMappingMode); } void LLVertexBuffer::setColorData(const LLColor4U* data) { llassert(sGLRenderBuffer == mGLBuffer); - flush_vbo(GL_ARRAY_BUFFER, mOffsets[TYPE_COLOR], mOffsets[TYPE_COLOR] + sTypeSize[TYPE_COLOR] * getNumVerts() - 1, (U8*) data); + flush_vbo(GL_ARRAY_BUFFER, mOffsets[TYPE_COLOR], mOffsets[TYPE_COLOR] + sTypeSize[TYPE_COLOR] * getNumVerts() - 1, (U8*) data, sMappingMode); } void LLVertexBuffer::setNormalData(const LLVector4a* data) { llassert(sGLRenderBuffer == mGLBuffer); - flush_vbo(GL_ARRAY_BUFFER, mOffsets[TYPE_NORMAL], mOffsets[TYPE_NORMAL] + sTypeSize[TYPE_NORMAL] * getNumVerts() - 1, (U8*) data); + flush_vbo(GL_ARRAY_BUFFER, mOffsets[TYPE_NORMAL], mOffsets[TYPE_NORMAL] + sTypeSize[TYPE_NORMAL] * getNumVerts() - 1, (U8*) data, sMappingMode); } void LLVertexBuffer::setTangentData(const LLVector4a* data) { llassert(sGLRenderBuffer == mGLBuffer); - flush_vbo(GL_ARRAY_BUFFER, mOffsets[TYPE_TANGENT], mOffsets[TYPE_TANGENT] + sTypeSize[TYPE_TANGENT] * getNumVerts() - 1, (U8*) data); + flush_vbo(GL_ARRAY_BUFFER, mOffsets[TYPE_TANGENT], mOffsets[TYPE_TANGENT] + sTypeSize[TYPE_TANGENT] * getNumVerts() - 1, (U8*) data, sMappingMode); } void LLVertexBuffer::setWeight4Data(const LLVector4a* data) { llassert(sGLRenderBuffer == mGLBuffer); - flush_vbo(GL_ARRAY_BUFFER, mOffsets[TYPE_WEIGHT4], mOffsets[TYPE_WEIGHT4] + sTypeSize[TYPE_WEIGHT4] * getNumVerts() - 1, (U8*) data); + flush_vbo(GL_ARRAY_BUFFER, mOffsets[TYPE_WEIGHT4], mOffsets[TYPE_WEIGHT4] + sTypeSize[TYPE_WEIGHT4] * getNumVerts() - 1, (U8*) data, sMappingMode); } void LLVertexBuffer::setIndexData(const U16* data) { llassert(sGLRenderIndices == mGLIndices); - flush_vbo(GL_ELEMENT_ARRAY_BUFFER, 0, sizeof(U16) * getNumIndices() - 1, (U8*) data); + flush_vbo(GL_ELEMENT_ARRAY_BUFFER, 0, sizeof(U16) * getNumIndices() - 1, (U8*) data, sMappingMode); } void LLVertexBuffer::setIndexData(const U32* data) @@ -1610,6 +1581,7 @@ void LLVertexBuffer::setIndexData(const U32* data) mIndicesStride = 4; mNumIndices /= 2; } - flush_vbo(GL_ELEMENT_ARRAY_BUFFER, 0, sizeof(U32) * getNumIndices() - 1, (U8*)data); + + flush_vbo(GL_ELEMENT_ARRAY_BUFFER, 0, sizeof(U32) * getNumIndices() - 1, (U8*) data, sMappingMode); } diff --git a/indra/llrender/llvertexbuffer.h b/indra/llrender/llvertexbuffer.h index b634609929..184b0a4ac9 100644 --- a/indra/llrender/llvertexbuffer.h +++ b/indra/llrender/llvertexbuffer.h @@ -161,7 +161,7 @@ public: // set for rendering // assumes (and will assert on) the following: - // - this buffer has no pending unampBuffer call + // - this buffer has no pending unmapBuffer call // - a shader is currently bound // - This buffer has sufficient attributes within it to satisfy the needs of the currently bound shader void setBuffer(); @@ -267,6 +267,8 @@ public: static U32 sGLRenderIndices; static U32 sLastMask; static U32 sVertexCount; + + static U32 sMappingMode; }; #ifdef LL_PROFILER_ENABLE_RENDER_DOC diff --git a/indra/llwindow/llwindow.h b/indra/llwindow/llwindow.h index aff9334cb6..5bb538f892 100644 --- a/indra/llwindow/llwindow.h +++ b/indra/llwindow/llwindow.h @@ -34,6 +34,8 @@ #include "llinstancetracker.h" #include "llsd.h" +#include "../llrender/llglheaders.h" + class LLSplashScreen; class LLPreeditor; class LLWindowCallbacks; @@ -243,6 +245,8 @@ protected: S32 mMinWindowHeight; S32 mRefreshRate; + GLsync swapFense; + // Handle a UTF-16 encoding unit received from keyboard. // Converting the series of UTF-16 encoding units to UTF-32 data, // this method passes the resulting UTF-32 data to mCallback's diff --git a/indra/llwindow/llwindowmacosx.cpp b/indra/llwindow/llwindowmacosx.cpp index 453905b19b..1b8ab27f23 100644 --- a/indra/llwindow/llwindowmacosx.cpp +++ b/indra/llwindow/llwindowmacosx.cpp @@ -38,6 +38,8 @@ #include "lldir.h" #include "indra_constants.h" +#include "../newview/llviewercontrol.h" + #include #include #include @@ -50,6 +52,8 @@ #include #include + + extern BOOL gDebugWindowProc; BOOL gHiDPISupport = TRUE; @@ -1009,6 +1013,19 @@ BOOL LLWindowMacOSX::setSizeImpl(const LLCoordWindow size) void LLWindowMacOSX::swapBuffers() { CGLFlushDrawable(mContext); + + U32 mode = gSavedSettings.getU32("MPVBufferOptiMode"); + if (mode == 0) + { + if(gGLManager.mIsApple) mode = 2; + else mode = 1; + } + if (mode > 2) + { + glClientWaitSync(swapFense, GL_SYNC_FLUSH_COMMANDS_BIT, GL_TIMEOUT_IGNORED); + glDeleteSync(swapFense); + swapFense = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0); + } } void LLWindowMacOSX::restoreGLContext() diff --git a/indra/llwindow/llwindowsdl.cpp b/indra/llwindow/llwindowsdl.cpp index 98484419f9..110a28ca48 100644 --- a/indra/llwindow/llwindowsdl.cpp +++ b/indra/llwindow/llwindowsdl.cpp @@ -1098,6 +1098,18 @@ void LLWindowSDL::swapBuffers() if (mWindow) { SDL_GL_SwapWindow(mWindow); + U32 mode = gSavedSettings.getU32("MPVBuffMapMode"); + if (mode == 0) + { + if(gGLManager.mIsApple) mode = 2; + else mode = 1; + } + if (mode > 2) + { + glClientWaitSync(swapFense, GL_SYNC_FLUSH_COMMANDS_BIT, GL_TIMEOUT_IGNORED); + glDeleteSync(swapFense); + swapFense = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0); + } } } diff --git a/indra/newview/app_settings/settings.xml b/indra/newview/app_settings/settings.xml index 00c7eae9bc..81b35359be 100644 --- a/indra/newview/app_settings/settings.xml +++ b/indra/newview/app_settings/settings.xml @@ -13485,6 +13485,17 @@ Value 0 + MaxFPS + + Comment + FPS Limiter. + Persist + 1 + Type + U32 + Value + 0 + ZoomDirect Comment @@ -13744,6 +13755,23 @@ 12 + + + MPVBufferOptiMode + + Comment + + OpenGL buffer mapping mode: + 0:auto, 1:normal, 2:optimised, 3:unsynchronized. + Persist + 1 + Type + U32 + Value + 0 + + +