From 6369047dcb74323b248de59bc8187db0d315548a Mon Sep 17 00:00:00 2001 From: Erik Kundiman Date: Fri, 5 Jul 2024 20:24:10 +0800 Subject: Temporary Cocoa U16 vs. SDL2 U32 key types Even when maint-b is merged to main, webrtc-voice will very likely still have to wait until it's merged with main to get maint-b's changes. I'll worry about the conflicts later, just so any branch would build on SDL2 or not without any patches. --- indra/llwindow/llkeyboard.cpp | 12 ++++++++++++ indra/llwindow/llkeyboard.h | 15 +++++++++++++++ indra/llwindow/llkeyboardheadless.cpp | 8 ++++++++ indra/llwindow/llkeyboardheadless.h | 5 +++++ 4 files changed, 40 insertions(+) (limited to 'indra/llwindow') diff --git a/indra/llwindow/llkeyboard.cpp b/indra/llwindow/llkeyboard.cpp index b3dcac6222..e93ad28fdf 100644 --- a/indra/llwindow/llkeyboard.cpp +++ b/indra/llwindow/llkeyboard.cpp @@ -195,9 +195,15 @@ void LLKeyboard::resetKeys() } +#if LL_SDL +BOOL LLKeyboard::translateKey(const U32 os_key, KEY *out_key) +{ + std::map::iterator iter; +#else BOOL LLKeyboard::translateKey(const U16 os_key, KEY *out_key) { std::map::iterator iter; +#endif // Only translate keys in the map, ignore all other keys for now iter = mTranslateKeyMap.find(os_key); @@ -215,9 +221,15 @@ BOOL LLKeyboard::translateKey(const U16 os_key, KEY *out_key) } +#if LL_SDL +U32 LLKeyboard::inverseTranslateKey(const KEY translated_key) +{ + std::map::iterator iter; +#else U16 LLKeyboard::inverseTranslateKey(const KEY translated_key) { std::map::iterator iter; +#endif iter = mInvTranslateKeyMap.find(translated_key); if (iter == mInvTranslateKeyMap.end()) { diff --git a/indra/llwindow/llkeyboard.h b/indra/llwindow/llkeyboard.h index e406de347c..89fa840e42 100644 --- a/indra/llwindow/llkeyboard.h +++ b/indra/llwindow/llkeyboard.h @@ -67,14 +67,24 @@ public: BOOL getKeyDown(const KEY key) { return mKeyLevel[key]; } BOOL getKeyRepeated(const KEY key) { return mKeyRepeated[key]; } +#if LL_SDL + BOOL translateKey(const U32 os_key, KEY *translated_key); + U32 inverseTranslateKey(const KEY translated_key); +#else BOOL translateKey(const U16 os_key, KEY *translated_key); U16 inverseTranslateKey(const KEY translated_key); +#endif BOOL handleTranslatedKeyUp(KEY translated_key, U32 translated_mask); // Translated into "Linden" keycodes BOOL handleTranslatedKeyDown(KEY translated_key, U32 translated_mask); // Translated into "Linden" keycodes +#if LL_SDL + virtual BOOL handleKeyUp(const U32 key, MASK mask) = 0; + virtual BOOL handleKeyDown(const U32 key, MASK mask) = 0; +#else virtual BOOL handleKeyUp(const U16 key, MASK mask) = 0; virtual BOOL handleKeyDown(const U16 key, MASK mask) = 0; +#endif #if defined(LL_DARWIN) && !defined(LL_SDL) // We only actually use this for OS X. @@ -111,8 +121,13 @@ protected: void addKeyName(KEY key, const std::string& name); protected: +#if LL_SDL + std::map mTranslateKeyMap; // Map of translations from OS keys to Linden KEYs + std::map mInvTranslateKeyMap; // Map of translations from Linden KEYs to OS keys +#else std::map mTranslateKeyMap; // Map of translations from OS keys to Linden KEYs std::map mInvTranslateKeyMap; // Map of translations from Linden KEYs to OS keys +#endif LLWindowCallbacks *mCallbacks; LLTimer mKeyLevelTimer[KEY_COUNT]; // Time since level was set diff --git a/indra/llwindow/llkeyboardheadless.cpp b/indra/llwindow/llkeyboardheadless.cpp index 01ac26261b..a3c86fde2b 100644 --- a/indra/llwindow/llkeyboardheadless.cpp +++ b/indra/llwindow/llkeyboardheadless.cpp @@ -35,11 +35,19 @@ void LLKeyboardHeadless::resetMaskKeys() { } +#if LL_SDL +BOOL LLKeyboardHeadless::handleKeyDown(const U32 key, const U32 mask) +#else BOOL LLKeyboardHeadless::handleKeyDown(const U16 key, const U32 mask) +#endif { return FALSE; } +#if LL_SDL +BOOL LLKeyboardHeadless::handleKeyUp(const U32 key, const U32 mask) +#else BOOL LLKeyboardHeadless::handleKeyUp(const U16 key, const U32 mask) +#endif { return FALSE; } MASK LLKeyboardHeadless::currentMask(BOOL for_mouse_event) diff --git a/indra/llwindow/llkeyboardheadless.h b/indra/llwindow/llkeyboardheadless.h index 8e067e6108..2bb670a53d 100644 --- a/indra/llwindow/llkeyboardheadless.h +++ b/indra/llwindow/llkeyboardheadless.h @@ -35,8 +35,13 @@ public: LLKeyboardHeadless(); /*virtual*/ ~LLKeyboardHeadless() {}; +#if LL_SDL + /*virtual*/ BOOL handleKeyUp(const U32 key, MASK mask); + /*virtual*/ BOOL handleKeyDown(const U32 key, MASK mask); +#else /*virtual*/ BOOL handleKeyUp(const U16 key, MASK mask); /*virtual*/ BOOL handleKeyDown(const U16 key, MASK mask); +#endif /*virtual*/ void resetMaskKeys(); /*virtual*/ MASK currentMask(BOOL for_mouse_event); /*virtual*/ void scanKeyboard(); -- cgit v1.3 From 9f62ef6cb34ebc91aa82715ce8b036c0af7affef Mon Sep 17 00:00:00 2001 From: mobserveur Date: Fri, 5 Jul 2024 20:13:59 +0200 Subject: Tuning floater and buffer mapping improvements This commit adds the tuning floater accessible via the fps button, and improvements to the buffer mapping optimisation modes. --- indra/llrender/llvertexbuffer.cpp | 132 ++++++++------------- indra/llrender/llvertexbuffer.h | 4 +- indra/llwindow/llwindow.h | 4 + indra/llwindow/llwindowmacosx.cpp | 17 +++ indra/llwindow/llwindowsdl.cpp | 12 ++ indra/newview/app_settings/settings.xml | 28 +++++ indra/newview/llstatusbar.cpp | 13 +- indra/newview/llstatusbar.h | 4 +- indra/newview/llviewerfloaterreg.cpp | 4 + indra/newview/mpvfloatertuning.cpp | 90 ++++++++++++++ indra/newview/mpvfloatertuning.h | 48 ++++++++ .../default/xui/en/floater_mpv_performance.xml | 104 ++++++++++++++++ .../skins/default/xui/en/panel_status_bar.xml | 20 ++-- 13 files changed, 386 insertions(+), 94 deletions(-) create mode 100644 indra/newview/mpvfloatertuning.cpp create mode 100644 indra/newview/mpvfloatertuning.h create mode 100644 indra/newview/skins/default/xui/en/floater_mpv_performance.xml (limited to 'indra/llwindow') diff --git a/indra/llrender/llvertexbuffer.cpp b/indra/llrender/llvertexbuffer.cpp index 52fb58187a..30a7ed796a 100644 --- a/indra/llrender/llvertexbuffer.cpp +++ b/indra/llrender/llvertexbuffer.cpp @@ -37,6 +37,10 @@ #include "llglslshader.h" #include "llmemory.h" +#include "llcontrol.h" + +extern LLControlGroup gSavedSettings; + //Next Highest Power Of Two //helper function, returns first number > v that is a power of 2, or v if v is already a power of 2 U32 nhpo2(U32 v) @@ -530,6 +534,7 @@ U32 LLVertexBuffer::sGLRenderIndices = 0; U32 LLVertexBuffer::sLastMask = 0; U32 LLVertexBuffer::sVertexCount = 0; +U32 LLVertexBuffer::sMappingMode = gSavedSettings.getU32("MPVBufferOptiMode"); //NOTE: each component must be AT LEAST 4 bytes in size to avoid a performance penalty on AMD hardware const U32 LLVertexBuffer::sTypeSize[LLVertexBuffer::TYPE_MAX] = @@ -1144,93 +1149,58 @@ U8* LLVertexBuffer::mapIndexBuffer(U32 index, S32 count) // start -- first byte to copy // end -- last byte to copy (NOT last byte + 1) // data -- mMappedData or mMappedIndexData -static void flush_vbo(GLenum target, U32 start, U32 end, void* data) +static void flush_vbo(GLenum target, U32 start, U32 end, void* data, S16 mode) { - if (end != 0) + if (end == 0) return; + + if (mode == 0) + { + if(gGLManager.mIsApple) mode = 2; + else mode = 1; + } + + if (mode == 1) { - //Note (observeur): I maintained the profile "glBufferSubData" names because i'm not sure if it would impact any statistics part somewhere in the code. LL_PROFILE_ZONE_NAMED_CATEGORY_VERTEX("glBufferSubData"); LL_PROFILE_ZONE_NUM(start); LL_PROFILE_ZONE_NUM(end); LL_PROFILE_ZONE_NUM(end-start); - U32 size = end-start+1; - U32 block_size = 65536; + const U32 block_size = 65536; - //Note (observeur): The following code is executed on non Apple gpus. Using glMapBufferRange() didn't show obvious benefit on the other tested platforms (intel igpu, amd igpu and nVidia dgpus). - if(!gGLManager.mIsApple) - { - for (U32 i = start; i <= end; i += block_size) - { - LL_PROFILE_ZONE_NAMED_CATEGORY_VERTEX("glBufferSubData block"); - LL_PROFILE_GPU_ZONE("glBufferSubData"); - U32 tend = llmin(i + block_size, end); - U32 size = tend - i + 1; - glBufferSubData(target, i, size, (U8*) data + (i-start)); - } - - return; - } - - //Note (observeur): glBufferSubData() was causing synchronization stalls on Apple GPUs resulting to heavy stutters and lower performance in the world and UI rendering. Using glMapBufferRange() benefits Macs with Apple gpus enormously. - - //Note (observeur): Other bits such as GL_MAP_INVALIDATE_RANGE_BIT or GL_MAP_UNSYNCHRONIZED_BIT didn't seem to make much of a difference on Apple gpus, so we stick to the simple way. - U32 MapBits = GL_MAP_WRITE_BIT; - - //Note (observeur): Using a block size of 0 will call the following block and map the buffer all in once. It doesn't bother Apple machines, it might actually benefit them a little bit. A larger value is also fine. The largest buffers I observed where around 2mb or 3mb while most of buffers are smaller than 50000 bytes. - block_size = 524288; - - //Note (observeur): This is called in case block_size is set to 0 (All in one mapping). - if(block_size == 0) + for (U32 i = start; i <= end; i += block_size) { - U8 * mptr = NULL; LL_PROFILE_ZONE_NAMED_CATEGORY_VERTEX("glBufferSubData block"); LL_PROFILE_GPU_ZONE("glBufferSubData"); - - mptr = (U8*) glMapBufferRange( target, start, size, MapBits); - - if(mptr) - { - std::memcpy(mptr, (U8*) data, size); - glUnmapBuffer(target); - } - else - { - LL_WARNS() << "glMapBufferRange() returned NULL" << LL_ENDL; - } - return; + U32 tend = llmin(i + block_size, end); + //U32 size = tend - i + 1; + glBufferSubData(target, i, tend - i +1, (U8*) data + (i-start)); } - //Note (observeur): The following code is executed in case of block_size is superior to 0 - - //Note (observeur): This is for analysis purpose only - //if(size > block_size) - //{ - // LL_INFOS() << "Large data range (MB MODE) : " << size << LL_ENDL; - //} + return; + } - U8 * mptr = NULL; + U32 MapBits = GL_MAP_WRITE_BIT; + if (mode>2) MapBits = GL_MAP_WRITE_BIT | GL_MAP_UNSYNCHRONIZED_BIT; - for (U32 i = start; i <= end; i += block_size) - { - LL_PROFILE_ZONE_NAMED_CATEGORY_VERTEX("glBufferSubData block"); - LL_PROFILE_GPU_ZONE("glBufferSubData"); - U32 tend = llmin(i + block_size, end); - size = tend - i + 1; + U32 buffer_size = end-start+1; - mptr = (U8*) glMapBufferRange( target, i, size, MapBits ); + U8 * mptr = NULL; + mptr = (U8*) glMapBufferRange( target, start, end-start+1, MapBits); - if(mptr) - { - std::memcpy(mptr, (U8*) data + (i-start), size); - glUnmapBuffer(target); - } - else - { - LL_WARNS() << "glMapBufferRange() returned NULL" << LL_ENDL; - } + if (mptr) + { + std::memcpy(mptr, (U8*) data, buffer_size); + if(!glUnmapBuffer(target)) + { + LL_WARNS() << "glUnmapBuffer() failed" << LL_ENDL; } } + else + { + LL_WARNS() << "glMapBufferRange() returned NULL" << LL_ENDL; + } + } void LLVertexBuffer::unmapBuffer() @@ -1266,13 +1236,13 @@ void LLVertexBuffer::unmapBuffer() } else { - flush_vbo(GL_ARRAY_BUFFER, start, end, (U8*)mMappedData + start); + flush_vbo(GL_ARRAY_BUFFER, start, end, (U8*)mMappedData + start, sMappingMode); start = region.mStart; end = region.mEnd; } } - flush_vbo(GL_ARRAY_BUFFER, start, end, (U8*)mMappedData + start); + flush_vbo(GL_ARRAY_BUFFER, start, end, (U8*)mMappedData + start, sMappingMode); mMappedVertexRegions.clear(); } @@ -1300,13 +1270,14 @@ void LLVertexBuffer::unmapBuffer() } else { - flush_vbo(GL_ELEMENT_ARRAY_BUFFER, start, end, (U8*)mMappedIndexData + start); + flush_vbo(GL_ELEMENT_ARRAY_BUFFER, start, end, (U8*)mMappedIndexData + start, sMappingMode); + start = region.mStart; end = region.mEnd; } } - flush_vbo(GL_ELEMENT_ARRAY_BUFFER, start, end, (U8*)mMappedIndexData + start); + flush_vbo(GL_ELEMENT_ARRAY_BUFFER, start, end, (U8*)mMappedIndexData + start, sMappingMode); mMappedIndexRegions.clear(); } @@ -1562,43 +1533,43 @@ void LLVertexBuffer::setupVertexBuffer() void LLVertexBuffer::setPositionData(const LLVector4a* data) { llassert(sGLRenderBuffer == mGLBuffer); - flush_vbo(GL_ARRAY_BUFFER, 0, sizeof(LLVector4a) * getNumVerts()-1, (U8*) data); + flush_vbo(GL_ARRAY_BUFFER, 0, sizeof(LLVector4a) * getNumVerts()-1, (U8*) data, sMappingMode); } void LLVertexBuffer::setTexCoordData(const LLVector2* data) { llassert(sGLRenderBuffer == mGLBuffer); - flush_vbo(GL_ARRAY_BUFFER, mOffsets[TYPE_TEXCOORD0], mOffsets[TYPE_TEXCOORD0] + sTypeSize[TYPE_TEXCOORD0] * getNumVerts() - 1, (U8*)data); + flush_vbo(GL_ARRAY_BUFFER, mOffsets[TYPE_TEXCOORD0], mOffsets[TYPE_TEXCOORD0] + sTypeSize[TYPE_TEXCOORD0] * getNumVerts() - 1, (U8*) data, sMappingMode); } void LLVertexBuffer::setColorData(const LLColor4U* data) { llassert(sGLRenderBuffer == mGLBuffer); - flush_vbo(GL_ARRAY_BUFFER, mOffsets[TYPE_COLOR], mOffsets[TYPE_COLOR] + sTypeSize[TYPE_COLOR] * getNumVerts() - 1, (U8*) data); + flush_vbo(GL_ARRAY_BUFFER, mOffsets[TYPE_COLOR], mOffsets[TYPE_COLOR] + sTypeSize[TYPE_COLOR] * getNumVerts() - 1, (U8*) data, sMappingMode); } void LLVertexBuffer::setNormalData(const LLVector4a* data) { llassert(sGLRenderBuffer == mGLBuffer); - flush_vbo(GL_ARRAY_BUFFER, mOffsets[TYPE_NORMAL], mOffsets[TYPE_NORMAL] + sTypeSize[TYPE_NORMAL] * getNumVerts() - 1, (U8*) data); + flush_vbo(GL_ARRAY_BUFFER, mOffsets[TYPE_NORMAL], mOffsets[TYPE_NORMAL] + sTypeSize[TYPE_NORMAL] * getNumVerts() - 1, (U8*) data, sMappingMode); } void LLVertexBuffer::setTangentData(const LLVector4a* data) { llassert(sGLRenderBuffer == mGLBuffer); - flush_vbo(GL_ARRAY_BUFFER, mOffsets[TYPE_TANGENT], mOffsets[TYPE_TANGENT] + sTypeSize[TYPE_TANGENT] * getNumVerts() - 1, (U8*) data); + flush_vbo(GL_ARRAY_BUFFER, mOffsets[TYPE_TANGENT], mOffsets[TYPE_TANGENT] + sTypeSize[TYPE_TANGENT] * getNumVerts() - 1, (U8*) data, sMappingMode); } void LLVertexBuffer::setWeight4Data(const LLVector4a* data) { llassert(sGLRenderBuffer == mGLBuffer); - flush_vbo(GL_ARRAY_BUFFER, mOffsets[TYPE_WEIGHT4], mOffsets[TYPE_WEIGHT4] + sTypeSize[TYPE_WEIGHT4] * getNumVerts() - 1, (U8*) data); + flush_vbo(GL_ARRAY_BUFFER, mOffsets[TYPE_WEIGHT4], mOffsets[TYPE_WEIGHT4] + sTypeSize[TYPE_WEIGHT4] * getNumVerts() - 1, (U8*) data, sMappingMode); } void LLVertexBuffer::setIndexData(const U16* data) { llassert(sGLRenderIndices == mGLIndices); - flush_vbo(GL_ELEMENT_ARRAY_BUFFER, 0, sizeof(U16) * getNumIndices() - 1, (U8*) data); + flush_vbo(GL_ELEMENT_ARRAY_BUFFER, 0, sizeof(U16) * getNumIndices() - 1, (U8*) data, sMappingMode); } void LLVertexBuffer::setIndexData(const U32* data) @@ -1610,6 +1581,7 @@ void LLVertexBuffer::setIndexData(const U32* data) mIndicesStride = 4; mNumIndices /= 2; } - flush_vbo(GL_ELEMENT_ARRAY_BUFFER, 0, sizeof(U32) * getNumIndices() - 1, (U8*)data); + + flush_vbo(GL_ELEMENT_ARRAY_BUFFER, 0, sizeof(U32) * getNumIndices() - 1, (U8*) data, sMappingMode); } diff --git a/indra/llrender/llvertexbuffer.h b/indra/llrender/llvertexbuffer.h index b634609929..184b0a4ac9 100644 --- a/indra/llrender/llvertexbuffer.h +++ b/indra/llrender/llvertexbuffer.h @@ -161,7 +161,7 @@ public: // set for rendering // assumes (and will assert on) the following: - // - this buffer has no pending unampBuffer call + // - this buffer has no pending unmapBuffer call // - a shader is currently bound // - This buffer has sufficient attributes within it to satisfy the needs of the currently bound shader void setBuffer(); @@ -267,6 +267,8 @@ public: static U32 sGLRenderIndices; static U32 sLastMask; static U32 sVertexCount; + + static U32 sMappingMode; }; #ifdef LL_PROFILER_ENABLE_RENDER_DOC diff --git a/indra/llwindow/llwindow.h b/indra/llwindow/llwindow.h index aff9334cb6..5bb538f892 100644 --- a/indra/llwindow/llwindow.h +++ b/indra/llwindow/llwindow.h @@ -34,6 +34,8 @@ #include "llinstancetracker.h" #include "llsd.h" +#include "../llrender/llglheaders.h" + class LLSplashScreen; class LLPreeditor; class LLWindowCallbacks; @@ -243,6 +245,8 @@ protected: S32 mMinWindowHeight; S32 mRefreshRate; + GLsync swapFense; + // Handle a UTF-16 encoding unit received from keyboard. // Converting the series of UTF-16 encoding units to UTF-32 data, // this method passes the resulting UTF-32 data to mCallback's diff --git a/indra/llwindow/llwindowmacosx.cpp b/indra/llwindow/llwindowmacosx.cpp index 453905b19b..1b8ab27f23 100644 --- a/indra/llwindow/llwindowmacosx.cpp +++ b/indra/llwindow/llwindowmacosx.cpp @@ -38,6 +38,8 @@ #include "lldir.h" #include "indra_constants.h" +#include "../newview/llviewercontrol.h" + #include #include #include @@ -50,6 +52,8 @@ #include #include + + extern BOOL gDebugWindowProc; BOOL gHiDPISupport = TRUE; @@ -1009,6 +1013,19 @@ BOOL LLWindowMacOSX::setSizeImpl(const LLCoordWindow size) void LLWindowMacOSX::swapBuffers() { CGLFlushDrawable(mContext); + + U32 mode = gSavedSettings.getU32("MPVBufferOptiMode"); + if (mode == 0) + { + if(gGLManager.mIsApple) mode = 2; + else mode = 1; + } + if (mode > 2) + { + glClientWaitSync(swapFense, GL_SYNC_FLUSH_COMMANDS_BIT, GL_TIMEOUT_IGNORED); + glDeleteSync(swapFense); + swapFense = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0); + } } void LLWindowMacOSX::restoreGLContext() diff --git a/indra/llwindow/llwindowsdl.cpp b/indra/llwindow/llwindowsdl.cpp index 98484419f9..110a28ca48 100644 --- a/indra/llwindow/llwindowsdl.cpp +++ b/indra/llwindow/llwindowsdl.cpp @@ -1098,6 +1098,18 @@ void LLWindowSDL::swapBuffers() if (mWindow) { SDL_GL_SwapWindow(mWindow); + U32 mode = gSavedSettings.getU32("MPVBuffMapMode"); + if (mode == 0) + { + if(gGLManager.mIsApple) mode = 2; + else mode = 1; + } + if (mode > 2) + { + glClientWaitSync(swapFense, GL_SYNC_FLUSH_COMMANDS_BIT, GL_TIMEOUT_IGNORED); + glDeleteSync(swapFense); + swapFense = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0); + } } } diff --git a/indra/newview/app_settings/settings.xml b/indra/newview/app_settings/settings.xml index 00c7eae9bc..81b35359be 100644 --- a/indra/newview/app_settings/settings.xml +++ b/indra/newview/app_settings/settings.xml @@ -13485,6 +13485,17 @@ Value 0 + MaxFPS + + Comment + FPS Limiter. + Persist + 1 + Type + U32 + Value + 0 + ZoomDirect Comment @@ -13744,6 +13755,23 @@ 12 + + + MPVBufferOptiMode + + Comment + + OpenGL buffer mapping mode: + 0:auto, 1:normal, 2:optimised, 3:unsynchronized. + Persist + 1 + Type + U32 + Value + 0 + + +