From d5217ec883e298e3cee4149cfb46f952391e83d1 Mon Sep 17 00:00:00 2001 From: mobserveur Date: Fri, 28 Jun 2024 13:41:44 +0200 Subject: fps limiter setting in app_settings.xml MaxFPS settings type and description changed --- indra/newview/app_settings/settings.xml | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) (limited to 'indra') diff --git a/indra/newview/app_settings/settings.xml b/indra/newview/app_settings/settings.xml index a07396b5b1..00c7eae9bc 100644 --- a/indra/newview/app_settings/settings.xml +++ b/indra/newview/app_settings/settings.xml @@ -2468,7 +2468,7 @@ Value 0 - DoubleClickTeleport + DoubleClickTeleport Comment Enable double-click to teleport where allowed (afects minimap and people panel) @@ -8898,7 +8898,7 @@ Value 1 - + RenderReflectionDetail Comment @@ -8965,7 +8965,7 @@ Value 1 - + RenderReflectionProbeDrawDistance Comment @@ -9153,7 +9153,7 @@ Value 0.7 - + RenderReflectionProbeMaxLocalLightAmbiance Comment @@ -10589,7 +10589,7 @@ Boolean Value 0 - + NearbyListShowMap Comment @@ -13477,13 +13477,13 @@ MaxFPS Comment - OBSOLETE UNUSED setting. + FPS Limiter. Persist 1 Type - F32 + U32 Value - -1.0 + 0 ZoomDirect @@ -15384,7 +15384,7 @@ Type Boolean Value - 1 + 1 UpdateAppWindowTitleBar -- cgit v1.2.3 From 5d578f69df6ad00e51d1c2902a1f304f4d942e61 Mon Sep 17 00:00:00 2001 From: mobserveur Date: Fri, 28 Jun 2024 13:43:29 +0200 Subject: Faster Zoom in avatars Adjusted value for MAX_ANIM_SECONDS in llagentcamera.cpp --- indra/newview/llagentcamera.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'indra') diff --git a/indra/newview/llagentcamera.cpp b/indra/newview/llagentcamera.cpp index 1912d9d1d5..8e8d7e6c50 100644 --- a/indra/newview/llagentcamera.cpp +++ b/indra/newview/llagentcamera.cpp @@ -2663,9 +2663,9 @@ void LLAgentCamera::setCameraPosAndFocusGlobal(const LLVector3d& camera_pos, con if (mCameraAnimating) { - const F64 ANIM_METERS_PER_SECOND = 10.0; + const F64 ANIM_METERS_PER_SECOND = 15.0; const F64 MIN_ANIM_SECONDS = 0.5; - const F64 MAX_ANIM_SECONDS = 10.0; + const F64 MAX_ANIM_SECONDS = 3.0; F64 anim_duration = llmax( MIN_ANIM_SECONDS, sqrt(focus_delta_squared) / ANIM_METERS_PER_SECOND ); anim_duration = llmin( anim_duration, MAX_ANIM_SECONDS ); setAnimationDuration( (F32)anim_duration ); -- cgit v1.2.3 From 714038e0f35329b614d12cf467230f9cb6eea017 Mon Sep 17 00:00:00 2001 From: mobserveur Date: Sat, 29 Jun 2024 22:48:41 +0200 Subject: Apple GPU Detection and Apple GPU performance optimisations This commit adds a mIsApple member to the gl manager and refines the buffers optimisations for Apple GPU in llvertexbuffer.cpp --- indra/llrender/llgl.cpp | 7 +++++++ indra/llrender/llgl.h | 1 + 2 files changed, 8 insertions(+) (limited to 'indra') diff --git a/indra/llrender/llgl.cpp b/indra/llrender/llgl.cpp index 10920f1de3..9207e6ad73 100644 --- a/indra/llrender/llgl.cpp +++ b/indra/llrender/llgl.cpp @@ -1001,6 +1001,7 @@ LLGLManager::LLGLManager() : mIsAMD(FALSE), mIsNVIDIA(FALSE), mIsIntel(FALSE), + mIsApple(FALSE), #if LL_DARWIN mIsMobileGF(FALSE), #endif @@ -1174,6 +1175,11 @@ bool LLGLManager::initGL() mGLVendorShort = "INTEL"; mIsIntel = TRUE; } + else if(mGLVendor.find("APPLE") != std::string::npos) + { + mGLVendorShort = "APPLE"; + mIsApple = TRUE; + } else { mGLVendorShort = "MISC"; @@ -1373,6 +1379,7 @@ void LLGLManager::asLLSD(LLSD& info) info["is_ati"] = mIsAMD; // note, do not rename is_ati to is_amd without coordinating with DW info["is_nvidia"] = mIsNVIDIA; info["is_intel"] = mIsIntel; + info["is_apple"] = mIsApple; info["gl_renderer"] = mGLRenderer; } diff --git a/indra/llrender/llgl.h b/indra/llrender/llgl.h index 5a7ad943df..e4b106c999 100644 --- a/indra/llrender/llgl.h +++ b/indra/llrender/llgl.h @@ -100,6 +100,7 @@ public: BOOL mIsAMD; BOOL mIsNVIDIA; BOOL mIsIntel; + BOOL mIsApple; #if LL_DARWIN // Needed to distinguish problem cards on older Macs that break with Materials -- cgit v1.2.3 From 20b54d6c7ed671dd7a6e5b2518d338d67e81a543 Mon Sep 17 00:00:00 2001 From: mobserveur Date: Sat, 29 Jun 2024 22:50:42 +0200 Subject: Reworked fps limiter code This improves the realiability of the fps limiter --- indra/newview/llappviewer.cpp | 37 ++++++++++++------------------------- 1 file changed, 12 insertions(+), 25 deletions(-) (limited to 'indra') diff --git a/indra/newview/llappviewer.cpp b/indra/newview/llappviewer.cpp index 2d65c725b5..0f1b5a789b 100644 --- a/indra/newview/llappviewer.cpp +++ b/indra/newview/llappviewer.cpp @@ -393,7 +393,6 @@ static std::string gLaunchFileOnQuit; // Used on Win32 for other apps to identify our window (eg, win_setup) const char* const VIEWER_WINDOW_CLASSNAME = "Second Life"; -U64 fpsLimitSleepUntil = 0; // fps limiter : time until to render the frame again //---------------------------------------------------------------------------- @@ -1373,21 +1372,11 @@ bool LLAppViewer::frame() bool LLAppViewer::doFrame() { + static LLCachedControl fpsLimitMaxFps(gSavedSettings, "MaxFPS", 0); - // FPS Limit - - U64 fpsLimitNow = LLTrace::BlockTimer::getCPUClockCount64(); - U64 fpsLimitFrameStartTime = fpsLimitNow; - if(fpsLimitSleepUntil > 0) - { - if(fpsLimitSleepUntil > fpsLimitNow) return 0; - } - else - { - fpsLimitSleepUntil = 0; - } - - + U64 fpsLimitSleepFor = 0; + U64 fpsLimitFrameStartTime = 0; + if(fpsLimitMaxFps > 0) fpsLimitFrameStartTime = LLTrace::BlockTimer::getCPUClockCount64(); LL_RECORD_BLOCK_TIME(FTM_FRAME); { @@ -1559,24 +1548,17 @@ bool LLAppViewer::doFrame() } } - // fps limiter - - fpsLimitNow = LLTrace::BlockTimer::getCPUClockCount64(); - U64 fpsLimitFrameTime = fpsLimitNow - fpsLimitFrameStartTime; - static LLCachedControl fpsLimitMaxFps(gSavedSettings, "MaxFPS", 0); - if(fpsLimitMaxFps > 0) { + U64 fpsLimitFrameTime = LLTrace::BlockTimer::getCPUClockCount64() - fpsLimitFrameStartTime; U64 desired_time_ns = (U32)(1000000.f / fpsLimitMaxFps); - if(fpsLimitFrameTime < desired_time_ns) + if((fpsLimitFrameTime+1000) < desired_time_ns) { - U64 fpsLimitSleepUntil_for = desired_time_ns - fpsLimitFrameTime; - fpsLimitSleepUntil = LLTrace::BlockTimer::getCPUClockCount64() + fpsLimitSleepUntil_for; + fpsLimitSleepFor = (desired_time_ns - fpsLimitFrameTime - 1000) * 1.0; } } - { LL_PROFILE_ZONE_NAMED_CATEGORY_APP( "df pauseMainloopTimeout" ) pingMainloopTimeout("Main:Sleep"); @@ -1589,6 +1571,11 @@ bool LLAppViewer::doFrame() //LL_RECORD_BLOCK_TIME(SLEEP2); LL_PROFILE_ZONE_WARN( "Sleep2" ) + if(fpsLimitSleepFor) + { + usleep(fpsLimitSleepFor); + } + // yield some time to the os based on command line option static LLCachedControl yield_time(gSavedSettings, "YieldTime", -1); if(yield_time >= 0) -- cgit v1.2.3 From 2e16d1b365e465c0c3e505770e213200ec25fc12 Mon Sep 17 00:00:00 2001 From: mobserveur Date: Sat, 29 Jun 2024 22:57:51 +0200 Subject: vertex buffer optimisations for Apple GPU The vertex buffer will use the original mapping for non Apple GPUs and the new optimized mapping for Apple GPUs. It needs the Apple gpu detection in the gl manager --- indra/llrender/llvertexbuffer.cpp | 42 ++++++++++++++++++++++++++++++--------- 1 file changed, 33 insertions(+), 9 deletions(-) (limited to 'indra') diff --git a/indra/llrender/llvertexbuffer.cpp b/indra/llrender/llvertexbuffer.cpp index 783794512c..52fb58187a 100644 --- a/indra/llrender/llvertexbuffer.cpp +++ b/indra/llrender/llvertexbuffer.cpp @@ -1155,19 +1155,40 @@ static void flush_vbo(GLenum target, U32 start, U32 end, void* data) LL_PROFILE_ZONE_NUM(end-start); U32 size = end-start+1; + U32 block_size = 65536; - //Note (observeur): glBufferSubData() was causing synchronization stalls, specialy on Apple GPUs, possibly to the fact Apple GPU is a tiled gpu, resulting to heavy stutters, and spacialy when called several times per frame on the same buffer. + //Note (observeur): The following code is executed on non Apple gpus. Using glMapBufferRange() didn't show obvious benefit on the other tested platforms (intel igpu, amd igpu and nVidia dgpus). + if(!gGLManager.mIsApple) + { + for (U32 i = start; i <= end; i += block_size) + { + LL_PROFILE_ZONE_NAMED_CATEGORY_VERTEX("glBufferSubData block"); + LL_PROFILE_GPU_ZONE("glBufferSubData"); + U32 tend = llmin(i + block_size, end); + U32 size = tend - i + 1; + glBufferSubData(target, i, size, (U8*) data + (i-start)); + } + + return; + } + + //Note (observeur): glBufferSubData() was causing synchronization stalls on Apple GPUs resulting to heavy stutters and lower performance in the world and UI rendering. Using glMapBufferRange() benefits Macs with Apple gpus enormously. - //Note (observeur): I maintained the notion of block_size for testing purpose, but i think it's a bad idea. We don't know the overhead of glMapBufferRange() depending on the driver, so it's better avoiding calling it more than necessary.(0 -> loop is disabled, 8192 -> original value, 524288 -> a resonable value). - constexpr U32 block_size = 0; + //Note (observeur): Other bits such as GL_MAP_INVALIDATE_RANGE_BIT or GL_MAP_UNSYNCHRONIZED_BIT didn't seem to make much of a difference on Apple gpus, so we stick to the simple way. + U32 MapBits = GL_MAP_WRITE_BIT; + //Note (observeur): Using a block size of 0 will call the following block and map the buffer all in once. It doesn't bother Apple machines, it might actually benefit them a little bit. A larger value is also fine. The largest buffers I observed where around 2mb or 3mb while most of buffers are smaller than 50000 bytes. + block_size = 524288; + + //Note (observeur): This is called in case block_size is set to 0 (All in one mapping). if(block_size == 0) { U8 * mptr = NULL; LL_PROFILE_ZONE_NAMED_CATEGORY_VERTEX("glBufferSubData block"); LL_PROFILE_GPU_ZONE("glBufferSubData"); - mptr = (U8*) glMapBufferRange( target, start, size, GL_MAP_WRITE_BIT | GL_MAP_UNSYNCHRONIZED_BIT); + mptr = (U8*) glMapBufferRange( target, start, size, MapBits); + if(mptr) { std::memcpy(mptr, (U8*) data, size); @@ -1180,11 +1201,13 @@ static void flush_vbo(GLenum target, U32 start, U32 end, void* data) return; } + //Note (observeur): The following code is executed in case of block_size is superior to 0 + //Note (observeur): This is for analysis purpose only - if(size > block_size) - { - LL_INFOS() << "Large data range : " << size << LL_ENDL; - } + //if(size > block_size) + //{ + // LL_INFOS() << "Large data range (MB MODE) : " << size << LL_ENDL; + //} U8 * mptr = NULL; @@ -1195,7 +1218,8 @@ static void flush_vbo(GLenum target, U32 start, U32 end, void* data) U32 tend = llmin(i + block_size, end); size = tend - i + 1; - mptr = (U8*) glMapBufferRange( target, i, size, GL_MAP_WRITE_BIT | GL_MAP_UNSYNCHRONIZED_BIT); + mptr = (U8*) glMapBufferRange( target, i, size, MapBits ); + if(mptr) { std::memcpy(mptr, (U8*) data + (i-start), size); -- cgit v1.2.3