diff options
author | Erik Kundiman <erik@megapahit.org> | 2024-06-30 07:37:29 +0800 |
---|---|---|
committer | Erik Kundiman <erik@megapahit.org> | 2024-06-30 07:37:29 +0800 |
commit | 3c5f4d1273e02c9d247ff86d6cef20c37e587f19 (patch) | |
tree | e78cdf529cd6cc3b7657482048f7e577eb6f1e47 /indra | |
parent | 095bf053f148dac958158f6709303bf09fbcd719 (diff) | |
parent | 2e16d1b365e465c0c3e505770e213200ec25fc12 (diff) |
Merge remote-tracking branch 'mobserveur/main'
Diffstat (limited to 'indra')
-rw-r--r-- | indra/llrender/llgl.cpp | 7 | ||||
-rw-r--r-- | indra/llrender/llgl.h | 1 | ||||
-rw-r--r-- | indra/llrender/llvertexbuffer.cpp | 42 | ||||
-rw-r--r-- | indra/newview/app_settings/settings.xml | 18 | ||||
-rw-r--r-- | indra/newview/llagentcamera.cpp | 4 | ||||
-rw-r--r-- | indra/newview/llappviewer.cpp | 37 |
6 files changed, 64 insertions, 45 deletions
diff --git a/indra/llrender/llgl.cpp b/indra/llrender/llgl.cpp index 10920f1de3..9207e6ad73 100644 --- a/indra/llrender/llgl.cpp +++ b/indra/llrender/llgl.cpp @@ -1001,6 +1001,7 @@ LLGLManager::LLGLManager() : mIsAMD(FALSE), mIsNVIDIA(FALSE), mIsIntel(FALSE), + mIsApple(FALSE), #if LL_DARWIN mIsMobileGF(FALSE), #endif @@ -1174,6 +1175,11 @@ bool LLGLManager::initGL() mGLVendorShort = "INTEL"; mIsIntel = TRUE; } + else if(mGLVendor.find("APPLE") != std::string::npos) + { + mGLVendorShort = "APPLE"; + mIsApple = TRUE; + } else { mGLVendorShort = "MISC"; @@ -1373,6 +1379,7 @@ void LLGLManager::asLLSD(LLSD& info) info["is_ati"] = mIsAMD; // note, do not rename is_ati to is_amd without coordinating with DW info["is_nvidia"] = mIsNVIDIA; info["is_intel"] = mIsIntel; + info["is_apple"] = mIsApple; info["gl_renderer"] = mGLRenderer; } diff --git a/indra/llrender/llgl.h b/indra/llrender/llgl.h index 5a7ad943df..e4b106c999 100644 --- a/indra/llrender/llgl.h +++ b/indra/llrender/llgl.h @@ -100,6 +100,7 @@ public: BOOL mIsAMD; BOOL mIsNVIDIA; BOOL mIsIntel; + BOOL mIsApple; #if LL_DARWIN // Needed to distinguish problem cards on older Macs that break with Materials diff --git a/indra/llrender/llvertexbuffer.cpp b/indra/llrender/llvertexbuffer.cpp index 783794512c..52fb58187a 100644 --- a/indra/llrender/llvertexbuffer.cpp +++ b/indra/llrender/llvertexbuffer.cpp @@ -1155,19 +1155,40 @@ static void flush_vbo(GLenum target, U32 start, U32 end, void* data) LL_PROFILE_ZONE_NUM(end-start); U32 size = end-start+1; + U32 block_size = 65536; - //Note (observeur): glBufferSubData() was causing synchronization stalls, specialy on Apple GPUs, possibly to the fact Apple GPU is a tiled gpu, resulting to heavy stutters, and spacialy when called several times per frame on the same buffer. + //Note (observeur): The following code is executed on non Apple gpus. Using glMapBufferRange() didn't show obvious benefit on the other tested platforms (intel igpu, amd igpu and nVidia dgpus). + if(!gGLManager.mIsApple) + { + for (U32 i = start; i <= end; i += block_size) + { + LL_PROFILE_ZONE_NAMED_CATEGORY_VERTEX("glBufferSubData block"); + LL_PROFILE_GPU_ZONE("glBufferSubData"); + U32 tend = llmin(i + block_size, end); + U32 size = tend - i + 1; + glBufferSubData(target, i, size, (U8*) data + (i-start)); + } + + return; + } + + //Note (observeur): glBufferSubData() was causing synchronization stalls on Apple GPUs resulting to heavy stutters and lower performance in the world and UI rendering. Using glMapBufferRange() benefits Macs with Apple gpus enormously. - //Note (observeur): I maintained the notion of block_size for testing purpose, but i think it's a bad idea. We don't know the overhead of glMapBufferRange() depending on the driver, so it's better avoiding calling it more than necessary.(0 -> loop is disabled, 8192 -> original value, 524288 -> a resonable value). - constexpr U32 block_size = 0; + //Note (observeur): Other bits such as GL_MAP_INVALIDATE_RANGE_BIT or GL_MAP_UNSYNCHRONIZED_BIT didn't seem to make much of a difference on Apple gpus, so we stick to the simple way. + U32 MapBits = GL_MAP_WRITE_BIT; + //Note (observeur): Using a block size of 0 will call the following block and map the buffer all in once. It doesn't bother Apple machines, it might actually benefit them a little bit. A larger value is also fine. The largest buffers I observed where around 2mb or 3mb while most of buffers are smaller than 50000 bytes. + block_size = 524288; + + //Note (observeur): This is called in case block_size is set to 0 (All in one mapping). if(block_size == 0) { U8 * mptr = NULL; LL_PROFILE_ZONE_NAMED_CATEGORY_VERTEX("glBufferSubData block"); LL_PROFILE_GPU_ZONE("glBufferSubData"); - mptr = (U8*) glMapBufferRange( target, start, size, GL_MAP_WRITE_BIT | GL_MAP_UNSYNCHRONIZED_BIT); + mptr = (U8*) glMapBufferRange( target, start, size, MapBits); + if(mptr) { std::memcpy(mptr, (U8*) data, size); @@ -1180,11 +1201,13 @@ static void flush_vbo(GLenum target, U32 start, U32 end, void* data) return; } + //Note (observeur): The following code is executed in case of block_size is superior to 0 + //Note (observeur): This is for analysis purpose only - if(size > block_size) - { - LL_INFOS() << "Large data range : " << size << LL_ENDL; - } + //if(size > block_size) + //{ + // LL_INFOS() << "Large data range (MB MODE) : " << size << LL_ENDL; + //} U8 * mptr = NULL; @@ -1195,7 +1218,8 @@ static void flush_vbo(GLenum target, U32 start, U32 end, void* data) U32 tend = llmin(i + block_size, end); size = tend - i + 1; - mptr = (U8*) glMapBufferRange( target, i, size, GL_MAP_WRITE_BIT | GL_MAP_UNSYNCHRONIZED_BIT); + mptr = (U8*) glMapBufferRange( target, i, size, MapBits ); + if(mptr) { std::memcpy(mptr, (U8*) data + (i-start), size); diff --git a/indra/newview/app_settings/settings.xml b/indra/newview/app_settings/settings.xml index a07396b5b1..00c7eae9bc 100644 --- a/indra/newview/app_settings/settings.xml +++ b/indra/newview/app_settings/settings.xml @@ -2468,7 +2468,7 @@ <key>Value</key> <integer>0</integer> </map> - <key>DoubleClickTeleport</key> + <key>DoubleClickTeleport</key> <map> <key>Comment</key> <string>Enable double-click to teleport where allowed (afects minimap and people panel)</string> @@ -8898,7 +8898,7 @@ <key>Value</key> <integer>1</integer> </map> - + <key>RenderReflectionDetail</key> <map> <key>Comment</key> @@ -8965,7 +8965,7 @@ <key>Value</key> <real>1</real> </map> - + <key>RenderReflectionProbeDrawDistance</key> <map> <key>Comment</key> @@ -9153,7 +9153,7 @@ <key>Value</key> <real>0.7</real> </map> - + <key>RenderReflectionProbeMaxLocalLightAmbiance</key> <map> <key>Comment</key> @@ -10589,7 +10589,7 @@ <string>Boolean</string> <key>Value</key> <integer>0</integer> - </map> + </map> <key>NearbyListShowMap</key> <map> <key>Comment</key> @@ -13477,13 +13477,13 @@ <key>MaxFPS</key> <map> <key>Comment</key> - <string>OBSOLETE UNUSED setting.</string> + <string>FPS Limiter.</string> <key>Persist</key> <integer>1</integer> <key>Type</key> - <string>F32</string> + <string>U32</string> <key>Value</key> - <real>-1.0</real> + <real>0</real> </map> <key>ZoomDirect</key> <map> @@ -15384,7 +15384,7 @@ <key>Type</key> <string>Boolean</string> <key>Value</key> - <integer>1</integer> + <integer>1</integer> </map> <key>UpdateAppWindowTitleBar</key> <map> diff --git a/indra/newview/llagentcamera.cpp b/indra/newview/llagentcamera.cpp index 1912d9d1d5..8e8d7e6c50 100644 --- a/indra/newview/llagentcamera.cpp +++ b/indra/newview/llagentcamera.cpp @@ -2663,9 +2663,9 @@ void LLAgentCamera::setCameraPosAndFocusGlobal(const LLVector3d& camera_pos, con if (mCameraAnimating) { - const F64 ANIM_METERS_PER_SECOND = 10.0; + const F64 ANIM_METERS_PER_SECOND = 15.0; const F64 MIN_ANIM_SECONDS = 0.5; - const F64 MAX_ANIM_SECONDS = 10.0; + const F64 MAX_ANIM_SECONDS = 3.0; F64 anim_duration = llmax( MIN_ANIM_SECONDS, sqrt(focus_delta_squared) / ANIM_METERS_PER_SECOND ); anim_duration = llmin( anim_duration, MAX_ANIM_SECONDS ); setAnimationDuration( (F32)anim_duration ); diff --git a/indra/newview/llappviewer.cpp b/indra/newview/llappviewer.cpp index 2d65c725b5..0f1b5a789b 100644 --- a/indra/newview/llappviewer.cpp +++ b/indra/newview/llappviewer.cpp @@ -393,7 +393,6 @@ static std::string gLaunchFileOnQuit; // Used on Win32 for other apps to identify our window (eg, win_setup) const char* const VIEWER_WINDOW_CLASSNAME = "Second Life"; -U64 fpsLimitSleepUntil = 0; // fps limiter : time until to render the frame again //---------------------------------------------------------------------------- @@ -1373,21 +1372,11 @@ bool LLAppViewer::frame() bool LLAppViewer::doFrame() { + static LLCachedControl<U32> fpsLimitMaxFps(gSavedSettings, "MaxFPS", 0); - // FPS Limit - - U64 fpsLimitNow = LLTrace::BlockTimer::getCPUClockCount64(); - U64 fpsLimitFrameStartTime = fpsLimitNow; - if(fpsLimitSleepUntil > 0) - { - if(fpsLimitSleepUntil > fpsLimitNow) return 0; - } - else - { - fpsLimitSleepUntil = 0; - } - - + U64 fpsLimitSleepFor = 0; + U64 fpsLimitFrameStartTime = 0; + if(fpsLimitMaxFps > 0) fpsLimitFrameStartTime = LLTrace::BlockTimer::getCPUClockCount64(); LL_RECORD_BLOCK_TIME(FTM_FRAME); { @@ -1559,24 +1548,17 @@ bool LLAppViewer::doFrame() } } - // fps limiter - - fpsLimitNow = LLTrace::BlockTimer::getCPUClockCount64(); - U64 fpsLimitFrameTime = fpsLimitNow - fpsLimitFrameStartTime; - static LLCachedControl<U32> fpsLimitMaxFps(gSavedSettings, "MaxFPS", 0); - if(fpsLimitMaxFps > 0) { + U64 fpsLimitFrameTime = LLTrace::BlockTimer::getCPUClockCount64() - fpsLimitFrameStartTime; U64 desired_time_ns = (U32)(1000000.f / fpsLimitMaxFps); - if(fpsLimitFrameTime < desired_time_ns) + if((fpsLimitFrameTime+1000) < desired_time_ns) { - U64 fpsLimitSleepUntil_for = desired_time_ns - fpsLimitFrameTime; - fpsLimitSleepUntil = LLTrace::BlockTimer::getCPUClockCount64() + fpsLimitSleepUntil_for; + fpsLimitSleepFor = (desired_time_ns - fpsLimitFrameTime - 1000) * 1.0; } } - { LL_PROFILE_ZONE_NAMED_CATEGORY_APP( "df pauseMainloopTimeout" ) pingMainloopTimeout("Main:Sleep"); @@ -1589,6 +1571,11 @@ bool LLAppViewer::doFrame() //LL_RECORD_BLOCK_TIME(SLEEP2); LL_PROFILE_ZONE_WARN( "Sleep2" ) + if(fpsLimitSleepFor) + { + usleep(fpsLimitSleepFor); + } + // yield some time to the os based on command line option static LLCachedControl<S32> yield_time(gSavedSettings, "YieldTime", -1); if(yield_time >= 0) |