Merge remote-tracking branch 'mobserveur/main'

author: Erik Kundiman <erik@megapahit.org> 2024-06-30 07:37:29 +0800
committer: Erik Kundiman <erik@megapahit.org> 2024-06-30 07:37:29 +0800
commit: 3c5f4d1273e02c9d247ff86d6cef20c37e587f19 (patch)
tree: e78cdf529cd6cc3b7657482048f7e577eb6f1e47 /indra
parent: 095bf053f148dac958158f6709303bf09fbcd719 (diff)
parent: 2e16d1b365e465c0c3e505770e213200ec25fc12 (diff)
6 files changed, 64 insertions, 45 deletions
diff --git a/indra/llrender/llgl.cpp b/indra/llrender/llgl.cpp
index 10920f1de3..9207e6ad73 100644
--- a/indra/llrender/llgl.cpp
+++ b/indra/llrender/llgl.cpp
@@ -1001,6 +1001,7 @@ LLGLManager::LLGLManager() :
     mIsAMD(FALSE),
     mIsNVIDIA(FALSE),
     mIsIntel(FALSE),
+    mIsApple(FALSE),
 #if LL_DARWIN
     mIsMobileGF(FALSE),
 #endif
@@ -1174,6 +1175,11 @@ bool LLGLManager::initGL()
         mGLVendorShort = "INTEL";
         mIsIntel = TRUE;
     }
+    else if(mGLVendor.find("APPLE") != std::string::npos)
+    {
+        mGLVendorShort = "APPLE";
+        mIsApple = TRUE;
+    }
     else
     {
         mGLVendorShort = "MISC";
@@ -1373,6 +1379,7 @@ void LLGLManager::asLLSD(LLSD& info)
     info["is_ati"] = mIsAMD;  // note, do not rename is_ati to is_amd without coordinating with DW
     info["is_nvidia"] = mIsNVIDIA;
     info["is_intel"] = mIsIntel;
+    info["is_apple"] = mIsApple;
 
     info["gl_renderer"] = mGLRenderer;
 }
diff --git a/indra/llrender/llgl.h b/indra/llrender/llgl.h
index 5a7ad943df..e4b106c999 100644
--- a/indra/llrender/llgl.h
+++ b/indra/llrender/llgl.h
@@ -100,6 +100,7 @@ public:
     BOOL mIsAMD;
     BOOL mIsNVIDIA;
     BOOL mIsIntel;
+    BOOL mIsApple;
 
 #if LL_DARWIN
     // Needed to distinguish problem cards on older Macs that break with Materials
diff --git a/indra/llrender/llvertexbuffer.cpp b/indra/llrender/llvertexbuffer.cpp
index 783794512c..52fb58187a 100644
--- a/indra/llrender/llvertexbuffer.cpp
+++ b/indra/llrender/llvertexbuffer.cpp
@@ -1155,19 +1155,40 @@ static void flush_vbo(GLenum target, U32 start, U32 end, void* data)
         LL_PROFILE_ZONE_NUM(end-start);
 
         U32 size = end-start+1;
+        U32 block_size = 65536;
 
-        //Note (observeur): glBufferSubData() was causing synchronization stalls, specialy on Apple GPUs, possibly to the fact Apple GPU is a tiled gpu, resulting to heavy stutters, and spacialy when called several times per frame on the same buffer.
+        //Note (observeur): The following code is executed on non Apple gpus. Using glMapBufferRange() didn't show obvious benefit on the other tested platforms (intel igpu, amd igpu and nVidia dgpus).
+        if(!gGLManager.mIsApple)
+        {
+            for (U32 i = start; i <= end; i += block_size)
+            {
+                LL_PROFILE_ZONE_NAMED_CATEGORY_VERTEX("glBufferSubData block");
+                LL_PROFILE_GPU_ZONE("glBufferSubData");
+                U32 tend = llmin(i + block_size, end);
+                U32 size = tend - i + 1;
+                glBufferSubData(target, i, size, (U8*) data + (i-start));
+            }
+
+            return;
+        }
+
+        //Note (observeur): glBufferSubData() was causing synchronization stalls on Apple GPUs resulting to heavy stutters and lower performance in the world and UI rendering. Using glMapBufferRange() benefits Macs with Apple gpus enormously.
 
-        //Note (observeur): I maintained the notion of block_size for testing purpose, but i think it's a bad idea. We don't know the overhead of glMapBufferRange() depending on the driver, so it's better avoiding calling it more than necessary.(0 -> loop is disabled, 8192 -> original value, 524288 -> a resonable value).
-        constexpr U32 block_size = 0;
+        //Note (observeur): Other bits such as GL_MAP_INVALIDATE_RANGE_BIT or GL_MAP_UNSYNCHRONIZED_BIT didn't seem to make much of a difference on Apple gpus, so we stick to the simple way.
+        U32 MapBits = GL_MAP_WRITE_BIT;
 
+        //Note (observeur): Using a block size of 0 will call the following block and map the buffer all in once. It doesn't bother Apple machines, it might actually benefit them a little bit. A larger value is also fine. The largest buffers I observed where around 2mb or 3mb while most of buffers are smaller than 50000 bytes.
+        block_size = 524288;
+
+        //Note (observeur): This is called in case block_size is set to 0 (All in one mapping).
         if(block_size == 0)
         {
             U8 * mptr = NULL;
             LL_PROFILE_ZONE_NAMED_CATEGORY_VERTEX("glBufferSubData block");
             LL_PROFILE_GPU_ZONE("glBufferSubData");
 
-            mptr = (U8*) glMapBufferRange( target, start, size, GL_MAP_WRITE_BIT | GL_MAP_UNSYNCHRONIZED_BIT);
+            mptr = (U8*) glMapBufferRange( target, start, size, MapBits);
+
             if(mptr)
             {
                 std::memcpy(mptr, (U8*) data, size);
@@ -1180,11 +1201,13 @@ static void flush_vbo(GLenum target, U32 start, U32 end, void* data)
             return;
         }
 
+        //Note (observeur): The following code is executed in case of block_size is superior to 0
+
         //Note (observeur): This is for analysis purpose only
-        if(size > block_size)
-        {
-            LL_INFOS() << "Large data range : " << size << LL_ENDL;
-        }
+        //if(size > block_size)
+        //{
+        //    LL_INFOS() << "Large data range (MB MODE) : " << size << LL_ENDL;
+        //}
 
         U8 * mptr = NULL;
 
@@ -1195,7 +1218,8 @@ static void flush_vbo(GLenum target, U32 start, U32 end, void* data)
             U32 tend = llmin(i + block_size, end);
             size = tend - i + 1;
 
-            mptr = (U8*) glMapBufferRange( target, i, size, GL_MAP_WRITE_BIT | GL_MAP_UNSYNCHRONIZED_BIT);
+            mptr = (U8*) glMapBufferRange( target, i, size, MapBits );
+
             if(mptr)
             {
                 std::memcpy(mptr, (U8*) data + (i-start), size);
diff --git a/indra/newview/app_settings/settings.xml b/indra/newview/app_settings/settings.xml
index a07396b5b1..00c7eae9bc 100644
--- a/indra/newview/app_settings/settings.xml
+++ b/indra/newview/app_settings/settings.xml
@@ -2468,7 +2468,7 @@
       <key>Value</key>
       <integer>0</integer>
     </map>
-    <key>DoubleClickTeleport</key> 
+    <key>DoubleClickTeleport</key>
     <map>
       <key>Comment</key>
       <string>Enable double-click to teleport where allowed (afects minimap and people panel)</string>
@@ -8898,7 +8898,7 @@
       <key>Value</key>
       <integer>1</integer>
     </map>
-  
+
   <key>RenderReflectionDetail</key>
     <map>
       <key>Comment</key>
@@ -8965,7 +8965,7 @@
     <key>Value</key>
     <real>1</real>
   </map>
-  
+
   <key>RenderReflectionProbeDrawDistance</key>
   <map>
     <key>Comment</key>
@@ -9153,7 +9153,7 @@
     <key>Value</key>
     <real>0.7</real>
   </map>
-  
+
   <key>RenderReflectionProbeMaxLocalLightAmbiance</key>
   <map>
     <key>Comment</key>
@@ -10589,7 +10589,7 @@
             <string>Boolean</string>
         <key>Value</key>
             <integer>0</integer>
-    </map> 
+    </map>
     <key>NearbyListShowMap</key>
     <map>
       <key>Comment</key>
@@ -13477,13 +13477,13 @@
     <key>MaxFPS</key>
     <map>
       <key>Comment</key>
-      <string>OBSOLETE UNUSED setting.</string>
+      <string>FPS Limiter.</string>
       <key>Persist</key>
       <integer>1</integer>
       <key>Type</key>
-      <string>F32</string>
+      <string>U32</string>
       <key>Value</key>
-      <real>-1.0</real>
+      <real>0</real>
     </map>
     <key>ZoomDirect</key>
     <map>
@@ -15384,7 +15384,7 @@
     <key>Type</key>
     <string>Boolean</string>
     <key>Value</key>
-    <integer>1</integer>        
+    <integer>1</integer>
   </map>
   <key>UpdateAppWindowTitleBar</key>
   <map>
diff --git a/indra/newview/llagentcamera.cpp b/indra/newview/llagentcamera.cpp
index 1912d9d1d5..8e8d7e6c50 100644
--- a/indra/newview/llagentcamera.cpp
+++ b/indra/newview/llagentcamera.cpp
@@ -2663,9 +2663,9 @@ void LLAgentCamera::setCameraPosAndFocusGlobal(const LLVector3d& camera_pos, con
 
     if (mCameraAnimating)
     {
-        const F64 ANIM_METERS_PER_SECOND = 10.0;
+        const F64 ANIM_METERS_PER_SECOND = 15.0;
         const F64 MIN_ANIM_SECONDS = 0.5;
-        const F64 MAX_ANIM_SECONDS = 10.0;
+        const F64 MAX_ANIM_SECONDS = 3.0;
         F64 anim_duration = llmax( MIN_ANIM_SECONDS, sqrt(focus_delta_squared) / ANIM_METERS_PER_SECOND );
         anim_duration = llmin( anim_duration, MAX_ANIM_SECONDS );
         setAnimationDuration( (F32)anim_duration );
diff --git a/indra/newview/llappviewer.cpp b/indra/newview/llappviewer.cpp
index 2d65c725b5..0f1b5a789b 100644
--- a/indra/newview/llappviewer.cpp
+++ b/indra/newview/llappviewer.cpp
@@ -393,7 +393,6 @@ static std::string gLaunchFileOnQuit;
 // Used on Win32 for other apps to identify our window (eg, win_setup)
 const char* const VIEWER_WINDOW_CLASSNAME = "Second Life";
 
-U64 fpsLimitSleepUntil = 0; // fps limiter : time until to render the frame again
 
 //----------------------------------------------------------------------------
 
@@ -1373,21 +1372,11 @@ bool LLAppViewer::frame()
 
 bool LLAppViewer::doFrame()
 {
+    static LLCachedControl<U32> fpsLimitMaxFps(gSavedSettings, "MaxFPS", 0);
 
-    // FPS Limit
-
-    U64 fpsLimitNow = LLTrace::BlockTimer::getCPUClockCount64();
-    U64 fpsLimitFrameStartTime = fpsLimitNow;
-    if(fpsLimitSleepUntil > 0)
-    {
-        if(fpsLimitSleepUntil > fpsLimitNow) return 0;
-    }
-    else
-    {
-        fpsLimitSleepUntil = 0;
-    }
-
-
+    U64 fpsLimitSleepFor = 0;
+    U64 fpsLimitFrameStartTime = 0;
+    if(fpsLimitMaxFps > 0) fpsLimitFrameStartTime = LLTrace::BlockTimer::getCPUClockCount64();
 
     LL_RECORD_BLOCK_TIME(FTM_FRAME);
     {
@@ -1559,24 +1548,17 @@ bool LLAppViewer::doFrame()
             }
         }
 
-        // fps limiter
-
-        fpsLimitNow = LLTrace::BlockTimer::getCPUClockCount64();
-        U64 fpsLimitFrameTime = fpsLimitNow - fpsLimitFrameStartTime;
-        static LLCachedControl<U32> fpsLimitMaxFps(gSavedSettings, "MaxFPS", 0);
-
         if(fpsLimitMaxFps > 0)
         {
+            U64 fpsLimitFrameTime = LLTrace::BlockTimer::getCPUClockCount64() - fpsLimitFrameStartTime;
             U64 desired_time_ns = (U32)(1000000.f / fpsLimitMaxFps);
 
-            if(fpsLimitFrameTime < desired_time_ns)
+            if((fpsLimitFrameTime+1000) < desired_time_ns)
             {
-                U64 fpsLimitSleepUntil_for = desired_time_ns - fpsLimitFrameTime;
-                fpsLimitSleepUntil = LLTrace::BlockTimer::getCPUClockCount64() + fpsLimitSleepUntil_for;
+                fpsLimitSleepFor = (desired_time_ns - fpsLimitFrameTime - 1000) * 1.0;
             }
         }
 
-
         {
             LL_PROFILE_ZONE_NAMED_CATEGORY_APP( "df pauseMainloopTimeout" )
         pingMainloopTimeout("Main:Sleep");
@@ -1589,6 +1571,11 @@ bool LLAppViewer::doFrame()
             //LL_RECORD_BLOCK_TIME(SLEEP2);
             LL_PROFILE_ZONE_WARN( "Sleep2" )
 
+            if(fpsLimitSleepFor)
+            {
+                usleep(fpsLimitSleepFor);
+            }
+
             // yield some time to the os based on command line option
             static LLCachedControl<S32> yield_time(gSavedSettings, "YieldTime", -1);
             if(yield_time >= 0)
author	Erik Kundiman <erik@megapahit.org>	2024-06-30 07:37:29 +0800
committer	Erik Kundiman <erik@megapahit.org>	2024-06-30 07:37:29 +0800
commit	3c5f4d1273e02c9d247ff86d6cef20c37e587f19 (patch)
tree	e78cdf529cd6cc3b7657482048f7e577eb6f1e47 /indra
parent	095bf053f148dac958158f6709303bf09fbcd719 (diff)
parent	2e16d1b365e465c0c3e505770e213200ec25fc12 (diff)