#2590 Radeon mac optimization pass (#3277)

- Skip updating of reflection probes that are not the default probe when probe coverage is set to "None" - enable RenderAppleUseMultGL and disable occlusion culling on Macs with AMD GPUs - Reduce the number of texture decode threads on Macs with intel cpus. - Move texture deletion to LLImageGL::updateClass and prevent textures from staying resident in vram longer than 3 frames - Disable SSAO by default on Macs with intel CPUs
author: Dave Parks <davep@lindenlab.com> 2024-12-12 13:46:01 -0800
committer: GitHub <noreply@github.com> 2024-12-12 15:46:01 -0600
commit: eff46262c8324ed4931cdd544a757f0c13f9ec0a (patch)
tree: 0d5c80a965d93ae44d0184b5dd7493535a2c9d33
parent: 5a629574b775e2a8f3602ee183fd9e1b2fcfac68 (diff)
10 files changed, 112 insertions, 55 deletions
diff --git a/indra/llcommon/llcommon.cpp b/indra/llcommon/llcommon.cpp
index f1f3958fe0..84b35749cc 100644
--- a/indra/llcommon/llcommon.cpp
+++ b/indra/llcommon/llcommon.cpp
@@ -54,7 +54,7 @@ void* ll_tracy_new(size_t size)
     {
         throw std::bad_alloc();
     }
-    TracyAlloc(ptr, size);
+    LL_PROFILE_ALLOC(ptr, size);
     return ptr;
 }
 
@@ -70,7 +70,7 @@ void* operator new[](std::size_t count)
 
 void ll_tracy_delete(void* ptr)
 {
-    TracyFree(ptr);
+    LL_PROFILE_FREE(ptr);
     if (gProfilerEnabled)
     {
         //LL_PROFILE_ZONE_SCOPED_CATEGORY_MEMORY;
@@ -102,13 +102,13 @@ void operator delete[](void* ptr) noexcept
 void *tracy_aligned_malloc(size_t size, size_t alignment)
 {
     auto ptr = ll_aligned_malloc_fallback(size, alignment);
-    if (ptr) TracyAlloc(ptr, size);
+    if (ptr) LL_PROFILE_ALLOC(ptr, size);
     return ptr;
 }
 
 void tracy_aligned_free(void *memblock)
 {
-    TracyFree(memblock);
+    LL_PROFILE_FREE(memblock);
     ll_aligned_free_fallback(memblock);
 }
 
diff --git a/indra/llcommon/llmemory.h b/indra/llcommon/llmemory.h
index 80cfe554c4..b616edfde7 100644
--- a/indra/llcommon/llmemory.h
+++ b/indra/llcommon/llmemory.h
@@ -222,7 +222,7 @@ inline void* ll_aligned_realloc_16(void* ptr, size_t size, size_t old_size) // r
         ll_aligned_free_16(ptr);
     }
 #endif
-    LL_PROFILE_ALLOC(ptr, size);
+    LL_PROFILE_ALLOC(ret, size);
     return ret;
 }
 
diff --git a/indra/llrender/llimagegl.cpp b/indra/llrender/llimagegl.cpp
index 84c61c790f..5ac3243fd4 100644
--- a/indra/llrender/llimagegl.cpp
+++ b/indra/llrender/llimagegl.cpp
@@ -1052,7 +1052,7 @@ U32 type_width_from_pixtype(U32 pixtype)
 bool should_stagger_image_set(bool compressed)
 {
 #if LL_DARWIN
-    return false;
+    return !compressed && on_main_thread() && gGLManager.mIsAMD;
 #else
     // glTexSubImage2D doesn't work with compressed textures on select tested Nvidia GPUs on Windows 10 -Cosmic,2023-03-08
     // Setting media textures off-thread seems faster when not using sub_image_lines (Nvidia/Windows 10) -Cosmic,2023-03-31
@@ -1270,37 +1270,37 @@ void LLImageGL::generateTextures(S32 numTextures, U32 *textures)
     }
 }
 
+constexpr int DELETE_DELAY = 3; // number of frames to wait before deleting textures
+static std::vector<U32> sFreeList[DELETE_DELAY+1];
+
 // static
 void LLImageGL::updateClass()
 {
     sFrameCount++;
+
+    // wait a few frames before actually deleting the textures to avoid
+    // synchronization issues with the GPU
+    U32 idx = (sFrameCount+DELETE_DELAY) % (DELETE_DELAY+1);
+
+    if (!sFreeList[idx].empty())
+    {
+        free_tex_images((GLsizei) sFreeList[idx].size(), sFreeList[idx].data());
+        glDeleteTextures((GLsizei)sFreeList[idx].size(), sFreeList[idx].data());
+        sFreeList[idx].resize(0);
+    }
 }
 
 // static
 void LLImageGL::deleteTextures(S32 numTextures, const U32 *textures)
 {
-    // wait a few frames before actually deleting the textures to avoid
-    // synchronization issues with the GPU
-    static std::vector<U32> sFreeList[4];
-
     if (gGLManager.mInited)
     {
         LL_PROFILE_ZONE_SCOPED_CATEGORY_TEXTURE;
-        U32 idx = sFrameCount % 4;
-
+        U32 idx = sFrameCount % (DELETE_DELAY+1);
         for (S32 i = 0; i < numTextures; ++i)
         {
             sFreeList[idx].push_back(textures[i]);
         }
-
-        idx = (sFrameCount + 3) % 4;
-
-        if (!sFreeList[idx].empty())
-        {
-            free_tex_images((GLsizei) sFreeList[idx].size(), sFreeList[idx].data());
-            glDeleteTextures((GLsizei)sFreeList[idx].size(), sFreeList[idx].data());
-            sFreeList[idx].resize(0);
-        }
     }
 }
 
diff --git a/indra/llwindow/llwindowmacosx.cpp b/indra/llwindow/llwindowmacosx.cpp
index 80001b14ee..f26d692363 100644
--- a/indra/llwindow/llwindowmacosx.cpp
+++ b/indra/llwindow/llwindowmacosx.cpp
@@ -68,6 +68,41 @@ namespace
 
 bool LLWindowMacOSX::sUseMultGL = false;
 
+//static
+void LLWindowMacOSX::setUseMultGL(bool use_mult_gl)
+{
+    bool was_enabled = sUseMultGL;
+
+    sUseMultGL = use_mult_gl;
+
+    if (gGLManager.mInited)
+    {
+        CGLContextObj ctx = CGLGetCurrentContext();
+        //enable multi-threaded OpenGL (whether or not sUseMultGL actually changed)
+        if (sUseMultGL)
+        {
+            CGLError cgl_err;
+
+            cgl_err =  CGLEnable( ctx, kCGLCEMPEngine);
+
+            if (cgl_err != kCGLNoError )
+            {
+                LL_INFOS("GLInit") << "Multi-threaded OpenGL not available." << LL_ENDL;
+                sUseMultGL = false;
+            }
+            else
+            {
+                LL_INFOS("GLInit") << "Multi-threaded OpenGL enabled." << LL_ENDL;
+            }
+        }
+        else if (was_enabled)
+        {
+            CGLDisable( ctx, kCGLCEMPEngine);
+            LL_INFOS("GLInit") << "Multi-threaded OpenGL disabled." << LL_ENDL;
+        }
+    }
+}
+
 // Cross-platform bits:
 
 bool check_for_card(const char* RENDERER, const char* bad_card)
@@ -704,23 +739,8 @@ bool LLWindowMacOSX::createContext(int x, int y, int width, int height, int bits
     // Disable vertical sync for swap
     toggleVSync(enable_vsync);
 
-    //enable multi-threaded OpenGL
-    if (sUseMultGL)
-    {
-        CGLError cgl_err;
-        CGLContextObj ctx = CGLGetCurrentContext();
-
-        cgl_err =  CGLEnable( ctx, kCGLCEMPEngine);
+    setUseMultGL(sUseMultGL);
 
-        if (cgl_err != kCGLNoError )
-        {
-            LL_INFOS("GLInit") << "Multi-threaded OpenGL not available." << LL_ENDL;
-        }
-        else
-        {
-            LL_INFOS("GLInit") << "Multi-threaded OpenGL enabled." << LL_ENDL;
-        }
-    }
     makeFirstResponder(mWindow, mGLView);
 
     return true;
diff --git a/indra/llwindow/llwindowmacosx.h b/indra/llwindow/llwindowmacosx.h
index f5b6441746..7de1a40d93 100644
--- a/indra/llwindow/llwindowmacosx.h
+++ b/indra/llwindow/llwindowmacosx.h
@@ -147,6 +147,9 @@ public:
 
     void toggleVSync(bool enable_vsync) override;
 
+    // enable or disable multithreaded GL
+    static void setUseMultGL(bool use_mult_gl);
+
 protected:
     LLWindowMacOSX(LLWindowCallbacks* callbacks,
         const std::string& title, const std::string& name, int x, int y, int width, int height, U32 flags,
diff --git a/indra/newview/featuretable_mac.txt b/indra/newview/featuretable_mac.txt
index aa3d241ecb..a0af98a451 100644
--- a/indra/newview/featuretable_mac.txt
+++ b/indra/newview/featuretable_mac.txt
@@ -1,4 +1,4 @@
-version 68
+version 71
 // The version number above should be incremented IF AND ONLY IF some
 // change has been made that is sufficiently important to justify
 // resetting the graphics preferences of all users to the recommended
@@ -67,9 +67,9 @@ RenderFSAAType			    1	2
 RenderFSAASamples			1	3
 RenderMaxTextureIndex		1	16
 RenderGLContextCoreProfile         1   1
-RenderGLMultiThreadedTextures      1   0
-RenderGLMultiThreadedMedia         1   0
-RenderAppleUseMultGL        1   0
+RenderGLMultiThreadedTextures      1   1
+RenderGLMultiThreadedMedia         1   1
+RenderAppleUseMultGL        1   1
 RenderReflectionsEnabled    1   1
 RenderReflectionProbeDetail	1	2
 RenderScreenSpaceReflections 1  1
@@ -405,20 +405,30 @@ list TexUnit16orLess
 RenderTerrainPBRDetail      1   -1
 
 list AMD
-RenderDeferredSSAO			1	0
+UseOcclusion                    1   0
+RenderGLMultiThreadedTextures   1   0
+
+list NVIDIA
+RenderGLMultiThreadedTextures   1   0
+RenderGLMultiThreadedMedia      1   0
+RenderAppleUseMultGL        1   0
 
 list Intel
 RenderAnisotropic			1	0
 RenderFSAASamples			1	0
+RenderGLMultiThreadedTextures   1   0
+RenderGLMultiThreadedMedia      1   0
+RenderAppleUseMultGL        1   0
 
 // AppleGPU and NonAppleGPU can be thought of as Apple silicon vs Intel Mac
 list AppleGPU
 RenderGLMultiThreadedMedia  1   0
 RenderAppleUseMultGL        1   0
+RenderGLMultiThreadedTextures   1   0
+RenderGLMultiThreadedMedia      1   0
 
 list NonAppleGPU
-RenderGLMultiThreadedMedia  1   0
-RenderAppleUseMultGL        1   0
+RenderDeferredSSAO			1   0
 
 list GL3
 RenderFSAASamples           0   0
diff --git a/indra/newview/llappviewer.cpp b/indra/newview/llappviewer.cpp
index f79eb5ddce..c770b7c917 100644
--- a/indra/newview/llappviewer.cpp
+++ b/indra/newview/llappviewer.cpp
@@ -2171,7 +2171,12 @@ bool LLAppViewer::initThreads()
 
     // get the number of concurrent threads that can run
     S32 cores = std::thread::hardware_concurrency();
-
+#if LL_DARWIN
+    if (!gGLManager.mIsApple)
+    {
+        cores /= 2;
+    }
+#endif
     U32 max_cores = gSavedSettings.getU32("EmulateCoreCount");
     if (max_cores != 0)
     {
diff --git a/indra/newview/llreflectionmapmanager.cpp b/indra/newview/llreflectionmapmanager.cpp
index 232b0b1cdf..8f75b108cc 100644
--- a/indra/newview/llreflectionmapmanager.cpp
+++ b/indra/newview/llreflectionmapmanager.cpp
@@ -404,6 +404,13 @@ void LLReflectionMapManager::update()
         {
             closestDynamic = probe;
         }
+
+        if (sLevel == 0)
+        {
+            // only update default probe when coverage is set to none
+            llassert(probe == mDefaultProbe);
+            break;
+        }
     }
 
     if (realtime && closestDynamic != nullptr)
@@ -713,6 +720,7 @@ void LLReflectionMapManager::updateProbeFace(LLReflectionMap* probe, U32 face)
     }
     else
     {
+        llassert(gSavedSettings.getS32("RenderReflectionProbeLevel") > 0); // should never update a probe that's not the default probe if reflection coverage is none
         probe->update(mRenderTarget.getWidth(), face);
     }
 
diff --git a/indra/newview/llviewercontrol.cpp b/indra/newview/llviewercontrol.cpp
index 172ffcb0d4..18746e76fc 100644
--- a/indra/newview/llviewercontrol.cpp
+++ b/indra/newview/llviewercontrol.cpp
@@ -77,6 +77,10 @@
 #include "llstartup.h"
 #include "llperfstats.h"
 
+#if LL_DARWIN
+#include "llwindowmacosx.h"
+#endif
+
 // Third party library includes
 #include <boost/algorithm/string.hpp>
 
@@ -453,6 +457,17 @@ static bool handleReflectionProbeDetailChanged(const LLSD& newvalue)
     return true;
 }
 
+#if LL_DARWIN
+static bool handleAppleUseMultGLChanged(const LLSD& newvalue)
+{
+    if (gGLManager.mInited)
+    {
+        LLWindowMacOSX::setUseMultGL(newvalue.asBoolean());
+    }
+    return true;
+}
+#endif
+
 static bool handleHeroProbeResolutionChanged(const LLSD &newvalue)
 {
     if (gPipeline.isInit())
@@ -820,6 +835,9 @@ void settings_setup_listeners()
     setting_setup_signal_listener(gSavedSettings, "RenderReflectionProbeLevel", handleReflectionProbeDetailChanged);
     setting_setup_signal_listener(gSavedSettings, "RenderReflectionProbeDetail", handleReflectionProbeDetailChanged);
     setting_setup_signal_listener(gSavedSettings, "RenderReflectionsEnabled", handleReflectionProbeDetailChanged);
+#if LL_DARWIN
+    setting_setup_signal_listener(gSavedSettings, "RenderAppleUseMultGL", handleAppleUseMultGLChanged);
+#endif
     setting_setup_signal_listener(gSavedSettings, "RenderScreenSpaceReflections", handleReflectionProbeDetailChanged);
     setting_setup_signal_listener(gSavedSettings, "RenderMirrors", handleReflectionProbeDetailChanged);
     setting_setup_signal_listener(gSavedSettings, "RenderHeroProbeResolution", handleHeroProbeResolutionChanged);
diff --git a/indra/newview/llviewerwindow.cpp b/indra/newview/llviewerwindow.cpp
index 4bd1cdd6a1..8a5aac9b8b 100644
--- a/indra/newview/llviewerwindow.cpp
+++ b/indra/newview/llviewerwindow.cpp
@@ -5391,6 +5391,8 @@ bool LLViewerWindow::cubeSnapshot(const LLVector3& origin, LLCubeMapArray* cubea
         camera->setUserClipPlane(clipPlane);
     }
 
+    gPipeline.pushRenderTypeMask();
+
     glClear(GL_DEPTH_BUFFER_BIT | GL_COLOR_BUFFER_BIT); // stencil buffer is deprecated | GL_STENCIL_BUFFER_BIT);
 
     U32 dynamic_render_types[] = {
@@ -5479,16 +5481,7 @@ bool LLViewerWindow::cubeSnapshot(const LLVector3& origin, LLCubeMapArray* cubea
         }
     }
 
-    if (!dynamic_render)
-    {
-        for (int i = 0; i < dynamic_render_type_count; ++i)
-        {
-            if (prev_dynamic_render_type[i])
-            {
-                gPipeline.toggleRenderType(dynamic_render_types[i]);
-            }
-        }
-    }
+    gPipeline.popRenderTypeMask();
 
     if (hide_hud)
     {
author	Dave Parks <davep@lindenlab.com>	2024-12-12 13:46:01 -0800
committer	GitHub <noreply@github.com>	2024-12-12 15:46:01 -0600
commit	eff46262c8324ed4931cdd544a757f0c13f9ec0a (patch)
tree	0d5c80a965d93ae44d0184b5dd7493535a2c9d33
parent	5a629574b775e2a8f3602ee183fd9e1b2fcfac68 (diff)