diff options
| author | Dave Parks <davep@lindenlab.com> | 2024-12-12 13:46:01 -0800 | 
|---|---|---|
| committer | GitHub <noreply@github.com> | 2024-12-12 15:46:01 -0600 | 
| commit | eff46262c8324ed4931cdd544a757f0c13f9ec0a (patch) | |
| tree | 0d5c80a965d93ae44d0184b5dd7493535a2c9d33 | |
| parent | 5a629574b775e2a8f3602ee183fd9e1b2fcfac68 (diff) | |
#2590 Radeon mac optimization pass (#3277)
- Skip updating of reflection probes that are not the default probe when probe coverage is set to "None"
- enable RenderAppleUseMultGL and disable occlusion culling on Macs with AMD GPUs
- Reduce the number of texture decode threads on Macs with intel cpus.
- Move texture deletion to LLImageGL::updateClass and prevent textures from staying resident in vram longer than 3 frames
- Disable SSAO by default on Macs with intel CPUs
| -rw-r--r-- | indra/llcommon/llcommon.cpp | 8 | ||||
| -rw-r--r-- | indra/llcommon/llmemory.h | 2 | ||||
| -rw-r--r-- | indra/llrender/llimagegl.cpp | 32 | ||||
| -rw-r--r-- | indra/llwindow/llwindowmacosx.cpp | 52 | ||||
| -rw-r--r-- | indra/llwindow/llwindowmacosx.h | 3 | ||||
| -rw-r--r-- | indra/newview/featuretable_mac.txt | 24 | ||||
| -rw-r--r-- | indra/newview/llappviewer.cpp | 7 | ||||
| -rw-r--r-- | indra/newview/llreflectionmapmanager.cpp | 8 | ||||
| -rw-r--r-- | indra/newview/llviewercontrol.cpp | 18 | ||||
| -rw-r--r-- | indra/newview/llviewerwindow.cpp | 13 | 
10 files changed, 112 insertions, 55 deletions
| diff --git a/indra/llcommon/llcommon.cpp b/indra/llcommon/llcommon.cpp index f1f3958fe0..84b35749cc 100644 --- a/indra/llcommon/llcommon.cpp +++ b/indra/llcommon/llcommon.cpp @@ -54,7 +54,7 @@ void* ll_tracy_new(size_t size)      {          throw std::bad_alloc();      } -    TracyAlloc(ptr, size); +    LL_PROFILE_ALLOC(ptr, size);      return ptr;  } @@ -70,7 +70,7 @@ void* operator new[](std::size_t count)  void ll_tracy_delete(void* ptr)  { -    TracyFree(ptr); +    LL_PROFILE_FREE(ptr);      if (gProfilerEnabled)      {          //LL_PROFILE_ZONE_SCOPED_CATEGORY_MEMORY; @@ -102,13 +102,13 @@ void operator delete[](void* ptr) noexcept  void *tracy_aligned_malloc(size_t size, size_t alignment)  {      auto ptr = ll_aligned_malloc_fallback(size, alignment); -    if (ptr) TracyAlloc(ptr, size); +    if (ptr) LL_PROFILE_ALLOC(ptr, size);      return ptr;  }  void tracy_aligned_free(void *memblock)  { -    TracyFree(memblock); +    LL_PROFILE_FREE(memblock);      ll_aligned_free_fallback(memblock);  } diff --git a/indra/llcommon/llmemory.h b/indra/llcommon/llmemory.h index 80cfe554c4..b616edfde7 100644 --- a/indra/llcommon/llmemory.h +++ b/indra/llcommon/llmemory.h @@ -222,7 +222,7 @@ inline void* ll_aligned_realloc_16(void* ptr, size_t size, size_t old_size) // r          ll_aligned_free_16(ptr);      }  #endif -    LL_PROFILE_ALLOC(ptr, size); +    LL_PROFILE_ALLOC(ret, size);      return ret;  } diff --git a/indra/llrender/llimagegl.cpp b/indra/llrender/llimagegl.cpp index 84c61c790f..5ac3243fd4 100644 --- a/indra/llrender/llimagegl.cpp +++ b/indra/llrender/llimagegl.cpp @@ -1052,7 +1052,7 @@ U32 type_width_from_pixtype(U32 pixtype)  bool should_stagger_image_set(bool compressed)  {  #if LL_DARWIN -    return false; +    return !compressed && on_main_thread() && gGLManager.mIsAMD;  #else      // glTexSubImage2D doesn't work with compressed textures on select tested Nvidia GPUs on Windows 10 -Cosmic,2023-03-08      // Setting media textures off-thread seems faster when not using sub_image_lines (Nvidia/Windows 10) -Cosmic,2023-03-31 @@ -1270,37 +1270,37 @@ void LLImageGL::generateTextures(S32 numTextures, U32 *textures)      }  } +constexpr int DELETE_DELAY = 3; // number of frames to wait before deleting textures +static std::vector<U32> sFreeList[DELETE_DELAY+1]; +  // static  void LLImageGL::updateClass()  {      sFrameCount++; + +    // wait a few frames before actually deleting the textures to avoid +    // synchronization issues with the GPU +    U32 idx = (sFrameCount+DELETE_DELAY) % (DELETE_DELAY+1); + +    if (!sFreeList[idx].empty()) +    { +        free_tex_images((GLsizei) sFreeList[idx].size(), sFreeList[idx].data()); +        glDeleteTextures((GLsizei)sFreeList[idx].size(), sFreeList[idx].data()); +        sFreeList[idx].resize(0); +    }  }  // static  void LLImageGL::deleteTextures(S32 numTextures, const U32 *textures)  { -    // wait a few frames before actually deleting the textures to avoid -    // synchronization issues with the GPU -    static std::vector<U32> sFreeList[4]; -      if (gGLManager.mInited)      {          LL_PROFILE_ZONE_SCOPED_CATEGORY_TEXTURE; -        U32 idx = sFrameCount % 4; - +        U32 idx = sFrameCount % (DELETE_DELAY+1);          for (S32 i = 0; i < numTextures; ++i)          {              sFreeList[idx].push_back(textures[i]);          } - -        idx = (sFrameCount + 3) % 4; - -        if (!sFreeList[idx].empty()) -        { -            free_tex_images((GLsizei) sFreeList[idx].size(), sFreeList[idx].data()); -            glDeleteTextures((GLsizei)sFreeList[idx].size(), sFreeList[idx].data()); -            sFreeList[idx].resize(0); -        }      }  } diff --git a/indra/llwindow/llwindowmacosx.cpp b/indra/llwindow/llwindowmacosx.cpp index 80001b14ee..f26d692363 100644 --- a/indra/llwindow/llwindowmacosx.cpp +++ b/indra/llwindow/llwindowmacosx.cpp @@ -68,6 +68,41 @@ namespace  bool LLWindowMacOSX::sUseMultGL = false; +//static +void LLWindowMacOSX::setUseMultGL(bool use_mult_gl) +{ +    bool was_enabled = sUseMultGL; + +    sUseMultGL = use_mult_gl; + +    if (gGLManager.mInited) +    { +        CGLContextObj ctx = CGLGetCurrentContext(); +        //enable multi-threaded OpenGL (whether or not sUseMultGL actually changed) +        if (sUseMultGL) +        { +            CGLError cgl_err; + +            cgl_err =  CGLEnable( ctx, kCGLCEMPEngine); + +            if (cgl_err != kCGLNoError ) +            { +                LL_INFOS("GLInit") << "Multi-threaded OpenGL not available." << LL_ENDL; +                sUseMultGL = false; +            } +            else +            { +                LL_INFOS("GLInit") << "Multi-threaded OpenGL enabled." << LL_ENDL; +            } +        } +        else if (was_enabled) +        { +            CGLDisable( ctx, kCGLCEMPEngine); +            LL_INFOS("GLInit") << "Multi-threaded OpenGL disabled." << LL_ENDL; +        } +    } +} +  // Cross-platform bits:  bool check_for_card(const char* RENDERER, const char* bad_card) @@ -704,23 +739,8 @@ bool LLWindowMacOSX::createContext(int x, int y, int width, int height, int bits      // Disable vertical sync for swap      toggleVSync(enable_vsync); -    //enable multi-threaded OpenGL -    if (sUseMultGL) -    { -        CGLError cgl_err; -        CGLContextObj ctx = CGLGetCurrentContext(); - -        cgl_err =  CGLEnable( ctx, kCGLCEMPEngine); +    setUseMultGL(sUseMultGL); -        if (cgl_err != kCGLNoError ) -        { -            LL_INFOS("GLInit") << "Multi-threaded OpenGL not available." << LL_ENDL; -        } -        else -        { -            LL_INFOS("GLInit") << "Multi-threaded OpenGL enabled." << LL_ENDL; -        } -    }      makeFirstResponder(mWindow, mGLView);      return true; diff --git a/indra/llwindow/llwindowmacosx.h b/indra/llwindow/llwindowmacosx.h index f5b6441746..7de1a40d93 100644 --- a/indra/llwindow/llwindowmacosx.h +++ b/indra/llwindow/llwindowmacosx.h @@ -147,6 +147,9 @@ public:      void toggleVSync(bool enable_vsync) override; +    // enable or disable multithreaded GL +    static void setUseMultGL(bool use_mult_gl); +  protected:      LLWindowMacOSX(LLWindowCallbacks* callbacks,          const std::string& title, const std::string& name, int x, int y, int width, int height, U32 flags, diff --git a/indra/newview/featuretable_mac.txt b/indra/newview/featuretable_mac.txt index aa3d241ecb..a0af98a451 100644 --- a/indra/newview/featuretable_mac.txt +++ b/indra/newview/featuretable_mac.txt @@ -1,4 +1,4 @@ -version 68 +version 71  // The version number above should be incremented IF AND ONLY IF some  // change has been made that is sufficiently important to justify  // resetting the graphics preferences of all users to the recommended @@ -67,9 +67,9 @@ RenderFSAAType			    1	2  RenderFSAASamples			1	3  RenderMaxTextureIndex		1	16  RenderGLContextCoreProfile         1   1 -RenderGLMultiThreadedTextures      1   0 -RenderGLMultiThreadedMedia         1   0 -RenderAppleUseMultGL        1   0 +RenderGLMultiThreadedTextures      1   1 +RenderGLMultiThreadedMedia         1   1 +RenderAppleUseMultGL        1   1  RenderReflectionsEnabled    1   1  RenderReflectionProbeDetail	1	2  RenderScreenSpaceReflections 1  1 @@ -405,20 +405,30 @@ list TexUnit16orLess  RenderTerrainPBRDetail      1   -1  list AMD -RenderDeferredSSAO			1	0 +UseOcclusion                    1   0 +RenderGLMultiThreadedTextures   1   0 + +list NVIDIA +RenderGLMultiThreadedTextures   1   0 +RenderGLMultiThreadedMedia      1   0 +RenderAppleUseMultGL        1   0  list Intel  RenderAnisotropic			1	0  RenderFSAASamples			1	0 +RenderGLMultiThreadedTextures   1   0 +RenderGLMultiThreadedMedia      1   0 +RenderAppleUseMultGL        1   0  // AppleGPU and NonAppleGPU can be thought of as Apple silicon vs Intel Mac  list AppleGPU  RenderGLMultiThreadedMedia  1   0  RenderAppleUseMultGL        1   0 +RenderGLMultiThreadedTextures   1   0 +RenderGLMultiThreadedMedia      1   0  list NonAppleGPU -RenderGLMultiThreadedMedia  1   0 -RenderAppleUseMultGL        1   0 +RenderDeferredSSAO			1   0  list GL3  RenderFSAASamples           0   0 diff --git a/indra/newview/llappviewer.cpp b/indra/newview/llappviewer.cpp index f79eb5ddce..c770b7c917 100644 --- a/indra/newview/llappviewer.cpp +++ b/indra/newview/llappviewer.cpp @@ -2171,7 +2171,12 @@ bool LLAppViewer::initThreads()      // get the number of concurrent threads that can run      S32 cores = std::thread::hardware_concurrency(); - +#if LL_DARWIN +    if (!gGLManager.mIsApple) +    { +        cores /= 2; +    } +#endif      U32 max_cores = gSavedSettings.getU32("EmulateCoreCount");      if (max_cores != 0)      { diff --git a/indra/newview/llreflectionmapmanager.cpp b/indra/newview/llreflectionmapmanager.cpp index 232b0b1cdf..8f75b108cc 100644 --- a/indra/newview/llreflectionmapmanager.cpp +++ b/indra/newview/llreflectionmapmanager.cpp @@ -404,6 +404,13 @@ void LLReflectionMapManager::update()          {              closestDynamic = probe;          } + +        if (sLevel == 0) +        { +            // only update default probe when coverage is set to none +            llassert(probe == mDefaultProbe); +            break; +        }      }      if (realtime && closestDynamic != nullptr) @@ -713,6 +720,7 @@ void LLReflectionMapManager::updateProbeFace(LLReflectionMap* probe, U32 face)      }      else      { +        llassert(gSavedSettings.getS32("RenderReflectionProbeLevel") > 0); // should never update a probe that's not the default probe if reflection coverage is none          probe->update(mRenderTarget.getWidth(), face);      } diff --git a/indra/newview/llviewercontrol.cpp b/indra/newview/llviewercontrol.cpp index 172ffcb0d4..18746e76fc 100644 --- a/indra/newview/llviewercontrol.cpp +++ b/indra/newview/llviewercontrol.cpp @@ -77,6 +77,10 @@  #include "llstartup.h"  #include "llperfstats.h" +#if LL_DARWIN +#include "llwindowmacosx.h" +#endif +  // Third party library includes  #include <boost/algorithm/string.hpp> @@ -453,6 +457,17 @@ static bool handleReflectionProbeDetailChanged(const LLSD& newvalue)      return true;  } +#if LL_DARWIN +static bool handleAppleUseMultGLChanged(const LLSD& newvalue) +{ +    if (gGLManager.mInited) +    { +        LLWindowMacOSX::setUseMultGL(newvalue.asBoolean()); +    } +    return true; +} +#endif +  static bool handleHeroProbeResolutionChanged(const LLSD &newvalue)  {      if (gPipeline.isInit()) @@ -820,6 +835,9 @@ void settings_setup_listeners()      setting_setup_signal_listener(gSavedSettings, "RenderReflectionProbeLevel", handleReflectionProbeDetailChanged);      setting_setup_signal_listener(gSavedSettings, "RenderReflectionProbeDetail", handleReflectionProbeDetailChanged);      setting_setup_signal_listener(gSavedSettings, "RenderReflectionsEnabled", handleReflectionProbeDetailChanged); +#if LL_DARWIN +    setting_setup_signal_listener(gSavedSettings, "RenderAppleUseMultGL", handleAppleUseMultGLChanged); +#endif      setting_setup_signal_listener(gSavedSettings, "RenderScreenSpaceReflections", handleReflectionProbeDetailChanged);      setting_setup_signal_listener(gSavedSettings, "RenderMirrors", handleReflectionProbeDetailChanged);      setting_setup_signal_listener(gSavedSettings, "RenderHeroProbeResolution", handleHeroProbeResolutionChanged); diff --git a/indra/newview/llviewerwindow.cpp b/indra/newview/llviewerwindow.cpp index 4bd1cdd6a1..8a5aac9b8b 100644 --- a/indra/newview/llviewerwindow.cpp +++ b/indra/newview/llviewerwindow.cpp @@ -5391,6 +5391,8 @@ bool LLViewerWindow::cubeSnapshot(const LLVector3& origin, LLCubeMapArray* cubea          camera->setUserClipPlane(clipPlane);      } +    gPipeline.pushRenderTypeMask(); +      glClear(GL_DEPTH_BUFFER_BIT | GL_COLOR_BUFFER_BIT); // stencil buffer is deprecated | GL_STENCIL_BUFFER_BIT);      U32 dynamic_render_types[] = { @@ -5479,16 +5481,7 @@ bool LLViewerWindow::cubeSnapshot(const LLVector3& origin, LLCubeMapArray* cubea          }      } -    if (!dynamic_render) -    { -        for (int i = 0; i < dynamic_render_type_count; ++i) -        { -            if (prev_dynamic_render_type[i]) -            { -                gPipeline.toggleRenderType(dynamic_render_types[i]); -            } -        } -    } +    gPipeline.popRenderTypeMask();      if (hide_hud)      { | 
