summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDave Parks <davep@lindenlab.com>2024-12-12 13:46:01 -0800
committerGitHub <noreply@github.com>2024-12-12 15:46:01 -0600
commiteff46262c8324ed4931cdd544a757f0c13f9ec0a (patch)
tree0d5c80a965d93ae44d0184b5dd7493535a2c9d33
parent5a629574b775e2a8f3602ee183fd9e1b2fcfac68 (diff)
#2590 Radeon mac optimization pass (#3277)
- Skip updating of reflection probes that are not the default probe when probe coverage is set to "None" - enable RenderAppleUseMultGL and disable occlusion culling on Macs with AMD GPUs - Reduce the number of texture decode threads on Macs with intel cpus. - Move texture deletion to LLImageGL::updateClass and prevent textures from staying resident in vram longer than 3 frames - Disable SSAO by default on Macs with intel CPUs
-rw-r--r--indra/llcommon/llcommon.cpp8
-rw-r--r--indra/llcommon/llmemory.h2
-rw-r--r--indra/llrender/llimagegl.cpp32
-rw-r--r--indra/llwindow/llwindowmacosx.cpp52
-rw-r--r--indra/llwindow/llwindowmacosx.h3
-rw-r--r--indra/newview/featuretable_mac.txt24
-rw-r--r--indra/newview/llappviewer.cpp7
-rw-r--r--indra/newview/llreflectionmapmanager.cpp8
-rw-r--r--indra/newview/llviewercontrol.cpp18
-rw-r--r--indra/newview/llviewerwindow.cpp13
10 files changed, 112 insertions, 55 deletions
diff --git a/indra/llcommon/llcommon.cpp b/indra/llcommon/llcommon.cpp
index f1f3958fe0..84b35749cc 100644
--- a/indra/llcommon/llcommon.cpp
+++ b/indra/llcommon/llcommon.cpp
@@ -54,7 +54,7 @@ void* ll_tracy_new(size_t size)
{
throw std::bad_alloc();
}
- TracyAlloc(ptr, size);
+ LL_PROFILE_ALLOC(ptr, size);
return ptr;
}
@@ -70,7 +70,7 @@ void* operator new[](std::size_t count)
void ll_tracy_delete(void* ptr)
{
- TracyFree(ptr);
+ LL_PROFILE_FREE(ptr);
if (gProfilerEnabled)
{
//LL_PROFILE_ZONE_SCOPED_CATEGORY_MEMORY;
@@ -102,13 +102,13 @@ void operator delete[](void* ptr) noexcept
void *tracy_aligned_malloc(size_t size, size_t alignment)
{
auto ptr = ll_aligned_malloc_fallback(size, alignment);
- if (ptr) TracyAlloc(ptr, size);
+ if (ptr) LL_PROFILE_ALLOC(ptr, size);
return ptr;
}
void tracy_aligned_free(void *memblock)
{
- TracyFree(memblock);
+ LL_PROFILE_FREE(memblock);
ll_aligned_free_fallback(memblock);
}
diff --git a/indra/llcommon/llmemory.h b/indra/llcommon/llmemory.h
index 80cfe554c4..b616edfde7 100644
--- a/indra/llcommon/llmemory.h
+++ b/indra/llcommon/llmemory.h
@@ -222,7 +222,7 @@ inline void* ll_aligned_realloc_16(void* ptr, size_t size, size_t old_size) // r
ll_aligned_free_16(ptr);
}
#endif
- LL_PROFILE_ALLOC(ptr, size);
+ LL_PROFILE_ALLOC(ret, size);
return ret;
}
diff --git a/indra/llrender/llimagegl.cpp b/indra/llrender/llimagegl.cpp
index 84c61c790f..5ac3243fd4 100644
--- a/indra/llrender/llimagegl.cpp
+++ b/indra/llrender/llimagegl.cpp
@@ -1052,7 +1052,7 @@ U32 type_width_from_pixtype(U32 pixtype)
bool should_stagger_image_set(bool compressed)
{
#if LL_DARWIN
- return false;
+ return !compressed && on_main_thread() && gGLManager.mIsAMD;
#else
// glTexSubImage2D doesn't work with compressed textures on select tested Nvidia GPUs on Windows 10 -Cosmic,2023-03-08
// Setting media textures off-thread seems faster when not using sub_image_lines (Nvidia/Windows 10) -Cosmic,2023-03-31
@@ -1270,37 +1270,37 @@ void LLImageGL::generateTextures(S32 numTextures, U32 *textures)
}
}
+constexpr int DELETE_DELAY = 3; // number of frames to wait before deleting textures
+static std::vector<U32> sFreeList[DELETE_DELAY+1];
+
// static
void LLImageGL::updateClass()
{
sFrameCount++;
+
+ // wait a few frames before actually deleting the textures to avoid
+ // synchronization issues with the GPU
+ U32 idx = (sFrameCount+DELETE_DELAY) % (DELETE_DELAY+1);
+
+ if (!sFreeList[idx].empty())
+ {
+ free_tex_images((GLsizei) sFreeList[idx].size(), sFreeList[idx].data());
+ glDeleteTextures((GLsizei)sFreeList[idx].size(), sFreeList[idx].data());
+ sFreeList[idx].resize(0);
+ }
}
// static
void LLImageGL::deleteTextures(S32 numTextures, const U32 *textures)
{
- // wait a few frames before actually deleting the textures to avoid
- // synchronization issues with the GPU
- static std::vector<U32> sFreeList[4];
-
if (gGLManager.mInited)
{
LL_PROFILE_ZONE_SCOPED_CATEGORY_TEXTURE;
- U32 idx = sFrameCount % 4;
-
+ U32 idx = sFrameCount % (DELETE_DELAY+1);
for (S32 i = 0; i < numTextures; ++i)
{
sFreeList[idx].push_back(textures[i]);
}
-
- idx = (sFrameCount + 3) % 4;
-
- if (!sFreeList[idx].empty())
- {
- free_tex_images((GLsizei) sFreeList[idx].size(), sFreeList[idx].data());
- glDeleteTextures((GLsizei)sFreeList[idx].size(), sFreeList[idx].data());
- sFreeList[idx].resize(0);
- }
}
}
diff --git a/indra/llwindow/llwindowmacosx.cpp b/indra/llwindow/llwindowmacosx.cpp
index 80001b14ee..f26d692363 100644
--- a/indra/llwindow/llwindowmacosx.cpp
+++ b/indra/llwindow/llwindowmacosx.cpp
@@ -68,6 +68,41 @@ namespace
bool LLWindowMacOSX::sUseMultGL = false;
+//static
+void LLWindowMacOSX::setUseMultGL(bool use_mult_gl)
+{
+ bool was_enabled = sUseMultGL;
+
+ sUseMultGL = use_mult_gl;
+
+ if (gGLManager.mInited)
+ {
+ CGLContextObj ctx = CGLGetCurrentContext();
+ //enable multi-threaded OpenGL (whether or not sUseMultGL actually changed)
+ if (sUseMultGL)
+ {
+ CGLError cgl_err;
+
+ cgl_err = CGLEnable( ctx, kCGLCEMPEngine);
+
+ if (cgl_err != kCGLNoError )
+ {
+ LL_INFOS("GLInit") << "Multi-threaded OpenGL not available." << LL_ENDL;
+ sUseMultGL = false;
+ }
+ else
+ {
+ LL_INFOS("GLInit") << "Multi-threaded OpenGL enabled." << LL_ENDL;
+ }
+ }
+ else if (was_enabled)
+ {
+ CGLDisable( ctx, kCGLCEMPEngine);
+ LL_INFOS("GLInit") << "Multi-threaded OpenGL disabled." << LL_ENDL;
+ }
+ }
+}
+
// Cross-platform bits:
bool check_for_card(const char* RENDERER, const char* bad_card)
@@ -704,23 +739,8 @@ bool LLWindowMacOSX::createContext(int x, int y, int width, int height, int bits
// Disable vertical sync for swap
toggleVSync(enable_vsync);
- //enable multi-threaded OpenGL
- if (sUseMultGL)
- {
- CGLError cgl_err;
- CGLContextObj ctx = CGLGetCurrentContext();
-
- cgl_err = CGLEnable( ctx, kCGLCEMPEngine);
+ setUseMultGL(sUseMultGL);
- if (cgl_err != kCGLNoError )
- {
- LL_INFOS("GLInit") << "Multi-threaded OpenGL not available." << LL_ENDL;
- }
- else
- {
- LL_INFOS("GLInit") << "Multi-threaded OpenGL enabled." << LL_ENDL;
- }
- }
makeFirstResponder(mWindow, mGLView);
return true;
diff --git a/indra/llwindow/llwindowmacosx.h b/indra/llwindow/llwindowmacosx.h
index f5b6441746..7de1a40d93 100644
--- a/indra/llwindow/llwindowmacosx.h
+++ b/indra/llwindow/llwindowmacosx.h
@@ -147,6 +147,9 @@ public:
void toggleVSync(bool enable_vsync) override;
+ // enable or disable multithreaded GL
+ static void setUseMultGL(bool use_mult_gl);
+
protected:
LLWindowMacOSX(LLWindowCallbacks* callbacks,
const std::string& title, const std::string& name, int x, int y, int width, int height, U32 flags,
diff --git a/indra/newview/featuretable_mac.txt b/indra/newview/featuretable_mac.txt
index aa3d241ecb..a0af98a451 100644
--- a/indra/newview/featuretable_mac.txt
+++ b/indra/newview/featuretable_mac.txt
@@ -1,4 +1,4 @@
-version 68
+version 71
// The version number above should be incremented IF AND ONLY IF some
// change has been made that is sufficiently important to justify
// resetting the graphics preferences of all users to the recommended
@@ -67,9 +67,9 @@ RenderFSAAType 1 2
RenderFSAASamples 1 3
RenderMaxTextureIndex 1 16
RenderGLContextCoreProfile 1 1
-RenderGLMultiThreadedTextures 1 0
-RenderGLMultiThreadedMedia 1 0
-RenderAppleUseMultGL 1 0
+RenderGLMultiThreadedTextures 1 1
+RenderGLMultiThreadedMedia 1 1
+RenderAppleUseMultGL 1 1
RenderReflectionsEnabled 1 1
RenderReflectionProbeDetail 1 2
RenderScreenSpaceReflections 1 1
@@ -405,20 +405,30 @@ list TexUnit16orLess
RenderTerrainPBRDetail 1 -1
list AMD
-RenderDeferredSSAO 1 0
+UseOcclusion 1 0
+RenderGLMultiThreadedTextures 1 0
+
+list NVIDIA
+RenderGLMultiThreadedTextures 1 0
+RenderGLMultiThreadedMedia 1 0
+RenderAppleUseMultGL 1 0
list Intel
RenderAnisotropic 1 0
RenderFSAASamples 1 0
+RenderGLMultiThreadedTextures 1 0
+RenderGLMultiThreadedMedia 1 0
+RenderAppleUseMultGL 1 0
// AppleGPU and NonAppleGPU can be thought of as Apple silicon vs Intel Mac
list AppleGPU
RenderGLMultiThreadedMedia 1 0
RenderAppleUseMultGL 1 0
+RenderGLMultiThreadedTextures 1 0
+RenderGLMultiThreadedMedia 1 0
list NonAppleGPU
-RenderGLMultiThreadedMedia 1 0
-RenderAppleUseMultGL 1 0
+RenderDeferredSSAO 1 0
list GL3
RenderFSAASamples 0 0
diff --git a/indra/newview/llappviewer.cpp b/indra/newview/llappviewer.cpp
index f79eb5ddce..c770b7c917 100644
--- a/indra/newview/llappviewer.cpp
+++ b/indra/newview/llappviewer.cpp
@@ -2171,7 +2171,12 @@ bool LLAppViewer::initThreads()
// get the number of concurrent threads that can run
S32 cores = std::thread::hardware_concurrency();
-
+#if LL_DARWIN
+ if (!gGLManager.mIsApple)
+ {
+ cores /= 2;
+ }
+#endif
U32 max_cores = gSavedSettings.getU32("EmulateCoreCount");
if (max_cores != 0)
{
diff --git a/indra/newview/llreflectionmapmanager.cpp b/indra/newview/llreflectionmapmanager.cpp
index 232b0b1cdf..8f75b108cc 100644
--- a/indra/newview/llreflectionmapmanager.cpp
+++ b/indra/newview/llreflectionmapmanager.cpp
@@ -404,6 +404,13 @@ void LLReflectionMapManager::update()
{
closestDynamic = probe;
}
+
+ if (sLevel == 0)
+ {
+ // only update default probe when coverage is set to none
+ llassert(probe == mDefaultProbe);
+ break;
+ }
}
if (realtime && closestDynamic != nullptr)
@@ -713,6 +720,7 @@ void LLReflectionMapManager::updateProbeFace(LLReflectionMap* probe, U32 face)
}
else
{
+ llassert(gSavedSettings.getS32("RenderReflectionProbeLevel") > 0); // should never update a probe that's not the default probe if reflection coverage is none
probe->update(mRenderTarget.getWidth(), face);
}
diff --git a/indra/newview/llviewercontrol.cpp b/indra/newview/llviewercontrol.cpp
index 172ffcb0d4..18746e76fc 100644
--- a/indra/newview/llviewercontrol.cpp
+++ b/indra/newview/llviewercontrol.cpp
@@ -77,6 +77,10 @@
#include "llstartup.h"
#include "llperfstats.h"
+#if LL_DARWIN
+#include "llwindowmacosx.h"
+#endif
+
// Third party library includes
#include <boost/algorithm/string.hpp>
@@ -453,6 +457,17 @@ static bool handleReflectionProbeDetailChanged(const LLSD& newvalue)
return true;
}
+#if LL_DARWIN
+static bool handleAppleUseMultGLChanged(const LLSD& newvalue)
+{
+ if (gGLManager.mInited)
+ {
+ LLWindowMacOSX::setUseMultGL(newvalue.asBoolean());
+ }
+ return true;
+}
+#endif
+
static bool handleHeroProbeResolutionChanged(const LLSD &newvalue)
{
if (gPipeline.isInit())
@@ -820,6 +835,9 @@ void settings_setup_listeners()
setting_setup_signal_listener(gSavedSettings, "RenderReflectionProbeLevel", handleReflectionProbeDetailChanged);
setting_setup_signal_listener(gSavedSettings, "RenderReflectionProbeDetail", handleReflectionProbeDetailChanged);
setting_setup_signal_listener(gSavedSettings, "RenderReflectionsEnabled", handleReflectionProbeDetailChanged);
+#if LL_DARWIN
+ setting_setup_signal_listener(gSavedSettings, "RenderAppleUseMultGL", handleAppleUseMultGLChanged);
+#endif
setting_setup_signal_listener(gSavedSettings, "RenderScreenSpaceReflections", handleReflectionProbeDetailChanged);
setting_setup_signal_listener(gSavedSettings, "RenderMirrors", handleReflectionProbeDetailChanged);
setting_setup_signal_listener(gSavedSettings, "RenderHeroProbeResolution", handleHeroProbeResolutionChanged);
diff --git a/indra/newview/llviewerwindow.cpp b/indra/newview/llviewerwindow.cpp
index 4bd1cdd6a1..8a5aac9b8b 100644
--- a/indra/newview/llviewerwindow.cpp
+++ b/indra/newview/llviewerwindow.cpp
@@ -5391,6 +5391,8 @@ bool LLViewerWindow::cubeSnapshot(const LLVector3& origin, LLCubeMapArray* cubea
camera->setUserClipPlane(clipPlane);
}
+ gPipeline.pushRenderTypeMask();
+
glClear(GL_DEPTH_BUFFER_BIT | GL_COLOR_BUFFER_BIT); // stencil buffer is deprecated | GL_STENCIL_BUFFER_BIT);
U32 dynamic_render_types[] = {
@@ -5479,16 +5481,7 @@ bool LLViewerWindow::cubeSnapshot(const LLVector3& origin, LLCubeMapArray* cubea
}
}
- if (!dynamic_render)
- {
- for (int i = 0; i < dynamic_render_type_count; ++i)
- {
- if (prev_dynamic_render_type[i])
- {
- gPipeline.toggleRenderType(dynamic_render_types[i]);
- }
- }
- }
+ gPipeline.popRenderTypeMask();
if (hide_hud)
{