summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRye <rye@lindenlab.com>2025-02-02 02:43:46 -0500
committerRye <rye@lindenlab.com>2025-02-11 05:04:05 -0500
commit6fcd349f374710a3f4e0e0585bb6d7af86ebb66d (patch)
treed1a2e98993c6e4f50230b8d87885ea2ae416c3cf
parent51ed6b5424a626499ddb7f95e6da7cf34b375f6a (diff)
Fix Tracy memory profiling overloads for aligned allocations
Fix disabling renderdoc support Improve ll_aligned_alloc functions on darwin for 32 and 64byte aligned by utilizing posix_memalign
-rw-r--r--autobuild.xml14
-rw-r--r--indra/cmake/CMakeLists.txt1
-rw-r--r--indra/cmake/Tracy.cmake6
-rw-r--r--indra/llcommon/linden_common.h6
-rw-r--r--indra/llcommon/llcommon.cpp72
-rw-r--r--indra/llcommon/llmemory.h49
-rw-r--r--indra/llcommon/llprofiler.h32
-rw-r--r--indra/llrender/llglslshader.cpp4
-rw-r--r--indra/llrender/llglslshader.h4
-rw-r--r--indra/llrender/llvertexbuffer.cpp2
-rw-r--r--indra/llrender/llvertexbuffer.h4
-rw-r--r--indra/llwindow/llwindowwin32.cpp7
-rw-r--r--indra/newview/llappviewer.cpp37
-rw-r--r--indra/newview/llappviewerlinux.cpp5
-rw-r--r--indra/newview/llappviewermacosx.cpp5
-rw-r--r--indra/newview/lldrawpool.h4
-rw-r--r--indra/newview/llheroprobemanager.cpp11
-rw-r--r--indra/newview/llreflectionmapmanager.cpp4
-rw-r--r--indra/newview/llviewerdisplay.cpp2
-rw-r--r--indra/newview/pipeline.cpp21
20 files changed, 166 insertions, 124 deletions
diff --git a/autobuild.xml b/autobuild.xml
index c3df05a223..653762da9e 100644
--- a/autobuild.xml
+++ b/autobuild.xml
@@ -2433,11 +2433,11 @@ Copyright (c) 2012, 2014, 2015, 2016 nghttp2 contributors</string>
<key>archive</key>
<map>
<key>hash</key>
- <string>226225ec049826c35adc5e897e0398ed64d4bedb</string>
+ <string>0c3d01b7e9e39c23f0f40c56a1a04d1fba08ead0</string>
<key>hash_algorithm</key>
<string>sha1</string>
<key>url</key>
- <string>https://github.com/secondlife/3p-tracy/releases/download/v0.11.0%2Br1/tracy-v0.11.0.10376230034-darwin64-10376230034.tar.zst</string>
+ <string>https://github.com/secondlife/3p-tracy/releases/download/v0.11.1-r1/tracy-v0.11.1.11706699176-darwin64-11706699176.tar.zst</string>
</map>
<key>name</key>
<string>darwin64</string>
@@ -2447,11 +2447,11 @@ Copyright (c) 2012, 2014, 2015, 2016 nghttp2 contributors</string>
<key>archive</key>
<map>
<key>hash</key>
- <string>8c5429d1a1486f40cf7e5e88a232222d1fa4f78e</string>
+ <string>b46cef5646a8d0471ab6256fe5119220fa238772</string>
<key>hash_algorithm</key>
<string>sha1</string>
<key>url</key>
- <string>https://github.com/secondlife/3p-tracy/releases/download/v0.11.0%2Br1/tracy-v0.11.0.10376230034-windows64-10376230034.tar.zst</string>
+ <string>https://github.com/secondlife/3p-tracy/releases/download/v0.11.1-r1/tracy-v0.11.1.11706699176-windows64-11706699176.tar.zst</string>
</map>
<key>name</key>
<string>windows64</string>
@@ -2461,11 +2461,11 @@ Copyright (c) 2012, 2014, 2015, 2016 nghttp2 contributors</string>
<key>archive</key>
<map>
<key>hash</key>
- <string>ed0664a009aba1dcf1246d845839f524e857162e</string>
+ <string>beab04c9ea6036b1851a485b65c66cf6a38f0be4</string>
<key>hash_algorithm</key>
<string>sha1</string>
<key>url</key>
- <string>https://github.com/secondlife/3p-tracy/releases/download/v0.11.0%2Br1/tracy-v0.11.0.10376230034-linux64-10376230034.tar.zst</string>
+ <string>https://github.com/secondlife/3p-tracy/releases/download/v0.11.1-r1/tracy-v0.11.1.11706699176-linux64-11706699176.tar.zst</string>
</map>
<key>name</key>
<string>linux64</string>
@@ -2478,7 +2478,7 @@ Copyright (c) 2012, 2014, 2015, 2016 nghttp2 contributors</string>
<key>copyright</key>
<string>Copyright (c) 2017-2024, Bartosz Taudul (wolf@nereid.pl)</string>
<key>version</key>
- <string>v0.11.0.10376230034</string>
+ <string>v0.11.1.11706699176</string>
<key>name</key>
<string>tracy</string>
<key>canonical_repo</key>
diff --git a/indra/cmake/CMakeLists.txt b/indra/cmake/CMakeLists.txt
index cc217b0563..8d55cc4bbe 100644
--- a/indra/cmake/CMakeLists.txt
+++ b/indra/cmake/CMakeLists.txt
@@ -55,6 +55,7 @@ set(cmake_SOURCE_FILES
TemplateCheck.cmake
TinyEXR.cmake
TinyGLTF.cmake
+ Tracy.cmake
Tut.cmake
UI.cmake
UnixInstall.cmake
diff --git a/indra/cmake/Tracy.cmake b/indra/cmake/Tracy.cmake
index a7eac2711f..cb09337d15 100644
--- a/indra/cmake/Tracy.cmake
+++ b/indra/cmake/Tracy.cmake
@@ -15,6 +15,7 @@ endif()
if (USE_TRACY)
option(USE_TRACY_ON_DEMAND "Use on-demand Tracy profiling." ON)
option(USE_TRACY_LOCAL_ONLY "Disallow remote Tracy profiling." OFF)
+ option(USE_TRACY_GPU "Use Tracy GPU profiling" OFF)
use_system_binary(tracy)
use_prebuilt_binary(tracy)
@@ -31,9 +32,8 @@ if (USE_TRACY)
target_compile_definitions(ll::tracy INTERFACE -DTRACY_NO_BROADCAST=1 -DTRACY_ONLY_LOCALHOST=1)
endif ()
- # GHA runners don't always provide invariant TSC support, but always build with LL_TESTS enabled
- if (DARWIN AND LL_TESTS)
- target_compile_definitions(ll::tracy INTERFACE -DTRACY_TIMER_FALLBACK=1)
+ if (USE_TRACY_GPU AND NOT DARWIN) # Tracy OpenGL mode is incompatible with macOS/iOS
+ target_compile_definitions(ll::tracy INTERFACE -DLL_PROFILER_ENABLE_TRACY_OPENGL=1)
endif ()
# See: indra/llcommon/llprofiler.h
diff --git a/indra/llcommon/linden_common.h b/indra/llcommon/linden_common.h
index a918caa2e8..a41af153fe 100644
--- a/indra/llcommon/linden_common.h
+++ b/indra/llcommon/linden_common.h
@@ -28,12 +28,6 @@
#define LL_LINDEN_COMMON_H
#include "llprofiler.h"
-#if TRACY_ENABLE && !defined(LL_PROFILER_ENABLE_TRACY_OPENGL) // hooks for memory profiling
-void *tracy_aligned_malloc(size_t size, size_t alignment);
-void tracy_aligned_free(void *memblock);
-#define _aligned_malloc(X, Y) tracy_aligned_malloc((X), (Y))
-#define _aligned_free(X) tracy_aligned_free((X))
-#endif
// *NOTE: Please keep includes here to a minimum!
//
diff --git a/indra/llcommon/llcommon.cpp b/indra/llcommon/llcommon.cpp
index 84b35749cc..7a22eaf203 100644
--- a/indra/llcommon/llcommon.cpp
+++ b/indra/llcommon/llcommon.cpp
@@ -33,23 +33,23 @@
#include "lltracethreadrecorder.h"
#include "llcleanup.h"
-thread_local bool gProfilerEnabled = false;
-
-#if (TRACY_ENABLE)
+#if LL_PROFILER_CONFIGURATION >= LL_PROFILER_CONFIG_TRACY && TRACY_ENABLE
// Override new/delete for tracy memory profiling
void* ll_tracy_new(size_t size)
{
- void* ptr;
- if (gProfilerEnabled)
- {
- //LL_PROFILE_ZONE_SCOPED_CATEGORY_MEMORY;
- ptr = (malloc)(size);
- }
- else
+ void* ptr = (malloc)(size);
+ if (!ptr)
{
- ptr = (malloc)(size);
+ throw std::bad_alloc();
}
+ LL_PROFILE_ALLOC(ptr, size);
+ return ptr;
+}
+
+void* ll_tracy_aligned_new(size_t size, size_t alignment)
+{
+ void* ptr = ll_aligned_malloc_fallback(size, alignment);
if (!ptr)
{
throw std::bad_alloc();
@@ -58,6 +58,18 @@ void* ll_tracy_new(size_t size)
return ptr;
}
+void ll_tracy_delete(void* ptr)
+{
+ LL_PROFILE_FREE(ptr);
+ (free)(ptr);
+}
+
+void ll_tracy_aligned_delete(void* ptr)
+{
+ LL_PROFILE_FREE(ptr);
+ ll_aligned_free_fallback(ptr);
+}
+
void* operator new(size_t size)
{
return ll_tracy_new(size);
@@ -68,18 +80,14 @@ void* operator new[](std::size_t count)
return ll_tracy_new(count);
}
-void ll_tracy_delete(void* ptr)
+void* operator new(size_t size, std::align_val_t align)
{
- LL_PROFILE_FREE(ptr);
- if (gProfilerEnabled)
- {
- //LL_PROFILE_ZONE_SCOPED_CATEGORY_MEMORY;
- (free)(ptr);
- }
- else
- {
- (free)(ptr);
- }
+ return ll_tracy_aligned_new(size, (size_t)align);
+}
+
+void* operator new[](std::size_t count, std::align_val_t align)
+{
+ return ll_tracy_aligned_new(count, (size_t)align);
}
void operator delete(void *ptr) noexcept
@@ -92,27 +100,17 @@ void operator delete[](void* ptr) noexcept
ll_tracy_delete(ptr);
}
-// C-style malloc/free can't be so easily overridden, so we define tracy versions and use
-// a pre-processor #define in linden_common.h to redirect to them. The parens around the native
-// functions below prevents recursive substitution by the preprocessor.
-//
-// Unaligned mallocs are rare in LL code but hooking them causes problems in 3p lib code (looking at
-// you, Havok), so we'll only capture the aligned version.
-
-void *tracy_aligned_malloc(size_t size, size_t alignment)
+void operator delete(void *ptr, std::align_val_t align) noexcept
{
- auto ptr = ll_aligned_malloc_fallback(size, alignment);
- if (ptr) LL_PROFILE_ALLOC(ptr, size);
- return ptr;
+ ll_tracy_aligned_delete(ptr);
}
-void tracy_aligned_free(void *memblock)
+void operator delete[](void* ptr, std::align_val_t align) noexcept
{
- LL_PROFILE_FREE(memblock);
- ll_aligned_free_fallback(memblock);
+ ll_tracy_aligned_delete(ptr);
}
-#endif
+#endif // TRACY_ENABLE && !LL_PROFILER_ENABLE_TRACY_OPENGL
//static
bool LLCommon::sAprInitialized = false;
diff --git a/indra/llcommon/llmemory.h b/indra/llcommon/llmemory.h
index b616edfde7..72aec57080 100644
--- a/indra/llcommon/llmemory.h
+++ b/indra/llcommon/llmemory.h
@@ -231,8 +231,6 @@ inline void* ll_aligned_malloc_32(size_t size) // returned hunk MUST be freed wi
LL_PROFILE_ZONE_SCOPED_CATEGORY_MEMORY;
#if defined(LL_WINDOWS)
void* ret = _aligned_malloc(size, 32);
-#elif defined(LL_DARWIN)
- void* ret = ll_aligned_malloc_fallback( size, 32 );
#else
void *ret;
if (0 != posix_memalign(&ret, 32, size))
@@ -248,8 +246,31 @@ inline void ll_aligned_free_32(void *p)
LL_PROFILE_FREE(p);
#if defined(LL_WINDOWS)
_aligned_free(p);
-#elif defined(LL_DARWIN)
- ll_aligned_free_fallback( p );
+#else
+ free(p); // posix_memalign() is compatible with heap deallocator
+#endif
+}
+
+inline void* ll_aligned_malloc_64(size_t size) // returned hunk MUST be freed with ll_aligned_free_32().
+{
+ LL_PROFILE_ZONE_SCOPED_CATEGORY_MEMORY;
+#if defined(LL_WINDOWS)
+ void* ret = _aligned_malloc(size, 64);
+#else
+ void *ret;
+ if (0 != posix_memalign(&ret, 64, size))
+ return nullptr;
+#endif
+ LL_PROFILE_ALLOC(ret, size);
+ return ret;
+}
+
+inline void ll_aligned_free_64(void *p)
+{
+ LL_PROFILE_ZONE_SCOPED_CATEGORY_MEMORY;
+ LL_PROFILE_FREE(p);
+#if defined(LL_WINDOWS)
+ _aligned_free(p);
#else
free(p); // posix_memalign() is compatible with heap deallocator
#endif
@@ -261,19 +282,23 @@ LL_FORCE_INLINE void* ll_aligned_malloc(size_t size)
{
LL_PROFILE_ZONE_SCOPED_CATEGORY_MEMORY;
void* ret;
- if (LL_DEFAULT_HEAP_ALIGN % ALIGNMENT == 0)
+ if constexpr (LL_DEFAULT_HEAP_ALIGN % ALIGNMENT == 0)
{
ret = malloc(size);
LL_PROFILE_ALLOC(ret, size);
}
- else if (ALIGNMENT == 16)
+ else if constexpr (ALIGNMENT == 16)
{
ret = ll_aligned_malloc_16(size);
}
- else if (ALIGNMENT == 32)
+ else if constexpr (ALIGNMENT == 32)
{
ret = ll_aligned_malloc_32(size);
}
+ else if constexpr (ALIGNMENT == 64)
+ {
+ ret = ll_aligned_malloc_64(size);
+ }
else
{
ret = ll_aligned_malloc_fallback(size, ALIGNMENT);
@@ -285,16 +310,20 @@ template<size_t ALIGNMENT>
LL_FORCE_INLINE void ll_aligned_free(void* ptr)
{
LL_PROFILE_ZONE_SCOPED_CATEGORY_MEMORY;
- if (ALIGNMENT == LL_DEFAULT_HEAP_ALIGN)
+ if constexpr (ALIGNMENT == LL_DEFAULT_HEAP_ALIGN)
{
LL_PROFILE_FREE(ptr);
free(ptr);
}
- else if (ALIGNMENT == 16)
+ else if constexpr (ALIGNMENT == 16)
{
ll_aligned_free_16(ptr);
}
- else if (ALIGNMENT == 32)
+ else if constexpr (ALIGNMENT == 32)
+ {
+ return ll_aligned_free_32(ptr);
+ }
+ else if constexpr (ALIGNMENT == 64)
{
return ll_aligned_free_32(ptr);
}
diff --git a/indra/llcommon/llprofiler.h b/indra/llcommon/llprofiler.h
index f6a4d24747..5fb32d6280 100644
--- a/indra/llcommon/llprofiler.h
+++ b/indra/llcommon/llprofiler.h
@@ -74,23 +74,18 @@
#define LL_PROFILER_CONFIGURATION LL_PROFILER_CONFIG_FAST_TIMER
#endif
-extern thread_local bool gProfilerEnabled;
-
#if defined(LL_PROFILER_CONFIGURATION) && (LL_PROFILER_CONFIGURATION > LL_PROFILER_CONFIG_NONE)
#if LL_PROFILER_CONFIGURATION == LL_PROFILER_CONFIG_TRACY || LL_PROFILER_CONFIGURATION == LL_PROFILER_CONFIG_TRACY_FAST_TIMER
#include "tracy/Tracy.hpp"
- // Enable OpenGL profiling
- #define LL_PROFILER_ENABLE_TRACY_OPENGL 0
-
// Enable RenderDoc labeling
- #define LL_PROFILER_ENABLE_RENDER_DOC 0
+ //#define LL_PROFILER_ENABLE_RENDER_DOC 0
#endif
#if LL_PROFILER_CONFIGURATION == LL_PROFILER_CONFIG_TRACY
#define LL_PROFILER_FRAME_END FrameMark
- #define LL_PROFILER_SET_THREAD_NAME( name ) tracy::SetThreadName( name ); gProfilerEnabled = true;
+ #define LL_PROFILER_SET_THREAD_NAME( name ) tracy::SetThreadName( name );
#define LL_RECORD_BLOCK_TIME(name) ZoneScoped // Want descriptive names; was: ZoneNamedN( ___tracy_scoped_zone, #name, true );
#define LL_PROFILE_ZONE_NAMED(name) ZoneNamedN( ___tracy_scoped_zone, name, true );
#define LL_PROFILE_ZONE_NAMED_COLOR(name,color) ZoneNamedNC( ___tracy_scopped_zone, name, color, true ) // RGB
@@ -133,7 +128,7 @@ extern thread_local bool gProfilerEnabled;
#endif
#if LL_PROFILER_CONFIGURATION == LL_PROFILER_CONFIG_TRACY_FAST_TIMER
#define LL_PROFILER_FRAME_END FrameMark
- #define LL_PROFILER_SET_THREAD_NAME( name ) tracy::SetThreadName( name ); gProfilerEnabled = true;
+ #define LL_PROFILER_SET_THREAD_NAME( name ) tracy::SetThreadName( name );
#define LL_RECORD_BLOCK_TIME(name) ZoneNamedN(___tracy_scoped_zone, #name, true); const LLTrace::BlockTimer& LL_GLUE_TOKENS(block_time_recorder, __LINE__)(LLTrace::timeThisBlock(name)); (void)LL_GLUE_TOKENS(block_time_recorder, __LINE__);
#define LL_PROFILE_ZONE_NAMED(name) ZoneNamedN( ___tracy_scoped_zone, #name, true );
#define LL_PROFILE_ZONE_NAMED_COLOR(name,color) ZoneNamedNC( ___tracy_scopped_zone, name, color, true ) // RGB
@@ -158,23 +153,20 @@ extern thread_local bool gProfilerEnabled;
#endif // LL_PROFILER
#if LL_PROFILER_ENABLE_TRACY_OPENGL
-#define LL_PROFILE_GPU_ZONE(name) TracyGpuZone(name)
-#define LL_PROFILE_GPU_ZONEC(name,color) TracyGpuZoneC(name,color)
+#define LL_PROFILE_GPU_ZONE(name) TracyGpuZone(name)
+#define LL_PROFILE_GPU_ZONEC(name,color) TracyGpuZoneC(name,color)
#define LL_PROFILER_GPU_COLLECT TracyGpuCollect
#define LL_PROFILER_GPU_CONTEXT TracyGpuContext
-
-// disable memory tracking (incompatible with GPU tracing
-#define LL_PROFILE_ALLOC(ptr, size) (void)(ptr); (void)(size);
-#define LL_PROFILE_FREE(ptr) (void)(ptr);
+#define LL_PROFILER_GPU_CONTEXT_NAMED TracyGpuContextName
#else
-#define LL_PROFILE_GPU_ZONE(name) (void)name;
-#define LL_PROFILE_GPU_ZONEC(name,color) (void)name;(void)color;
+#define LL_PROFILE_GPU_ZONE(name) (void)name;
+#define LL_PROFILE_GPU_ZONEC(name,color) (void)name;(void)color;
#define LL_PROFILER_GPU_COLLECT
#define LL_PROFILER_GPU_CONTEXT
+#define LL_PROFILER_GPU_CONTEXT_NAMED(name) (void)name;
+#endif // LL_PROFILER_ENABLE_TRACY_OPENGL
-#define LL_LABEL_OBJECT_GL(type, name, length, label)
-
-#if !LL_DARWIN && LL_PROFILER_CONFIGURATION > 1
+#if LL_PROFILER_CONFIGURATION >= LL_PROFILER_CONFIG_TRACY
#define LL_PROFILE_ALLOC(ptr, size) TracyAlloc(ptr, size)
#define LL_PROFILE_FREE(ptr) TracyFree(ptr)
#else
@@ -182,8 +174,6 @@ extern thread_local bool gProfilerEnabled;
#define LL_PROFILE_FREE(ptr) (void)(ptr);
#endif
-#endif
-
#if LL_PROFILER_ENABLE_RENDER_DOC
#define LL_LABEL_OBJECT_GL(type, name, length, label) glObjectLabel(type, name, length, label)
#else
diff --git a/indra/llrender/llglslshader.cpp b/indra/llrender/llglslshader.cpp
index b3f32fdc83..0841c0e943 100644
--- a/indra/llrender/llglslshader.cpp
+++ b/indra/llrender/llglslshader.cpp
@@ -543,7 +543,7 @@ bool LLGLSLShader::createShader()
}
}
-#ifdef LL_PROFILER_ENABLE_RENDER_DOC
+#if LL_PROFILER_ENABLE_RENDER_DOC
setLabel(mName.c_str());
#endif
@@ -2061,7 +2061,7 @@ LLUUID LLGLSLShader::hash()
return hash_obj.digest();
}
-#ifdef LL_PROFILER_ENABLE_RENDER_DOC
+#if LL_PROFILER_ENABLE_RENDER_DOC
void LLGLSLShader::setLabel(const char* label) {
LL_LABEL_OBJECT_GL(GL_PROGRAM, mProgramObject, strlen(label), label);
}
diff --git a/indra/llrender/llglslshader.h b/indra/llrender/llglslshader.h
index 58c456f134..cade888a83 100644
--- a/indra/llrender/llglslshader.h
+++ b/indra/llrender/llglslshader.h
@@ -360,7 +360,7 @@ public:
// hacky flag used for optimization in LLDrawPoolAlpha
bool mCanBindFast = false;
-#ifdef LL_PROFILER_ENABLE_RENDER_DOC
+#if LL_PROFILER_ENABLE_RENDER_DOC
void setLabel(const char* label);
#endif
@@ -380,7 +380,7 @@ extern LLGLSLShader gSolidColorProgram;
//Alpha mask shader (declared here so llappearance can access properly)
extern LLGLSLShader gAlphaMaskProgram;
-#ifdef LL_PROFILER_ENABLE_RENDER_DOC
+#if LL_PROFILER_ENABLE_RENDER_DOC
#define LL_SET_SHADER_LABEL(shader) shader.setLabel(#shader)
#else
#define LL_SET_SHADER_LABEL(shader, label)
diff --git a/indra/llrender/llvertexbuffer.cpp b/indra/llrender/llvertexbuffer.cpp
index 1f0c424188..86ec9a453b 100644
--- a/indra/llrender/llvertexbuffer.cpp
+++ b/indra/llrender/llvertexbuffer.cpp
@@ -885,7 +885,7 @@ bool LLVertexBuffer::validateRange(U32 start, U32 end, U32 count, U32 indices_of
return true;
}
-#ifdef LL_PROFILER_ENABLE_RENDER_DOC
+#if LL_PROFILER_ENABLE_RENDER_DOC
void LLVertexBuffer::setLabel(const char* label) {
LL_LABEL_OBJECT_GL(GL_BUFFER, mGLBuffer, strlen(label), label);
}
diff --git a/indra/llrender/llvertexbuffer.h b/indra/llrender/llvertexbuffer.h
index 375ad76fb8..faaa6ba0f0 100644
--- a/indra/llrender/llvertexbuffer.h
+++ b/indra/llrender/llvertexbuffer.h
@@ -279,7 +279,7 @@ public:
//for debugging, validate data in given range is valid
bool validateRange(U32 start, U32 end, U32 count, U32 offset) const;
- #ifdef LL_PROFILER_ENABLE_RENDER_DOC
+ #if LL_PROFILER_ENABLE_RENDER_DOC
void setLabel(const char* label);
#endif
@@ -340,7 +340,7 @@ public:
static U32 sVertexCount;
};
-#ifdef LL_PROFILER_ENABLE_RENDER_DOC
+#if LL_PROFILER_ENABLE_RENDER_DOC
#define LL_LABEL_VERTEX_BUFFER(buf, name) buf->setLabel(name)
#else
#define LL_LABEL_VERTEX_BUFFER(buf, name)
diff --git a/indra/llwindow/llwindowwin32.cpp b/indra/llwindow/llwindowwin32.cpp
index 832cf254d1..90713a6653 100644
--- a/indra/llwindow/llwindowwin32.cpp
+++ b/indra/llwindow/llwindowwin32.cpp
@@ -1666,6 +1666,11 @@ const S32 max_format = (S32)num_formats - 1;
return false;
}
+ // Setup Tracy gpu context
+ {
+ LL_PROFILER_GPU_CONTEXT;
+ }
+
// Disable vertical sync for swap
toggleVSync(enable_vsync);
@@ -1697,8 +1702,6 @@ const S32 max_format = (S32)num_formats - 1;
swapBuffers();
}
- LL_PROFILER_GPU_CONTEXT;
-
return true;
}
diff --git a/indra/newview/llappviewer.cpp b/indra/newview/llappviewer.cpp
index 9889765fff..84cce2348a 100644
--- a/indra/newview/llappviewer.cpp
+++ b/indra/newview/llappviewer.cpp
@@ -1324,6 +1324,7 @@ bool LLAppViewer::frame()
bool LLAppViewer::doFrame()
{
LL_RECORD_BLOCK_TIME(FTM_FRAME);
+ LL_PROFILE_GPU_ZONE("Frame");
{
// and now adjust the visuals from previous frame.
if(LLPerfStats::tunables.userAutoTuneEnabled && LLPerfStats::tunables.tuningFlag != LLPerfStats::Tunables::Nothing)
@@ -1413,24 +1414,26 @@ bool LLAppViewer::doFrame()
if (!LLApp::isExiting())
{
- LL_PROFILE_ZONE_NAMED_CATEGORY_APP("df JoystickKeyboard");
- pingMainloopTimeout("Main:JoystickKeyboard");
-
- // Scan keyboard for movement keys. Command keys and typing
- // are handled by windows callbacks. Don't do this until we're
- // done initializing. JC
- if (gViewerWindow
- && (gHeadlessClient || gViewerWindow->getWindow()->getVisible())
- && gViewerWindow->getActive()
- && !gViewerWindow->getWindow()->getMinimized()
- && LLStartUp::getStartupState() == STATE_STARTED
- && (gHeadlessClient || !gViewerWindow->getShowProgress())
- && !gFocusMgr.focusLocked())
{
- LLPerfStats::RecordSceneTime T (LLPerfStats::StatType_t::RENDER_IDLE);
- joystick->scanJoystick();
- gKeyboard->scanKeyboard();
- gViewerInput.scanMouse();
+ LL_PROFILE_ZONE_NAMED_CATEGORY_APP("df JoystickKeyboard");
+ pingMainloopTimeout("Main:JoystickKeyboard");
+
+ // Scan keyboard for movement keys. Command keys and typing
+ // are handled by windows callbacks. Don't do this until we're
+ // done initializing. JC
+ if (gViewerWindow
+ && (gHeadlessClient || gViewerWindow->getWindow()->getVisible())
+ && gViewerWindow->getActive()
+ && !gViewerWindow->getWindow()->getMinimized()
+ && LLStartUp::getStartupState() == STATE_STARTED
+ && (gHeadlessClient || !gViewerWindow->getShowProgress())
+ && !gFocusMgr.focusLocked())
+ {
+ LLPerfStats::RecordSceneTime T(LLPerfStats::StatType_t::RENDER_IDLE);
+ joystick->scanJoystick();
+ gKeyboard->scanKeyboard();
+ gViewerInput.scanMouse();
+ }
}
// Update state based on messages, user input, object idle.
diff --git a/indra/newview/llappviewerlinux.cpp b/indra/newview/llappviewerlinux.cpp
index 1709970156..89d19d180b 100644
--- a/indra/newview/llappviewerlinux.cpp
+++ b/indra/newview/llappviewerlinux.cpp
@@ -73,6 +73,11 @@ static void exceptionTerminateHandler()
int main( int argc, char **argv )
{
+ // Call Tracy first thing to have it allocate memory
+ // https://github.com/wolfpld/tracy/issues/196
+ LL_PROFILER_FRAME_END;
+ LL_PROFILER_SET_THREAD_NAME("App");
+
gArgC = argc;
gArgV = argv;
diff --git a/indra/newview/llappviewermacosx.cpp b/indra/newview/llappviewermacosx.cpp
index 4162c0479a..f497a3cdf3 100644
--- a/indra/newview/llappviewermacosx.cpp
+++ b/indra/newview/llappviewermacosx.cpp
@@ -231,6 +231,11 @@ void infos(const std::string& message)
int main( int argc, char **argv )
{
+ // Call Tracy first thing to have it allocate memory
+ // https://github.com/wolfpld/tracy/issues/196
+ LL_PROFILER_FRAME_END;
+ LL_PROFILER_SET_THREAD_NAME("App");
+
// Store off the command line args for use later.
gArgC = argc;
gArgV = argv;
diff --git a/indra/newview/lldrawpool.h b/indra/newview/lldrawpool.h
index 1c8864a9df..46696fc4a4 100644
--- a/indra/newview/lldrawpool.h
+++ b/indra/newview/lldrawpool.h
@@ -204,7 +204,7 @@ public:
NUM_RENDER_TYPES,
};
- #ifdef LL_PROFILER_ENABLE_RENDER_DOC
+ #if LL_PROFILER_ENABLE_RENDER_DOC
static inline const char* lookupPassName(U32 pass)
{
switch (pass)
@@ -340,7 +340,7 @@ public:
}
}
#else
- static inline const char* lookupPass(U32 pass) { return ""; }
+ static inline const char* lookupPassName(U32 pass) { return ""; }
#endif
LLRenderPass(const U32 type);
diff --git a/indra/newview/llheroprobemanager.cpp b/indra/newview/llheroprobemanager.cpp
index e754de2fd1..2ec161c093 100644
--- a/indra/newview/llheroprobemanager.cpp
+++ b/indra/newview/llheroprobemanager.cpp
@@ -81,6 +81,7 @@ void LLHeroProbeManager::update()
}
LL_PROFILE_ZONE_SCOPED_CATEGORY_DISPLAY;
+ LL_PROFILE_GPU_ZONE("hero manager update");
llassert(!gCubeSnapshot); // assert a snapshot is not in progress
if (LLAppViewer::instance()->logoutRequestSent())
{
@@ -282,6 +283,9 @@ void LLHeroProbeManager::renderProbes()
// In effect this simulates single-bounce lighting.
void LLHeroProbeManager::updateProbeFace(LLReflectionMap* probe, U32 face, bool is_dynamic, F32 near_clip)
{
+ LL_PROFILE_ZONE_SCOPED_CATEGORY_DISPLAY;
+ LL_PROFILE_GPU_ZONE("hero probe update");
+
// hacky hot-swap of camera specific render targets
gPipeline.mRT = &gPipeline.mHeroProbeRT;
@@ -352,7 +356,7 @@ void LLHeroProbeManager::updateProbeFace(LLReflectionMap* probe, U32 face, bool
for (int i = 0; i < mMipChain.size(); ++i)
{
- LL_PROFILE_GPU_ZONE("probe mip");
+ LL_PROFILE_GPU_ZONE("hero probe mip");
mMipChain[i].bindTarget();
if (i == 0)
{
@@ -379,7 +383,7 @@ void LLHeroProbeManager::updateProbeFace(LLReflectionMap* probe, U32 face, bool
if (mip >= 0)
{
- LL_PROFILE_GPU_ZONE("probe mip copy");
+ LL_PROFILE_GPU_ZONE("hero probe mip copy");
mTexture->bind(0);
glCopyTexSubImage3D(GL_TEXTURE_CUBE_MAP_ARRAY, mip, 0, 0, sourceIdx * 6 + face, 0, 0, res, res);
@@ -427,7 +431,7 @@ void LLHeroProbeManager::generateRadiance(LLReflectionMap* probe)
for (int i = 0; i < mMipChain.size() / 4; ++i)
{
- LL_PROFILE_GPU_ZONE("probe radiance gen");
+ LL_PROFILE_GPU_ZONE("hero probe radiance gen");
static LLStaticHashedString sMipLevel("mipLevel");
static LLStaticHashedString sRoughness("roughness");
static LLStaticHashedString sWidth("u_width");
@@ -474,6 +478,7 @@ void LLHeroProbeManager::updateUniforms()
}
LL_PROFILE_ZONE_SCOPED_CATEGORY_DISPLAY;
+ LL_PROFILE_GPU_ZONE("hpmu - uniforms")
LLMatrix4a modelview;
modelview.loadu(gGLModelView);
diff --git a/indra/newview/llreflectionmapmanager.cpp b/indra/newview/llreflectionmapmanager.cpp
index 4760ab376e..ae5ade5f24 100644
--- a/indra/newview/llreflectionmapmanager.cpp
+++ b/indra/newview/llreflectionmapmanager.cpp
@@ -210,6 +210,7 @@ void LLReflectionMapManager::update()
}
LL_PROFILE_ZONE_SCOPED_CATEGORY_DISPLAY;
+ LL_PROFILE_GPU_ZONE("reflection manager update");
llassert(!gCubeSnapshot); // assert a snapshot is not in progress
if (LLAppViewer::instance()->logoutRequestSent())
{
@@ -696,6 +697,8 @@ void LLReflectionMapManager::doProbeUpdate()
// In effect this simulates single-bounce lighting.
void LLReflectionMapManager::updateProbeFace(LLReflectionMap* probe, U32 face)
{
+ LL_PROFILE_ZONE_SCOPED_CATEGORY_DISPLAY;
+ LL_PROFILE_GPU_ZONE("probe update");
// hacky hot-swap of camera specific render targets
gPipeline.mRT = &gPipeline.mAuxillaryRT;
@@ -1011,6 +1014,7 @@ void LLReflectionMapManager::updateUniforms()
}
LL_PROFILE_ZONE_SCOPED_CATEGORY_DISPLAY;
+ LL_PROFILE_GPU_ZONE("rmmu - uniforms")
// structure for packing uniform buffer object
// see class3/deferred/reflectionProbeF.glsl
diff --git a/indra/newview/llviewerdisplay.cpp b/indra/newview/llviewerdisplay.cpp
index 32019f860d..b34c8600f7 100644
--- a/indra/newview/llviewerdisplay.cpp
+++ b/indra/newview/llviewerdisplay.cpp
@@ -407,6 +407,7 @@ static void update_tp_display(bool minimized)
void display(bool rebuild, F32 zoom_factor, int subfield, bool for_snapshot)
{
LL_PROFILE_ZONE_NAMED_CATEGORY_DISPLAY("Render");
+ LL_PROFILE_GPU_ZONE("Render");
LLPerfStats::RecordSceneTime T (LLPerfStats::StatType_t::RENDER_DISPLAY); // render time capture - This is the main stat for overall rendering.
@@ -709,6 +710,7 @@ void display(bool rebuild, F32 zoom_factor, int subfield, bool for_snapshot)
if (gPipeline.RenderMirrors && !gSnapshot)
{
LL_PROFILE_ZONE_NAMED_CATEGORY_DISPLAY("Update hero probes");
+ LL_PROFILE_GPU_ZONE("hero manager")
gPipeline.mHeroProbeManager.update();
gPipeline.mHeroProbeManager.renderProbes();
}
diff --git a/indra/newview/pipeline.cpp b/indra/newview/pipeline.cpp
index 18dd694246..ddfb5a5621 100644
--- a/indra/newview/pipeline.cpp
+++ b/indra/newview/pipeline.cpp
@@ -7182,11 +7182,11 @@ extern LLPointer<LLImageGL> gEXRImage;
void LLPipeline::tonemap(LLRenderTarget* src, LLRenderTarget* dst)
{
+ LL_PROFILE_GPU_ZONE("tonemap");
+
dst->bindTarget();
// gamma correct lighting
{
- LL_PROFILE_GPU_ZONE("tonemap");
-
static LLCachedControl<bool> buildNoPost(gSavedSettings, "RenderDisablePostProcessing", false);
LLGLDepthTest depth(GL_FALSE, GL_FALSE);
@@ -7235,11 +7235,11 @@ void LLPipeline::tonemap(LLRenderTarget* src, LLRenderTarget* dst)
void LLPipeline::gammaCorrect(LLRenderTarget* src, LLRenderTarget* dst)
{
+ LL_PROFILE_GPU_ZONE("gamma correct");
+
dst->bindTarget();
// gamma correct lighting
{
- LL_PROFILE_GPU_ZONE("gamma correct");
-
LLGLDepthTest depth(GL_FALSE, GL_FALSE);
static LLCachedControl<bool> buildNoPost(gSavedSettings, "RenderDisablePostProcessing", false);
@@ -7290,9 +7290,9 @@ void LLPipeline::copyScreenSpaceReflections(LLRenderTarget* src, LLRenderTarget*
void LLPipeline::generateGlow(LLRenderTarget* src)
{
+ LL_PROFILE_GPU_ZONE("glow generate");
if (sRenderGlow)
{
- LL_PROFILE_GPU_ZONE("glow");
mGlow[2].bindTarget();
mGlow[2].clear();
@@ -7401,6 +7401,7 @@ void LLPipeline::generateGlow(LLRenderTarget* src)
void LLPipeline::applyCAS(LLRenderTarget* src, LLRenderTarget* dst)
{
static LLCachedControl<F32> cas_sharpness(gSavedSettings, "RenderCASSharpness", 0.4f);
+ LL_PROFILE_GPU_ZONE("cas");
if (cas_sharpness == 0.0f || !gCASProgram.isComplete())
{
gPipeline.copyRenderTarget(src, dst);
@@ -7445,6 +7446,7 @@ void LLPipeline::applyCAS(LLRenderTarget* src, LLRenderTarget* dst)
void LLPipeline::applyFXAA(LLRenderTarget* src, LLRenderTarget* dst)
{
+ LL_PROFILE_GPU_ZONE("FXAA");
{
llassert(!gCubeSnapshot);
bool multisample = RenderFSAAType == 1 && gFXAAProgram[0].isComplete() && mFXAAMap.isComplete();
@@ -7536,7 +7538,7 @@ void LLPipeline::generateSMAABuffers(LLRenderTarget* src)
// Present everything.
if (multisample)
{
- LL_PROFILE_GPU_ZONE("aa");
+ LL_PROFILE_GPU_ZONE("SMAA Edge");
static LLCachedControl<U32> aa_quality(gSavedSettings, "RenderFSAASamples", 0U);
U32 fsaa_quality = std::clamp(aa_quality(), 0U, 3U);
@@ -7648,13 +7650,13 @@ void LLPipeline::generateSMAABuffers(LLRenderTarget* src)
void LLPipeline::applySMAA(LLRenderTarget* src, LLRenderTarget* dst)
{
+ LL_PROFILE_GPU_ZONE("SMAA");
llassert(!gCubeSnapshot);
bool multisample = RenderFSAAType == 2 && gSMAAEdgeDetectProgram[0].isComplete() && mFXAAMap.isComplete() && mSMAABlendBuffer.isComplete();
// Present everything.
if (multisample)
{
- LL_PROFILE_GPU_ZONE("aa");
static LLCachedControl<U32> aa_quality(gSavedSettings, "RenderFSAASamples", 0U);
U32 fsaa_quality = std::clamp(aa_quality(), 0U, 3U);
@@ -7732,8 +7734,9 @@ void LLPipeline::copyRenderTarget(LLRenderTarget* src, LLRenderTarget* dst)
void LLPipeline::combineGlow(LLRenderTarget* src, LLRenderTarget* dst)
{
- // Go ahead and do our glow combine here in our destination. We blit this later into the front buffer.
+ LL_PROFILE_GPU_ZONE("glow combine");
+ // Go ahead and do our glow combine here in our destination. We blit this later into the front buffer.
dst->bindTarget();
{
@@ -7752,6 +7755,7 @@ void LLPipeline::combineGlow(LLRenderTarget* src, LLRenderTarget* dst)
void LLPipeline::renderDoF(LLRenderTarget* src, LLRenderTarget* dst)
{
+ LL_PROFILE_GPU_ZONE("dof");
{
bool dof_enabled =
(RenderDepthOfFieldInEditMode || !LLToolMgr::getInstance()->inBuildMode()) &&
@@ -7762,7 +7766,6 @@ void LLPipeline::renderDoF(LLRenderTarget* src, LLRenderTarget* dst)
if (dof_enabled)
{
- LL_PROFILE_GPU_ZONE("dof");
LLGLDisable blend(GL_BLEND);
// depth of field focal plane calculations