diff options
author | Rye <rye@lindenlab.com> | 2024-11-07 06:01:25 -0500 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-11-07 06:01:25 -0500 |
commit | de104e3f8d996a63c07fdc1ef6130048bac6c94c (patch) | |
tree | e5a4cb3e07cdb968e1b8ac3523edbcb11fbed393 /indra | |
parent | 7e479bf89dba7a27c57bf09e346d78eee25ef6f1 (diff) | |
parent | 03231789c016245cdc93d154502c879386030de1 (diff) |
Merge pull request #3016 from secondlife/rye/tracyfixes
Tracy Improvements
Diffstat (limited to 'indra')
-rw-r--r-- | indra/cmake/CMakeLists.txt | 1 | ||||
-rw-r--r-- | indra/cmake/Tracy.cmake | 6 | ||||
-rw-r--r-- | indra/llcommon/linden_common.h | 6 | ||||
-rw-r--r-- | indra/llcommon/llcommon.cpp | 72 | ||||
-rw-r--r-- | indra/llcommon/llmemory.h | 51 | ||||
-rw-r--r-- | indra/llcommon/llprofiler.h | 34 | ||||
-rw-r--r-- | indra/llwindow/llwindowwin32.cpp | 7 | ||||
-rw-r--r-- | indra/newview/llappviewer.cpp | 37 | ||||
-rw-r--r-- | indra/newview/llappviewerlinux.cpp | 5 | ||||
-rw-r--r-- | indra/newview/llappviewermacosx.cpp | 5 | ||||
-rw-r--r-- | indra/newview/lldrawpool.h | 2 | ||||
-rw-r--r-- | indra/newview/llheroprobemanager.cpp | 11 | ||||
-rw-r--r-- | indra/newview/llreflectionmapmanager.cpp | 4 | ||||
-rw-r--r-- | indra/newview/llviewerdisplay.cpp | 2 | ||||
-rw-r--r-- | indra/newview/pipeline.cpp | 24 |
15 files changed, 155 insertions, 112 deletions
diff --git a/indra/cmake/CMakeLists.txt b/indra/cmake/CMakeLists.txt index 9d95a23a59..1083d7f2c4 100644 --- a/indra/cmake/CMakeLists.txt +++ b/indra/cmake/CMakeLists.txt @@ -57,6 +57,7 @@ set(cmake_SOURCE_FILES TemplateCheck.cmake TinyEXR.cmake TinyGLTF.cmake + Tracy.cmake Tut.cmake UI.cmake UnixInstall.cmake diff --git a/indra/cmake/Tracy.cmake b/indra/cmake/Tracy.cmake index 9eee9eb57f..e3f46cca9b 100644 --- a/indra/cmake/Tracy.cmake +++ b/indra/cmake/Tracy.cmake @@ -20,6 +20,7 @@ endif(LINUX) if (USE_TRACY) option(USE_TRACY_ON_DEMAND "Use on-demand Tracy profiling." ON) option(USE_TRACY_LOCAL_ONLY "Disallow remote Tracy profiling." OFF) + option(USE_TRACY_GPU "Use Tracy GPU profiling" OFF) use_system_binary(tracy) use_prebuilt_binary(tracy) @@ -36,9 +37,8 @@ if (USE_TRACY) target_compile_definitions(ll::tracy INTERFACE -DTRACY_NO_BROADCAST=1 -DTRACY_ONLY_LOCALHOST=1) endif () - # GHA runners don't always provide invariant TSC support, but always build with LL_TESTS enabled - if (DARWIN AND LL_TESTS) - target_compile_definitions(ll::tracy INTERFACE -DTRACY_TIMER_FALLBACK=1) + if (USE_TRACY_GPU AND NOT DARWIN) # Tracy OpenGL mode is incompatible with macOS/iOS + target_compile_definitions(ll::tracy INTERFACE -DLL_PROFILER_ENABLE_TRACY_OPENGL=1) endif () # See: indra/llcommon/llprofiler.h diff --git a/indra/llcommon/linden_common.h b/indra/llcommon/linden_common.h index a918caa2e8..a41af153fe 100644 --- a/indra/llcommon/linden_common.h +++ b/indra/llcommon/linden_common.h @@ -28,12 +28,6 @@ #define LL_LINDEN_COMMON_H #include "llprofiler.h" -#if TRACY_ENABLE && !defined(LL_PROFILER_ENABLE_TRACY_OPENGL) // hooks for memory profiling -void *tracy_aligned_malloc(size_t size, size_t alignment); -void tracy_aligned_free(void *memblock); -#define _aligned_malloc(X, Y) tracy_aligned_malloc((X), (Y)) -#define _aligned_free(X) tracy_aligned_free((X)) -#endif // *NOTE: Please keep includes here to a minimum! // diff --git a/indra/llcommon/llcommon.cpp b/indra/llcommon/llcommon.cpp index f1f3958fe0..110a1c32a1 100644 --- a/indra/llcommon/llcommon.cpp +++ b/indra/llcommon/llcommon.cpp @@ -33,23 +33,23 @@ #include "lltracethreadrecorder.h" #include "llcleanup.h" -thread_local bool gProfilerEnabled = false; - -#if (TRACY_ENABLE) +#if LL_PROFILER_CONFIGURATION > 1 && TRACY_ENABLE // Override new/delete for tracy memory profiling void* ll_tracy_new(size_t size) { - void* ptr; - if (gProfilerEnabled) - { - //LL_PROFILE_ZONE_SCOPED_CATEGORY_MEMORY; - ptr = (malloc)(size); - } - else + void* ptr = (malloc)(size); + if (!ptr) { - ptr = (malloc)(size); + throw std::bad_alloc(); } + TracyAlloc(ptr, size); + return ptr; +} + +void* ll_tracy_aligned_new(size_t size, size_t alignment) +{ + void* ptr = ll_aligned_malloc_fallback(size, alignment); if (!ptr) { throw std::bad_alloc(); @@ -58,6 +58,18 @@ void* ll_tracy_new(size_t size) return ptr; } +void ll_tracy_delete(void* ptr) +{ + TracyFree(ptr); + (free)(ptr); +} + +void ll_tracy_aligned_delete(void* ptr) +{ + TracyFree(ptr); + ll_aligned_free_fallback(ptr); +} + void* operator new(size_t size) { return ll_tracy_new(size); @@ -68,18 +80,14 @@ void* operator new[](std::size_t count) return ll_tracy_new(count); } -void ll_tracy_delete(void* ptr) +void* operator new(size_t size, std::align_val_t align) { - TracyFree(ptr); - if (gProfilerEnabled) - { - //LL_PROFILE_ZONE_SCOPED_CATEGORY_MEMORY; - (free)(ptr); - } - else - { - (free)(ptr); - } + return ll_tracy_aligned_new(size, (size_t)align); +} + +void* operator new[](std::size_t count, std::align_val_t align) +{ + return ll_tracy_aligned_new(count, (size_t)align); } void operator delete(void *ptr) noexcept @@ -92,27 +100,17 @@ void operator delete[](void* ptr) noexcept ll_tracy_delete(ptr); } -// C-style malloc/free can't be so easily overridden, so we define tracy versions and use -// a pre-processor #define in linden_common.h to redirect to them. The parens around the native -// functions below prevents recursive substitution by the preprocessor. -// -// Unaligned mallocs are rare in LL code but hooking them causes problems in 3p lib code (looking at -// you, Havok), so we'll only capture the aligned version. - -void *tracy_aligned_malloc(size_t size, size_t alignment) +void operator delete(void *ptr, std::align_val_t align) noexcept { - auto ptr = ll_aligned_malloc_fallback(size, alignment); - if (ptr) TracyAlloc(ptr, size); - return ptr; + ll_tracy_aligned_delete(ptr); } -void tracy_aligned_free(void *memblock) +void operator delete[](void* ptr, std::align_val_t align) noexcept { - TracyFree(memblock); - ll_aligned_free_fallback(memblock); + ll_tracy_aligned_delete(ptr); } -#endif +#endif // TRACY_ENABLE && !LL_PROFILER_ENABLE_TRACY_OPENGL //static bool LLCommon::sAprInitialized = false; diff --git a/indra/llcommon/llmemory.h b/indra/llcommon/llmemory.h index 80cfe554c4..72aec57080 100644 --- a/indra/llcommon/llmemory.h +++ b/indra/llcommon/llmemory.h @@ -222,7 +222,7 @@ inline void* ll_aligned_realloc_16(void* ptr, size_t size, size_t old_size) // r ll_aligned_free_16(ptr); } #endif - LL_PROFILE_ALLOC(ptr, size); + LL_PROFILE_ALLOC(ret, size); return ret; } @@ -231,8 +231,6 @@ inline void* ll_aligned_malloc_32(size_t size) // returned hunk MUST be freed wi LL_PROFILE_ZONE_SCOPED_CATEGORY_MEMORY; #if defined(LL_WINDOWS) void* ret = _aligned_malloc(size, 32); -#elif defined(LL_DARWIN) - void* ret = ll_aligned_malloc_fallback( size, 32 ); #else void *ret; if (0 != posix_memalign(&ret, 32, size)) @@ -248,8 +246,31 @@ inline void ll_aligned_free_32(void *p) LL_PROFILE_FREE(p); #if defined(LL_WINDOWS) _aligned_free(p); -#elif defined(LL_DARWIN) - ll_aligned_free_fallback( p ); +#else + free(p); // posix_memalign() is compatible with heap deallocator +#endif +} + +inline void* ll_aligned_malloc_64(size_t size) // returned hunk MUST be freed with ll_aligned_free_32(). +{ + LL_PROFILE_ZONE_SCOPED_CATEGORY_MEMORY; +#if defined(LL_WINDOWS) + void* ret = _aligned_malloc(size, 64); +#else + void *ret; + if (0 != posix_memalign(&ret, 64, size)) + return nullptr; +#endif + LL_PROFILE_ALLOC(ret, size); + return ret; +} + +inline void ll_aligned_free_64(void *p) +{ + LL_PROFILE_ZONE_SCOPED_CATEGORY_MEMORY; + LL_PROFILE_FREE(p); +#if defined(LL_WINDOWS) + _aligned_free(p); #else free(p); // posix_memalign() is compatible with heap deallocator #endif @@ -261,19 +282,23 @@ LL_FORCE_INLINE void* ll_aligned_malloc(size_t size) { LL_PROFILE_ZONE_SCOPED_CATEGORY_MEMORY; void* ret; - if (LL_DEFAULT_HEAP_ALIGN % ALIGNMENT == 0) + if constexpr (LL_DEFAULT_HEAP_ALIGN % ALIGNMENT == 0) { ret = malloc(size); LL_PROFILE_ALLOC(ret, size); } - else if (ALIGNMENT == 16) + else if constexpr (ALIGNMENT == 16) { ret = ll_aligned_malloc_16(size); } - else if (ALIGNMENT == 32) + else if constexpr (ALIGNMENT == 32) { ret = ll_aligned_malloc_32(size); } + else if constexpr (ALIGNMENT == 64) + { + ret = ll_aligned_malloc_64(size); + } else { ret = ll_aligned_malloc_fallback(size, ALIGNMENT); @@ -285,16 +310,20 @@ template<size_t ALIGNMENT> LL_FORCE_INLINE void ll_aligned_free(void* ptr) { LL_PROFILE_ZONE_SCOPED_CATEGORY_MEMORY; - if (ALIGNMENT == LL_DEFAULT_HEAP_ALIGN) + if constexpr (ALIGNMENT == LL_DEFAULT_HEAP_ALIGN) { LL_PROFILE_FREE(ptr); free(ptr); } - else if (ALIGNMENT == 16) + else if constexpr (ALIGNMENT == 16) { ll_aligned_free_16(ptr); } - else if (ALIGNMENT == 32) + else if constexpr (ALIGNMENT == 32) + { + return ll_aligned_free_32(ptr); + } + else if constexpr (ALIGNMENT == 64) { return ll_aligned_free_32(ptr); } diff --git a/indra/llcommon/llprofiler.h b/indra/llcommon/llprofiler.h index f6a4d24747..6ff4341b69 100644 --- a/indra/llcommon/llprofiler.h +++ b/indra/llcommon/llprofiler.h @@ -74,23 +74,18 @@ #define LL_PROFILER_CONFIGURATION LL_PROFILER_CONFIG_FAST_TIMER #endif -extern thread_local bool gProfilerEnabled; - #if defined(LL_PROFILER_CONFIGURATION) && (LL_PROFILER_CONFIGURATION > LL_PROFILER_CONFIG_NONE) #if LL_PROFILER_CONFIGURATION == LL_PROFILER_CONFIG_TRACY || LL_PROFILER_CONFIGURATION == LL_PROFILER_CONFIG_TRACY_FAST_TIMER #include "tracy/Tracy.hpp" - // Enable OpenGL profiling - #define LL_PROFILER_ENABLE_TRACY_OPENGL 0 - // Enable RenderDoc labeling - #define LL_PROFILER_ENABLE_RENDER_DOC 0 + //#define LL_PROFILER_ENABLE_RENDER_DOC 0 #endif #if LL_PROFILER_CONFIGURATION == LL_PROFILER_CONFIG_TRACY #define LL_PROFILER_FRAME_END FrameMark - #define LL_PROFILER_SET_THREAD_NAME( name ) tracy::SetThreadName( name ); gProfilerEnabled = true; + #define LL_PROFILER_SET_THREAD_NAME( name ) tracy::SetThreadName( name ); #define LL_RECORD_BLOCK_TIME(name) ZoneScoped // Want descriptive names; was: ZoneNamedN( ___tracy_scoped_zone, #name, true ); #define LL_PROFILE_ZONE_NAMED(name) ZoneNamedN( ___tracy_scoped_zone, name, true ); #define LL_PROFILE_ZONE_NAMED_COLOR(name,color) ZoneNamedNC( ___tracy_scopped_zone, name, color, true ) // RGB @@ -133,7 +128,7 @@ extern thread_local bool gProfilerEnabled; #endif #if LL_PROFILER_CONFIGURATION == LL_PROFILER_CONFIG_TRACY_FAST_TIMER #define LL_PROFILER_FRAME_END FrameMark - #define LL_PROFILER_SET_THREAD_NAME( name ) tracy::SetThreadName( name ); gProfilerEnabled = true; + #define LL_PROFILER_SET_THREAD_NAME( name ) tracy::SetThreadName( name ); #define LL_RECORD_BLOCK_TIME(name) ZoneNamedN(___tracy_scoped_zone, #name, true); const LLTrace::BlockTimer& LL_GLUE_TOKENS(block_time_recorder, __LINE__)(LLTrace::timeThisBlock(name)); (void)LL_GLUE_TOKENS(block_time_recorder, __LINE__); #define LL_PROFILE_ZONE_NAMED(name) ZoneNamedN( ___tracy_scoped_zone, #name, true ); #define LL_PROFILE_ZONE_NAMED_COLOR(name,color) ZoneNamedNC( ___tracy_scopped_zone, name, color, true ) // RGB @@ -158,23 +153,20 @@ extern thread_local bool gProfilerEnabled; #endif // LL_PROFILER #if LL_PROFILER_ENABLE_TRACY_OPENGL -#define LL_PROFILE_GPU_ZONE(name) TracyGpuZone(name) -#define LL_PROFILE_GPU_ZONEC(name,color) TracyGpuZoneC(name,color) +#define LL_PROFILE_GPU_ZONE(name) TracyGpuZone(name) +#define LL_PROFILE_GPU_ZONEC(name,color) TracyGpuZoneC(name,color) #define LL_PROFILER_GPU_COLLECT TracyGpuCollect #define LL_PROFILER_GPU_CONTEXT TracyGpuContext - -// disable memory tracking (incompatible with GPU tracing -#define LL_PROFILE_ALLOC(ptr, size) (void)(ptr); (void)(size); -#define LL_PROFILE_FREE(ptr) (void)(ptr); +#define LL_PROFILER_GPU_CONTEXT_NAMED TracyGpuContextName #else -#define LL_PROFILE_GPU_ZONE(name) (void)name; -#define LL_PROFILE_GPU_ZONEC(name,color) (void)name;(void)color; +#define LL_PROFILE_GPU_ZONE(name) (void)name; +#define LL_PROFILE_GPU_ZONEC(name,color) (void)name;(void)color; #define LL_PROFILER_GPU_COLLECT #define LL_PROFILER_GPU_CONTEXT +#define LL_PROFILER_GPU_CONTEXT_NAMED(name) (void)name; +#endif // LL_PROFILER_ENABLE_TRACY_OPENGL -#define LL_LABEL_OBJECT_GL(type, name, length, label) - -#if !LL_DARWIN && LL_PROFILER_CONFIGURATION > 1 +#if LL_PROFILER_CONFIGURATION > 1 #define LL_PROFILE_ALLOC(ptr, size) TracyAlloc(ptr, size) #define LL_PROFILE_FREE(ptr) TracyFree(ptr) #else @@ -182,9 +174,7 @@ extern thread_local bool gProfilerEnabled; #define LL_PROFILE_FREE(ptr) (void)(ptr); #endif -#endif - -#if LL_PROFILER_ENABLE_RENDER_DOC +#ifdef LL_PROFILER_ENABLE_RENDER_DOC #define LL_LABEL_OBJECT_GL(type, name, length, label) glObjectLabel(type, name, length, label) #else #define LL_LABEL_OBJECT_GL(type, name, length, label) diff --git a/indra/llwindow/llwindowwin32.cpp b/indra/llwindow/llwindowwin32.cpp index f349c4aea8..f926d106be 100644 --- a/indra/llwindow/llwindowwin32.cpp +++ b/indra/llwindow/llwindowwin32.cpp @@ -1661,6 +1661,11 @@ const S32 max_format = (S32)num_formats - 1; return false; } + // Setup Tracy gpu context + { + LL_PROFILER_GPU_CONTEXT; + } + // Disable vertical sync for swap toggleVSync(enable_vsync); @@ -1692,8 +1697,6 @@ const S32 max_format = (S32)num_formats - 1; swapBuffers(); } - LL_PROFILER_GPU_CONTEXT; - return true; } diff --git a/indra/newview/llappviewer.cpp b/indra/newview/llappviewer.cpp index 423fe2469a..fda4f043b0 100644 --- a/indra/newview/llappviewer.cpp +++ b/indra/newview/llappviewer.cpp @@ -1473,6 +1473,7 @@ void sendGameControlInput() bool LLAppViewer::doFrame() { LL_RECORD_BLOCK_TIME(FTM_FRAME); + LL_PROFILE_GPU_ZONE("Frame"); { // and now adjust the visuals from previous frame. if(LLPerfStats::tunables.userAutoTuneEnabled && LLPerfStats::tunables.tuningFlag != LLPerfStats::Tunables::Nothing) @@ -1562,24 +1563,26 @@ bool LLAppViewer::doFrame() if (!LLApp::isExiting()) { - LL_PROFILE_ZONE_NAMED_CATEGORY_APP("df JoystickKeyboard"); - pingMainloopTimeout("Main:JoystickKeyboard"); - - // Scan keyboard for movement keys. Command keys and typing - // are handled by windows callbacks. Don't do this until we're - // done initializing. JC - if (gViewerWindow - && (gHeadlessClient || gViewerWindow->getWindow()->getVisible()) - && gViewerWindow->getActive() - && !gViewerWindow->getWindow()->getMinimized() - && LLStartUp::getStartupState() == STATE_STARTED - && (gHeadlessClient || !gViewerWindow->getShowProgress()) - && !gFocusMgr.focusLocked()) { - LLPerfStats::RecordSceneTime T (LLPerfStats::StatType_t::RENDER_IDLE); - joystick->scanJoystick(); - gKeyboard->scanKeyboard(); - gViewerInput.scanMouse(); + LL_PROFILE_ZONE_NAMED_CATEGORY_APP("df JoystickKeyboard"); + pingMainloopTimeout("Main:JoystickKeyboard"); + + // Scan keyboard for movement keys. Command keys and typing + // are handled by windows callbacks. Don't do this until we're + // done initializing. JC + if (gViewerWindow + && (gHeadlessClient || gViewerWindow->getWindow()->getVisible()) + && gViewerWindow->getActive() + && !gViewerWindow->getWindow()->getMinimized() + && LLStartUp::getStartupState() == STATE_STARTED + && (gHeadlessClient || !gViewerWindow->getShowProgress()) + && !gFocusMgr.focusLocked()) + { + LLPerfStats::RecordSceneTime T(LLPerfStats::StatType_t::RENDER_IDLE); + joystick->scanJoystick(); + gKeyboard->scanKeyboard(); + gViewerInput.scanMouse(); + } } // Update state based on messages, user input, object idle. diff --git a/indra/newview/llappviewerlinux.cpp b/indra/newview/llappviewerlinux.cpp index 3dbbda11f9..febe2d1e41 100644 --- a/indra/newview/llappviewerlinux.cpp +++ b/indra/newview/llappviewerlinux.cpp @@ -113,6 +113,11 @@ static void exceptionTerminateHandler() int main( int argc, char **argv ) { + // Call Tracy first thing to have it allocate memory + // https://github.com/wolfpld/tracy/issues/196 + LL_PROFILER_FRAME_END; + LL_PROFILER_SET_THREAD_NAME("App"); + gArgC = argc; gArgV = argv; diff --git a/indra/newview/llappviewermacosx.cpp b/indra/newview/llappviewermacosx.cpp index 4162c0479a..f497a3cdf3 100644 --- a/indra/newview/llappviewermacosx.cpp +++ b/indra/newview/llappviewermacosx.cpp @@ -231,6 +231,11 @@ void infos(const std::string& message) int main( int argc, char **argv ) { + // Call Tracy first thing to have it allocate memory + // https://github.com/wolfpld/tracy/issues/196 + LL_PROFILER_FRAME_END; + LL_PROFILER_SET_THREAD_NAME("App"); + // Store off the command line args for use later. gArgC = argc; gArgV = argv; diff --git a/indra/newview/lldrawpool.h b/indra/newview/lldrawpool.h index bc412214c7..34257f98f8 100644 --- a/indra/newview/lldrawpool.h +++ b/indra/newview/lldrawpool.h @@ -339,7 +339,7 @@ public: } } #else - static inline const char* lookupPass(U32 pass) { return ""; } + static inline const char* lookupPassName(U32 pass) { return ""; } #endif LLRenderPass(const U32 type); diff --git a/indra/newview/llheroprobemanager.cpp b/indra/newview/llheroprobemanager.cpp index ce419498cf..8e3cd0626f 100644 --- a/indra/newview/llheroprobemanager.cpp +++ b/indra/newview/llheroprobemanager.cpp @@ -81,6 +81,7 @@ void LLHeroProbeManager::update() } LL_PROFILE_ZONE_SCOPED_CATEGORY_DISPLAY; + LL_PROFILE_GPU_ZONE("hero manager update"); llassert(!gCubeSnapshot); // assert a snapshot is not in progress if (LLAppViewer::instance()->logoutRequestSent()) { @@ -279,6 +280,9 @@ void LLHeroProbeManager::renderProbes() // In effect this simulates single-bounce lighting. void LLHeroProbeManager::updateProbeFace(LLReflectionMap* probe, U32 face, bool is_dynamic, F32 near_clip) { + LL_PROFILE_ZONE_SCOPED_CATEGORY_DISPLAY; + LL_PROFILE_GPU_ZONE("hero probe update"); + // hacky hot-swap of camera specific render targets gPipeline.mRT = &gPipeline.mHeroProbeRT; @@ -349,7 +353,7 @@ void LLHeroProbeManager::updateProbeFace(LLReflectionMap* probe, U32 face, bool for (int i = 0; i < mMipChain.size(); ++i) { - LL_PROFILE_GPU_ZONE("probe mip"); + LL_PROFILE_GPU_ZONE("hero probe mip"); mMipChain[i].bindTarget(); if (i == 0) { @@ -376,7 +380,7 @@ void LLHeroProbeManager::updateProbeFace(LLReflectionMap* probe, U32 face, bool if (mip >= 0) { - LL_PROFILE_GPU_ZONE("probe mip copy"); + LL_PROFILE_GPU_ZONE("hero probe mip copy"); mTexture->bind(0); glCopyTexSubImage3D(GL_TEXTURE_CUBE_MAP_ARRAY, mip, 0, 0, sourceIdx * 6 + face, 0, 0, res, res); @@ -424,7 +428,7 @@ void LLHeroProbeManager::generateRadiance(LLReflectionMap* probe) for (int i = 0; i < mMipChain.size() / 4; ++i) { - LL_PROFILE_GPU_ZONE("probe radiance gen"); + LL_PROFILE_GPU_ZONE("hero probe radiance gen"); static LLStaticHashedString sMipLevel("mipLevel"); static LLStaticHashedString sRoughness("roughness"); static LLStaticHashedString sWidth("u_width"); @@ -471,6 +475,7 @@ void LLHeroProbeManager::updateUniforms() } LL_PROFILE_ZONE_SCOPED_CATEGORY_DISPLAY; + LL_PROFILE_GPU_ZONE("hpmu - uniforms") LLMatrix4a modelview; modelview.loadu(gGLModelView); diff --git a/indra/newview/llreflectionmapmanager.cpp b/indra/newview/llreflectionmapmanager.cpp index f083747bfe..4b79ce03eb 100644 --- a/indra/newview/llreflectionmapmanager.cpp +++ b/indra/newview/llreflectionmapmanager.cpp @@ -210,6 +210,7 @@ void LLReflectionMapManager::update() } LL_PROFILE_ZONE_SCOPED_CATEGORY_DISPLAY; + LL_PROFILE_GPU_ZONE("reflection manager update"); llassert(!gCubeSnapshot); // assert a snapshot is not in progress if (LLAppViewer::instance()->logoutRequestSent()) { @@ -677,6 +678,8 @@ void LLReflectionMapManager::doProbeUpdate() // In effect this simulates single-bounce lighting. void LLReflectionMapManager::updateProbeFace(LLReflectionMap* probe, U32 face) { + LL_PROFILE_ZONE_SCOPED_CATEGORY_DISPLAY; + LL_PROFILE_GPU_ZONE("probe update"); // hacky hot-swap of camera specific render targets gPipeline.mRT = &gPipeline.mAuxillaryRT; @@ -991,6 +994,7 @@ void LLReflectionMapManager::updateUniforms() } LL_PROFILE_ZONE_SCOPED_CATEGORY_DISPLAY; + LL_PROFILE_GPU_ZONE("rmmu - uniforms") // structure for packing uniform buffer object // see class3/deferred/reflectionProbeF.glsl diff --git a/indra/newview/llviewerdisplay.cpp b/indra/newview/llviewerdisplay.cpp index b0e07d8ea3..94ebca77e2 100644 --- a/indra/newview/llviewerdisplay.cpp +++ b/indra/newview/llviewerdisplay.cpp @@ -406,6 +406,7 @@ static void update_tp_display(bool minimized) void display(bool rebuild, F32 zoom_factor, int subfield, bool for_snapshot) { LL_PROFILE_ZONE_NAMED_CATEGORY_DISPLAY("Render"); + LL_PROFILE_GPU_ZONE("Render"); LLPerfStats::RecordSceneTime T (LLPerfStats::StatType_t::RENDER_DISPLAY); // render time capture - This is the main stat for overall rendering. @@ -710,6 +711,7 @@ void display(bool rebuild, F32 zoom_factor, int subfield, bool for_snapshot) if (gPipeline.RenderMirrors && !gSnapshot) { LL_PROFILE_ZONE_NAMED_CATEGORY_DISPLAY("Update hero probes"); + LL_PROFILE_GPU_ZONE("hero manager") gPipeline.mHeroProbeManager.update(); gPipeline.mHeroProbeManager.renderProbes(); } diff --git a/indra/newview/pipeline.cpp b/indra/newview/pipeline.cpp index 65f7c1d70f..1edbf584d6 100644 --- a/indra/newview/pipeline.cpp +++ b/indra/newview/pipeline.cpp @@ -7058,11 +7058,12 @@ extern LLPointer<LLImageGL> gEXRImage; void LLPipeline::tonemap(LLRenderTarget* src, LLRenderTarget* dst, bool gamma_correct/* = true*/) { - dst->bindTarget(); // gamma correct lighting { LL_PROFILE_GPU_ZONE("tonemap"); + dst->bindTarget(); + static LLCachedControl<bool> buildNoPost(gSavedSettings, "RenderDisablePostProcessing", false); LLGLDepthTest depth(GL_FALSE, GL_FALSE); @@ -7115,13 +7116,13 @@ void LLPipeline::tonemap(LLRenderTarget* src, LLRenderTarget* dst, bool gamma_co gGL.getTexUnit(channel)->unbind(src->getUsage()); shader->unbind(); + + dst->flush(); } - dst->flush(); } void LLPipeline::copyScreenSpaceReflections(LLRenderTarget* src, LLRenderTarget* dst) { - if (RenderScreenSpaceReflections && !gCubeSnapshot) { LL_PROFILE_GPU_ZONE("ssr copy"); @@ -7148,9 +7149,9 @@ void LLPipeline::copyScreenSpaceReflections(LLRenderTarget* src, LLRenderTarget* void LLPipeline::generateGlow(LLRenderTarget* src) { + LL_PROFILE_GPU_ZONE("glow generate"); if (sRenderGlow) { - LL_PROFILE_GPU_ZONE("glow"); mGlow[2].bindTarget(); mGlow[2].clear(); @@ -7261,6 +7262,8 @@ void LLPipeline::applyCAS(LLRenderTarget* src, LLRenderTarget* dst) static LLCachedControl<F32> cas_sharpness(gSavedSettings, "RenderCASSharpness", 0.4f); static LLCachedControl<bool> should_auto_adjust(gSavedSettings, "RenderSkyAutoAdjustLegacy", true); + LL_PROFILE_GPU_ZONE("cas"); + LLSettingsSky::ptr_t psky = LLEnvironment::instance().getCurrentSky(); LLGLSLShader* sharpen_shader = psky->getReflectionProbeAmbiance(should_auto_adjust) == 0.f ? &gCASLegacyGammaProgram : &gCASProgram; @@ -7303,7 +7306,7 @@ void LLPipeline::applyFXAA(LLRenderTarget* src, LLRenderTarget* dst, bool combin llassert(!gCubeSnapshot); LLGLSLShader* shader = &gGlowCombineProgram; - LL_PROFILE_GPU_ZONE("aa"); + LL_PROFILE_GPU_ZONE("FXAA"); S32 width = src->getWidth(); S32 height = src->getHeight(); @@ -7388,7 +7391,7 @@ void LLPipeline::generateSMAABuffers(LLRenderTarget* src) // Present everything. if (multisample) { - LL_PROFILE_GPU_ZONE("aa"); + LL_PROFILE_GPU_ZONE("SMAA Edge"); static LLCachedControl<U32> aa_quality(gSavedSettings, "RenderFSAASamples", 0U); U32 fsaa_quality = std::clamp(aa_quality(), 0U, 3U); @@ -7503,7 +7506,7 @@ void LLPipeline::applySMAA(LLRenderTarget* src, LLRenderTarget* dst, bool combin { llassert(!gCubeSnapshot); - LL_PROFILE_GPU_ZONE("aa"); + LL_PROFILE_GPU_ZONE("SMAA"); static LLCachedControl<U32> aa_quality(gSavedSettings, "RenderFSAASamples", 0U); U32 fsaa_quality = std::clamp(aa_quality(), 0U, 3U); @@ -7566,7 +7569,6 @@ void LLPipeline::applySMAA(LLRenderTarget* src, LLRenderTarget* dst, bool combin void LLPipeline::copyRenderTarget(LLRenderTarget* src, LLRenderTarget* dst) { - LL_PROFILE_GPU_ZONE("copyRenderTarget"); dst->bindTarget(); @@ -7587,8 +7589,9 @@ void LLPipeline::copyRenderTarget(LLRenderTarget* src, LLRenderTarget* dst) void LLPipeline::combineGlow(LLRenderTarget* src, LLRenderTarget* dst) { - // Go ahead and do our glow combine here in our destination. We blit this later into the front buffer. + LL_PROFILE_GPU_ZONE("glow combine"); + // Go ahead and do our glow combine here in our destination. We blit this later into the front buffer. if (dst) { dst->bindTarget(); @@ -7604,6 +7607,7 @@ void LLPipeline::combineGlow(LLRenderTarget* src, LLRenderTarget* dst) mScreenTriangleVB->setBuffer(); mScreenTriangleVB->drawArrays(LLRender::TRIANGLES, 0, 3); } + if (dst) { dst->flush(); @@ -7612,9 +7616,9 @@ void LLPipeline::combineGlow(LLRenderTarget* src, LLRenderTarget* dst) void LLPipeline::renderDoF(LLRenderTarget* src, LLRenderTarget* dst) { + LL_PROFILE_GPU_ZONE("dof"); gViewerWindow->setup3DViewport(); - LL_PROFILE_GPU_ZONE("dof"); LLGLDisable blend(GL_BLEND); // depth of field focal plane calculations |