From d9ee26fcbabbb5fe94006b8d31d4bfb988f7b53e Mon Sep 17 00:00:00 2001 From: Rye Date: Wed, 6 Nov 2024 04:06:29 -0500 Subject: Improve Tracy OpenGL integration Fix memory profiling when gpu profiling enabled Fix disabling renderdoc support --- indra/cmake/CMakeLists.txt | 1 + indra/cmake/Tracy.cmake | 5 +++++ indra/llcommon/llcommon.cpp | 2 +- indra/llcommon/llprofiler.h | 26 ++++++++-------------- indra/llwindow/llwindowwin32.cpp | 7 ++++-- indra/newview/llappviewer.cpp | 37 +++++++++++++++++--------------- indra/newview/lldrawpool.h | 2 +- indra/newview/llheroprobemanager.cpp | 11 +++++++--- indra/newview/llreflectionmapmanager.cpp | 4 ++++ indra/newview/llviewerdisplay.cpp | 2 ++ indra/newview/pipeline.cpp | 24 ++++++++++++--------- 11 files changed, 70 insertions(+), 51 deletions(-) diff --git a/indra/cmake/CMakeLists.txt b/indra/cmake/CMakeLists.txt index 9d95a23a59..1083d7f2c4 100644 --- a/indra/cmake/CMakeLists.txt +++ b/indra/cmake/CMakeLists.txt @@ -57,6 +57,7 @@ set(cmake_SOURCE_FILES TemplateCheck.cmake TinyEXR.cmake TinyGLTF.cmake + Tracy.cmake Tut.cmake UI.cmake UnixInstall.cmake diff --git a/indra/cmake/Tracy.cmake b/indra/cmake/Tracy.cmake index 3f92cff899..e3f46cca9b 100644 --- a/indra/cmake/Tracy.cmake +++ b/indra/cmake/Tracy.cmake @@ -20,6 +20,7 @@ endif(LINUX) if (USE_TRACY) option(USE_TRACY_ON_DEMAND "Use on-demand Tracy profiling." ON) option(USE_TRACY_LOCAL_ONLY "Disallow remote Tracy profiling." OFF) + option(USE_TRACY_GPU "Use Tracy GPU profiling" OFF) use_system_binary(tracy) use_prebuilt_binary(tracy) @@ -36,6 +37,10 @@ if (USE_TRACY) target_compile_definitions(ll::tracy INTERFACE -DTRACY_NO_BROADCAST=1 -DTRACY_ONLY_LOCALHOST=1) endif () + if (USE_TRACY_GPU AND NOT DARWIN) # Tracy OpenGL mode is incompatible with macOS/iOS + target_compile_definitions(ll::tracy INTERFACE -DLL_PROFILER_ENABLE_TRACY_OPENGL=1) + endif () + # See: indra/llcommon/llprofiler.h add_compile_definitions(LL_PROFILER_CONFIGURATION=3) endif (USE_TRACY) diff --git a/indra/llcommon/llcommon.cpp b/indra/llcommon/llcommon.cpp index 832a7a2b0a..110a1c32a1 100644 --- a/indra/llcommon/llcommon.cpp +++ b/indra/llcommon/llcommon.cpp @@ -33,7 +33,7 @@ #include "lltracethreadrecorder.h" #include "llcleanup.h" -#if LL_PROFILER_CONFIGURATION > 1 && TRACY_ENABLE && !LL_PROFILER_ENABLE_TRACY_OPENGL +#if LL_PROFILER_CONFIGURATION > 1 && TRACY_ENABLE // Override new/delete for tracy memory profiling void* ll_tracy_new(size_t size) diff --git a/indra/llcommon/llprofiler.h b/indra/llcommon/llprofiler.h index e5cde192d9..6ff4341b69 100644 --- a/indra/llcommon/llprofiler.h +++ b/indra/llcommon/llprofiler.h @@ -78,11 +78,8 @@ #if LL_PROFILER_CONFIGURATION == LL_PROFILER_CONFIG_TRACY || LL_PROFILER_CONFIGURATION == LL_PROFILER_CONFIG_TRACY_FAST_TIMER #include "tracy/Tracy.hpp" - // Enable OpenGL profiling - #define LL_PROFILER_ENABLE_TRACY_OPENGL 0 - // Enable RenderDoc labeling - #define LL_PROFILER_ENABLE_RENDER_DOC 0 + //#define LL_PROFILER_ENABLE_RENDER_DOC 0 #endif @@ -156,21 +153,18 @@ #endif // LL_PROFILER #if LL_PROFILER_ENABLE_TRACY_OPENGL -#define LL_PROFILE_GPU_ZONE(name) TracyGpuZone(name) -#define LL_PROFILE_GPU_ZONEC(name,color) TracyGpuZoneC(name,color) +#define LL_PROFILE_GPU_ZONE(name) TracyGpuZone(name) +#define LL_PROFILE_GPU_ZONEC(name,color) TracyGpuZoneC(name,color) #define LL_PROFILER_GPU_COLLECT TracyGpuCollect #define LL_PROFILER_GPU_CONTEXT TracyGpuContext - -// disable memory tracking (incompatible with GPU tracing) -#define LL_PROFILE_ALLOC(ptr, size) (void)(ptr); (void)(size); -#define LL_PROFILE_FREE(ptr) (void)(ptr); +#define LL_PROFILER_GPU_CONTEXT_NAMED TracyGpuContextName #else -#define LL_PROFILE_GPU_ZONE(name) (void)name; -#define LL_PROFILE_GPU_ZONEC(name,color) (void)name;(void)color; +#define LL_PROFILE_GPU_ZONE(name) (void)name; +#define LL_PROFILE_GPU_ZONEC(name,color) (void)name;(void)color; #define LL_PROFILER_GPU_COLLECT #define LL_PROFILER_GPU_CONTEXT - -#define LL_LABEL_OBJECT_GL(type, name, length, label) +#define LL_PROFILER_GPU_CONTEXT_NAMED(name) (void)name; +#endif // LL_PROFILER_ENABLE_TRACY_OPENGL #if LL_PROFILER_CONFIGURATION > 1 #define LL_PROFILE_ALLOC(ptr, size) TracyAlloc(ptr, size) @@ -180,9 +174,7 @@ #define LL_PROFILE_FREE(ptr) (void)(ptr); #endif -#endif // LL_PROFILER_ENABLE_TRACY_OPENGL - -#if LL_PROFILER_ENABLE_RENDER_DOC +#ifdef LL_PROFILER_ENABLE_RENDER_DOC #define LL_LABEL_OBJECT_GL(type, name, length, label) glObjectLabel(type, name, length, label) #else #define LL_LABEL_OBJECT_GL(type, name, length, label) diff --git a/indra/llwindow/llwindowwin32.cpp b/indra/llwindow/llwindowwin32.cpp index f349c4aea8..f926d106be 100644 --- a/indra/llwindow/llwindowwin32.cpp +++ b/indra/llwindow/llwindowwin32.cpp @@ -1661,6 +1661,11 @@ const S32 max_format = (S32)num_formats - 1; return false; } + // Setup Tracy gpu context + { + LL_PROFILER_GPU_CONTEXT; + } + // Disable vertical sync for swap toggleVSync(enable_vsync); @@ -1692,8 +1697,6 @@ const S32 max_format = (S32)num_formats - 1; swapBuffers(); } - LL_PROFILER_GPU_CONTEXT; - return true; } diff --git a/indra/newview/llappviewer.cpp b/indra/newview/llappviewer.cpp index 423fe2469a..fda4f043b0 100644 --- a/indra/newview/llappviewer.cpp +++ b/indra/newview/llappviewer.cpp @@ -1473,6 +1473,7 @@ void sendGameControlInput() bool LLAppViewer::doFrame() { LL_RECORD_BLOCK_TIME(FTM_FRAME); + LL_PROFILE_GPU_ZONE("Frame"); { // and now adjust the visuals from previous frame. if(LLPerfStats::tunables.userAutoTuneEnabled && LLPerfStats::tunables.tuningFlag != LLPerfStats::Tunables::Nothing) @@ -1562,24 +1563,26 @@ bool LLAppViewer::doFrame() if (!LLApp::isExiting()) { - LL_PROFILE_ZONE_NAMED_CATEGORY_APP("df JoystickKeyboard"); - pingMainloopTimeout("Main:JoystickKeyboard"); - - // Scan keyboard for movement keys. Command keys and typing - // are handled by windows callbacks. Don't do this until we're - // done initializing. JC - if (gViewerWindow - && (gHeadlessClient || gViewerWindow->getWindow()->getVisible()) - && gViewerWindow->getActive() - && !gViewerWindow->getWindow()->getMinimized() - && LLStartUp::getStartupState() == STATE_STARTED - && (gHeadlessClient || !gViewerWindow->getShowProgress()) - && !gFocusMgr.focusLocked()) { - LLPerfStats::RecordSceneTime T (LLPerfStats::StatType_t::RENDER_IDLE); - joystick->scanJoystick(); - gKeyboard->scanKeyboard(); - gViewerInput.scanMouse(); + LL_PROFILE_ZONE_NAMED_CATEGORY_APP("df JoystickKeyboard"); + pingMainloopTimeout("Main:JoystickKeyboard"); + + // Scan keyboard for movement keys. Command keys and typing + // are handled by windows callbacks. Don't do this until we're + // done initializing. JC + if (gViewerWindow + && (gHeadlessClient || gViewerWindow->getWindow()->getVisible()) + && gViewerWindow->getActive() + && !gViewerWindow->getWindow()->getMinimized() + && LLStartUp::getStartupState() == STATE_STARTED + && (gHeadlessClient || !gViewerWindow->getShowProgress()) + && !gFocusMgr.focusLocked()) + { + LLPerfStats::RecordSceneTime T(LLPerfStats::StatType_t::RENDER_IDLE); + joystick->scanJoystick(); + gKeyboard->scanKeyboard(); + gViewerInput.scanMouse(); + } } // Update state based on messages, user input, object idle. diff --git a/indra/newview/lldrawpool.h b/indra/newview/lldrawpool.h index bc412214c7..34257f98f8 100644 --- a/indra/newview/lldrawpool.h +++ b/indra/newview/lldrawpool.h @@ -339,7 +339,7 @@ public: } } #else - static inline const char* lookupPass(U32 pass) { return ""; } + static inline const char* lookupPassName(U32 pass) { return ""; } #endif LLRenderPass(const U32 type); diff --git a/indra/newview/llheroprobemanager.cpp b/indra/newview/llheroprobemanager.cpp index ce419498cf..8e3cd0626f 100644 --- a/indra/newview/llheroprobemanager.cpp +++ b/indra/newview/llheroprobemanager.cpp @@ -81,6 +81,7 @@ void LLHeroProbeManager::update() } LL_PROFILE_ZONE_SCOPED_CATEGORY_DISPLAY; + LL_PROFILE_GPU_ZONE("hero manager update"); llassert(!gCubeSnapshot); // assert a snapshot is not in progress if (LLAppViewer::instance()->logoutRequestSent()) { @@ -279,6 +280,9 @@ void LLHeroProbeManager::renderProbes() // In effect this simulates single-bounce lighting. void LLHeroProbeManager::updateProbeFace(LLReflectionMap* probe, U32 face, bool is_dynamic, F32 near_clip) { + LL_PROFILE_ZONE_SCOPED_CATEGORY_DISPLAY; + LL_PROFILE_GPU_ZONE("hero probe update"); + // hacky hot-swap of camera specific render targets gPipeline.mRT = &gPipeline.mHeroProbeRT; @@ -349,7 +353,7 @@ void LLHeroProbeManager::updateProbeFace(LLReflectionMap* probe, U32 face, bool for (int i = 0; i < mMipChain.size(); ++i) { - LL_PROFILE_GPU_ZONE("probe mip"); + LL_PROFILE_GPU_ZONE("hero probe mip"); mMipChain[i].bindTarget(); if (i == 0) { @@ -376,7 +380,7 @@ void LLHeroProbeManager::updateProbeFace(LLReflectionMap* probe, U32 face, bool if (mip >= 0) { - LL_PROFILE_GPU_ZONE("probe mip copy"); + LL_PROFILE_GPU_ZONE("hero probe mip copy"); mTexture->bind(0); glCopyTexSubImage3D(GL_TEXTURE_CUBE_MAP_ARRAY, mip, 0, 0, sourceIdx * 6 + face, 0, 0, res, res); @@ -424,7 +428,7 @@ void LLHeroProbeManager::generateRadiance(LLReflectionMap* probe) for (int i = 0; i < mMipChain.size() / 4; ++i) { - LL_PROFILE_GPU_ZONE("probe radiance gen"); + LL_PROFILE_GPU_ZONE("hero probe radiance gen"); static LLStaticHashedString sMipLevel("mipLevel"); static LLStaticHashedString sRoughness("roughness"); static LLStaticHashedString sWidth("u_width"); @@ -471,6 +475,7 @@ void LLHeroProbeManager::updateUniforms() } LL_PROFILE_ZONE_SCOPED_CATEGORY_DISPLAY; + LL_PROFILE_GPU_ZONE("hpmu - uniforms") LLMatrix4a modelview; modelview.loadu(gGLModelView); diff --git a/indra/newview/llreflectionmapmanager.cpp b/indra/newview/llreflectionmapmanager.cpp index f083747bfe..4b79ce03eb 100644 --- a/indra/newview/llreflectionmapmanager.cpp +++ b/indra/newview/llreflectionmapmanager.cpp @@ -210,6 +210,7 @@ void LLReflectionMapManager::update() } LL_PROFILE_ZONE_SCOPED_CATEGORY_DISPLAY; + LL_PROFILE_GPU_ZONE("reflection manager update"); llassert(!gCubeSnapshot); // assert a snapshot is not in progress if (LLAppViewer::instance()->logoutRequestSent()) { @@ -677,6 +678,8 @@ void LLReflectionMapManager::doProbeUpdate() // In effect this simulates single-bounce lighting. void LLReflectionMapManager::updateProbeFace(LLReflectionMap* probe, U32 face) { + LL_PROFILE_ZONE_SCOPED_CATEGORY_DISPLAY; + LL_PROFILE_GPU_ZONE("probe update"); // hacky hot-swap of camera specific render targets gPipeline.mRT = &gPipeline.mAuxillaryRT; @@ -991,6 +994,7 @@ void LLReflectionMapManager::updateUniforms() } LL_PROFILE_ZONE_SCOPED_CATEGORY_DISPLAY; + LL_PROFILE_GPU_ZONE("rmmu - uniforms") // structure for packing uniform buffer object // see class3/deferred/reflectionProbeF.glsl diff --git a/indra/newview/llviewerdisplay.cpp b/indra/newview/llviewerdisplay.cpp index b0e07d8ea3..94ebca77e2 100644 --- a/indra/newview/llviewerdisplay.cpp +++ b/indra/newview/llviewerdisplay.cpp @@ -406,6 +406,7 @@ static void update_tp_display(bool minimized) void display(bool rebuild, F32 zoom_factor, int subfield, bool for_snapshot) { LL_PROFILE_ZONE_NAMED_CATEGORY_DISPLAY("Render"); + LL_PROFILE_GPU_ZONE("Render"); LLPerfStats::RecordSceneTime T (LLPerfStats::StatType_t::RENDER_DISPLAY); // render time capture - This is the main stat for overall rendering. @@ -710,6 +711,7 @@ void display(bool rebuild, F32 zoom_factor, int subfield, bool for_snapshot) if (gPipeline.RenderMirrors && !gSnapshot) { LL_PROFILE_ZONE_NAMED_CATEGORY_DISPLAY("Update hero probes"); + LL_PROFILE_GPU_ZONE("hero manager") gPipeline.mHeroProbeManager.update(); gPipeline.mHeroProbeManager.renderProbes(); } diff --git a/indra/newview/pipeline.cpp b/indra/newview/pipeline.cpp index a88756452e..a17fd4d551 100644 --- a/indra/newview/pipeline.cpp +++ b/indra/newview/pipeline.cpp @@ -7063,11 +7063,12 @@ extern LLPointer gEXRImage; void LLPipeline::tonemap(LLRenderTarget* src, LLRenderTarget* dst, bool gamma_correct/* = true*/) { - dst->bindTarget(); // gamma correct lighting { LL_PROFILE_GPU_ZONE("tonemap"); + dst->bindTarget(); + static LLCachedControl buildNoPost(gSavedSettings, "RenderDisablePostProcessing", false); LLGLDepthTest depth(GL_FALSE, GL_FALSE); @@ -7120,13 +7121,13 @@ void LLPipeline::tonemap(LLRenderTarget* src, LLRenderTarget* dst, bool gamma_co gGL.getTexUnit(channel)->unbind(src->getUsage()); shader->unbind(); + + dst->flush(); } - dst->flush(); } void LLPipeline::copyScreenSpaceReflections(LLRenderTarget* src, LLRenderTarget* dst) { - if (RenderScreenSpaceReflections && !gCubeSnapshot) { LL_PROFILE_GPU_ZONE("ssr copy"); @@ -7153,9 +7154,9 @@ void LLPipeline::copyScreenSpaceReflections(LLRenderTarget* src, LLRenderTarget* void LLPipeline::generateGlow(LLRenderTarget* src) { + LL_PROFILE_GPU_ZONE("glow generate"); if (sRenderGlow) { - LL_PROFILE_GPU_ZONE("glow"); mGlow[2].bindTarget(); mGlow[2].clear(); @@ -7266,6 +7267,8 @@ void LLPipeline::applyCAS(LLRenderTarget* src, LLRenderTarget* dst) static LLCachedControl cas_sharpness(gSavedSettings, "RenderCASSharpness", 0.4f); static LLCachedControl should_auto_adjust(gSavedSettings, "RenderSkyAutoAdjustLegacy", true); + LL_PROFILE_GPU_ZONE("cas"); + LLSettingsSky::ptr_t psky = LLEnvironment::instance().getCurrentSky(); LLGLSLShader* sharpen_shader = psky->getReflectionProbeAmbiance(should_auto_adjust) == 0.f ? &gCASLegacyGammaProgram : &gCASProgram; @@ -7308,7 +7311,7 @@ void LLPipeline::applyFXAA(LLRenderTarget* src, LLRenderTarget* dst, bool combin llassert(!gCubeSnapshot); LLGLSLShader* shader = &gGlowCombineProgram; - LL_PROFILE_GPU_ZONE("aa"); + LL_PROFILE_GPU_ZONE("FXAA"); S32 width = src->getWidth(); S32 height = src->getHeight(); @@ -7393,7 +7396,7 @@ void LLPipeline::generateSMAABuffers(LLRenderTarget* src) // Present everything. if (multisample) { - LL_PROFILE_GPU_ZONE("aa"); + LL_PROFILE_GPU_ZONE("SMAA Edge"); static LLCachedControl aa_quality(gSavedSettings, "RenderFSAASamples", 0U); U32 fsaa_quality = std::clamp(aa_quality(), 0U, 3U); @@ -7508,7 +7511,7 @@ void LLPipeline::applySMAA(LLRenderTarget* src, LLRenderTarget* dst, bool combin { llassert(!gCubeSnapshot); - LL_PROFILE_GPU_ZONE("aa"); + LL_PROFILE_GPU_ZONE("SMAA"); static LLCachedControl aa_quality(gSavedSettings, "RenderFSAASamples", 0U); U32 fsaa_quality = std::clamp(aa_quality(), 0U, 3U); @@ -7571,7 +7574,6 @@ void LLPipeline::applySMAA(LLRenderTarget* src, LLRenderTarget* dst, bool combin void LLPipeline::copyRenderTarget(LLRenderTarget* src, LLRenderTarget* dst) { - LL_PROFILE_GPU_ZONE("copyRenderTarget"); dst->bindTarget(); @@ -7592,8 +7594,9 @@ void LLPipeline::copyRenderTarget(LLRenderTarget* src, LLRenderTarget* dst) void LLPipeline::combineGlow(LLRenderTarget* src, LLRenderTarget* dst) { - // Go ahead and do our glow combine here in our destination. We blit this later into the front buffer. + LL_PROFILE_GPU_ZONE("glow combine"); + // Go ahead and do our glow combine here in our destination. We blit this later into the front buffer. if (dst) { dst->bindTarget(); @@ -7609,6 +7612,7 @@ void LLPipeline::combineGlow(LLRenderTarget* src, LLRenderTarget* dst) mScreenTriangleVB->setBuffer(); mScreenTriangleVB->drawArrays(LLRender::TRIANGLES, 0, 3); } + if (dst) { dst->flush(); @@ -7617,9 +7621,9 @@ void LLPipeline::combineGlow(LLRenderTarget* src, LLRenderTarget* dst) void LLPipeline::renderDoF(LLRenderTarget* src, LLRenderTarget* dst) { + LL_PROFILE_GPU_ZONE("dof"); gViewerWindow->setup3DViewport(); - LL_PROFILE_GPU_ZONE("dof"); LLGLDisable blend(GL_BLEND); // depth of field focal plane calculations -- cgit v1.2.3