diff options
| author | Rye <rye@lindenlab.com> | 2025-02-02 02:43:46 -0500 | 
|---|---|---|
| committer | Rye <rye@lindenlab.com> | 2025-02-11 05:04:05 -0500 | 
| commit | 6fcd349f374710a3f4e0e0585bb6d7af86ebb66d (patch) | |
| tree | d1a2e98993c6e4f50230b8d87885ea2ae416c3cf | |
| parent | 51ed6b5424a626499ddb7f95e6da7cf34b375f6a (diff) | |
Fix Tracy memory profiling overloads for aligned allocations
Fix disabling renderdoc support
Improve ll_aligned_alloc functions on darwin for 32 and 64byte aligned by utilizing posix_memalign
| -rw-r--r-- | autobuild.xml | 14 | ||||
| -rw-r--r-- | indra/cmake/CMakeLists.txt | 1 | ||||
| -rw-r--r-- | indra/cmake/Tracy.cmake | 6 | ||||
| -rw-r--r-- | indra/llcommon/linden_common.h | 6 | ||||
| -rw-r--r-- | indra/llcommon/llcommon.cpp | 72 | ||||
| -rw-r--r-- | indra/llcommon/llmemory.h | 49 | ||||
| -rw-r--r-- | indra/llcommon/llprofiler.h | 32 | ||||
| -rw-r--r-- | indra/llrender/llglslshader.cpp | 4 | ||||
| -rw-r--r-- | indra/llrender/llglslshader.h | 4 | ||||
| -rw-r--r-- | indra/llrender/llvertexbuffer.cpp | 2 | ||||
| -rw-r--r-- | indra/llrender/llvertexbuffer.h | 4 | ||||
| -rw-r--r-- | indra/llwindow/llwindowwin32.cpp | 7 | ||||
| -rw-r--r-- | indra/newview/llappviewer.cpp | 37 | ||||
| -rw-r--r-- | indra/newview/llappviewerlinux.cpp | 5 | ||||
| -rw-r--r-- | indra/newview/llappviewermacosx.cpp | 5 | ||||
| -rw-r--r-- | indra/newview/lldrawpool.h | 4 | ||||
| -rw-r--r-- | indra/newview/llheroprobemanager.cpp | 11 | ||||
| -rw-r--r-- | indra/newview/llreflectionmapmanager.cpp | 4 | ||||
| -rw-r--r-- | indra/newview/llviewerdisplay.cpp | 2 | ||||
| -rw-r--r-- | indra/newview/pipeline.cpp | 21 | 
20 files changed, 166 insertions, 124 deletions
diff --git a/autobuild.xml b/autobuild.xml index c3df05a223..653762da9e 100644 --- a/autobuild.xml +++ b/autobuild.xml @@ -2433,11 +2433,11 @@ Copyright (c) 2012, 2014, 2015, 2016 nghttp2 contributors</string>              <key>archive</key>              <map>                <key>hash</key> -              <string>226225ec049826c35adc5e897e0398ed64d4bedb</string> +              <string>0c3d01b7e9e39c23f0f40c56a1a04d1fba08ead0</string>                <key>hash_algorithm</key>                <string>sha1</string>                <key>url</key> -              <string>https://github.com/secondlife/3p-tracy/releases/download/v0.11.0%2Br1/tracy-v0.11.0.10376230034-darwin64-10376230034.tar.zst</string> +              <string>https://github.com/secondlife/3p-tracy/releases/download/v0.11.1-r1/tracy-v0.11.1.11706699176-darwin64-11706699176.tar.zst</string>              </map>              <key>name</key>              <string>darwin64</string> @@ -2447,11 +2447,11 @@ Copyright (c) 2012, 2014, 2015, 2016 nghttp2 contributors</string>              <key>archive</key>              <map>                <key>hash</key> -              <string>8c5429d1a1486f40cf7e5e88a232222d1fa4f78e</string> +              <string>b46cef5646a8d0471ab6256fe5119220fa238772</string>                <key>hash_algorithm</key>                <string>sha1</string>                <key>url</key> -              <string>https://github.com/secondlife/3p-tracy/releases/download/v0.11.0%2Br1/tracy-v0.11.0.10376230034-windows64-10376230034.tar.zst</string> +              <string>https://github.com/secondlife/3p-tracy/releases/download/v0.11.1-r1/tracy-v0.11.1.11706699176-windows64-11706699176.tar.zst</string>              </map>              <key>name</key>              <string>windows64</string> @@ -2461,11 +2461,11 @@ Copyright (c) 2012, 2014, 2015, 2016 nghttp2 contributors</string>              <key>archive</key>              <map>                <key>hash</key> -              <string>ed0664a009aba1dcf1246d845839f524e857162e</string> +              <string>beab04c9ea6036b1851a485b65c66cf6a38f0be4</string>                <key>hash_algorithm</key>                <string>sha1</string>                <key>url</key> -              <string>https://github.com/secondlife/3p-tracy/releases/download/v0.11.0%2Br1/tracy-v0.11.0.10376230034-linux64-10376230034.tar.zst</string> +              <string>https://github.com/secondlife/3p-tracy/releases/download/v0.11.1-r1/tracy-v0.11.1.11706699176-linux64-11706699176.tar.zst</string>              </map>              <key>name</key>              <string>linux64</string> @@ -2478,7 +2478,7 @@ Copyright (c) 2012, 2014, 2015, 2016 nghttp2 contributors</string>          <key>copyright</key>          <string>Copyright (c) 2017-2024, Bartosz Taudul (wolf@nereid.pl)</string>          <key>version</key> -        <string>v0.11.0.10376230034</string> +        <string>v0.11.1.11706699176</string>          <key>name</key>          <string>tracy</string>          <key>canonical_repo</key> diff --git a/indra/cmake/CMakeLists.txt b/indra/cmake/CMakeLists.txt index cc217b0563..8d55cc4bbe 100644 --- a/indra/cmake/CMakeLists.txt +++ b/indra/cmake/CMakeLists.txt @@ -55,6 +55,7 @@ set(cmake_SOURCE_FILES          TemplateCheck.cmake          TinyEXR.cmake          TinyGLTF.cmake +        Tracy.cmake          Tut.cmake          UI.cmake          UnixInstall.cmake diff --git a/indra/cmake/Tracy.cmake b/indra/cmake/Tracy.cmake index a7eac2711f..cb09337d15 100644 --- a/indra/cmake/Tracy.cmake +++ b/indra/cmake/Tracy.cmake @@ -15,6 +15,7 @@ endif()  if (USE_TRACY)    option(USE_TRACY_ON_DEMAND "Use on-demand Tracy profiling." ON)    option(USE_TRACY_LOCAL_ONLY "Disallow remote Tracy profiling." OFF) +  option(USE_TRACY_GPU "Use Tracy GPU profiling" OFF)    use_system_binary(tracy)    use_prebuilt_binary(tracy) @@ -31,9 +32,8 @@ if (USE_TRACY)      target_compile_definitions(ll::tracy INTERFACE -DTRACY_NO_BROADCAST=1 -DTRACY_ONLY_LOCALHOST=1)    endif () -  # GHA runners don't always provide invariant TSC support, but always build with LL_TESTS enabled -  if (DARWIN AND LL_TESTS) -    target_compile_definitions(ll::tracy INTERFACE -DTRACY_TIMER_FALLBACK=1) +  if (USE_TRACY_GPU AND NOT DARWIN) # Tracy OpenGL mode is incompatible with macOS/iOS +    target_compile_definitions(ll::tracy INTERFACE -DLL_PROFILER_ENABLE_TRACY_OPENGL=1)    endif ()    # See: indra/llcommon/llprofiler.h diff --git a/indra/llcommon/linden_common.h b/indra/llcommon/linden_common.h index a918caa2e8..a41af153fe 100644 --- a/indra/llcommon/linden_common.h +++ b/indra/llcommon/linden_common.h @@ -28,12 +28,6 @@  #define LL_LINDEN_COMMON_H  #include "llprofiler.h" -#if TRACY_ENABLE && !defined(LL_PROFILER_ENABLE_TRACY_OPENGL)  // hooks for memory profiling -void *tracy_aligned_malloc(size_t size, size_t alignment); -void  tracy_aligned_free(void *memblock); -#define _aligned_malloc(X, Y) tracy_aligned_malloc((X), (Y)) -#define _aligned_free(X)      tracy_aligned_free((X)) -#endif  // *NOTE:  Please keep includes here to a minimum!  // diff --git a/indra/llcommon/llcommon.cpp b/indra/llcommon/llcommon.cpp index 84b35749cc..7a22eaf203 100644 --- a/indra/llcommon/llcommon.cpp +++ b/indra/llcommon/llcommon.cpp @@ -33,23 +33,23 @@  #include "lltracethreadrecorder.h"  #include "llcleanup.h" -thread_local bool gProfilerEnabled = false; - -#if (TRACY_ENABLE) +#if LL_PROFILER_CONFIGURATION >= LL_PROFILER_CONFIG_TRACY && TRACY_ENABLE  // Override new/delete for tracy memory profiling  void* ll_tracy_new(size_t size)  { -    void* ptr; -    if (gProfilerEnabled) -    { -        //LL_PROFILE_ZONE_SCOPED_CATEGORY_MEMORY; -        ptr = (malloc)(size); -    } -    else +    void* ptr = (malloc)(size); +    if (!ptr)      { -        ptr = (malloc)(size); +        throw std::bad_alloc();      } +    LL_PROFILE_ALLOC(ptr, size); +    return ptr; +} + +void* ll_tracy_aligned_new(size_t size, size_t alignment) +{ +    void* ptr = ll_aligned_malloc_fallback(size, alignment);      if (!ptr)      {          throw std::bad_alloc(); @@ -58,6 +58,18 @@ void* ll_tracy_new(size_t size)      return ptr;  } +void ll_tracy_delete(void* ptr) +{ +    LL_PROFILE_FREE(ptr); +    (free)(ptr); +} + +void ll_tracy_aligned_delete(void* ptr) +{ +    LL_PROFILE_FREE(ptr); +    ll_aligned_free_fallback(ptr); +} +  void* operator new(size_t size)  {      return ll_tracy_new(size); @@ -68,18 +80,14 @@ void* operator new[](std::size_t count)      return ll_tracy_new(count);  } -void ll_tracy_delete(void* ptr) +void* operator new(size_t size, std::align_val_t align)  { -    LL_PROFILE_FREE(ptr); -    if (gProfilerEnabled) -    { -        //LL_PROFILE_ZONE_SCOPED_CATEGORY_MEMORY; -        (free)(ptr); -    } -    else -    { -        (free)(ptr); -    } +    return ll_tracy_aligned_new(size, (size_t)align); +} + +void* operator new[](std::size_t count, std::align_val_t align) +{ +    return ll_tracy_aligned_new(count, (size_t)align);  }  void operator delete(void *ptr) noexcept @@ -92,27 +100,17 @@ void operator delete[](void* ptr) noexcept      ll_tracy_delete(ptr);  } -// C-style malloc/free can't be so easily overridden, so we define tracy versions and use -// a pre-processor #define in linden_common.h to redirect to them. The parens around the native -// functions below prevents recursive substitution by the preprocessor. -// -// Unaligned mallocs are rare in LL code but hooking them causes problems in 3p lib code (looking at -// you, Havok), so we'll only capture the aligned version. - -void *tracy_aligned_malloc(size_t size, size_t alignment) +void operator delete(void *ptr, std::align_val_t align) noexcept  { -    auto ptr = ll_aligned_malloc_fallback(size, alignment); -    if (ptr) LL_PROFILE_ALLOC(ptr, size); -    return ptr; +    ll_tracy_aligned_delete(ptr);  } -void tracy_aligned_free(void *memblock) +void operator delete[](void* ptr, std::align_val_t align) noexcept  { -    LL_PROFILE_FREE(memblock); -    ll_aligned_free_fallback(memblock); +    ll_tracy_aligned_delete(ptr);  } -#endif +#endif // TRACY_ENABLE && !LL_PROFILER_ENABLE_TRACY_OPENGL  //static  bool LLCommon::sAprInitialized = false; diff --git a/indra/llcommon/llmemory.h b/indra/llcommon/llmemory.h index b616edfde7..72aec57080 100644 --- a/indra/llcommon/llmemory.h +++ b/indra/llcommon/llmemory.h @@ -231,8 +231,6 @@ inline void* ll_aligned_malloc_32(size_t size) // returned hunk MUST be freed wi      LL_PROFILE_ZONE_SCOPED_CATEGORY_MEMORY;  #if defined(LL_WINDOWS)      void* ret = _aligned_malloc(size, 32); -#elif defined(LL_DARWIN) -    void* ret = ll_aligned_malloc_fallback( size, 32 );  #else      void *ret;      if (0 != posix_memalign(&ret, 32, size)) @@ -248,8 +246,31 @@ inline void ll_aligned_free_32(void *p)      LL_PROFILE_FREE(p);  #if defined(LL_WINDOWS)      _aligned_free(p); -#elif defined(LL_DARWIN) -    ll_aligned_free_fallback( p ); +#else +    free(p); // posix_memalign() is compatible with heap deallocator +#endif +} + +inline void* ll_aligned_malloc_64(size_t size) // returned hunk MUST be freed with ll_aligned_free_32(). +{ +    LL_PROFILE_ZONE_SCOPED_CATEGORY_MEMORY; +#if defined(LL_WINDOWS) +    void* ret = _aligned_malloc(size, 64); +#else +    void *ret; +    if (0 != posix_memalign(&ret, 64, size)) +        return nullptr; +#endif +    LL_PROFILE_ALLOC(ret, size); +    return ret; +} + +inline void ll_aligned_free_64(void *p) +{ +    LL_PROFILE_ZONE_SCOPED_CATEGORY_MEMORY; +    LL_PROFILE_FREE(p); +#if defined(LL_WINDOWS) +    _aligned_free(p);  #else      free(p); // posix_memalign() is compatible with heap deallocator  #endif @@ -261,19 +282,23 @@ LL_FORCE_INLINE void* ll_aligned_malloc(size_t size)  {      LL_PROFILE_ZONE_SCOPED_CATEGORY_MEMORY;      void* ret; -    if (LL_DEFAULT_HEAP_ALIGN % ALIGNMENT == 0) +    if constexpr (LL_DEFAULT_HEAP_ALIGN % ALIGNMENT == 0)      {          ret = malloc(size);          LL_PROFILE_ALLOC(ret, size);      } -    else if (ALIGNMENT == 16) +    else if constexpr (ALIGNMENT == 16)      {          ret = ll_aligned_malloc_16(size);      } -    else if (ALIGNMENT == 32) +    else if constexpr (ALIGNMENT == 32)      {          ret = ll_aligned_malloc_32(size);      } +    else if constexpr (ALIGNMENT == 64) +    { +        ret = ll_aligned_malloc_64(size); +    }      else      {          ret = ll_aligned_malloc_fallback(size, ALIGNMENT); @@ -285,16 +310,20 @@ template<size_t ALIGNMENT>  LL_FORCE_INLINE void ll_aligned_free(void* ptr)  {      LL_PROFILE_ZONE_SCOPED_CATEGORY_MEMORY; -    if (ALIGNMENT == LL_DEFAULT_HEAP_ALIGN) +    if constexpr (ALIGNMENT == LL_DEFAULT_HEAP_ALIGN)      {          LL_PROFILE_FREE(ptr);          free(ptr);      } -    else if (ALIGNMENT == 16) +    else if constexpr (ALIGNMENT == 16)      {          ll_aligned_free_16(ptr);      } -    else if (ALIGNMENT == 32) +    else if constexpr (ALIGNMENT == 32) +    { +        return ll_aligned_free_32(ptr); +    } +    else if constexpr (ALIGNMENT == 64)      {          return ll_aligned_free_32(ptr);      } diff --git a/indra/llcommon/llprofiler.h b/indra/llcommon/llprofiler.h index f6a4d24747..5fb32d6280 100644 --- a/indra/llcommon/llprofiler.h +++ b/indra/llcommon/llprofiler.h @@ -74,23 +74,18 @@  #define LL_PROFILER_CONFIGURATION           LL_PROFILER_CONFIG_FAST_TIMER  #endif -extern thread_local bool gProfilerEnabled; -  #if defined(LL_PROFILER_CONFIGURATION) && (LL_PROFILER_CONFIGURATION > LL_PROFILER_CONFIG_NONE)      #if LL_PROFILER_CONFIGURATION == LL_PROFILER_CONFIG_TRACY || LL_PROFILER_CONFIGURATION == LL_PROFILER_CONFIG_TRACY_FAST_TIMER          #include "tracy/Tracy.hpp" -        // Enable OpenGL profiling -        #define LL_PROFILER_ENABLE_TRACY_OPENGL 0 -          // Enable RenderDoc labeling -        #define LL_PROFILER_ENABLE_RENDER_DOC 0 +        //#define LL_PROFILER_ENABLE_RENDER_DOC 0      #endif      #if LL_PROFILER_CONFIGURATION == LL_PROFILER_CONFIG_TRACY          #define LL_PROFILER_FRAME_END                   FrameMark -        #define LL_PROFILER_SET_THREAD_NAME( name )     tracy::SetThreadName( name );    gProfilerEnabled = true; +        #define LL_PROFILER_SET_THREAD_NAME( name )     tracy::SetThreadName( name );          #define LL_RECORD_BLOCK_TIME(name)              ZoneScoped // Want descriptive names; was: ZoneNamedN( ___tracy_scoped_zone, #name, true );          #define LL_PROFILE_ZONE_NAMED(name)             ZoneNamedN( ___tracy_scoped_zone, name, true );          #define LL_PROFILE_ZONE_NAMED_COLOR(name,color) ZoneNamedNC( ___tracy_scopped_zone, name, color, true ) // RGB @@ -133,7 +128,7 @@ extern thread_local bool gProfilerEnabled;      #endif      #if LL_PROFILER_CONFIGURATION == LL_PROFILER_CONFIG_TRACY_FAST_TIMER          #define LL_PROFILER_FRAME_END                   FrameMark -        #define LL_PROFILER_SET_THREAD_NAME( name )     tracy::SetThreadName( name );    gProfilerEnabled = true; +        #define LL_PROFILER_SET_THREAD_NAME( name )     tracy::SetThreadName( name );          #define LL_RECORD_BLOCK_TIME(name)              ZoneNamedN(___tracy_scoped_zone, #name, true);   const LLTrace::BlockTimer& LL_GLUE_TOKENS(block_time_recorder, __LINE__)(LLTrace::timeThisBlock(name)); (void)LL_GLUE_TOKENS(block_time_recorder, __LINE__);          #define LL_PROFILE_ZONE_NAMED(name)             ZoneNamedN( ___tracy_scoped_zone, #name, true );          #define LL_PROFILE_ZONE_NAMED_COLOR(name,color) ZoneNamedNC( ___tracy_scopped_zone, name, color, true ) // RGB @@ -158,23 +153,20 @@ extern thread_local bool gProfilerEnabled;  #endif // LL_PROFILER  #if LL_PROFILER_ENABLE_TRACY_OPENGL -#define LL_PROFILE_GPU_ZONE(name)        TracyGpuZone(name) -#define LL_PROFILE_GPU_ZONEC(name,color) TracyGpuZoneC(name,color) +#define LL_PROFILE_GPU_ZONE(name)         TracyGpuZone(name) +#define LL_PROFILE_GPU_ZONEC(name,color)  TracyGpuZoneC(name,color)  #define LL_PROFILER_GPU_COLLECT           TracyGpuCollect  #define LL_PROFILER_GPU_CONTEXT           TracyGpuContext - -// disable memory tracking (incompatible with GPU tracing -#define LL_PROFILE_ALLOC(ptr, size)             (void)(ptr); (void)(size); -#define LL_PROFILE_FREE(ptr)                    (void)(ptr); +#define LL_PROFILER_GPU_CONTEXT_NAMED     TracyGpuContextName  #else -#define LL_PROFILE_GPU_ZONE(name)        (void)name; -#define LL_PROFILE_GPU_ZONEC(name,color) (void)name;(void)color; +#define LL_PROFILE_GPU_ZONE(name)           (void)name; +#define LL_PROFILE_GPU_ZONEC(name,color)    (void)name;(void)color;  #define LL_PROFILER_GPU_COLLECT  #define LL_PROFILER_GPU_CONTEXT +#define LL_PROFILER_GPU_CONTEXT_NAMED(name) (void)name; +#endif // LL_PROFILER_ENABLE_TRACY_OPENGL -#define LL_LABEL_OBJECT_GL(type, name, length, label) - -#if !LL_DARWIN && LL_PROFILER_CONFIGURATION > 1 +#if LL_PROFILER_CONFIGURATION >= LL_PROFILER_CONFIG_TRACY  #define LL_PROFILE_ALLOC(ptr, size)             TracyAlloc(ptr, size)  #define LL_PROFILE_FREE(ptr)                    TracyFree(ptr)  #else @@ -182,8 +174,6 @@ extern thread_local bool gProfilerEnabled;  #define LL_PROFILE_FREE(ptr)                    (void)(ptr);  #endif -#endif -  #if LL_PROFILER_ENABLE_RENDER_DOC  #define LL_LABEL_OBJECT_GL(type, name, length, label) glObjectLabel(type, name, length, label)  #else diff --git a/indra/llrender/llglslshader.cpp b/indra/llrender/llglslshader.cpp index b3f32fdc83..0841c0e943 100644 --- a/indra/llrender/llglslshader.cpp +++ b/indra/llrender/llglslshader.cpp @@ -543,7 +543,7 @@ bool LLGLSLShader::createShader()          }      } -#ifdef LL_PROFILER_ENABLE_RENDER_DOC +#if LL_PROFILER_ENABLE_RENDER_DOC      setLabel(mName.c_str());  #endif @@ -2061,7 +2061,7 @@ LLUUID LLGLSLShader::hash()      return hash_obj.digest();  } -#ifdef LL_PROFILER_ENABLE_RENDER_DOC +#if LL_PROFILER_ENABLE_RENDER_DOC  void LLGLSLShader::setLabel(const char* label) {      LL_LABEL_OBJECT_GL(GL_PROGRAM, mProgramObject, strlen(label), label);  } diff --git a/indra/llrender/llglslshader.h b/indra/llrender/llglslshader.h index 58c456f134..cade888a83 100644 --- a/indra/llrender/llglslshader.h +++ b/indra/llrender/llglslshader.h @@ -360,7 +360,7 @@ public:      // hacky flag used for optimization in LLDrawPoolAlpha      bool mCanBindFast = false; -#ifdef LL_PROFILER_ENABLE_RENDER_DOC +#if LL_PROFILER_ENABLE_RENDER_DOC      void setLabel(const char* label);  #endif @@ -380,7 +380,7 @@ extern LLGLSLShader         gSolidColorProgram;  //Alpha mask shader (declared here so llappearance can access properly)  extern LLGLSLShader         gAlphaMaskProgram; -#ifdef LL_PROFILER_ENABLE_RENDER_DOC +#if LL_PROFILER_ENABLE_RENDER_DOC  #define LL_SET_SHADER_LABEL(shader) shader.setLabel(#shader)  #else  #define LL_SET_SHADER_LABEL(shader, label) diff --git a/indra/llrender/llvertexbuffer.cpp b/indra/llrender/llvertexbuffer.cpp index 1f0c424188..86ec9a453b 100644 --- a/indra/llrender/llvertexbuffer.cpp +++ b/indra/llrender/llvertexbuffer.cpp @@ -885,7 +885,7 @@ bool LLVertexBuffer::validateRange(U32 start, U32 end, U32 count, U32 indices_of      return true;  } -#ifdef LL_PROFILER_ENABLE_RENDER_DOC +#if LL_PROFILER_ENABLE_RENDER_DOC  void LLVertexBuffer::setLabel(const char* label) {      LL_LABEL_OBJECT_GL(GL_BUFFER, mGLBuffer, strlen(label), label);  } diff --git a/indra/llrender/llvertexbuffer.h b/indra/llrender/llvertexbuffer.h index 375ad76fb8..faaa6ba0f0 100644 --- a/indra/llrender/llvertexbuffer.h +++ b/indra/llrender/llvertexbuffer.h @@ -279,7 +279,7 @@ public:      //for debugging, validate data in given range is valid      bool validateRange(U32 start, U32 end, U32 count, U32 offset) const; -    #ifdef LL_PROFILER_ENABLE_RENDER_DOC +    #if LL_PROFILER_ENABLE_RENDER_DOC      void setLabel(const char* label);      #endif @@ -340,7 +340,7 @@ public:      static U32 sVertexCount;  }; -#ifdef LL_PROFILER_ENABLE_RENDER_DOC +#if LL_PROFILER_ENABLE_RENDER_DOC  #define LL_LABEL_VERTEX_BUFFER(buf, name) buf->setLabel(name)  #else  #define LL_LABEL_VERTEX_BUFFER(buf, name) diff --git a/indra/llwindow/llwindowwin32.cpp b/indra/llwindow/llwindowwin32.cpp index 832cf254d1..90713a6653 100644 --- a/indra/llwindow/llwindowwin32.cpp +++ b/indra/llwindow/llwindowwin32.cpp @@ -1666,6 +1666,11 @@ const   S32   max_format  = (S32)num_formats - 1;          return false;      } +    // Setup Tracy gpu context +    { +        LL_PROFILER_GPU_CONTEXT; +    } +      // Disable vertical sync for swap      toggleVSync(enable_vsync); @@ -1697,8 +1702,6 @@ const   S32   max_format  = (S32)num_formats - 1;          swapBuffers();      } -    LL_PROFILER_GPU_CONTEXT; -      return true;  } diff --git a/indra/newview/llappviewer.cpp b/indra/newview/llappviewer.cpp index 9889765fff..84cce2348a 100644 --- a/indra/newview/llappviewer.cpp +++ b/indra/newview/llappviewer.cpp @@ -1324,6 +1324,7 @@ bool LLAppViewer::frame()  bool LLAppViewer::doFrame()  {      LL_RECORD_BLOCK_TIME(FTM_FRAME); +    LL_PROFILE_GPU_ZONE("Frame");      {      // and now adjust the visuals from previous frame.      if(LLPerfStats::tunables.userAutoTuneEnabled && LLPerfStats::tunables.tuningFlag != LLPerfStats::Tunables::Nothing) @@ -1413,24 +1414,26 @@ bool LLAppViewer::doFrame()          if (!LLApp::isExiting())          { -            LL_PROFILE_ZONE_NAMED_CATEGORY_APP("df JoystickKeyboard"); -            pingMainloopTimeout("Main:JoystickKeyboard"); - -            // Scan keyboard for movement keys.  Command keys and typing -            // are handled by windows callbacks.  Don't do this until we're -            // done initializing.  JC -            if (gViewerWindow -                && (gHeadlessClient || gViewerWindow->getWindow()->getVisible()) -                && gViewerWindow->getActive() -                && !gViewerWindow->getWindow()->getMinimized() -                && LLStartUp::getStartupState() == STATE_STARTED -                && (gHeadlessClient || !gViewerWindow->getShowProgress()) -                && !gFocusMgr.focusLocked())              { -                LLPerfStats::RecordSceneTime T (LLPerfStats::StatType_t::RENDER_IDLE); -                joystick->scanJoystick(); -                gKeyboard->scanKeyboard(); -                gViewerInput.scanMouse(); +                LL_PROFILE_ZONE_NAMED_CATEGORY_APP("df JoystickKeyboard"); +                pingMainloopTimeout("Main:JoystickKeyboard"); + +                // Scan keyboard for movement keys.  Command keys and typing +                // are handled by windows callbacks.  Don't do this until we're +                // done initializing.  JC +                if (gViewerWindow +                    && (gHeadlessClient || gViewerWindow->getWindow()->getVisible()) +                    && gViewerWindow->getActive() +                    && !gViewerWindow->getWindow()->getMinimized() +                    && LLStartUp::getStartupState() == STATE_STARTED +                    && (gHeadlessClient || !gViewerWindow->getShowProgress()) +                    && !gFocusMgr.focusLocked()) +                { +                    LLPerfStats::RecordSceneTime T(LLPerfStats::StatType_t::RENDER_IDLE); +                    joystick->scanJoystick(); +                    gKeyboard->scanKeyboard(); +                    gViewerInput.scanMouse(); +                }              }              // Update state based on messages, user input, object idle. diff --git a/indra/newview/llappviewerlinux.cpp b/indra/newview/llappviewerlinux.cpp index 1709970156..89d19d180b 100644 --- a/indra/newview/llappviewerlinux.cpp +++ b/indra/newview/llappviewerlinux.cpp @@ -73,6 +73,11 @@ static void exceptionTerminateHandler()  int main( int argc, char **argv )  { +    // Call Tracy first thing to have it allocate memory +    // https://github.com/wolfpld/tracy/issues/196 +    LL_PROFILER_FRAME_END; +    LL_PROFILER_SET_THREAD_NAME("App"); +      gArgC = argc;      gArgV = argv; diff --git a/indra/newview/llappviewermacosx.cpp b/indra/newview/llappviewermacosx.cpp index 4162c0479a..f497a3cdf3 100644 --- a/indra/newview/llappviewermacosx.cpp +++ b/indra/newview/llappviewermacosx.cpp @@ -231,6 +231,11 @@ void infos(const std::string& message)  int main( int argc, char **argv )  { +    // Call Tracy first thing to have it allocate memory +    // https://github.com/wolfpld/tracy/issues/196 +    LL_PROFILER_FRAME_END; +    LL_PROFILER_SET_THREAD_NAME("App"); +      // Store off the command line args for use later.      gArgC = argc;      gArgV = argv; diff --git a/indra/newview/lldrawpool.h b/indra/newview/lldrawpool.h index 1c8864a9df..46696fc4a4 100644 --- a/indra/newview/lldrawpool.h +++ b/indra/newview/lldrawpool.h @@ -204,7 +204,7 @@ public:          NUM_RENDER_TYPES,      }; -    #ifdef LL_PROFILER_ENABLE_RENDER_DOC +    #if LL_PROFILER_ENABLE_RENDER_DOC      static inline const char* lookupPassName(U32 pass)      {          switch (pass) @@ -340,7 +340,7 @@ public:          }      }      #else -    static inline const char* lookupPass(U32 pass) { return ""; } +    static inline const char* lookupPassName(U32 pass) { return ""; }      #endif      LLRenderPass(const U32 type); diff --git a/indra/newview/llheroprobemanager.cpp b/indra/newview/llheroprobemanager.cpp index e754de2fd1..2ec161c093 100644 --- a/indra/newview/llheroprobemanager.cpp +++ b/indra/newview/llheroprobemanager.cpp @@ -81,6 +81,7 @@ void LLHeroProbeManager::update()      }      LL_PROFILE_ZONE_SCOPED_CATEGORY_DISPLAY; +    LL_PROFILE_GPU_ZONE("hero manager update");      llassert(!gCubeSnapshot); // assert a snapshot is not in progress      if (LLAppViewer::instance()->logoutRequestSent())      { @@ -282,6 +283,9 @@ void LLHeroProbeManager::renderProbes()  // In effect this simulates single-bounce lighting.  void LLHeroProbeManager::updateProbeFace(LLReflectionMap* probe, U32 face, bool is_dynamic, F32 near_clip)  { +    LL_PROFILE_ZONE_SCOPED_CATEGORY_DISPLAY; +    LL_PROFILE_GPU_ZONE("hero probe update"); +      // hacky hot-swap of camera specific render targets      gPipeline.mRT = &gPipeline.mHeroProbeRT; @@ -352,7 +356,7 @@ void LLHeroProbeManager::updateProbeFace(LLReflectionMap* probe, U32 face, bool          for (int i = 0; i < mMipChain.size(); ++i)          { -            LL_PROFILE_GPU_ZONE("probe mip"); +            LL_PROFILE_GPU_ZONE("hero probe mip");              mMipChain[i].bindTarget();              if (i == 0)              { @@ -379,7 +383,7 @@ void LLHeroProbeManager::updateProbeFace(LLReflectionMap* probe, U32 face, bool              if (mip >= 0)              { -                LL_PROFILE_GPU_ZONE("probe mip copy"); +                LL_PROFILE_GPU_ZONE("hero probe mip copy");                  mTexture->bind(0);                  glCopyTexSubImage3D(GL_TEXTURE_CUBE_MAP_ARRAY, mip, 0, 0, sourceIdx * 6 + face, 0, 0, res, res); @@ -427,7 +431,7 @@ void LLHeroProbeManager::generateRadiance(LLReflectionMap* probe)              for (int i = 0; i < mMipChain.size() / 4; ++i)              { -                LL_PROFILE_GPU_ZONE("probe radiance gen"); +                LL_PROFILE_GPU_ZONE("hero probe radiance gen");                  static LLStaticHashedString sMipLevel("mipLevel");                  static LLStaticHashedString sRoughness("roughness");                  static LLStaticHashedString sWidth("u_width"); @@ -474,6 +478,7 @@ void LLHeroProbeManager::updateUniforms()      }      LL_PROFILE_ZONE_SCOPED_CATEGORY_DISPLAY; +    LL_PROFILE_GPU_ZONE("hpmu - uniforms")      LLMatrix4a modelview;      modelview.loadu(gGLModelView); diff --git a/indra/newview/llreflectionmapmanager.cpp b/indra/newview/llreflectionmapmanager.cpp index 4760ab376e..ae5ade5f24 100644 --- a/indra/newview/llreflectionmapmanager.cpp +++ b/indra/newview/llreflectionmapmanager.cpp @@ -210,6 +210,7 @@ void LLReflectionMapManager::update()      }      LL_PROFILE_ZONE_SCOPED_CATEGORY_DISPLAY; +    LL_PROFILE_GPU_ZONE("reflection manager update");      llassert(!gCubeSnapshot); // assert a snapshot is not in progress      if (LLAppViewer::instance()->logoutRequestSent())      { @@ -696,6 +697,8 @@ void LLReflectionMapManager::doProbeUpdate()  // In effect this simulates single-bounce lighting.  void LLReflectionMapManager::updateProbeFace(LLReflectionMap* probe, U32 face)  { +    LL_PROFILE_ZONE_SCOPED_CATEGORY_DISPLAY; +    LL_PROFILE_GPU_ZONE("probe update");      // hacky hot-swap of camera specific render targets      gPipeline.mRT = &gPipeline.mAuxillaryRT; @@ -1011,6 +1014,7 @@ void LLReflectionMapManager::updateUniforms()      }      LL_PROFILE_ZONE_SCOPED_CATEGORY_DISPLAY; +    LL_PROFILE_GPU_ZONE("rmmu - uniforms")      // structure for packing uniform buffer object      // see class3/deferred/reflectionProbeF.glsl diff --git a/indra/newview/llviewerdisplay.cpp b/indra/newview/llviewerdisplay.cpp index 32019f860d..b34c8600f7 100644 --- a/indra/newview/llviewerdisplay.cpp +++ b/indra/newview/llviewerdisplay.cpp @@ -407,6 +407,7 @@ static void update_tp_display(bool minimized)  void display(bool rebuild, F32 zoom_factor, int subfield, bool for_snapshot)  {      LL_PROFILE_ZONE_NAMED_CATEGORY_DISPLAY("Render"); +    LL_PROFILE_GPU_ZONE("Render");      LLPerfStats::RecordSceneTime T (LLPerfStats::StatType_t::RENDER_DISPLAY); // render time capture - This is the main stat for overall rendering. @@ -709,6 +710,7 @@ void display(bool rebuild, F32 zoom_factor, int subfield, bool for_snapshot)          if (gPipeline.RenderMirrors && !gSnapshot)          {              LL_PROFILE_ZONE_NAMED_CATEGORY_DISPLAY("Update hero probes"); +            LL_PROFILE_GPU_ZONE("hero manager")              gPipeline.mHeroProbeManager.update();              gPipeline.mHeroProbeManager.renderProbes();          } diff --git a/indra/newview/pipeline.cpp b/indra/newview/pipeline.cpp index 18dd694246..ddfb5a5621 100644 --- a/indra/newview/pipeline.cpp +++ b/indra/newview/pipeline.cpp @@ -7182,11 +7182,11 @@ extern LLPointer<LLImageGL> gEXRImage;  void LLPipeline::tonemap(LLRenderTarget* src, LLRenderTarget* dst)  { +    LL_PROFILE_GPU_ZONE("tonemap"); +      dst->bindTarget();      // gamma correct lighting      { -        LL_PROFILE_GPU_ZONE("tonemap"); -          static LLCachedControl<bool> buildNoPost(gSavedSettings, "RenderDisablePostProcessing", false);          LLGLDepthTest depth(GL_FALSE, GL_FALSE); @@ -7235,11 +7235,11 @@ void LLPipeline::tonemap(LLRenderTarget* src, LLRenderTarget* dst)  void LLPipeline::gammaCorrect(LLRenderTarget* src, LLRenderTarget* dst)  { +    LL_PROFILE_GPU_ZONE("gamma correct"); +      dst->bindTarget();      // gamma correct lighting      { -        LL_PROFILE_GPU_ZONE("gamma correct"); -          LLGLDepthTest depth(GL_FALSE, GL_FALSE);          static LLCachedControl<bool> buildNoPost(gSavedSettings, "RenderDisablePostProcessing", false); @@ -7290,9 +7290,9 @@ void LLPipeline::copyScreenSpaceReflections(LLRenderTarget* src, LLRenderTarget*  void LLPipeline::generateGlow(LLRenderTarget* src)  { +    LL_PROFILE_GPU_ZONE("glow generate");      if (sRenderGlow)      { -        LL_PROFILE_GPU_ZONE("glow");          mGlow[2].bindTarget();          mGlow[2].clear(); @@ -7401,6 +7401,7 @@ void LLPipeline::generateGlow(LLRenderTarget* src)  void LLPipeline::applyCAS(LLRenderTarget* src, LLRenderTarget* dst)  {      static LLCachedControl<F32> cas_sharpness(gSavedSettings, "RenderCASSharpness", 0.4f); +	LL_PROFILE_GPU_ZONE("cas");      if (cas_sharpness == 0.0f || !gCASProgram.isComplete())      {          gPipeline.copyRenderTarget(src, dst); @@ -7445,6 +7446,7 @@ void LLPipeline::applyCAS(LLRenderTarget* src, LLRenderTarget* dst)  void LLPipeline::applyFXAA(LLRenderTarget* src, LLRenderTarget* dst)  { +	LL_PROFILE_GPU_ZONE("FXAA");      {          llassert(!gCubeSnapshot);          bool multisample = RenderFSAAType == 1 && gFXAAProgram[0].isComplete() && mFXAAMap.isComplete(); @@ -7536,7 +7538,7 @@ void LLPipeline::generateSMAABuffers(LLRenderTarget* src)      // Present everything.      if (multisample)      { -        LL_PROFILE_GPU_ZONE("aa"); +        LL_PROFILE_GPU_ZONE("SMAA Edge");          static LLCachedControl<U32> aa_quality(gSavedSettings, "RenderFSAASamples", 0U);          U32 fsaa_quality = std::clamp(aa_quality(), 0U, 3U); @@ -7648,13 +7650,13 @@ void LLPipeline::generateSMAABuffers(LLRenderTarget* src)  void LLPipeline::applySMAA(LLRenderTarget* src, LLRenderTarget* dst)  { +	LL_PROFILE_GPU_ZONE("SMAA");      llassert(!gCubeSnapshot);      bool multisample = RenderFSAAType == 2 && gSMAAEdgeDetectProgram[0].isComplete() && mFXAAMap.isComplete() && mSMAABlendBuffer.isComplete();      // Present everything.      if (multisample)      { -        LL_PROFILE_GPU_ZONE("aa");          static LLCachedControl<U32> aa_quality(gSavedSettings, "RenderFSAASamples", 0U);          U32 fsaa_quality = std::clamp(aa_quality(), 0U, 3U); @@ -7732,8 +7734,9 @@ void LLPipeline::copyRenderTarget(LLRenderTarget* src, LLRenderTarget* dst)  void LLPipeline::combineGlow(LLRenderTarget* src, LLRenderTarget* dst)  { -    // Go ahead and do our glow combine here in our destination.  We blit this later into the front buffer. +    LL_PROFILE_GPU_ZONE("glow combine"); +    // Go ahead and do our glow combine here in our destination.  We blit this later into the front buffer.      dst->bindTarget();      { @@ -7752,6 +7755,7 @@ void LLPipeline::combineGlow(LLRenderTarget* src, LLRenderTarget* dst)  void LLPipeline::renderDoF(LLRenderTarget* src, LLRenderTarget* dst)  { +	LL_PROFILE_GPU_ZONE("dof");      {          bool dof_enabled =              (RenderDepthOfFieldInEditMode || !LLToolMgr::getInstance()->inBuildMode()) && @@ -7762,7 +7766,6 @@ void LLPipeline::renderDoF(LLRenderTarget* src, LLRenderTarget* dst)          if (dof_enabled)          { -            LL_PROFILE_GPU_ZONE("dof");              LLGLDisable blend(GL_BLEND);              // depth of field focal plane calculations  | 
