diff options
Diffstat (limited to 'indra/llcommon')
59 files changed, 3585 insertions, 674 deletions
diff --git a/indra/llcommon/CMakeLists.txt b/indra/llcommon/CMakeLists.txt index 766a1849f9..ca8b5e946f 100644 --- a/indra/llcommon/CMakeLists.txt +++ b/indra/llcommon/CMakeLists.txt @@ -12,6 +12,7 @@ include(JsonCpp)  include(Copy3rdPartyLibs)  include(ZLIB)  include(URIPARSER) +include(Tracy)  include_directories(      ${EXPAT_INCLUDE_DIRS} @@ -19,6 +20,7 @@ include_directories(      ${JSONCPP_INCLUDE_DIR}      ${ZLIB_INCLUDE_DIRS}      ${URIPARSER_INCLUDE_DIRS} +    ${TRACY_INCLUDE_DIR}      )  # add_executable(lltreeiterators lltreeiterators.cpp) @@ -117,14 +119,16 @@ set(llcommon_SOURCE_FILES      lluriparser.cpp      lluuid.cpp      llworkerthread.cpp -    timing.cpp      u64.cpp +    threadpool.cpp +    workqueue.cpp      StackWalker.cpp      )  set(llcommon_HEADER_FILES      CMakeLists.txt +    chrono.h      ctype_workaround.h      fix_macros.h      indra_constants.h @@ -197,6 +201,8 @@ set(llcommon_HEADER_FILES      llmortician.h      llnametable.h      llpointer.h +    llprofiler.h +    llprofilercategories.h      llpounceable.h      llpredicate.h      llpreprocessor.h @@ -251,8 +257,12 @@ set(llcommon_HEADER_FILES      lockstatic.h      stdtypes.h      stringize.h +    threadpool.h +    threadsafeschedule.h      timer.h +    tuple.h      u64.h +    workqueue.h      StackWalker.h      ) @@ -299,6 +309,7 @@ target_link_libraries(      ${BOOST_SYSTEM_LIBRARY}      ${GOOGLE_PERFTOOLS_LIBRARIES}      ${URIPARSER_LIBRARIES} +    ${TRACY_LIBRARY}      )  if (DARWIN) @@ -355,6 +366,9 @@ if (LL_TESTS)    LL_ADD_INTEGRATION_TEST(lluri "" "${test_libs}")    LL_ADD_INTEGRATION_TEST(llunits "" "${test_libs}")    LL_ADD_INTEGRATION_TEST(stringize "" "${test_libs}") +  LL_ADD_INTEGRATION_TEST(threadsafeschedule "" "${test_libs}") +  LL_ADD_INTEGRATION_TEST(tuple "" "${test_libs}") +  LL_ADD_INTEGRATION_TEST(workqueue "" "${test_libs}")  ## llexception_test.cpp isn't a regression test, and doesn't need to be run  ## every build. It's to help a developer make implementation choices about diff --git a/indra/llcommon/chrono.h b/indra/llcommon/chrono.h new file mode 100644 index 0000000000..806e871892 --- /dev/null +++ b/indra/llcommon/chrono.h @@ -0,0 +1,65 @@ +/** + * @file   chrono.h + * @author Nat Goodspeed + * @date   2021-10-05 + * @brief  supplement <chrono> with utility functions + *  + * $LicenseInfo:firstyear=2021&license=viewerlgpl$ + * Copyright (c) 2021, Linden Research, Inc. + * $/LicenseInfo$ + */ + +#if ! defined(LL_CHRONO_H) +#define LL_CHRONO_H + +#include <chrono> +#include <type_traits>              // std::enable_if + +namespace LL +{ + +// time_point_cast() is derived from https://stackoverflow.com/a/35293183 +// without the iteration: we think errors in the ~1 microsecond range are +// probably acceptable. + +// This variant is for the optimal case when the source and dest use the same +// clock: that case is handled by std::chrono. +template <typename DestTimePoint, typename SrcTimePoint, +          typename std::enable_if<std::is_same<typename DestTimePoint::clock, +                                               typename SrcTimePoint::clock>::value, +                                  bool>::type = true> +DestTimePoint time_point_cast(const SrcTimePoint& time) +{ +    return std::chrono::time_point_cast<typename DestTimePoint::duration>(time); +} + +// This variant is for when the source and dest use different clocks -- see +// the linked StackOverflow answer, also Howard Hinnant's, for more context. +template <typename DestTimePoint, typename SrcTimePoint, +          typename std::enable_if<! std::is_same<typename DestTimePoint::clock, +                                                 typename SrcTimePoint::clock>::value, +                                  bool>::type = true> +DestTimePoint time_point_cast(const SrcTimePoint& time) +{ +    // The basic idea is that we must adjust the passed time_point by the +    // difference between the clocks' epochs. But since time_point doesn't +    // expose its epoch, we fall back on what each of them thinks is now(). +    // However, since we necessarily make sequential calls to those now() +    // functions, the answers differ not only by the cycles spent executing +    // those calls, but by potential OS interruptions between them. Try to +    // reduce that error by capturing the source clock time both before and +    // after the dest clock, and splitting the difference. Of course an +    // interruption between two of these now() calls without a comparable +    // interruption between the other two will skew the result, but better is +    // more expensive. +    const auto src_before = typename SrcTimePoint::clock::now(); +    const auto dest_now   = typename DestTimePoint::clock::now(); +    const auto src_after  = typename SrcTimePoint::clock::now(); +    const auto src_diff   = src_after - src_before; +    const auto src_now    = src_before + src_diff / 2; +    return dest_now + (time - src_now); +} + +} // namespace LL + +#endif /* ! defined(LL_CHRONO_H) */ diff --git a/indra/llcommon/linden_common.h b/indra/llcommon/linden_common.h index e5a913a6a9..a228fd22be 100644 --- a/indra/llcommon/linden_common.h +++ b/indra/llcommon/linden_common.h @@ -27,6 +27,14 @@  #ifndef LL_LINDEN_COMMON_H  #define LL_LINDEN_COMMON_H +#include "llprofiler.h" +#if TRACY_ENABLE && !defined(LL_PROFILER_ENABLE_TRACY_OPENGL)  // hooks for memory profiling +void *tracy_aligned_malloc(size_t size, size_t alignment); +void  tracy_aligned_free(void *memblock); +#define _aligned_malloc(X, Y) tracy_aligned_malloc((X), (Y)) +#define _aligned_free(X)      tracy_aligned_free((X)) +#endif +  // *NOTE:  Please keep includes here to a minimum!  //  // Files included here are included in every library .cpp file and diff --git a/indra/llcommon/llcommon.cpp b/indra/llcommon/llcommon.cpp index 96be913d17..d2c4e66160 100644 --- a/indra/llcommon/llcommon.cpp +++ b/indra/llcommon/llcommon.cpp @@ -33,6 +33,66 @@  #include "lltracethreadrecorder.h"  #include "llcleanup.h" +thread_local bool gProfilerEnabled = false; + +#if (TRACY_ENABLE) +// Override new/delete for tracy memory profiling +void *operator new(size_t size) +{ +    void* ptr; +    if (gProfilerEnabled) +    { +        LL_PROFILE_ZONE_SCOPED_CATEGORY_MEMORY; +        ptr = (malloc)(size); +    } +    else +    { +        ptr = (malloc)(size); +    } +    if (!ptr) +    { +        throw std::bad_alloc(); +    } +    TracyAlloc(ptr, size); +    return ptr; +} + +void operator delete(void *ptr) noexcept +{ +    TracyFree(ptr); +    if (gProfilerEnabled) +    { +        LL_PROFILE_ZONE_SCOPED_CATEGORY_MEMORY; +        (free)(ptr); +    } +    else +    { +        (free)(ptr); +    } +} + +// C-style malloc/free can't be so easily overridden, so we define tracy versions and use +// a pre-processor #define in linden_common.h to redirect to them. The parens around the native +// functions below prevents recursive substitution by the preprocessor. +// +// Unaligned mallocs are rare in LL code but hooking them causes problems in 3p lib code (looking at +// you, Havok), so we'll only capture the aligned version. + +void *tracy_aligned_malloc(size_t size, size_t alignment) +{ +    auto ptr = ll_aligned_malloc_fallback(size, alignment); +    if (ptr) TracyAlloc(ptr, size); +    return ptr; +} + +void tracy_aligned_free(void *memblock) +{ +    TracyFree(memblock); +    ll_aligned_free_fallback(memblock); +} + +#endif +  //static  BOOL LLCommon::sAprInitialized = FALSE; diff --git a/indra/llcommon/llcond.h b/indra/llcommon/llcond.h index e31b67d893..da6e6affe1 100644 --- a/indra/llcommon/llcond.h +++ b/indra/llcommon/llcond.h @@ -53,6 +53,8 @@ private:      LLCoros::Mutex mMutex;      // Use LLCoros::ConditionVariable for the same reason.      LLCoros::ConditionVariable mCond; +    using LockType = LLCoros::LockType; +    using cv_status = LLCoros::cv_status;  public:      /// LLCond can be explicitly initialized with a specific value for mData if @@ -65,10 +67,29 @@ public:      LLCond(const LLCond&) = delete;      LLCond& operator=(const LLCond&) = delete; -    /// get() returns a const reference to the stored DATA. The only way to -    /// get a non-const reference -- to modify the stored DATA -- is via -    /// update_one() or update_all(). -    const value_type& get() const { return mData; } +    /** +     * get() returns the stored DATA by value -- so to use get(), DATA must +     * be copyable. The only way to get a non-const reference -- to modify +     * the stored DATA -- is via update_one() or update_all(). +     */ +    value_type get() +    { +        LockType lk(mMutex); +        return mData; +    } + +    /** +     * get(functor) returns whatever the functor returns. It allows us to peek +     * at the stored DATA without copying the whole thing. The functor must +     * accept a const reference to DATA. If you want to modify DATA, call +     * update_one() or update_all() instead. +     */ +    template <typename FUNC> +    auto get(FUNC&& func) +    { +        LockType lk(mMutex); +        return std::forward<FUNC>(func)(const_data()); +    }      /**       * Pass update_one() an invocable accepting non-const (DATA&). The @@ -80,11 +101,11 @@ public:       * update_one() when DATA is a struct or class.       */      template <typename MODIFY> -    void update_one(MODIFY modify) +    void update_one(MODIFY&& modify)      {          { // scope of lock can/should end before notify_one() -            LLCoros::LockType lk(mMutex); -            modify(mData); +            LockType lk(mMutex); +            std::forward<MODIFY>(modify)(mData);          }          mCond.notify_one();      } @@ -99,11 +120,11 @@ public:       * update_all() when DATA is a struct or class.       */      template <typename MODIFY> -    void update_all(MODIFY modify) +    void update_all(MODIFY&& modify)      {          { // scope of lock can/should end before notify_all() -            LLCoros::LockType lk(mMutex); -            modify(mData); +            LockType lk(mMutex); +            std::forward<MODIFY>(modify)(mData);          }          mCond.notify_all();      } @@ -116,9 +137,9 @@ public:       * wait() on the condition_variable.       */      template <typename Pred> -    void wait(Pred pred) +    void wait(Pred&& pred)      { -        LLCoros::LockType lk(mMutex); +        LockType lk(mMutex);          // We must iterate explicitly since the predicate accepted by          // condition_variable::wait() requires a different signature:          // condition_variable::wait() calls its predicate with no arguments. @@ -127,7 +148,7 @@ public:          // But what if they instead pass a predicate accepting non-const          // (DATA&)? Such a predicate could modify mData, which would be Bad.          // Forbid that. -        while (! pred(const_cast<const value_type&>(mData))) +        while (! std::forward<Pred>(pred)(const_data()))          {              mCond.wait(lk);          } @@ -144,7 +165,7 @@ public:       * returning true.       */      template <typename Rep, typename Period, typename Pred> -    bool wait_for(const std::chrono::duration<Rep, Period>& timeout_duration, Pred pred) +    bool wait_for(const std::chrono::duration<Rep, Period>& timeout_duration, Pred&& pred)      {          // Instead of replicating wait_until() logic, convert duration to          // time_point and just call wait_until(). @@ -153,7 +174,8 @@ public:          // wrong! We'd keep pushing the timeout time farther and farther into          // the future. This way, we establish a definite timeout time and          // stick to it. -        return wait_until(std::chrono::steady_clock::now() + timeout_duration, pred); +        return wait_until(std::chrono::steady_clock::now() + timeout_duration, +                          std::forward<Pred>(pred));      }      /** @@ -163,9 +185,9 @@ public:       * generic wait_for() method.       */      template <typename Pred> -    bool wait_for(F32Milliseconds timeout_duration, Pred pred) +    bool wait_for(F32Milliseconds timeout_duration, Pred&& pred)      { -        return wait_for(convert(timeout_duration), pred); +        return wait_for(convert(timeout_duration), std::forward<Pred>(pred));      }  protected: @@ -183,6 +205,10 @@ protected:      }  private: +    // It's important to pass a const ref to certain user-specified functors +    // that aren't supposed to be able to modify mData. +    const value_type& const_data() const { return mData; } +      /**       * Pass wait_until() a chrono::time_point, indicating the time at which we       * should stop waiting, and a predicate accepting (const DATA&), returning @@ -203,21 +229,21 @@ private:       * honoring a fixed timeout.       */      template <typename Clock, typename Duration, typename Pred> -    bool wait_until(const std::chrono::time_point<Clock, Duration>& timeout_time, Pred pred) +    bool wait_until(const std::chrono::time_point<Clock, Duration>& timeout_time, Pred&& pred)      { -        LLCoros::LockType lk(mMutex); +        LockType lk(mMutex);          // We advise the caller to pass a predicate accepting (const DATA&).          // But what if they instead pass a predicate accepting non-const          // (DATA&)? Such a predicate could modify mData, which would be Bad.          // Forbid that. -        while (! pred(const_cast<const value_type&>(mData))) +        while (! std::forward<Pred>(pred)(const_data()))          { -            if (LLCoros::cv_status::timeout == mCond.wait_until(lk, timeout_time)) +            if (cv_status::timeout == mCond.wait_until(lk, timeout_time))              {                  // It's possible that wait_until() timed out AND the predicate                  // became true more or less simultaneously. Even though                  // wait_until() timed out, check the predicate one more time. -                return pred(const_cast<const value_type&>(mData)); +                return std::forward<Pred>(pred)(const_data());              }          }          return true; diff --git a/indra/llcommon/lldate.cpp b/indra/llcommon/lldate.cpp index 7a2a0869f4..2ddcf40895 100644 --- a/indra/llcommon/lldate.cpp +++ b/indra/llcommon/lldate.cpp @@ -86,11 +86,9 @@ std::string LLDate::asRFC1123() const  	return toHTTPDateString (std::string ("%A, %d %b %Y %H:%M:%S GMT"));  } -LLTrace::BlockTimerStatHandle FT_DATE_FORMAT("Date Format"); -  std::string LLDate::toHTTPDateString (std::string fmt) const  { -	LL_RECORD_BLOCK_TIME(FT_DATE_FORMAT); +    LL_PROFILE_ZONE_SCOPED;  	time_t locSeconds = (time_t) mSecondsSinceEpoch;  	struct tm * gmt = gmtime (&locSeconds); @@ -99,7 +97,7 @@ std::string LLDate::toHTTPDateString (std::string fmt) const  std::string LLDate::toHTTPDateString (tm * gmt, std::string fmt)  { -	LL_RECORD_BLOCK_TIME(FT_DATE_FORMAT); +    LL_PROFILE_ZONE_SCOPED;  	// avoid calling setlocale() unnecessarily - it's expensive.  	static std::string prev_locale = ""; diff --git a/indra/llcommon/llerror.cpp b/indra/llcommon/llerror.cpp index 55a06f8326..919d2dabc4 100644 --- a/indra/llcommon/llerror.cpp +++ b/indra/llcommon/llerror.cpp @@ -109,6 +109,7 @@ namespace {  		virtual void recordMessage(LLError::ELevel level,  									const std::string& message) override  		{ +            LL_PROFILE_ZONE_SCOPED_CATEGORY_LOGGING  			int syslogPriority = LOG_CRIT;  			switch (level) {  				case LLError::LEVEL_DEBUG:	syslogPriority = LOG_DEBUG;	break; @@ -166,6 +167,7 @@ namespace {          virtual void recordMessage(LLError::ELevel level,                                      const std::string& message) override          { +            LL_PROFILE_ZONE_SCOPED_CATEGORY_LOGGING              if (LLError::getAlwaysFlush())              {                  mFile << message << std::endl; @@ -194,7 +196,7 @@ namespace {          {              return LLError::getEnabledLogTypesMask() & 0x04;          } - +                  LL_FORCE_INLINE std::string createBoldANSI()          {              std::string ansi_code; @@ -220,10 +222,10 @@ namespace {          LL_FORCE_INLINE std::string createANSI(const std::string& color)          {              std::string ansi_code; -            ansi_code += '\033'; -            ansi_code += "["; +            ansi_code  += '\033'; +            ansi_code  += "[";              ansi_code += "38;5;"; -            ansi_code += color; +            ansi_code  += color;              ansi_code += "m";              return ansi_code; @@ -232,6 +234,7 @@ namespace {  		virtual void recordMessage(LLError::ELevel level,  					   const std::string& message) override  		{ +            LL_PROFILE_ZONE_SCOPED_CATEGORY_LOGGING              // The default colors for error, warn and debug are now a bit more pastel              // and easier to read on the default (black) terminal background but you               // now have the option to set the color of each via an environment variables: @@ -261,6 +264,7 @@ namespace {  			}              else              { +                LL_PROFILE_ZONE_NAMED("fprintf");                   fprintf(stderr, "%s\n", message.c_str());              }  		} @@ -270,6 +274,7 @@ namespace {          LL_FORCE_INLINE void writeANSI(const std::string& ansi_code, const std::string& message)  		{ +            LL_PROFILE_ZONE_SCOPED_CATEGORY_LOGGING              static std::string s_ansi_bold = createBoldANSI();  // bold text              static std::string s_ansi_reset = createResetANSI();  // reset  			// ANSI color code escape sequence, message, and reset in one fprintf call @@ -306,6 +311,7 @@ namespace {  		virtual void recordMessage(LLError::ELevel level,  								   const std::string& message) override  		{ +            LL_PROFILE_ZONE_SCOPED_CATEGORY_LOGGING  			mBuffer->addLine(message);  		} @@ -332,6 +338,7 @@ namespace {  		virtual void recordMessage(LLError::ELevel level,  								   const std::string& message) override  		{ +            LL_PROFILE_ZONE_SCOPED_CATEGORY_LOGGING  			debugger_print(message);  		}  	}; @@ -1210,6 +1217,7 @@ namespace  	void writeToRecorders(const LLError::CallSite& site, const std::string& message)  	{ +        LL_PROFILE_ZONE_SCOPED_CATEGORY_LOGGING  		LLError::ELevel level = site.mLevel;  		SettingsConfigPtr s = Globals::getInstance()->getSettingsConfig(); @@ -1344,6 +1352,7 @@ namespace LLError  	bool Log::shouldLog(CallSite& site)  	{ +        LL_PROFILE_ZONE_SCOPED_CATEGORY_LOGGING  		LLMutexTrylock lock(getMutex<LOG_MUTEX>(), 5);  		if (!lock.isLocked())  		{ @@ -1388,6 +1397,7 @@ namespace LLError  	void Log::flush(const std::ostringstream& out, const CallSite& site)  	{ +        LL_PROFILE_ZONE_SCOPED_CATEGORY_LOGGING  		LLMutexTrylock lock(getMutex<LOG_MUTEX>(),5);  		if (!lock.isLocked())  		{ diff --git a/indra/llcommon/llerror.h b/indra/llcommon/llerror.h index d439136ca8..d06c0e2132 100644 --- a/indra/llcommon/llerror.h +++ b/indra/llcommon/llerror.h @@ -35,7 +35,9 @@  #include "stdtypes.h" +#include "llprofiler.h"  #include "llpreprocessor.h" +  #include <boost/static_assert.hpp>  const int LL_ERR_NOERR = 0; @@ -348,7 +350,8 @@ typedef LLError::NoClassInfo _LL_CLASS_TO_LOG;  // if (condition) LL_INFOS() << "True" << LL_ENDL; else LL_INFOS()() << "False" << LL_ENDL;  #define lllog(level, once, ...)                                         \ -	do {                                                                \ +    do {                                                                \ +        LL_PROFILE_ZONE_NAMED("lllog");                                 \  		const char* tags[] = {"", ##__VA_ARGS__};                       \  		static LLError::CallSite _site(lllog_site_args_(level, once, tags)); \  		lllog_test_() diff --git a/indra/llcommon/llerrorcontrol.h b/indra/llcommon/llerrorcontrol.h index e87bb7bf35..57f10b7895 100644 --- a/indra/llcommon/llerrorcontrol.h +++ b/indra/llcommon/llerrorcontrol.h @@ -190,6 +190,7 @@ namespace LLError          {}          void recordMessage(LLError::ELevel level, const std::string& message) override          { +            LL_PROFILE_ZONE_SCOPED              mCallable(level, message);          }      private: diff --git a/indra/llcommon/lleventfilter.h b/indra/llcommon/lleventfilter.h index 48c2570732..7613850fb2 100644 --- a/indra/llcommon/lleventfilter.h +++ b/indra/llcommon/lleventfilter.h @@ -429,6 +429,8 @@ public:      // path, then stores it to mTarget.      virtual bool post(const LLSD& event)      { +        LL_PROFILE_ZONE_SCOPED +          // Extract the element specified by 'mPath' from 'event'. To perform a          // generic type-appropriate store through mTarget, construct an          // LLSDParam<T> and store that, thus engaging LLSDParam's custom diff --git a/indra/llcommon/llexception.cpp b/indra/llcommon/llexception.cpp index b584b0ff8b..46560b5e4c 100644 --- a/indra/llcommon/llexception.cpp +++ b/indra/llcommon/llexception.cpp @@ -97,6 +97,11 @@ static const U32 STATUS_MSC_EXCEPTION = 0xE06D7363; // compiler specific  U32 msc_exception_filter(U32 code, struct _EXCEPTION_POINTERS *exception_infop)  { +    const auto stack = to_string(boost::stacktrace::stacktrace()); +    LL_WARNS() << "SEH Exception handled (that probably shouldn't be): Code " << code  +        << "\n Stack trace: \n"  +        << stack << LL_ENDL; +      if (code == STATUS_MSC_EXCEPTION)      {          // C++ exception, go on diff --git a/indra/llcommon/llfasttimer.cpp b/indra/llcommon/llfasttimer.cpp index 5b6a7b82f8..2612d0f07c 100644 --- a/indra/llcommon/llfasttimer.cpp +++ b/indra/llcommon/llfasttimer.cpp @@ -191,29 +191,30 @@ TimeBlockTreeNode& BlockTimerStatHandle::getTreeNode() const  } +  void BlockTimer::bootstrapTimerTree()  { -	for (auto& base : BlockTimerStatHandle::instance_snapshot()) -	{ -		// because of indirect derivation from LLInstanceTracker, have to downcast -		BlockTimerStatHandle& timer = static_cast<BlockTimerStatHandle&>(base); -		if (&timer == &BlockTimer::getRootTimeBlock()) continue; - -		// bootstrap tree construction by attaching to last timer to be on stack -		// when this timer was called -		if (timer.getParent() == &BlockTimer::getRootTimeBlock()) -		{ -			TimeBlockAccumulator& accumulator = timer.getCurrentAccumulator(); - -			if (accumulator.mLastCaller) -			{ -				timer.setParent(accumulator.mLastCaller); -				accumulator.mParent = accumulator.mLastCaller; -			} -			// no need to push up tree on first use, flag can be set spuriously -			accumulator.mMoveUpTree = false; -		} -	} +    for (auto& base : BlockTimerStatHandle::instance_snapshot()) +    { +        // because of indirect derivation from LLInstanceTracker, have to downcast +        BlockTimerStatHandle& timer = static_cast<BlockTimerStatHandle&>(base); +        if (&timer == &BlockTimer::getRootTimeBlock()) continue; + +        // bootstrap tree construction by attaching to last timer to be on stack +        // when this timer was called +        if (timer.getParent() == &BlockTimer::getRootTimeBlock()) +        { +            TimeBlockAccumulator& accumulator = timer.getCurrentAccumulator(); + +            if (accumulator.mLastCaller) +            { +                timer.setParent(accumulator.mLastCaller); +                accumulator.mParent = accumulator.mLastCaller; +            } +            // no need to push up tree on first use, flag can be set spuriously +            accumulator.mMoveUpTree = false; +        } +    }  }  // bump timers up tree if they have been flagged as being in the wrong place @@ -221,6 +222,7 @@ void BlockTimer::bootstrapTimerTree()  // this preserves partial order derived from current frame's observations  void BlockTimer::incrementalUpdateTimerTree()  { +    LL_PROFILE_ZONE_SCOPED_CATEGORY_STATS;  	for(block_timer_tree_df_post_iterator_t it = begin_block_timer_tree_df_post(BlockTimer::getRootTimeBlock());  		it != end_block_timer_tree_df_post();  		++it) @@ -260,7 +262,8 @@ void BlockTimer::incrementalUpdateTimerTree()  void BlockTimer::updateTimes() -	{ +{ +    LL_PROFILE_ZONE_SCOPED_CATEGORY_STATS;  	// walk up stack of active timers and accumulate current time while leaving timing structures active  	BlockTimerStackRecord* stack_record	= LLThreadLocalSingletonPointer<BlockTimerStackRecord>::getInstance();  	if (!stack_record) return; @@ -271,7 +274,7 @@ void BlockTimer::updateTimes()  	while(cur_timer   		&& cur_timer->mParentTimerData.mActiveTimer != cur_timer) // root defined by parent pointing to self -		{ +	{  		U64 cumulative_time_delta = cur_time - cur_timer->mStartTime;  		cur_timer->mStartTime = cur_time; diff --git a/indra/llcommon/llfasttimer.h b/indra/llcommon/llfasttimer.h index dfc63d08a2..9bd93d7240 100644 --- a/indra/llcommon/llfasttimer.h +++ b/indra/llcommon/llfasttimer.h @@ -38,7 +38,10 @@  #define LL_FAST_TIMER_ON 1  #define LL_FASTTIMER_USE_RDTSC 1 +// NOTE: Also see llprofiler.h +#if !defined(LL_PROFILER_CONFIGURATION)  #define LL_RECORD_BLOCK_TIME(timer_stat) const LLTrace::BlockTimer& LL_GLUE_TOKENS(block_time_recorder, __LINE__)(LLTrace::timeThisBlock(timer_stat)); (void)LL_GLUE_TOKENS(block_time_recorder, __LINE__); +#endif // LL_PROFILER_CONFIGURATION  namespace LLTrace  { diff --git a/indra/llcommon/llframetimer.cpp b/indra/llcommon/llframetimer.cpp index 1e9920746b..c54029e8b4 100644 --- a/indra/llcommon/llframetimer.cpp +++ b/indra/llcommon/llframetimer.cpp @@ -29,6 +29,11 @@  #include "llframetimer.h" +// We don't bother building a stand alone lib; we just need to include the one source file for Tracy support +#if LL_PROFILER_CONFIGURATION == LL_PROFILER_CONFIG_TRACY || LL_PROFILER_CONFIGURATION == LL_PROFILER_CONFIG_TRACY_FAST_TIMER +	#include "TracyClient.cpp" +#endif // LL_PROFILER_CONFIGURATION +  // Static members  //LLTimer	LLFrameTimer::sInternalTimer;  U64 LLFrameTimer::sStartTotalTime = totalTime(); diff --git a/indra/llcommon/llinstancetracker.h b/indra/llcommon/llinstancetracker.h index 402333cca7..02535a59e7 100644 --- a/indra/llcommon/llinstancetracker.h +++ b/indra/llcommon/llinstancetracker.h @@ -83,13 +83,34 @@ class LLInstanceTracker      typedef llthread::LockStatic<StaticData> LockStatic;  public: +    using ptr_t  = std::shared_ptr<T>; +    using weak_t = std::weak_ptr<T>; + +    /** +     * Storing a dumb T* somewhere external is a bad idea, since +     * LLInstanceTracker subclasses are explicitly destroyed rather than +     * managed by smart pointers. It's legal to declare stack instances of an +     * LLInstanceTracker subclass. But it's reasonable to store a +     * std::weak_ptr<T>, which will become invalid when the T instance is +     * destroyed. +     */ +    weak_t getWeak() +    { +        return mSelf; +    } + +    static S32 instanceCount()  +    {  +        return LockStatic()->mMap.size();  +    } +          // snapshot of std::pair<const KEY, std::shared_ptr<T>> pairs      class snapshot      {          // It's very important that what we store in this snapshot are          // weak_ptrs, NOT shared_ptrs. That's how we discover whether any          // instance has been deleted during the lifespan of a snapshot. -        typedef std::vector<std::pair<const KEY, std::weak_ptr<T>>> VectorType; +        typedef std::vector<std::pair<const KEY, weak_t>> VectorType;          // Dereferencing our iterator produces a std::shared_ptr for each          // instance that still exists. Since we store weak_ptrs, that involves          // two chained transformations: @@ -98,7 +119,7 @@ public:          // It is very important that we filter lazily, that is, during          // traversal. Any one of our stored weak_ptrs might expire during          // traversal. -        typedef std::pair<const KEY, std::shared_ptr<T>> strong_pair; +        typedef std::pair<const KEY, ptr_t> strong_pair;          // Note for future reference: nat has not yet had any luck (up to          // Boost 1.67) trying to use boost::transform_iterator with a hand-          // coded functor, only with actual functions. In my experience, an @@ -202,17 +223,12 @@ public:          iterator end()   { return iterator(snapshot::end(),   key_getter); }      }; -    static T* getInstance(const KEY& k) +    static ptr_t getInstance(const KEY& k)      {          LockStatic lock;          const InstanceMap& map(lock->mMap);          typename InstanceMap::const_iterator found = map.find(k); -        return (found == map.end()) ? NULL : found->second.get(); -    } - -    static S32 instanceCount()  -    {  -        return LockStatic()->mMap.size();  +        return (found == map.end()) ? NULL : found->second;      }  protected: @@ -222,7 +238,9 @@ protected:          // shared_ptr, so give it a no-op deleter. We store shared_ptrs in our          // InstanceMap specifically so snapshot can store weak_ptrs so we can          // detect deletions during traversals. -        std::shared_ptr<T> ptr(static_cast<T*>(this), [](T*){}); +        ptr_t ptr(static_cast<T*>(this), [](T*){}); +        // save corresponding weak_ptr for future reference +        mSelf = ptr;          LockStatic lock;          add_(lock, key, ptr);      } @@ -257,7 +275,7 @@ private:      static std::string report(const char* key) { return report(std::string(key)); }      // caller must instantiate LockStatic -    void add_(LockStatic& lock, const KEY& key, const std::shared_ptr<T>& ptr)  +    void add_(LockStatic& lock, const KEY& key, const ptr_t& ptr)       {           mInstanceKey = key;           InstanceMap& map = lock->mMap; @@ -281,7 +299,7 @@ private:              break;          }      } -    std::shared_ptr<T> remove_(LockStatic& lock) +    ptr_t remove_(LockStatic& lock)      {          InstanceMap& map = lock->mMap;          typename InstanceMap::iterator iter = map.find(mInstanceKey); @@ -295,6 +313,9 @@ private:      }  private: +    // Storing a weak_ptr to self is a bit like deriving from +    // std::enable_shared_from_this(), except more explicit. +    weak_t mSelf;      KEY mInstanceKey;  }; @@ -326,6 +347,9 @@ class LLInstanceTracker<T, void, KEY_COLLISION_BEHAVIOR>      typedef llthread::LockStatic<StaticData> LockStatic;  public: +    using ptr_t  = std::shared_ptr<T>; +    using weak_t = std::weak_ptr<T>; +      /**       * Storing a dumb T* somewhere external is a bad idea, since       * LLInstanceTracker subclasses are explicitly destroyed rather than @@ -334,12 +358,15 @@ public:       * std::weak_ptr<T>, which will become invalid when the T instance is       * destroyed.       */ -    std::weak_ptr<T> getWeak() +    weak_t getWeak()      {          return mSelf;      } -    static S32 instanceCount() { return LockStatic()->mSet.size(); } +    static S32 instanceCount() +    { +        return LockStatic()->mSet.size(); +    }      // snapshot of std::shared_ptr<T> pointers      class snapshot @@ -347,7 +374,7 @@ public:          // It's very important that what we store in this snapshot are          // weak_ptrs, NOT shared_ptrs. That's how we discover whether any          // instance has been deleted during the lifespan of a snapshot. -        typedef std::vector<std::weak_ptr<T>> VectorType; +        typedef std::vector<weak_t> VectorType;          // Dereferencing our iterator produces a std::shared_ptr for each          // instance that still exists. Since we store weak_ptrs, that involves          // two chained transformations: @@ -453,7 +480,7 @@ protected:  private:      // Storing a weak_ptr to self is a bit like deriving from      // std::enable_shared_from_this(), except more explicit. -    std::weak_ptr<T> mSelf; +    weak_t mSelf;  };  #endif diff --git a/indra/llcommon/llleaplistener.cpp b/indra/llcommon/llleaplistener.cpp index 3e6ce9092c..11bfec1b31 100644 --- a/indra/llcommon/llleaplistener.cpp +++ b/indra/llcommon/llleaplistener.cpp @@ -220,7 +220,7 @@ void LLLeapListener::getAPI(const LLSD& request) const  {      Response reply(LLSD(), request); -    LLEventAPI* found = LLEventAPI::getInstance(request["api"]); +    auto found = LLEventAPI::getInstance(request["api"]);      if (found)      {          reply["name"] = found->getName(); diff --git a/indra/llcommon/llmemory.cpp b/indra/llcommon/llmemory.cpp index ea84e4c1ea..849867586a 100644 --- a/indra/llcommon/llmemory.cpp +++ b/indra/llcommon/llmemory.cpp @@ -82,6 +82,7 @@ void LLMemory::initMaxHeapSizeGB(F32Gigabytes max_heap_size)  //static   void LLMemory::updateMemoryInfo()   { +	LL_PROFILE_ZONE_SCOPED  #if LL_WINDOWS  	PROCESS_MEMORY_COUNTERS counters; @@ -145,6 +146,7 @@ void* LLMemory::tryToAlloc(void* address, U32 size)  //static   void LLMemory::logMemoryInfo(BOOL update)  { +	LL_PROFILE_ZONE_SCOPED  	if(update)  	{  		updateMemoryInfo() ; diff --git a/indra/llcommon/llmemory.h b/indra/llcommon/llmemory.h index 24f86cc11e..ac6c969d70 100644 --- a/indra/llcommon/llmemory.h +++ b/indra/llcommon/llmemory.h @@ -101,6 +101,29 @@ template <typename T> T* LL_NEXT_ALIGNED_ADDRESS_64(T* address)  #define LL_ALIGN_16(var) LL_ALIGN_PREFIX(16) var LL_ALIGN_POSTFIX(16) +#define LL_ALIGN_NEW                        \ +public:                                     \ +    void* operator new(size_t size)         \ +    {                                       \ +        return ll_aligned_malloc_16(size);  \ +    }                                       \ +                                            \ +    void operator delete(void* ptr)         \ +    {                                       \ +        ll_aligned_free_16(ptr);            \ +    }                                       \ +                                            \ +    void* operator new[](size_t size)       \ +    {                                       \ +        return ll_aligned_malloc_16(size);  \ +    }                                       \ +                                            \ +    void operator delete[](void* ptr)       \ +    {                                       \ +        ll_aligned_free_16(ptr);            \ +    } + +  //------------------------------------------------------------------------------------------------  //------------------------------------------------------------------------------------------------  	// for enable buffer overrun detection predefine LL_DEBUG_BUFFER_OVERRUN in current library @@ -113,8 +136,9 @@ template <typename T> T* LL_NEXT_ALIGNED_ADDRESS_64(T* address)  #else  	inline void* ll_aligned_malloc_fallback( size_t size, int align )  	{ +        LL_PROFILE_ZONE_SCOPED_CATEGORY_MEMORY;  	#if defined(LL_WINDOWS) -		return _aligned_malloc(size, align); +        void* ret = _aligned_malloc(size, align);  	#else          char* aligned = NULL;  		void* mem = malloc( size + (align - 1) + sizeof(void*) ); @@ -125,12 +149,16 @@ template <typename T> T* LL_NEXT_ALIGNED_ADDRESS_64(T* address)              ((void**)aligned)[-1] = mem;          } -		return aligned; +		void* ret = aligned;  	#endif +        LL_PROFILE_ALLOC(ret, size); +        return ret;  	}  	inline void ll_aligned_free_fallback( void* ptr )  	{ +        LL_PROFILE_ZONE_SCOPED_CATEGORY_MEMORY; +        LL_PROFILE_FREE(ptr);  	#if defined(LL_WINDOWS)  		_aligned_free(ptr);  	#else @@ -146,21 +174,24 @@ template <typename T> T* LL_NEXT_ALIGNED_ADDRESS_64(T* address)  inline void* ll_aligned_malloc_16(size_t size) // returned hunk MUST be freed with ll_aligned_free_16().  { +    LL_PROFILE_ZONE_SCOPED_CATEGORY_MEMORY;  #if defined(LL_WINDOWS) -	return _aligned_malloc(size, 16); +	void* ret = _aligned_malloc(size, 16);  #elif defined(LL_DARWIN) -	return malloc(size); // default osx malloc is 16 byte aligned. +	void* ret = malloc(size); // default osx malloc is 16 byte aligned.  #else -	void *rtn; -	if (LL_LIKELY(0 == posix_memalign(&rtn, 16, size))) -		return rtn; -	else // bad alignment requested, or out of memory -		return NULL; +	void *ret; +    if (0 != posix_memalign(&ret, 16, size)) +        return nullptr;  #endif +    LL_PROFILE_ALLOC(ret, size); +    return ret;  }  inline void ll_aligned_free_16(void *p)  { +    LL_PROFILE_ZONE_SCOPED_CATEGORY_MEMORY; +    LL_PROFILE_FREE(p);  #if defined(LL_WINDOWS)  	_aligned_free(p);  #elif defined(LL_DARWIN) @@ -172,10 +203,12 @@ inline void ll_aligned_free_16(void *p)  inline void* ll_aligned_realloc_16(void* ptr, size_t size, size_t old_size) // returned hunk MUST be freed with ll_aligned_free_16().  { +    LL_PROFILE_ZONE_SCOPED_CATEGORY_MEMORY; +    LL_PROFILE_FREE(ptr);  #if defined(LL_WINDOWS) -	return _aligned_realloc(ptr, size, 16); +	void* ret = _aligned_realloc(ptr, size, 16);  #elif defined(LL_DARWIN) -	return realloc(ptr,size); // default osx malloc is 16 byte aligned. +	void* ret = realloc(ptr,size); // default osx malloc is 16 byte aligned.  #else  	//FIXME: memcpy is SLOW  	void* ret = ll_aligned_malloc_16(size); @@ -188,27 +221,31 @@ inline void* ll_aligned_realloc_16(void* ptr, size_t size, size_t old_size) // r  		}  		ll_aligned_free_16(ptr);  	} -	return ret;  #endif +    LL_PROFILE_ALLOC(ptr, size); +    return ret;  }  inline void* ll_aligned_malloc_32(size_t size) // returned hunk MUST be freed with ll_aligned_free_32().  { +    LL_PROFILE_ZONE_SCOPED_CATEGORY_MEMORY;  #if defined(LL_WINDOWS) -	return _aligned_malloc(size, 32); +	void* ret = _aligned_malloc(size, 32);  #elif defined(LL_DARWIN) -	return ll_aligned_malloc_fallback( size, 32 ); +	void* ret = ll_aligned_malloc_fallback( size, 32 );  #else -	void *rtn; -	if (LL_LIKELY(0 == posix_memalign(&rtn, 32, size))) -		return rtn; -	else // bad alignment requested, or out of memory -		return NULL; +	void *ret; +    if (0 != posix_memalign(&ret, 32, size)) +        return nullptr;  #endif +    LL_PROFILE_ALLOC(ret, size); +    return ret;  }  inline void ll_aligned_free_32(void *p)  { +    LL_PROFILE_ZONE_SCOPED_CATEGORY_MEMORY; +    LL_PROFILE_FREE(p);  #if defined(LL_WINDOWS)  	_aligned_free(p);  #elif defined(LL_DARWIN) @@ -222,29 +259,35 @@ inline void ll_aligned_free_32(void *p)  template<size_t ALIGNMENT>  LL_FORCE_INLINE void* ll_aligned_malloc(size_t size)  { +    LL_PROFILE_ZONE_SCOPED_CATEGORY_MEMORY; +    void* ret;  	if (LL_DEFAULT_HEAP_ALIGN % ALIGNMENT == 0)  	{ -		return malloc(size); +		ret = malloc(size); +        LL_PROFILE_ALLOC(ret, size);  	}  	else if (ALIGNMENT == 16)  	{ -		return ll_aligned_malloc_16(size); +		ret = ll_aligned_malloc_16(size);  	}  	else if (ALIGNMENT == 32)  	{ -		return ll_aligned_malloc_32(size); +		ret = ll_aligned_malloc_32(size);  	}  	else  	{ -		return ll_aligned_malloc_fallback(size, ALIGNMENT); +		ret = ll_aligned_malloc_fallback(size, ALIGNMENT);  	} +    return ret;  }  template<size_t ALIGNMENT>  LL_FORCE_INLINE void ll_aligned_free(void* ptr)  { +    LL_PROFILE_ZONE_SCOPED_CATEGORY_MEMORY;  	if (ALIGNMENT == LL_DEFAULT_HEAP_ALIGN)  	{ +        LL_PROFILE_FREE(ptr);  		free(ptr);  	}  	else if (ALIGNMENT == 16) @@ -266,6 +309,7 @@ LL_FORCE_INLINE void ll_aligned_free(void* ptr)  //  inline void ll_memcpy_nonaliased_aligned_16(char* __restrict dst, const char* __restrict src, size_t bytes)  { +    LL_PROFILE_ZONE_SCOPED_CATEGORY_MEMORY;  	assert(src != NULL);  	assert(dst != NULL);  	assert(bytes > 0); diff --git a/indra/llcommon/llmutex.cpp b/indra/llcommon/llmutex.cpp index 4d73c04d07..0273dd5970 100644 --- a/indra/llcommon/llmutex.cpp +++ b/indra/llcommon/llmutex.cpp @@ -44,6 +44,7 @@ LLMutex::~LLMutex()  void LLMutex::lock()  { +    LL_PROFILE_ZONE_SCOPED_CATEGORY_THREAD  	if(isSelfLocked())  	{ //redundant lock  		mCount++; @@ -65,6 +66,7 @@ void LLMutex::lock()  void LLMutex::unlock()  { +    LL_PROFILE_ZONE_SCOPED_CATEGORY_THREAD  	if (mCount > 0)  	{ //not the root unlock  		mCount--; @@ -85,6 +87,7 @@ void LLMutex::unlock()  bool LLMutex::isLocked()  { +    LL_PROFILE_ZONE_SCOPED_CATEGORY_THREAD  	if (!mMutex.try_lock())  	{  		return true; @@ -108,6 +111,7 @@ LLThread::id_t LLMutex::lockingThread() const  bool LLMutex::trylock()  { +    LL_PROFILE_ZONE_SCOPED_CATEGORY_THREAD  	if(isSelfLocked())  	{ //redundant lock  		mCount++; @@ -146,17 +150,20 @@ LLCondition::~LLCondition()  void LLCondition::wait()  { +    LL_PROFILE_ZONE_SCOPED_CATEGORY_THREAD  	std::unique_lock< std::mutex > lock(mMutex);  	mCond.wait(lock);  }  void LLCondition::signal()  { +    LL_PROFILE_ZONE_SCOPED_CATEGORY_THREAD  	mCond.notify_one();  }  void LLCondition::broadcast()  { +    LL_PROFILE_ZONE_SCOPED_CATEGORY_THREAD  	mCond.notify_all();  } @@ -166,6 +173,7 @@ LLMutexTrylock::LLMutexTrylock(LLMutex* mutex)      : mMutex(mutex),      mLocked(false)  { +    LL_PROFILE_ZONE_SCOPED_CATEGORY_THREAD      if (mMutex)          mLocked = mMutex->trylock();  } @@ -174,6 +182,7 @@ LLMutexTrylock::LLMutexTrylock(LLMutex* mutex, U32 aTries, U32 delay_ms)      : mMutex(mutex),      mLocked(false)  { +    LL_PROFILE_ZONE_SCOPED_CATEGORY_THREAD      if (!mMutex)          return; @@ -188,6 +197,7 @@ LLMutexTrylock::LLMutexTrylock(LLMutex* mutex, U32 aTries, U32 delay_ms)  LLMutexTrylock::~LLMutexTrylock()  { +    LL_PROFILE_ZONE_SCOPED_CATEGORY_THREAD      if (mMutex && mLocked)          mMutex->unlock();  } @@ -199,6 +209,7 @@ LLMutexTrylock::~LLMutexTrylock()  //  LLScopedLock::LLScopedLock(std::mutex* mutex) : mMutex(mutex)  { +    LL_PROFILE_ZONE_SCOPED_CATEGORY_THREAD  	if(mutex)  	{  		mutex->lock(); @@ -217,6 +228,7 @@ LLScopedLock::~LLScopedLock()  void LLScopedLock::unlock()  { +    LL_PROFILE_ZONE_SCOPED_CATEGORY_THREAD  	if(mLocked)  	{  		mMutex->unlock(); diff --git a/indra/llcommon/llpreprocessor.h b/indra/llcommon/llpreprocessor.h index b17a8e761a..dc586b0008 100644 --- a/indra/llcommon/llpreprocessor.h +++ b/indra/llcommon/llpreprocessor.h @@ -171,7 +171,9 @@  #define LL_DLLIMPORT  #endif // LL_WINDOWS -#if ! defined(LL_WINDOWS) +#if __clang__ || ! defined(LL_WINDOWS) +// Only on Windows, and only with the Microsoft compiler (vs. clang) is +// wchar_t potentially not a distinct type.  #define LL_WCHAR_T_NATIVE 1  #else  // LL_WINDOWS  // https://docs.microsoft.com/en-us/cpp/preprocessor/predefined-macros diff --git a/indra/llcommon/llprocessor.cpp b/indra/llcommon/llprocessor.cpp index 5d16a4b74d..818df07bb2 100644 --- a/indra/llcommon/llprocessor.cpp +++ b/indra/llcommon/llprocessor.cpp @@ -44,20 +44,6 @@  #include "llsd.h" -#if LL_MSVC && _M_X64 -#      define LL_X86_64 1 -#      define LL_X86 1 -#elif LL_MSVC && _M_IX86 -#      define LL_X86 1 -#elif LL_GNUC && ( defined(__amd64__) || defined(__x86_64__) ) -#      define LL_X86_64 1 -#      define LL_X86 1 -#elif LL_GNUC && ( defined(__i386__) ) -#      define LL_X86 1 -#elif LL_GNUC && ( defined(__powerpc__) || defined(__ppc__) ) -#      define LL_PPC 1 -#endif -  class LLProcessorInfoImpl; // foward declaration for the mImpl;  namespace  diff --git a/indra/llcommon/llprocessor.h b/indra/llcommon/llprocessor.h index 90e5bc59ee..b77eb22c3a 100644 --- a/indra/llcommon/llprocessor.h +++ b/indra/llcommon/llprocessor.h @@ -29,6 +29,20 @@  #define LLPROCESSOR_H  #include "llunits.h" +#if LL_MSVC && _M_X64 +#      define LL_X86_64 1 +#      define LL_X86 1 +#elif LL_MSVC && _M_IX86 +#      define LL_X86 1 +#elif LL_GNUC && ( defined(__amd64__) || defined(__x86_64__) ) +#      define LL_X86_64 1 +#      define LL_X86 1 +#elif LL_GNUC && ( defined(__i386__) ) +#      define LL_X86 1 +#elif LL_GNUC && ( defined(__powerpc__) || defined(__ppc__) ) +#      define LL_PPC 1 +#endif +  class LLProcessorInfoImpl;  class LL_COMMON_API LLProcessorInfo diff --git a/indra/llcommon/llprofiler.h b/indra/llcommon/llprofiler.h new file mode 100644 index 0000000000..f9d7ae7ce4 --- /dev/null +++ b/indra/llcommon/llprofiler.h @@ -0,0 +1,151 @@ +/** + * @file llprofiler.h + * @brief Wrapper for Tracy and/or other profilers + * + * $LicenseInfo:firstyear=2021&license=viewerlgpl$ + * Second Life Viewer Source Code + * Copyright (C) 2021, Linden Research, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; + * version 2.1 of the License only. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA + * + * Linden Research, Inc., 945 Battery Street, San Francisco, CA  94111  USA + * $/LicenseInfo$ + */ + +#ifndef LL_PROFILER_H +#define LL_PROFILER_H + +// If you use the default macros LL_PROFILE_ZONE_SCOPED and LL_PROFILE_ZONE_NAMED to profile code ... +// +//     void foo() +//     { +//         LL_PROFILE_ZONE_SCOPED; +//         : +// +//         { +//             LL_PROFILE_ZONE_NAMED("widget bar"); +//             : +//         } +//         { +//             LL_PROFILE_ZONE_NAMED("widget qux"); +//             : +//         } +//     } +// +// ... please be aware that ALL these will show up in a Tracy capture which can quickly exhaust memory. +// Instead, use LL_PROFILE_ZONE_SCOPED_CATEGORY_* and LL_PROFILE_ZONE_NAMED_CATEGORY_* to profile code ... +// +//     void foo() +//     { +//         LL_PROFILE_ZONE_SCOPED_CATEGORY_UI; +//         : +// +//         { +//             LL_PROFILE_ZONE_NAMED_CATEGORY_UI("widget bar"); +//             : +//         } +//         { +//             LL_PROFILE_ZONE_NAMED_CATEGORY_UI("widget qux"); +//             : +//         } +//     } +// +// ... as these can be selectively turned on/off.  This will minimize memory usage and visual clutter in a Tracy capture. +// See llprofiler_categories.h for more details on profiling categories. + +#define LL_PROFILER_CONFIG_NONE             0  // No profiling +#define LL_PROFILER_CONFIG_FAST_TIMER       1  // Profiling on: Only Fast Timers +#define LL_PROFILER_CONFIG_TRACY            2  // Profiling on: Only Tracy +#define LL_PROFILER_CONFIG_TRACY_FAST_TIMER 3  // Profiling on: Fast Timers + Tracy + +#ifndef LL_PROFILER_CONFIGURATION +#define LL_PROFILER_CONFIGURATION           LL_PROFILER_CONFIG_FAST_TIMER +#endif + +extern thread_local bool gProfilerEnabled; + +#if defined(LL_PROFILER_CONFIGURATION) && (LL_PROFILER_CONFIGURATION > LL_PROFILER_CONFIG_NONE) +    #if LL_PROFILER_CONFIGURATION == LL_PROFILER_CONFIG_TRACY || LL_PROFILER_CONFIGURATION == LL_PROFILER_CONFIG_TRACY_FAST_TIMER +        #define TRACY_ENABLE         1 +// Normally these would be enabled but we want to be able to build any viewer with Tracy enabled and run the Tracy server on another machine +// They must be undefined in order to work across multiple machines +//      #define TRACY_NO_BROADCAST   1 +//      #define TRACY_ONLY_LOCALHOST 1 +        #define TRACY_ONLY_IPV4      1 +        #include "Tracy.hpp" + +        // Mutually exclusive with detailed memory tracing +        #define LL_PROFILER_ENABLE_TRACY_OPENGL 0 +    #endif + +    #if LL_PROFILER_CONFIGURATION == LL_PROFILER_CONFIG_TRACY +        #define LL_PROFILER_FRAME_END                   FrameMark +        #define LL_PROFILER_SET_THREAD_NAME( name )     tracy::SetThreadName( name );    gProfilerEnabled = true; +        #define LL_RECORD_BLOCK_TIME(name)              ZoneScoped // Want descriptive names; was: ZoneNamedN( ___tracy_scoped_zone, #name, true ); +        #define LL_PROFILE_ZONE_NAMED(name)             ZoneNamedN( ___tracy_scoped_zone, name, true ); +        #define LL_PROFILE_ZONE_NAMED_COLOR(name,color) ZoneNamedNC( ___tracy_scopped_zone, name, color, true ) // RGB +        #define LL_PROFILE_ZONE_SCOPED                  ZoneScoped + +        #define LL_PROFILE_ZONE_NUM( val )              ZoneValue( val ) +        #define LL_PROFILE_ZONE_TEXT( text, size )      ZoneText( text, size ) + +        #define LL_PROFILE_ZONE_ERR(name)               LL_PROFILE_ZONE_NAMED_COLOR( name, 0XFF0000  )  // RGB yellow +        #define LL_PROFILE_ZONE_INFO(name)              LL_PROFILE_ZONE_NAMED_COLOR( name, 0X00FFFF  )  // RGB cyan +        #define LL_PROFILE_ZONE_WARN(name)              LL_PROFILE_ZONE_NAMED_COLOR( name, 0x0FFFF00 )  // RGB red +        #define LL_PROFILE_ALLOC(ptr, size)             TracyAlloc(ptr, size) +        #define LL_PROFILE_FREE(ptr)                    TracyFree(ptr) +    #endif +    #if LL_PROFILER_CONFIGURATION == LL_PROFILER_CONFIG_FAST_TIMER +        #define LL_PROFILER_FRAME_END +        #define LL_PROFILER_SET_THREAD_NAME( name )      (void)(name) +        #define LL_RECORD_BLOCK_TIME(name)                                                                  const LLTrace::BlockTimer& LL_GLUE_TOKENS(block_time_recorder, __LINE__)(LLTrace::timeThisBlock(name)); (void)LL_GLUE_TOKENS(block_time_recorder, __LINE__); +        #define LL_PROFILE_ZONE_NAMED(name)             // LL_PROFILE_ZONE_NAMED is a no-op when Tracy is disabled +        #define LL_PROFILE_ZONE_SCOPED                  // LL_PROFILE_ZONE_SCOPED is a no-op when Tracy is disabled +        #define LL_PROFILE_ZONE_COLOR(name,color)       // LL_RECORD_BLOCK_TIME(name) + +        #define LL_PROFILE_ZONE_NUM( val )              (void)( val );                // Not supported +        #define LL_PROFILE_ZONE_TEXT( text, size )      (void)( text ); void( size ); // Not supported + +        #define LL_PROFILE_ZONE_ERR(name)               (void)(name); // Not supported +        #define LL_PROFILE_ZONE_INFO(name)              (void)(name); // Not supported +        #define LL_PROFILE_ZONE_WARN(name)              (void)(name); // Not supported +        #define LL_PROFILE_ALLOC(ptr, size)             (void)(ptr); (void)(size); +        #define LL_PROFILE_FREE(ptr)                    (void)(ptr); +    #endif +    #if LL_PROFILER_CONFIGURATION == LL_PROFILER_CONFIG_TRACY_FAST_TIMER +        #define LL_PROFILER_FRAME_END                   FrameMark +        #define LL_PROFILER_SET_THREAD_NAME( name )     tracy::SetThreadName( name );    gProfilerEnabled = true; +        #define LL_RECORD_BLOCK_TIME(name)              ZoneNamedN(___tracy_scoped_zone, #name, true);   const LLTrace::BlockTimer& LL_GLUE_TOKENS(block_time_recorder, __LINE__)(LLTrace::timeThisBlock(name)); (void)LL_GLUE_TOKENS(block_time_recorder, __LINE__); +        #define LL_PROFILE_ZONE_NAMED(name)             ZoneNamedN( ___tracy_scoped_zone, #name, true ); +        #define LL_PROFILE_ZONE_NAMED_COLOR(name,color) ZoneNamedNC( ___tracy_scopped_zone, name, color, true ) // RGB +        #define LL_PROFILE_ZONE_SCOPED                  ZoneScoped + +        #define LL_PROFILE_ZONE_NUM( val )              ZoneValue( val ) +        #define LL_PROFILE_ZONE_TEXT( text, size )      ZoneText( text, size ) + +        #define LL_PROFILE_ZONE_ERR(name)               LL_PROFILE_ZONE_NAMED_COLOR( name, 0XFF0000  )  // RGB yellow +        #define LL_PROFILE_ZONE_INFO(name)              LL_PROFILE_ZONE_NAMED_COLOR( name, 0X00FFFF  )  // RGB cyan +        #define LL_PROFILE_ZONE_WARN(name)              LL_PROFILE_ZONE_NAMED_COLOR( name, 0x0FFFF00 )  // RGB red +        #define LL_PROFILE_ALLOC(ptr, size)             TracyAlloc(ptr, size) +        #define LL_PROFILE_FREE(ptr)                    TracyFree(ptr) +    #endif +#else +    #define LL_PROFILER_FRAME_END +    #define LL_PROFILER_SET_THREAD_NAME( name ) (void)(name) +#endif // LL_PROFILER + +#include "llprofilercategories.h" + +#endif // LL_PROFILER_H diff --git a/indra/llcommon/llprofilercategories.h b/indra/llcommon/llprofilercategories.h new file mode 100644 index 0000000000..8db29468cc --- /dev/null +++ b/indra/llcommon/llprofilercategories.h @@ -0,0 +1,280 @@ +/** + * @file llprofiler_ategories.h + * @brief Profiling categories to minimize Tracy memory usage when viewing captures. + * + * $LicenseInfo:firstyear=2022&license=viewerlgpl$ + * Second Life Viewer Source Code + * Copyright (C) 2022, Linden Research, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; + * version 2.1 of the License only. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA + * + * Linden Research, Inc., 945 Battery Street, San Francisco, CA  94111  USA + * $/LicenseInfo$ + */ + +#ifndef LL_PROFILER_CATEGORIES_H +#define LL_PROFILER_CATEGORIES_H + +// A Tracy capture can quickly consume memory.  Use these defines to selectively turn on/off Tracy profiling for these categories. +// The biggest memory usage ones are: +// +//    LL_PROFILER_CATEGORY_ENABLE_DRAWPOOL +//    LL_PROFILER_CATEGORY_ENABLE_LLSD +//    LL_PROFILER_CATEGORY_ENABLE_MEMORY +//    LL_PROFILER_CATEGORY_ENABLE_SHADERS +// +// NOTE: You can still manually use: +//     LL_PROFILE_ZONE_SCOPED(); +//     LL_PROFILE_ZONE_NAMED("name"); +// but just be aware that those will ALWAYS show up in a Tracy capture +//  a) using more memory, and +//  b) adding visual clutter. +#define LL_PROFILER_CATEGORY_ENABLE_APP         1 +#define LL_PROFILER_CATEGORY_ENABLE_AVATAR      1 +#define LL_PROFILER_CATEGORY_ENABLE_DISPLAY     1 +#define LL_PROFILER_CATEGORY_ENABLE_DRAWABLE    1 +#define LL_PROFILER_CATEGORY_ENABLE_DRAWPOOL    1 +#define LL_PROFILER_CATEGORY_ENABLE_ENVIRONMENT 1 +#define LL_PROFILER_CATEGORY_ENABLE_FACE        1 +#define LL_PROFILER_CATEGORY_ENABLE_LLSD        1 +#define LL_PROFILER_CATEGORY_ENABLE_LOGGING     1 +#define LL_PROFILER_CATEGORY_ENABLE_MATERIAL    1 +#define LL_PROFILER_CATEGORY_ENABLE_MEDIA       1 +#define LL_PROFILER_CATEGORY_ENABLE_MEMORY      1 +#define LL_PROFILER_CATEGORY_ENABLE_NETWORK     1 +#define LL_PROFILER_CATEGORY_ENABLE_OCTREE      1 +#define LL_PROFILER_CATEGORY_ENABLE_PIPELINE    1 +#define LL_PROFILER_CATEGORY_ENABLE_SHADER      1 +#define LL_PROFILER_CATEGORY_ENABLE_SPATIAL     1 +#define LL_PROFILER_CATEGORY_ENABLE_STATS       1 +#define LL_PROFILER_CATEGORY_ENABLE_STRING      1 +#define LL_PROFILER_CATEGORY_ENABLE_TEXTURE     1 +#define LL_PROFILER_CATEGORY_ENABLE_THREAD      1 +#define LL_PROFILER_CATEGORY_ENABLE_UI          1 +#define LL_PROFILER_CATEGORY_ENABLE_VIEWER      1 +#define LL_PROFILER_CATEGORY_ENABLE_VERTEX      1 +#define LL_PROFILER_CATEGORY_ENABLE_VOLUME      1 +#define LL_PROFILER_CATEGORY_ENABLE_WIN32       1 + +#if LL_PROFILER_CATEGORY_ENABLE_APP +    #define LL_PROFILE_ZONE_NAMED_CATEGORY_APP  LL_PROFILE_ZONE_NAMED +    #define LL_PROFILE_ZONE_SCOPED_CATEGORY_APP LL_PROFILE_ZONE_SCOPED +#else +    #define LL_PROFILE_ZONE_NAMED_CATEGORY_APP(name) +    #define LL_PROFILE_ZONE_SCOPED_CATEGORY_APP +#endif + +#if LL_PROFILER_CATEGORY_ENABLE_AVATAR +    #define LL_PROFILE_ZONE_NAMED_CATEGORY_AVATAR  LL_PROFILE_ZONE_NAMED +    #define LL_PROFILE_ZONE_SCOPED_CATEGORY_AVATAR LL_PROFILE_ZONE_SCOPED +#else +    #define LL_PROFILE_ZONE_NAMED_CATEGORY_AVATAR(name) +    #define LL_PROFILE_ZONE_SCOPED_CATEGORY_AVATAR +#endif + +#if LL_PROFILER_CATEGORY_ENABLE_DISPLAY +    #define LL_PROFILE_ZONE_NAMED_CATEGORY_DISPLAY  LL_PROFILE_ZONE_NAMED +    #define LL_PROFILE_ZONE_SCOPED_CATEGORY_DISPLAY LL_PROFILE_ZONE_SCOPED +#else +    #define LL_PROFILE_ZONE_NAMED_CATEGORY_DISPLAY(name) +    #define LL_PROFILE_ZONE_SCOPED_CATEGORY_DISPLAY +#endif + +#if LL_PROFILER_CATEGORY_ENABLE_DRAWABLE +    #define LL_PROFILE_ZONE_NAMED_CATEGORY_DRAWABLE  LL_PROFILE_ZONE_NAMED +    #define LL_PROFILE_ZONE_SCOPED_CATEGORY_DRAWABLE LL_PROFILE_ZONE_SCOPED +#else +    #define LL_PROFILE_ZONE_NAMED_CATEGORY_DRAWABLE(name) +    #define LL_PROFILE_ZONE_SCOPED_CATEGORY_DRAWABLE +#endif + +#if LL_PROFILER_CATEGORY_ENABLE_DRAWPOOL +    #define LL_PROFILE_ZONE_NAMED_CATEGORY_DRAWPOOL  LL_PROFILE_ZONE_NAMED +    #define LL_PROFILE_ZONE_SCOPED_CATEGORY_DRAWPOOL LL_PROFILE_ZONE_SCOPED +#else +    #define LL_PROFILE_ZONE_NAMED_CATEGORY_DRAWPOOL(name) +    #define LL_PROFILE_ZONE_SCOPED_CATEGORY_DRAWPOOL +#endif + +#if LL_PROFILER_CATEGORY_ENABLE_ENVIRONMENT +    #define LL_PROFILE_ZONE_NAMED_CATEGORY_ENVIRONMENT  LL_PROFILE_ZONE_NAMED +    #define LL_PROFILE_ZONE_SCOPED_CATEGORY_ENVIRONMENT LL_PROFILE_ZONE_SCOPED +#else +    #define LL_PROFILE_ZONE_NAMED_CATEGORY_ENVIRONMENT(name) +    #define LL_PROFILE_ZONE_SCOPED_CATEGORY_ENVIRONMENT +#endif + +#if LL_PROFILER_CATEGORY_ENABLE_FACE +    #define LL_PROFILE_ZONE_NAMED_CATEGORY_FACE  LL_PROFILE_ZONE_NAMED +    #define LL_PROFILE_ZONE_SCOPED_CATEGORY_FACE LL_PROFILE_ZONE_SCOPED +#else +    #define LL_PROFILE_ZONE_NAMED_CATEGORY_FACE(name) +    #define LL_PROFILE_ZONE_SCOPED_CATEGORY_FACE +#endif + +#if LL_PROFILER_CATEGORY_ENABLE_LLSD +    #define LL_PROFILE_ZONE_NAMED_CATEGORY_LLSD  LL_PROFILE_ZONE_NAMED +    #define LL_PROFILE_ZONE_SCOPED_CATEGORY_LLSD LL_PROFILE_ZONE_SCOPED +#else +    #define LL_PROFILE_ZONE_NAMED_CATEGORY_LLSD(name) +    #define LL_PROFILE_ZONE_SCOPED_CATEGORY_LLSD +#endif + +#if LL_PROFILER_CATEGORY_ENABLE_LOGGING +    #define LL_PROFILE_ZONE_NAMED_CATEGORY_LOGGING  LL_PROFILE_ZONE_NAMED +    #define LL_PROFILE_ZONE_SCOPED_CATEGORY_LOGGING LL_PROFILE_ZONE_SCOPED +#else +    #define LL_PROFILE_ZONE_NAMED_CATEGORY_LOGGING(name) +    #define LL_PROFILE_ZONE_SCOPED_CATEGORY_LOGGING +#endif + +#if LL_PROFILER_CATEGORY_ENABLE_MATERIAL +    #define LL_PROFILE_ZONE_NAMED_CATEGORY_MATERIAL  LL_PROFILE_ZONE_NAMED +    #define LL_PROFILE_ZONE_SCOPED_CATEGORY_MATERIAL LL_PROFILE_ZONE_SCOPED +#else +    #define LL_PROFILE_ZONE_NAMED_CATEGORY_MATERIAL(name) +    #define LL_PROFILE_ZONE_SCOPED_CATEGORY_MATERIAL +#endif + +#if LL_PROFILER_CATEGORY_ENABLE_MEDIA +    #define LL_PROFILE_ZONE_NAMED_CATEGORY_MEDIA  LL_PROFILE_ZONE_NAMED +    #define LL_PROFILE_ZONE_SCOPED_CATEGORY_MEDIA LL_PROFILE_ZONE_SCOPED +#else +    #define LL_PROFILE_ZONE_NAMED_CATEGORY_MEDIA(name) +    #define LL_PROFILE_ZONE_SCOPED_CATEGORY_MEDIA +#endif + +#if LL_PROFILER_CATEGORY_ENABLE_MEMORY +    #define LL_PROFILE_ZONE_NAMED_CATEGORY_MEMORY  LL_PROFILE_ZONE_NAMED +    #define LL_PROFILE_ZONE_SCOPED_CATEGORY_MEMORY LL_PROFILE_ZONE_SCOPED +#else +    #define LL_PROFILE_ZONE_NAMED_CATEGORY_MEMORY(name) +    #define LL_PROFILE_ZONE_SCOPED_CATEGORY_MEMORY +#endif + +#if LL_PROFILER_CATEGORY_ENABLE_NETWORK +    #define LL_PROFILE_ZONE_NAMED_CATEGORY_NETWORK  LL_PROFILE_ZONE_NAMED +    #define LL_PROFILE_ZONE_SCOPED_CATEGORY_NETWORK LL_PROFILE_ZONE_SCOPED +#else +    #define LL_PROFILE_ZONE_NAMED_CATEGORY_NETWORK(name) +    #define LL_PROFILE_ZONE_SCOPED_CATEGORY_NETWORK +#endif + +#if LL_PROFILER_CATEGORY_ENABLE_OCTREE +    #define LL_PROFILE_ZONE_NAMED_CATEGORY_OCTREE  LL_PROFILE_ZONE_NAMED +    #define LL_PROFILE_ZONE_SCOPED_CATEGORY_OCTREE LL_PROFILE_ZONE_SCOPED +#else +    #define LL_PROFILE_ZONE_NAMED_CATEGORY_OCTREE(name) +    #define LL_PROFILE_ZONE_SCOPED_CATEGORY_OCTREE +#endif + +#if LL_PROFILER_CATEGORY_ENABLE_PIPELINE +    #define LL_PROFILE_ZONE_NAMED_CATEGORY_PIPELINE  LL_PROFILE_ZONE_NAMED +    #define LL_PROFILE_ZONE_SCOPED_CATEGORY_PIPELINE LL_PROFILE_ZONE_SCOPED +#else +    #define LL_PROFILE_ZONE_NAMED_CATEGORY_PIPELINE(name) +    #define LL_PROFILE_ZONE_SCOPED_CATEGORY_PIPELINE +#endif + +#if LL_PROFILER_CATEGORY_ENABLE_SHADER +    #define LL_PROFILE_ZONE_NAMED_CATEGORY_SHADER  LL_PROFILE_ZONE_NAMED +    #define LL_PROFILE_ZONE_SCOPED_CATEGORY_SHADER LL_PROFILE_ZONE_SCOPED +#else +    #define LL_PROFILE_ZONE_NAMED_CATEGORY_SHADER(name) +    #define LL_PROFILE_ZONE_SCOPED_CATEGORY_SHADER +#endif + +#if LL_PROFILER_CATEGORY_ENABLE_SPATIAL +    #define LL_PROFILE_ZONE_NAMED_CATEGORY_SPATIAL  LL_PROFILE_ZONE_NAMED +    #define LL_PROFILE_ZONE_SCOPED_CATEGORY_SPATIAL LL_PROFILE_ZONE_SCOPED +#else +    #define LL_PROFILE_ZONE_NAMED_CATEGORY_SPATIAL(name) +    #define LL_PROFILE_ZONE_SCOPED_CATEGORY_SPATIAL +#endif + +#if LL_PROFILER_CATEGORY_ENABLE_STATS +    #define LL_PROFILE_ZONE_NAMED_CATEGORY_STATS  LL_PROFILE_ZONE_NAMED +    #define LL_PROFILE_ZONE_SCOPED_CATEGORY_STATS LL_PROFILE_ZONE_SCOPED +#else +    #define LL_PROFILE_ZONE_NAMED_CATEGORY_STATS(name) +    #define LL_PROFILE_ZONE_SCOPED_CATEGORY_STATS +#endif + +#if LL_PROFILER_CATEGORY_ENABLE_STRING +    #define LL_PROFILE_ZONE_NAMED_CATEGORY_STRING  LL_PROFILE_ZONE_NAMED +    #define LL_PROFILE_ZONE_SCOPED_CATEGORY_STRING LL_PROFILE_ZONE_SCOPED +#else +    #define LL_PROFILE_ZONE_NAMED_CATEGORY_STRING(name) +    #define LL_PROFILE_ZONE_SCOPED_CATEGORY_STRING +#endif + +#if LL_PROFILER_CATEGORY_ENABLE_TEXTURE +    #define LL_PROFILE_ZONE_NAMED_CATEGORY_TEXTURE  LL_PROFILE_ZONE_NAMED +    #define LL_PROFILE_ZONE_SCOPED_CATEGORY_TEXTURE LL_PROFILE_ZONE_SCOPED +#else +    #define LL_PROFILE_ZONE_NAMED_CATEGORY_TEXTURE(name) +    #define LL_PROFILE_ZONE_SCOPED_CATEGORY_TEXTURE +#endif + +#if LL_PROFILER_CATEGORY_ENABLE_THREAD +    #define LL_PROFILE_ZONE_NAMED_CATEGORY_THREAD  LL_PROFILE_ZONE_NAMED +    #define LL_PROFILE_ZONE_SCOPED_CATEGORY_THREAD LL_PROFILE_ZONE_SCOPED +#else +    #define LL_PROFILE_ZONE_NAMED_CATEGORY_THREAD(name) +    #define LL_PROFILE_ZONE_SCOPED_CATEGORY_THREAD +#endif + +#if LL_PROFILER_CATEGORY_ENABLE_UI +    #define LL_PROFILE_ZONE_NAMED_CATEGORY_UI  LL_PROFILE_ZONE_NAMED +    #define LL_PROFILE_ZONE_SCOPED_CATEGORY_UI LL_PROFILE_ZONE_SCOPED +#else +    #define LL_PROFILE_ZONE_NAMED_CATEGORY_UI(name) +    #define LL_PROFILE_ZONE_SCOPED_CATEGORY_UI +#endif + +#if LL_PROFILER_CATEGORY_ENABLE_VERTEX +    #define LL_PROFILE_ZONE_NAMED_CATEGORY_VERTEX  LL_PROFILE_ZONE_NAMED +    #define LL_PROFILE_ZONE_SCOPED_CATEGORY_VERTEX LL_PROFILE_ZONE_SCOPED +#else +    #define LL_PROFILE_ZONE_NAMED_CATEGORY_VERTEX(name) +    #define LL_PROFILE_ZONE_SCOPED_CATEGORY_VERTEX +#endif + +#if LL_PROFILER_CATEGORY_ENABLE_VIEWER +    #define LL_PROFILE_ZONE_NAMED_CATEGORY_VIEWER  LL_PROFILE_ZONE_NAMED +    #define LL_PROFILE_ZONE_SCOPED_CATEGORY_VIEWER LL_PROFILE_ZONE_SCOPED +#else +    #define LL_PROFILE_ZONE_NAMED_CATEGORY_VIEWER(name) +    #define LL_PROFILE_ZONE_SCOPED_CATEGORY_VIEWER +#endif + +#if LL_PROFILER_CATEGORY_ENABLE_VOLUME +    #define LL_PROFILE_ZONE_NAMED_CATEGORY_VOLUME  LL_PROFILE_ZONE_NAMED +    #define LL_PROFILE_ZONE_SCOPED_CATEGORY_VOLUME LL_PROFILE_ZONE_SCOPED +#else +    #define LL_PROFILE_ZONE_NAMED_CATEGORY_VOLUME(name) +    #define LL_PROFILE_ZONE_SCOPED_CATEGORY_VOLUME +#endif + +#if LL_PROFILER_CATEGORY_ENABLE_WIN32 +    #define LL_PROFILE_ZONE_NAMED_CATEGORY_WIN32  LL_PROFILE_ZONE_NAMED +    #define LL_PROFILE_ZONE_SCOPED_CATEGORY_WIN32 LL_PROFILE_ZONE_SCOPED +#else +    #define LL_PROFILE_ZONE_NAMED_CATEGORY_WIN32(name) +    #define LL_PROFILE_ZONE_SCOPED_CATEGORY_WIN32 +#endif + +#endif // LL_PROFILER_CATEGORIES_H + diff --git a/indra/llcommon/llrefcount.cpp b/indra/llcommon/llrefcount.cpp index 29a5ca6f24..5cbd346411 100644 --- a/indra/llcommon/llrefcount.cpp +++ b/indra/llcommon/llrefcount.cpp @@ -29,6 +29,9 @@  #include "llerror.h" +// maximum reference count before sounding memory leak alarm +const S32 gMaxRefCount = 65536; +  LLRefCount::LLRefCount(const LLRefCount& other)  :	mRef(0)  { @@ -47,7 +50,7 @@ LLRefCount::LLRefCount() :  LLRefCount::~LLRefCount()  {  -	if (mRef != 0) +	if (mRef != LL_REFCOUNT_FREE && mRef != 0)  	{  		LL_ERRS() << "deleting non-zero reference" << LL_ENDL;  	} diff --git a/indra/llcommon/llrefcount.h b/indra/llcommon/llrefcount.h index 7e4af6ea66..2080da1565 100644 --- a/indra/llcommon/llrefcount.h +++ b/indra/llcommon/llrefcount.h @@ -37,6 +37,10 @@ class LLMutex;  // see llthread.h for LLThreadSafeRefCount  //---------------------------------------------------------------------------- +//nonsense but recognizable value for freed LLRefCount (aids in debugging) +#define LL_REFCOUNT_FREE 1234567890 +extern const S32 gMaxRefCount; +  class LL_COMMON_API LLRefCount  {  protected: @@ -47,17 +51,25 @@ protected:  public:  	LLRefCount(); +    inline void validateRefCount() const +    { +        llassert(mRef > 0); // ref count below 0, likely corrupted +        llassert(mRef < gMaxRefCount); // ref count excessive, likely memory leak +    } +  	inline void ref() const  	{   		mRef++;  +        validateRefCount();  	}   	inline S32 unref() const  	{ -		llassert(mRef >= 1); +        validateRefCount();  		if (0 == --mRef)  		{ -			delete this;  +            mRef = LL_REFCOUNT_FREE; // set to nonsense yet recognizable value to aid in debugging +			delete this;  			return 0;  		}  		return mRef; diff --git a/indra/llcommon/llsd.cpp b/indra/llcommon/llsd.cpp index 57b746889d..807b3d13f8 100644 --- a/indra/llcommon/llsd.cpp +++ b/indra/llcommon/llsd.cpp @@ -400,6 +400,7 @@ namespace  	ImplMap& ImplMap::makeMap(LLSD::Impl*& var)  	{ +        LL_PROFILE_ZONE_SCOPED_CATEGORY_LLSD;  		if (shared())  		{  			ImplMap* i = new ImplMap(mData); @@ -414,18 +415,21 @@ namespace  	bool ImplMap::has(const LLSD::String& k) const  	{ +        LL_PROFILE_ZONE_SCOPED_CATEGORY_LLSD;  		DataMap::const_iterator i = mData.find(k);  		return i != mData.end();  	}  	LLSD ImplMap::get(const LLSD::String& k) const  	{ +        LL_PROFILE_ZONE_SCOPED_CATEGORY_LLSD;  		DataMap::const_iterator i = mData.find(k);  		return (i != mData.end()) ? i->second : LLSD();  	}  	LLSD ImplMap::getKeys() const  	{  +        LL_PROFILE_ZONE_SCOPED_CATEGORY_LLSD;  		LLSD keys = LLSD::emptyArray();  		DataMap::const_iterator iter = mData.begin();  		while (iter != mData.end()) @@ -438,11 +442,13 @@ namespace  	void ImplMap::insert(const LLSD::String& k, const LLSD& v)  	{ +        LL_PROFILE_ZONE_SCOPED_CATEGORY_LLSD;  		mData.insert(DataMap::value_type(k, v));  	}  	void ImplMap::erase(const LLSD::String& k)  	{ +        LL_PROFILE_ZONE_SCOPED_CATEGORY_LLSD;  		mData.erase(k);  	} @@ -684,6 +690,7 @@ const LLSD::Impl& LLSD::Impl::safe(const Impl* impl)  ImplMap& LLSD::Impl::makeMap(Impl*& var)  { +    LL_PROFILE_ZONE_SCOPED_CATEGORY_LLSD;  	ImplMap* im = new ImplMap;  	reset(var, im);  	return *im; @@ -887,11 +894,16 @@ LLSD& LLSD::with(const String& k, const LLSD& v)  										}  void LLSD::erase(const String& k)		{ makeMap(impl).erase(k); } -LLSD&		LLSD::operator[](const String& k) -										{ return makeMap(impl).ref(k); } +LLSD& LLSD::operator[](const String& k) +{  +    LL_PROFILE_ZONE_SCOPED_CATEGORY_LLSD; +    return makeMap(impl).ref(k);  +}  const LLSD& LLSD::operator[](const String& k) const -										{ return safe(impl).ref(k); } - +{  +    LL_PROFILE_ZONE_SCOPED_CATEGORY_LLSD; +    return safe(impl).ref(k);  +}  LLSD LLSD::emptyArray()  { @@ -914,10 +926,16 @@ LLSD& LLSD::with(Integer i, const LLSD& v)  LLSD& LLSD::append(const LLSD& v)		{ return makeArray(impl).append(v); }  void LLSD::erase(Integer i)				{ makeArray(impl).erase(i); } -LLSD&		LLSD::operator[](Integer i) -										{ return makeArray(impl).ref(i); } +LLSD& LLSD::operator[](Integer i) +{  +    LL_PROFILE_ZONE_SCOPED_CATEGORY_LLSD; +    return makeArray(impl).ref(i);  +}  const LLSD& LLSD::operator[](Integer i) const -										{ return safe(impl).ref(i); } +{  +    LL_PROFILE_ZONE_SCOPED_CATEGORY_LLSD; +    return safe(impl).ref(i); +}  static const char *llsd_dump(const LLSD &llsd, bool useXMLFormat)  { diff --git a/indra/llcommon/llsd.h b/indra/llcommon/llsd.h index 5b6d5545af..24cb9bbce1 100644 --- a/indra/llcommon/llsd.h +++ b/indra/llcommon/llsd.h @@ -290,9 +290,17 @@ public:  		LLSD& with(const String&, const LLSD&);  		LLSD& operator[](const String&); -		LLSD& operator[](const char* c)			{ return (*this)[String(c)]; } +		LLSD& operator[](const char* c) +        { +            LL_PROFILE_ZONE_SCOPED_CATEGORY_LLSD; +            return (*this)[String(c)]; +        }  		const LLSD& operator[](const String&) const; -		const LLSD& operator[](const char* c) const	{ return (*this)[String(c)]; } +		const LLSD& operator[](const char* c) const	 +        { +            LL_PROFILE_ZONE_SCOPED_CATEGORY_LLSD; +            return (*this)[String(c)]; +        }  	//@}  	/** @name Array Values */ diff --git a/indra/llcommon/llsdparam.cpp b/indra/llcommon/llsdparam.cpp index 2e7b46f885..af4ccf25fd 100644 --- a/indra/llcommon/llsdparam.cpp +++ b/indra/llcommon/llsdparam.cpp @@ -37,8 +37,6 @@ static 	LLInitParam::Parser::parser_write_func_map_t sWriteFuncs;  static 	LLInitParam::Parser::parser_inspect_func_map_t sInspectFuncs;  static const LLSD NO_VALUE_MARKER; -LLTrace::BlockTimerStatHandle FTM_SD_PARAM_ADAPTOR("LLSD to LLInitParam conversion"); -  //  // LLParamSDParser  // diff --git a/indra/llcommon/llsdparam.h b/indra/llcommon/llsdparam.h index 93910b70ae..82a623a8a0 100644 --- a/indra/llcommon/llsdparam.h +++ b/indra/llcommon/llsdparam.h @@ -110,7 +110,6 @@ private:  }; -extern LL_COMMON_API LLTrace::BlockTimerStatHandle FTM_SD_PARAM_ADAPTOR;  template<typename T>  class LLSDParamAdapter : public T  { @@ -118,7 +117,7 @@ public:  	LLSDParamAdapter() {}  	LLSDParamAdapter(const LLSD& sd)  	{ -		LL_RECORD_BLOCK_TIME(FTM_SD_PARAM_ADAPTOR); +        LL_PROFILE_ZONE_SCOPED;  		LLParamSDParser parser;  		// don't spam for implicit parsing of LLSD, as we want to allow arbitrary freeform data and ignore most of it  		bool parse_silently = true; diff --git a/indra/llcommon/llsdutil.cpp b/indra/llcommon/llsdutil.cpp index fc10fcece3..8e90d1e8b8 100644 --- a/indra/llcommon/llsdutil.cpp +++ b/indra/llcommon/llsdutil.cpp @@ -215,6 +215,8 @@ BOOL compare_llsd_with_template(  	const LLSD& template_llsd,  	LLSD& resultant_llsd)  { +    LL_PROFILE_ZONE_SCOPED +  	if (  		llsd_to_test.isUndefined() &&  		template_llsd.isDefined() ) @@ -336,6 +338,8 @@ bool filter_llsd_with_template(  	const LLSD & template_llsd,  	LLSD & resultant_llsd)  { +    LL_PROFILE_ZONE_SCOPED +  	if (llsd_to_test.isUndefined() && template_llsd.isDefined())  	{  		resultant_llsd = template_llsd; @@ -530,6 +534,8 @@ class TypeLookup  public:      TypeLookup()      { +        LL_PROFILE_ZONE_SCOPED +          for (const Data *di(boost::begin(typedata)), *dend(boost::end(typedata)); di != dend; ++di)          {              mMap[di->type] = di->name; @@ -538,6 +544,8 @@ public:      std::string lookup(LLSD::Type type) const      { +        LL_PROFILE_ZONE_SCOPED +          MapType::const_iterator found = mMap.find(type);          if (found != mMap.end())          { @@ -588,6 +596,8 @@ static std::string match_types(LLSD::Type expect, // prototype.type()                                 LLSD::Type actual,        // type we're checking                                 const std::string& pfx)   // as for llsd_matches  { +    LL_PROFILE_ZONE_SCOPED +      // Trivial case: if the actual type is exactly what we expect, we're good.      if (actual == expect)          return ""; @@ -625,6 +635,8 @@ static std::string match_types(LLSD::Type expect, // prototype.type()  // see docstring in .h file  std::string llsd_matches(const LLSD& prototype, const LLSD& data, const std::string& pfx)  { +    LL_PROFILE_ZONE_SCOPED +      // An undefined prototype means that any data is valid.      // An undefined slot in an array or map prototype means that any data      // may fill that slot. @@ -757,6 +769,8 @@ std::string llsd_matches(const LLSD& prototype, const LLSD& data, const std::str  bool llsd_equals(const LLSD& lhs, const LLSD& rhs, int bits)  { +    LL_PROFILE_ZONE_SCOPED +      // We're comparing strict equality of LLSD representation rather than      // performing any conversions. So if the types aren't equal, the LLSD      // values aren't equal. @@ -865,6 +879,8 @@ namespace llsd  LLSD& drill_ref(LLSD& blob, const LLSD& rawPath)  { +    LL_PROFILE_ZONE_SCOPED +      // Treat rawPath uniformly as an array. If it's not already an array,      // store it as the only entry in one. (But let's say Undefined means an      // empty array.) @@ -890,6 +906,8 @@ LLSD& drill_ref(LLSD& blob, const LLSD& rawPath)      // path entry that's bad.      for (LLSD::Integer i = 0; i < path.size(); ++i)      { +        LL_PROFILE_ZONE_NUM( i ) +          const LLSD& key{path[i]};          if (key.isString())          { @@ -918,6 +936,8 @@ LLSD& drill_ref(LLSD& blob, const LLSD& rawPath)  LLSD drill(const LLSD& blob, const LLSD& path)  { +    LL_PROFILE_ZONE_SCOPED +      // drill_ref() does exactly what we want. Temporarily cast away      // const-ness and use that.      return drill_ref(const_cast<LLSD&>(blob), path); @@ -930,6 +950,8 @@ LLSD drill(const LLSD& blob, const LLSD& path)  // filter may be include to exclude/include keys in a map.   LLSD llsd_clone(LLSD value, LLSD filter)  { +    LL_PROFILE_ZONE_SCOPED +      LLSD clone;      bool has_filter(filter.isMap()); diff --git a/indra/llcommon/llsingleton.h b/indra/llcommon/llsingleton.h index 7c81d65a8b..51ef514cf7 100644 --- a/indra/llcommon/llsingleton.h +++ b/indra/llcommon/llsingleton.h @@ -455,6 +455,7 @@ public:      static DERIVED_TYPE* getInstance()      { +        LL_PROFILE_ZONE_SCOPED_CATEGORY_THREAD;          // We know the viewer has LLSingleton dependency circularities. If you          // feel strongly motivated to eliminate them, cheers and good luck.          // (At that point we could consider a much simpler locking mechanism.) @@ -838,4 +839,36 @@ private:                                                                \      /* LLSINGLETON() is carefully implemented to permit exactly this */ \      LLSINGLETON_C11(DERIVED_CLASS) {} +// Relatively unsafe singleton implementation that is much faster +// and simpler than LLSingleton, but has no dependency tracking +// or inherent thread safety and requires manual invocation of  +// createInstance before first use. +template<class T> +class LLSimpleton +{ +public: +    template <typename... ARGS> +    static void createInstance(ARGS&&... args) +    { +        llassert(sInstance == nullptr); +        sInstance = new T(std::forward<ARGS>(args)...); +    } + +    static inline T* getInstance() { return sInstance; } +    static inline T& instance() { return *getInstance(); } +    static inline bool instanceExists() { return sInstance != nullptr; } + +    static void deleteSingleton() +    { +        delete sInstance; +        sInstance = nullptr; +    } + +private: +    static T* sInstance; +}; + +template <class T> +T* LLSimpleton<T>::sInstance{ nullptr }; +  #endif diff --git a/indra/llcommon/llstring.cpp b/indra/llcommon/llstring.cpp index 0290eea143..7f501f2e77 100644 --- a/indra/llcommon/llstring.cpp +++ b/indra/llcommon/llstring.cpp @@ -37,9 +37,6 @@  #include <winnls.h> // for WideCharToMultiByte  #endif -LLTrace::BlockTimerStatHandle FT_STRING_FORMAT("String Format"); - -  std::string ll_safe_string(const char* in)  {  	if(in) return std::string(in); @@ -215,7 +212,7 @@ S32 utf16chars_to_wchar(const U16* inchars, llwchar* outchar)  	return inchars - base;  } -llutf16string wstring_to_utf16str(const LLWString &utf32str, S32 len) +llutf16string wstring_to_utf16str(const llwchar* utf32str, size_t len)  {  	llutf16string out; @@ -237,27 +234,19 @@ llutf16string wstring_to_utf16str(const LLWString &utf32str, S32 len)  	return out;  } -llutf16string wstring_to_utf16str(const LLWString &utf32str) -{ -	const S32 len = (S32)utf32str.length(); -	return wstring_to_utf16str(utf32str, len); -} - -llutf16string utf8str_to_utf16str ( const std::string& utf8str ) +llutf16string utf8str_to_utf16str( const char* utf8str, size_t len )  { -	LLWString wstr = utf8str_to_wstring ( utf8str ); +	LLWString wstr = utf8str_to_wstring ( utf8str, len );  	return wstring_to_utf16str ( wstr );  } - -LLWString utf16str_to_wstring(const llutf16string &utf16str, S32 len) +LLWString utf16str_to_wstring(const U16* utf16str, size_t len)  {  	LLWString wout; -	if((len <= 0) || utf16str.empty()) return wout; +	if (len == 0) return wout;  	S32 i = 0; -	// craziness to make gcc happy (llutf16string.c_str() is tweaked on linux): -	const U16* chars16 = &(*(utf16str.begin())); +	const U16* chars16 = utf16str;  	while (i < len)  	{  		llwchar cur_char; @@ -267,12 +256,6 @@ LLWString utf16str_to_wstring(const llutf16string &utf16str, S32 len)  	return wout;  } -LLWString utf16str_to_wstring(const llutf16string &utf16str) -{ -	const S32 len = (S32)utf16str.length(); -	return utf16str_to_wstring(utf16str, len); -} -  // Length in llwchar (UTF-32) of the first len units (16 bits) of the given UTF-16 string.  S32 utf16str_wstring_length(const llutf16string &utf16str, const S32 utf16_len)  { @@ -392,8 +375,7 @@ S32 wstring_utf8_length(const LLWString& wstr)  	return len;  } - -LLWString utf8str_to_wstring(const std::string& utf8str, S32 len) +LLWString utf8str_to_wstring(const char* utf8str, size_t len)  {  	LLWString wout; @@ -481,13 +463,7 @@ LLWString utf8str_to_wstring(const std::string& utf8str, S32 len)  	return wout;  } -LLWString utf8str_to_wstring(const std::string& utf8str) -{ -	const S32 len = (S32)utf8str.length(); -	return utf8str_to_wstring(utf8str, len); -} - -std::string wstring_to_utf8str(const LLWString& utf32str, S32 len) +std::string wstring_to_utf8str(const llwchar* utf32str, size_t len)  {  	std::string out; @@ -503,20 +479,9 @@ std::string wstring_to_utf8str(const LLWString& utf32str, S32 len)  	return out;  } -std::string wstring_to_utf8str(const LLWString& utf32str) -{ -	const S32 len = (S32)utf32str.length(); -	return wstring_to_utf8str(utf32str, len); -} - -std::string utf16str_to_utf8str(const llutf16string& utf16str) -{ -	return wstring_to_utf8str(utf16str_to_wstring(utf16str)); -} - -std::string utf16str_to_utf8str(const llutf16string& utf16str, S32 len) +std::string utf16str_to_utf8str(const U16* utf16str, size_t len)  { -	return wstring_to_utf8str(utf16str_to_wstring(utf16str, len), len); +	return wstring_to_utf8str(utf16str_to_wstring(utf16str, len));  }  std::string utf8str_trim(const std::string& utf8str) @@ -657,17 +622,16 @@ std::string utf8str_removeCRLF(const std::string& utf8str)  }  #if LL_WINDOWS -std::string ll_convert_wide_to_string(const wchar_t* in) +unsigned int ll_wstring_default_code_page()  { -	return ll_convert_wide_to_string(in, CP_UTF8); +    return CP_UTF8;  } -std::string ll_convert_wide_to_string(const wchar_t* in, unsigned int code_page) +std::string ll_convert_wide_to_string(const wchar_t* in, size_t len_in, unsigned int code_page)  {  	std::string out;  	if(in)  	{ -		int len_in = wcslen(in);  		int len_out = WideCharToMultiByte(  			code_page,  			0, @@ -699,12 +663,7 @@ std::string ll_convert_wide_to_string(const wchar_t* in, unsigned int code_page)  	return out;  } -std::wstring ll_convert_string_to_wide(const std::string& in) -{ -	return ll_convert_string_to_wide(in, CP_UTF8); -} - -std::wstring ll_convert_string_to_wide(const std::string& in, unsigned int code_page) +std::wstring ll_convert_string_to_wide(const char* in, size_t len, unsigned int code_page)  {  	// From review:  	// We can preallocate a wide char buffer that is the same length (in wchar_t elements) as the utf8 input, @@ -716,10 +675,10 @@ std::wstring ll_convert_string_to_wide(const std::string& in, unsigned int code_  	// reserve an output buffer that will be destroyed on exit, with a place  	// to put NULL terminator -	std::vector<wchar_t> w_out(in.length() + 1); +	std::vector<wchar_t> w_out(len + 1);  	memset(&w_out[0], 0, w_out.size()); -	int real_output_str_len = MultiByteToWideChar(code_page, 0, in.c_str(), in.length(), +	int real_output_str_len = MultiByteToWideChar(code_page, 0, in, len,  												  &w_out[0], w_out.size() - 1);  	//looks like MultiByteToWideChar didn't add null terminator to converted string, see EXT-4858. @@ -729,30 +688,32 @@ std::wstring ll_convert_string_to_wide(const std::string& in, unsigned int code_  	return {&w_out[0]};  } -LLWString ll_convert_wide_to_wstring(const std::wstring& in) +LLWString ll_convert_wide_to_wstring(const wchar_t* in, size_t len)  { -    // This function, like its converse, is a placeholder, encapsulating a -    // guilty little hack: the only "official" way nat has found to convert -    // between std::wstring (16 bits on Windows) and LLWString (UTF-32) is -    // by using iconv, which we've avoided so far. It kinda sorta works to -    // just copy individual characters... -    // The point is that if/when we DO introduce some more official way to -    // perform such conversions, we should only have to call it here. -    return { in.begin(), in.end() }; +    // Whether or not std::wstring and llutf16string are distinct types, they +    // both hold UTF-16LE characters. (See header file comments.) Pretend this +    // wchar_t* sequence is really a U16* sequence and use the conversion we +    // define above. +    return utf16str_to_wstring(reinterpret_cast<const U16*>(in), len);  } -std::wstring ll_convert_wstring_to_wide(const LLWString& in) +std::wstring ll_convert_wstring_to_wide(const llwchar* in, size_t len)  { -    // See comments in ll_convert_wide_to_wstring() -    return { in.begin(), in.end() }; +    // first, convert to llutf16string, for which we have a real implementation +    auto utf16str{ wstring_to_utf16str(in, len) }; +    // then, because each U16 char must be UTF-16LE encoded, pretend the U16* +    // string pointer is a wchar_t* and instantiate a std::wstring of the same +    // length. +    return { reinterpret_cast<const wchar_t*>(utf16str.c_str()), utf16str.length() };  }  std::string ll_convert_string_to_utf8_string(const std::string& in)  { -	auto w_mesg = ll_convert_string_to_wide(in, CP_ACP); -	std::string out_utf8(ll_convert_wide_to_string(w_mesg.c_str(), CP_UTF8)); - -	return out_utf8; +	// If you pass code_page, you must also pass length, otherwise the code +	// page parameter will be mistaken for length. +	auto w_mesg = ll_convert_string_to_wide(in, in.length(), CP_ACP); +	// CP_UTF8 is default -- see ll_wstring_default_code_page() above. +	return ll_convert_wide_to_string(w_mesg);  }  namespace @@ -1356,7 +1317,7 @@ bool LLStringUtil::formatDatetime(std::string& replacement, std::string token,  template<>   S32 LLStringUtil::format(std::string& s, const format_map_t& substitutions)  { -	LL_RECORD_BLOCK_TIME(FT_STRING_FORMAT); +    LL_PROFILE_ZONE_SCOPED_CATEGORY_STRING;  	S32 res = 0;  	std::string output; @@ -1429,7 +1390,7 @@ S32 LLStringUtil::format(std::string& s, const format_map_t& substitutions)  template<>   S32 LLStringUtil::format(std::string& s, const LLSD& substitutions)  { -	LL_RECORD_BLOCK_TIME(FT_STRING_FORMAT); +    LL_PROFILE_ZONE_SCOPED_CATEGORY_STRING;  	S32 res = 0;  	if (!substitutions.isMap())  diff --git a/indra/llcommon/llstring.h b/indra/llcommon/llstring.h index 4263122f36..d94f549480 100644 --- a/indra/llcommon/llstring.h +++ b/indra/llcommon/llstring.h @@ -27,9 +27,11 @@  #ifndef LL_LLSTRING_H  #define LL_LLSTRING_H +#include <boost/call_traits.hpp>  #include <boost/optional/optional.hpp>  #include <string>  #include <cstdio> +#include <cwchar>                   // std::wcslen()  //#include <locale>  #include <iomanip>  #include <algorithm> @@ -527,14 +529,71 @@ struct ll_convert_impl<T, T>      T operator()(const T& in) const { return in; }  }; +// simple construction from char* +template<typename T> +struct ll_convert_impl<T, const typename T::value_type*> +{ +    T operator()(const typename T::value_type* in) const { return { in }; } +}; +  // specialize ll_convert_impl<TO, FROM> to return EXPR  #define ll_convert_alias(TO, FROM, EXPR)                    \  template<>                                                  \  struct ll_convert_impl<TO, FROM>                            \  {                                                           \ -    TO operator()(const FROM& in) const { return EXPR; }    \ +    /* param_type optimally passes both char* and string */ \ +    TO operator()(typename boost::call_traits<FROM>::param_type in) const { return EXPR; } \ +} + +// If all we're doing is copying characters, pass this to ll_convert_alias as +// EXPR. Since it expands into the 'return EXPR' slot in the ll_convert_impl +// specialization above, it implies TO{ in.begin(), in.end() }. +#define LL_CONVERT_COPY_CHARS { in.begin(), in.end() } + +// Generic name for strlen() / wcslen() - the default implementation should +// (!) work with U16 and llwchar, but we don't intend to engage it. +template <typename CHARTYPE> +size_t ll_convert_length(const CHARTYPE* zstr) +{ +    const CHARTYPE* zp; +    // classic C string scan +    for (zp = zstr; *zp; ++zp) +        ; +    return (zp - zstr);  } +// specialize where we have a library function; may use intrinsic operations +template <> +inline size_t ll_convert_length<wchar_t>(const wchar_t* zstr) { return std::wcslen(zstr); } +template <> +inline size_t ll_convert_length<char>   (const char*    zstr) { return std::strlen(zstr); } + +// ll_convert_forms() is short for a bunch of boilerplate. It defines +// longname(const char*, len), longname(const char*), longname(const string&) +// and longname(const string&, len) so calls written pre-ll_convert() will +// work. Most of these overloads will be unified once we turn on C++17 and can +// use std::string_view. +// It also uses aliasmacro to ensure that both ll_convert<OUTSTR>(const char*) +// and ll_convert<OUTSTR>(const string&) will work. +#define ll_convert_forms(aliasmacro, OUTSTR, INSTR, longname)           \ +LL_COMMON_API OUTSTR longname(const INSTR::value_type* in, size_t len); \ +inline auto longname(const INSTR& in, size_t len)                       \ +{                                                                       \ +    return longname(in.c_str(), len);                                   \ +}                                                                       \ +inline auto longname(const INSTR::value_type* in)                       \ +{                                                                       \ +    return longname(in, ll_convert_length(in));                         \ +}                                                                       \ +inline auto longname(const INSTR& in)                                   \ +{                                                                       \ +    return longname(in.c_str(), in.length());                           \ +}                                                                       \ +/* string param */                                                      \ +aliasmacro(OUTSTR, INSTR, longname(in));                                \ +/* char* param */                                                       \ +aliasmacro(OUTSTR, const INSTR::value_type*, longname(in)) +  // Make the incoming string a utf8 string. Replaces any unknown glyph  // with the UNKNOWN_CHARACTER. Once any unknown glyph is found, the rest  // of the data may not be recovered. @@ -571,63 +630,47 @@ LL_COMMON_API std::string rawstr_to_utf8(const std::string& raw);  // LL_WCHAR_T_NATIVE.  typedef std::basic_string<U16> llutf16string; -#if ! defined(LL_WCHAR_T_NATIVE) -// wchar_t is identical to U16, and std::wstring is identical to llutf16string. -// Defining an ll_convert alias involving llutf16string would collide with the -// comparable preferred alias involving std::wstring. (In this scenario, if -// you pass llutf16string, it will engage the std::wstring specialization.) -#define ll_convert_u16_alias(TO, FROM, EXPR) // nothing -#else  // defined(LL_WCHAR_T_NATIVE) -// wchar_t is a distinct native type, so llutf16string is also a distinct -// type, and there IS a point to converting separately to/from llutf16string. -// (But why? Windows APIs are still defined in terms of wchar_t, and -// in this scenario llutf16string won't work for them!) -#define ll_convert_u16_alias(TO, FROM, EXPR) ll_convert_alias(TO, FROM, EXPR) +// Considering wchar_t, llwchar and U16, there are three relevant cases: +#if LLWCHAR_IS_WCHAR_T         // every which way but Windows +// llwchar is identical to wchar_t, LLWString is identical to std::wstring. +// U16 is distinct, llutf16string is distinct (though pretty useless). +// Given conversions to/from LLWString and to/from llutf16string, conversions +// involving std::wstring would collide. +#define ll_convert_wstr_alias(TO, FROM, EXPR) // nothing +// but we can define conversions involving llutf16string without collisions +#define  ll_convert_u16_alias(TO, FROM, EXPR) ll_convert_alias(TO, FROM, EXPR) + +#elif defined(LL_WCHAR_T_NATIVE)    // Windows, either clang or MS /Zc:wchar_t +// llwchar (32-bit), wchar_t (16-bit) and U16 are all different types. +// Conversions to/from LLWString, to/from std::wstring and to/from llutf16string +// can all be defined. +#define ll_convert_wstr_alias(TO, FROM, EXPR) ll_convert_alias(TO, FROM, EXPR) +#define  ll_convert_u16_alias(TO, FROM, EXPR) ll_convert_alias(TO, FROM, EXPR) + +#else  // ! LL_WCHAR_T_NATIVE: Windows with MS /Zc:wchar_t- +// wchar_t is identical to U16, std::wstring is identical to llutf16string. +// Given conversions to/from LLWString and to/from std::wstring, conversions +// involving llutf16string would collide. +#define  ll_convert_u16_alias(TO, FROM, EXPR) // nothing +// but we can define conversions involving std::wstring without collisions +#define ll_convert_wstr_alias(TO, FROM, EXPR) ll_convert_alias(TO, FROM, EXPR) +#endif + +ll_convert_forms(ll_convert_u16_alias, LLWString,     llutf16string, utf16str_to_wstring); +ll_convert_forms(ll_convert_u16_alias, llutf16string, LLWString,     wstring_to_utf16str); +ll_convert_forms(ll_convert_u16_alias, llutf16string, std::string,   utf8str_to_utf16str); +ll_convert_forms(ll_convert_alias,     LLWString,     std::string,   utf8str_to_wstring); -#if LL_WINDOWS -// LL_WCHAR_T_NATIVE is defined on non-Windows systems because, in fact, -// wchar_t is native. Everywhere but Windows, we use it for llwchar (see -// stdtypes.h). That makes LLWString identical to std::wstring, so these -// aliases for std::wstring would collide with those for LLWString. Only -// define on Windows, where converting between std::wstring and llutf16string -// means copying chars. -ll_convert_alias(llutf16string, std::wstring, llutf16string(in.begin(), in.end())); -ll_convert_alias(std::wstring, llutf16string,  std::wstring(in.begin(), in.end())); -#endif // LL_WINDOWS -#endif // defined(LL_WCHAR_T_NATIVE) - -LL_COMMON_API LLWString utf16str_to_wstring(const llutf16string &utf16str, S32 len); -LL_COMMON_API LLWString utf16str_to_wstring(const llutf16string &utf16str); -ll_convert_u16_alias(LLWString, llutf16string, utf16str_to_wstring(in)); - -LL_COMMON_API llutf16string wstring_to_utf16str(const LLWString &utf32str, S32 len); -LL_COMMON_API llutf16string wstring_to_utf16str(const LLWString &utf32str); -ll_convert_u16_alias(llutf16string, LLWString, wstring_to_utf16str(in)); - -LL_COMMON_API llutf16string utf8str_to_utf16str ( const std::string& utf8str, S32 len); -LL_COMMON_API llutf16string utf8str_to_utf16str ( const std::string& utf8str ); -ll_convert_u16_alias(llutf16string, std::string, utf8str_to_utf16str(in)); - -LL_COMMON_API LLWString utf8str_to_wstring(const std::string &utf8str, S32 len); -LL_COMMON_API LLWString utf8str_to_wstring(const std::string &utf8str);  // Same function, better name. JC  inline LLWString utf8string_to_wstring(const std::string& utf8_string) { return utf8str_to_wstring(utf8_string); } -// best name of all -ll_convert_alias(LLWString, std::string, utf8string_to_wstring(in)); -//  LL_COMMON_API S32 wchar_to_utf8chars(llwchar inchar, char* outchars); -LL_COMMON_API std::string wstring_to_utf8str(const LLWString &utf32str, S32 len); -LL_COMMON_API std::string wstring_to_utf8str(const LLWString &utf32str); -ll_convert_alias(std::string, LLWString, wstring_to_utf8str(in)); -LL_COMMON_API std::string utf16str_to_utf8str(const llutf16string &utf16str, S32 len); -LL_COMMON_API std::string utf16str_to_utf8str(const llutf16string &utf16str); -ll_convert_u16_alias(std::string, llutf16string, utf16str_to_utf8str(in)); +ll_convert_forms(ll_convert_alias,     std::string, LLWString,     wstring_to_utf8str); +ll_convert_forms(ll_convert_u16_alias, std::string, llutf16string, utf16str_to_utf8str); -#if LL_WINDOWS +// an older alias for utf16str_to_utf8str(llutf16string)  inline std::string wstring_to_utf8str(const llutf16string &utf16str) { return utf16str_to_utf8str(utf16str);} -#endif  // Length of this UTF32 string in bytes when transformed to UTF8  LL_COMMON_API S32 wstring_utf8_length(const LLWString& wstr);  @@ -701,42 +744,48 @@ LL_COMMON_API std::string utf8str_removeCRLF(const std::string& utf8str);  //@{  /** - * @brief Convert a wide string to std::string + * @brief Convert a wide string to/from std::string + * Convert a Windows wide string to/from our LLWString   *   * This replaces the unsafe W2A macro from ATL.   */ -LL_COMMON_API std::string ll_convert_wide_to_string(const wchar_t* in, unsigned int code_page); -LL_COMMON_API std::string ll_convert_wide_to_string(const wchar_t* in); // default CP_UTF8 -inline std::string ll_convert_wide_to_string(const std::wstring& in, unsigned int code_page) -{ -    return ll_convert_wide_to_string(in.c_str(), code_page); -} -inline std::string ll_convert_wide_to_string(const std::wstring& in) -{ -    return ll_convert_wide_to_string(in.c_str()); -} -ll_convert_alias(std::string, std::wstring, ll_convert_wide_to_string(in)); - -/** - * Converts a string to wide string. - */ -LL_COMMON_API std::wstring ll_convert_string_to_wide(const std::string& in, -                                                     unsigned int code_page); -LL_COMMON_API std::wstring ll_convert_string_to_wide(const std::string& in); -                                                     // default CP_UTF8 -ll_convert_alias(std::wstring, std::string, ll_convert_string_to_wide(in)); - -/** - * Convert a Windows wide string to our LLWString - */ -LL_COMMON_API LLWString ll_convert_wide_to_wstring(const std::wstring& in); -ll_convert_alias(LLWString, std::wstring, ll_convert_wide_to_wstring(in)); - -/** - * Convert LLWString to Windows wide string - */ -LL_COMMON_API std::wstring ll_convert_wstring_to_wide(const LLWString& in); -ll_convert_alias(std::wstring, LLWString, ll_convert_wstring_to_wide(in)); +// Avoid requiring this header to #include the Windows header file declaring +// our actual default code_page by delegating this function to our .cpp file. +LL_COMMON_API unsigned int ll_wstring_default_code_page(); + +// This is like ll_convert_forms(), with the added complexity of a code page +// parameter that may or may not be passed. +#define ll_convert_cp_forms(aliasmacro, OUTSTR, INSTR, longname)    \ +/* declare the only nontrivial implementation (in .cpp file) */     \ +LL_COMMON_API OUTSTR longname(                                      \ +    const INSTR::value_type* in,                                    \ +    size_t len,                                                     \ +    unsigned int code_page=ll_wstring_default_code_page());         \ +/* if passed only a char pointer, scan for nul terminator */        \ +inline auto longname(const INSTR::value_type* in)                   \ +{                                                                   \ +    return longname(in, ll_convert_length(in));                     \ +}                                                                   \ +/* if passed string and length, extract its char pointer */         \ +inline auto longname(                                               \ +    const INSTR& in,                                                \ +    size_t len,                                                     \ +    unsigned int code_page=ll_wstring_default_code_page())          \ +{                                                                   \ +    return longname(in.c_str(), len, code_page);                    \ +}                                                                   \ +/* if passed only a string object, no scan, pass known length */    \ +inline auto longname(const INSTR& in)                               \ +{                                                                   \ +    return longname(in.c_str(), in.length());                       \ +}                                                                   \ +aliasmacro(OUTSTR, INSTR, longname(in));                            \ +aliasmacro(OUTSTR, const INSTR::value_type*, longname(in)) + +ll_convert_cp_forms(ll_convert_wstr_alias, std::string,  std::wstring, ll_convert_wide_to_string); +ll_convert_cp_forms(ll_convert_wstr_alias, std::wstring, std::string,  ll_convert_string_to_wide); +   ll_convert_forms(ll_convert_wstr_alias, LLWString,    std::wstring, ll_convert_wide_to_wstring); +   ll_convert_forms(ll_convert_wstr_alias, std::wstring, LLWString,    ll_convert_wstring_to_wide);  /**   * Converts incoming string into utf8 string @@ -1937,4 +1986,14 @@ void LLStringUtilBase<T>::truncate(string_type& string, size_type count)  	string.resize(count < cur_size ? count : cur_size);  } +// The good thing about *declaration* macros, vs. usage macros, is that now +// we're done with them: we don't need them to bleed into the consuming source +// file. +#undef ll_convert_alias +#undef ll_convert_u16_alias +#undef ll_convert_wstr_alias +#undef LL_CONVERT_COPY_CHARS +#undef ll_convert_forms +#undef ll_convert_cp_forms +  #endif  // LL_STRING_H diff --git a/indra/llcommon/llsys.cpp b/indra/llcommon/llsys.cpp index cdc1d83b59..f717b2cf34 100644 --- a/indra/llcommon/llsys.cpp +++ b/indra/llcommon/llsys.cpp @@ -843,6 +843,7 @@ LLSD LLMemoryInfo::getStatsMap() const  LLMemoryInfo& LLMemoryInfo::refresh()  { +	LL_PROFILE_ZONE_SCOPED  	mStatsMap = loadStatsMap();  	LL_DEBUGS("LLMemoryInfo") << "Populated mStatsMap:\n"; @@ -852,11 +853,9 @@ LLMemoryInfo& LLMemoryInfo::refresh()  	return *this;  } -static LLTrace::BlockTimerStatHandle FTM_MEMINFO_LOAD_STATS("MemInfo Load Stats"); -  LLSD LLMemoryInfo::loadStatsMap()  { -	LL_RECORD_BLOCK_TIME(FTM_MEMINFO_LOAD_STATS); +    LL_PROFILE_ZONE_SCOPED;  	// This implementation is derived from stream() code (as of 2011-06-29).  	Stats stats; diff --git a/indra/llcommon/llthread.cpp b/indra/llcommon/llthread.cpp index 6d531d842d..a807acc56e 100644 --- a/indra/llcommon/llthread.cpp +++ b/indra/llcommon/llthread.cpp @@ -135,6 +135,8 @@ void LLThread::threadRun()      set_thread_name(-1, mName.c_str());  #endif +    LL_PROFILER_SET_THREAD_NAME( mName.c_str() ); +      // this is the first point at which we're actually running in the new thread      mID = currentID(); @@ -331,6 +333,7 @@ bool LLThread::runCondition(void)  // Stop thread execution if requested until unpaused.  void LLThread::checkPause()  { +    LL_PROFILE_ZONE_SCOPED_CATEGORY_THREAD      mDataLock->lock();      // This is in a while loop because the pthread API allows for spurious wakeups. @@ -362,17 +365,20 @@ void LLThread::setQuitting()  // static  LLThread::id_t LLThread::currentID()  { +    LL_PROFILE_ZONE_SCOPED_CATEGORY_THREAD      return std::this_thread::get_id();  }  // static  void LLThread::yield()  { +    LL_PROFILE_ZONE_SCOPED_CATEGORY_THREAD      std::this_thread::yield();  }  void LLThread::wake()  { +    LL_PROFILE_ZONE_SCOPED_CATEGORY_THREAD      mDataLock->lock();      if(!shouldSleep())      { @@ -383,6 +389,7 @@ void LLThread::wake()  void LLThread::wakeLocked()  { +    LL_PROFILE_ZONE_SCOPED_CATEGORY_THREAD      if(!shouldSleep())      {          mRunCondition->signal(); @@ -391,11 +398,13 @@ void LLThread::wakeLocked()  void LLThread::lockData()  { +    LL_PROFILE_ZONE_SCOPED_CATEGORY_THREAD      mDataLock->lock();  }  void LLThread::unlockData()  { +    LL_PROFILE_ZONE_SCOPED_CATEGORY_THREAD      mDataLock->unlock();  } diff --git a/indra/llcommon/llthreadsafequeue.h b/indra/llcommon/llthreadsafequeue.h index 26e0d71d31..68d79cdd12 100644 --- a/indra/llcommon/llthreadsafequeue.h +++ b/indra/llcommon/llthreadsafequeue.h @@ -1,6 +1,6 @@  /**    * @file llthreadsafequeue.h - * @brief Base classes for thread, mutex and condition handling. + * @brief Queue protected with mutexes for cross-thread use   *   * $LicenseInfo:firstyear=2004&license=viewerlgpl$   * Second Life Viewer Source Code @@ -27,16 +27,19 @@  #ifndef LL_LLTHREADSAFEQUEUE_H  #define LL_LLTHREADSAFEQUEUE_H -#include "llexception.h" -#include <deque> -#include <string> -#include <chrono> -#include "mutex.h"  #include "llcoros.h"  #include LLCOROS_MUTEX_HEADER  #include <boost/fiber/timed_mutex.hpp>  #include LLCOROS_CONDVAR_HEADER +#include "llexception.h" +#include "mutex.h" +#include <chrono> +#include <queue> +#include <string> +/***************************************************************************** +*   LLThreadSafeQueue +*****************************************************************************/  //  // A general queue exception.  // @@ -66,70 +69,116 @@ public:  	}  }; -// -// Implements a thread safe FIFO. -// -template<typename ElementT> +/** + * Implements a thread safe FIFO. + */ +// Let the default std::queue default to underlying std::deque. Override if +// desired. +template<typename ElementT, typename QueueT=std::queue<ElementT>>  class LLThreadSafeQueue  {  public:  	typedef ElementT value_type; -	 -	// If the pool is set to NULL one will be allocated and managed by this -	// queue. + +	// Limiting the number of pending items prevents unbounded growth of the +	// underlying queue.  	LLThreadSafeQueue(U32 capacity = 1024); -	 -	// Add an element to the front of queue (will block if the queue has -	// reached capacity). +	virtual ~LLThreadSafeQueue() {} + +	// Add an element to the queue (will block if the queue has reached +	// capacity).  	//  	// This call will raise an interrupt error if the queue is closed while  	// the caller is blocked. -	void pushFront(ElementT const & element); -	 -	// Try to add an element to the front of queue without blocking. Returns +	template <typename T> +	void push(T&& element); +	// legacy name +	void pushFront(ElementT const & element) { return push(element); } + +	// Add an element to the queue (will block if the queue has reached +	// capacity). Return false if the queue is closed before push is possible. +	template <typename T> +	bool pushIfOpen(T&& element); + +	// Try to add an element to the queue without blocking. Returns  	// true only if the element was actually added. -	bool tryPushFront(ElementT const & element); +	template <typename T> +	bool tryPush(T&& element); +	// legacy name +	bool tryPushFront(ElementT const & element) { return tryPush(element); } -	// Try to add an element to the front of queue, blocking if full but with -	// timeout. Returns true if the element was added. +	// Try to add an element to the queue, blocking if full but with timeout +	// after specified duration. Returns true if the element was added.  	// There are potentially two different timeouts involved: how long to try  	// to lock the mutex, versus how long to wait for the queue to stop being  	// full. Careful settings for each timeout might be orders of magnitude  	// apart. However, this method conflates them. +	template <typename Rep, typename Period, typename T> +	bool tryPushFor(const std::chrono::duration<Rep, Period>& timeout, +					T&& element); +	// legacy name  	template <typename Rep, typename Period>  	bool tryPushFrontFor(const std::chrono::duration<Rep, Period>& timeout, -						 ElementT const & element); +						 ElementT const & element) { return tryPushFor(timeout, element); } + +	// Try to add an element to the queue, blocking if full but with +	// timeout at specified time_point. Returns true if the element was added. +	template <typename Clock, typename Duration, typename T> +	bool tryPushUntil(const std::chrono::time_point<Clock, Duration>& until, +					  T&& element); +	// no legacy name because this is a newer method -	// Pop the element at the end of the queue (will block if the queue is +	// Pop the element at the head of the queue (will block if the queue is  	// empty).  	//  	// This call will raise an interrupt error if the queue is closed while  	// the caller is blocked. -	ElementT popBack(void); -	 -	// Pop an element from the end of the queue if there is one available. +	ElementT pop(void); +	// legacy name +	ElementT popBack(void) { return pop(); } + +	// Pop an element from the head of the queue if there is one available.  	// Returns true only if an element was popped. -	bool tryPopBack(ElementT & element); -	 +	bool tryPop(ElementT & element); +	// legacy name +	bool tryPopBack(ElementT & element) { return tryPop(element); } + +	// Pop the element at the head of the queue, blocking if empty, with +	// timeout after specified duration. Returns true if an element was popped. +	template <typename Rep, typename Period> +	bool tryPopFor(const std::chrono::duration<Rep, Period>& timeout, ElementT& element); +	// no legacy name because this is a newer method + +	// Pop the element at the head of the queue, blocking if empty, with +	// timeout at specified time_point. Returns true if an element was popped. +	template <typename Clock, typename Duration> +	bool tryPopUntil(const std::chrono::time_point<Clock, Duration>& until, +					 ElementT& element); +	// no legacy name because this is a newer method +  	// Returns the size of the queue.  	size_t size(); +    //Returns the capacity of the queue. +    U32 capacity() { return mCapacity; } +  	// closes the queue: -	// - every subsequent pushFront() call will throw LLThreadSafeQueueInterrupt -	// - every subsequent tryPushFront() call will return false -	// - popBack() calls will return normally until the queue is drained, then -	//   every subsequent popBack() will throw LLThreadSafeQueueInterrupt -	// - tryPopBack() calls will return normally until the queue is drained, -	//   then every subsequent tryPopBack() call will return false +	// - every subsequent push() call will throw LLThreadSafeQueueInterrupt +	// - every subsequent tryPush() call will return false +	// - pop() calls will return normally until the queue is drained, then +	//   every subsequent pop() will throw LLThreadSafeQueueInterrupt +	// - tryPop() calls will return normally until the queue is drained, +	//   then every subsequent tryPop() call will return false  	void close(); -	// detect closed state +	// producer end: are we prevented from pushing any additional items?  	bool isClosed(); -	// inverse of isClosed() -	explicit operator bool(); +	// consumer end: are we done, is the queue entirely drained? +	bool done(); -private: -	std::deque< ElementT > mStorage; +protected: +	typedef QueueT queue_type; +	QueueT mStorage;  	U32 mCapacity;  	bool mClosed; @@ -137,37 +186,154 @@ private:  	typedef std::unique_lock<decltype(mLock)> lock_t;  	boost::fibers::condition_variable_any mCapacityCond;  	boost::fibers::condition_variable_any mEmptyCond; -}; -// LLThreadSafeQueue -//----------------------------------------------------------------------------- +	enum pop_result { EMPTY, DONE, WAITING, POPPED }; +	// implementation logic, suitable for passing to tryLockUntil() +	template <typename Clock, typename Duration> +	pop_result tryPopUntil_(lock_t& lock, +							const std::chrono::time_point<Clock, Duration>& until, +							ElementT& element); +	// if we're able to lock immediately, do so and run the passed callable, +	// which must accept lock_t& and return bool +	template <typename CALLABLE> +	bool tryLock(CALLABLE&& callable); +	// if we're able to lock before the passed time_point, do so and run the +	// passed callable, which must accept lock_t& and return bool +	template <typename Clock, typename Duration, typename CALLABLE> +	bool tryLockUntil(const std::chrono::time_point<Clock, Duration>& until, +					  CALLABLE&& callable); +	// while lock is locked, really push the passed element, if we can +	template <typename T> +	bool push_(lock_t& lock, T&& element); +	// while lock is locked, really pop the head element, if we can +	pop_result pop_(lock_t& lock, ElementT& element); +	// Is the current head element ready to pop? We say yes; subclass can +	// override as needed. +	virtual bool canPop(const ElementT& head) const { return true; } +}; -template<typename ElementT> -LLThreadSafeQueue<ElementT>::LLThreadSafeQueue(U32 capacity) : +/***************************************************************************** +*   PriorityQueueAdapter +*****************************************************************************/ +namespace LL +{ +    /** +     * std::priority_queue's API is almost like std::queue, intentionally of +     * course, but you must access the element about to pop() as top() rather +     * than as front(). Make an adapter for use with LLThreadSafeQueue. +     */ +    template <typename T, typename Container=std::vector<T>, +              typename Compare=std::less<typename Container::value_type>> +    class PriorityQueueAdapter +    { +    public: +        // publish all the same types +        typedef std::priority_queue<T, Container, Compare> queue_type; +        typedef typename queue_type::container_type  container_type; +        typedef typename queue_type::value_compare   value_compare; +        typedef typename queue_type::value_type      value_type; +        typedef typename queue_type::size_type       size_type; +        typedef typename queue_type::reference       reference; +        typedef typename queue_type::const_reference const_reference; + +        // Although std::queue defines both const and non-const front() +        // methods, std::priority_queue defines only const top(). +        const_reference front() const { return mQ.top(); } +        // std::priority_queue has no equivalent to back(), so it's good that +        // LLThreadSafeQueue doesn't use it. + +        // All the rest of these merely forward to the corresponding +        // queue_type methods. +        bool empty() const                 { return mQ.empty(); } +        size_type size() const             { return mQ.size(); } +        void push(const value_type& value) { mQ.push(value); } +        void push(value_type&& value)      { mQ.push(std::move(value)); } +        template <typename... Args> +        void emplace(Args&&... args)       { mQ.emplace(std::forward<Args>(args)...); } +        void pop()                         { mQ.pop(); } + +    private: +        queue_type mQ; +    }; +} // namespace LL + + +/***************************************************************************** +*   LLThreadSafeQueue implementation +*****************************************************************************/ +template<typename ElementT, typename QueueT> +LLThreadSafeQueue<ElementT, QueueT>::LLThreadSafeQueue(U32 capacity) :      mCapacity(capacity),      mClosed(false)  {  } -template<typename ElementT> -void LLThreadSafeQueue<ElementT>::pushFront(ElementT const & element) +// if we're able to lock immediately, do so and run the passed callable, which +// must accept lock_t& and return bool +template <typename ElementT, typename QueueT> +template <typename CALLABLE> +bool LLThreadSafeQueue<ElementT, QueueT>::tryLock(CALLABLE&& callable) +{ +    LL_PROFILE_ZONE_SCOPED_CATEGORY_THREAD; +    lock_t lock1(mLock, std::defer_lock); +    if (!lock1.try_lock()) +        return false; + +    return std::forward<CALLABLE>(callable)(lock1); +} + + +// if we're able to lock before the passed time_point, do so and run the +// passed callable, which must accept lock_t& and return bool +template <typename ElementT, typename QueueT> +template <typename Clock, typename Duration, typename CALLABLE> +bool LLThreadSafeQueue<ElementT, QueueT>::tryLockUntil( +    const std::chrono::time_point<Clock, Duration>& until, +    CALLABLE&& callable)  { +    LL_PROFILE_ZONE_SCOPED_CATEGORY_THREAD; +    lock_t lock1(mLock, std::defer_lock); +    if (!lock1.try_lock_until(until)) +        return false; + +    return std::forward<CALLABLE>(callable)(lock1); +} + + +// while lock is locked, really push the passed element, if we can +template <typename ElementT, typename QueueT> +template <typename T> +bool LLThreadSafeQueue<ElementT, QueueT>::push_(lock_t& lock, T&& element) +{ +    LL_PROFILE_ZONE_SCOPED_CATEGORY_THREAD; +    if (mStorage.size() >= mCapacity) +        return false; + +    mStorage.push(std::forward<T>(element)); +    lock.unlock(); +    // now that we've pushed, if somebody's been waiting to pop, signal them +    mEmptyCond.notify_one(); +    return true; +} + + +template <typename ElementT, typename QueueT> +template <typename T> +bool LLThreadSafeQueue<ElementT, QueueT>::pushIfOpen(T&& element) +{ +    LL_PROFILE_ZONE_SCOPED_CATEGORY_THREAD;      lock_t lock1(mLock);      while (true)      { +        // On the producer side, it doesn't matter whether the queue has been +        // drained or not: the moment either end calls close(), further push() +        // operations will fail.          if (mClosed) -        { -            LLTHROW(LLThreadSafeQueueInterrupt()); -        } +            return false; -        if (mStorage.size() < mCapacity) -        { -            mStorage.push_front(element); -            lock1.unlock(); -            mEmptyCond.notify_one(); -            return; -        } +        if (push_(lock1, std::forward<T>(element))) +            return true;          // Storage Full. Wait for signal.          mCapacityCond.wait(lock1); @@ -175,142 +341,250 @@ void LLThreadSafeQueue<ElementT>::pushFront(ElementT const & element)  } -template <typename ElementT> -template <typename Rep, typename Period> -bool LLThreadSafeQueue<ElementT>::tryPushFrontFor(const std::chrono::duration<Rep, Period>& timeout, -                                                  ElementT const & element) +template <typename ElementT, typename QueueT> +template<typename T> +void LLThreadSafeQueue<ElementT, QueueT>::push(T&& element)  { -    // Convert duration to time_point: passing the same timeout duration to -    // each of multiple calls is wrong. -    auto endpoint = std::chrono::steady_clock::now() + timeout; +    LL_PROFILE_ZONE_SCOPED_CATEGORY_THREAD; +    if (! pushIfOpen(std::forward<T>(element))) +    { +        LLTHROW(LLThreadSafeQueueInterrupt()); +    } +} -    lock_t lock1(mLock, std::defer_lock); -    if (!lock1.try_lock_until(endpoint)) -        return false; -    while (true) -    { -        if (mClosed) +template<typename ElementT, typename QueueT> +template<typename T> +bool LLThreadSafeQueue<ElementT, QueueT>::tryPush(T&& element) +{ +    LL_PROFILE_ZONE_SCOPED_CATEGORY_THREAD; +    return tryLock( +        [this, element=std::move(element)](lock_t& lock)          { -            return false; -        } +            if (mClosed) +                return false; +            return push_(lock, std::move(element)); +        }); +} -        if (mStorage.size() < mCapacity) -        { -            mStorage.push_front(element); -            lock1.unlock(); -            mEmptyCond.notify_one(); -            return true; -        } -        // Storage Full. Wait for signal. -        if (LLCoros::cv_status::timeout == mCapacityCond.wait_until(lock1, endpoint)) -        { -            // timed out -- formally we might recheck both conditions above -            return false; -        } -        // If we didn't time out, we were notified for some reason. Loop back -        // to check. -    } +template <typename ElementT, typename QueueT> +template <typename Rep, typename Period, typename T> +bool LLThreadSafeQueue<ElementT, QueueT>::tryPushFor( +    const std::chrono::duration<Rep, Period>& timeout, +    T&& element) +{ +    LL_PROFILE_ZONE_SCOPED_CATEGORY_THREAD; +    // Convert duration to time_point: passing the same timeout duration to +    // each of multiple calls is wrong. +    return tryPushUntil(std::chrono::steady_clock::now() + timeout, +                        std::forward<T>(element));  } -template<typename ElementT> -bool LLThreadSafeQueue<ElementT>::tryPushFront(ElementT const & element) +template <typename ElementT, typename QueueT> +template <typename Clock, typename Duration, typename T> +bool LLThreadSafeQueue<ElementT, QueueT>::tryPushUntil( +    const std::chrono::time_point<Clock, Duration>& until, +    T&& element)  { -    lock_t lock1(mLock, std::defer_lock); -    if (!lock1.try_lock()) -        return false; +    LL_PROFILE_ZONE_SCOPED_CATEGORY_THREAD; +    return tryLockUntil( +        until, +        [this, until, element=std::move(element)](lock_t& lock) +        { +            while (true) +            { +                if (mClosed) +                { +                    return false; +                } + +                if (push_(lock, std::move(element))) +                    return true; + +                // Storage Full. Wait for signal. +                if (LLCoros::cv_status::timeout == mCapacityCond.wait_until(lock, until)) +                { +                    // timed out -- formally we might recheck both conditions above +                    return false; +                } +                // If we didn't time out, we were notified for some reason. Loop back +                // to check. +            } +        }); +} -    if (mClosed) -        return false; -    if (mStorage.size() >= mCapacity) -        return false; +// while lock is locked, really pop the head element, if we can +template <typename ElementT, typename QueueT> +typename LLThreadSafeQueue<ElementT, QueueT>::pop_result +LLThreadSafeQueue<ElementT, QueueT>::pop_(lock_t& lock, ElementT& element) +{ +    LL_PROFILE_ZONE_SCOPED_CATEGORY_THREAD; +    // If mStorage is empty, there's no head element. +    if (mStorage.empty()) +        return mClosed? DONE : EMPTY; -    mStorage.push_front(element); -    lock1.unlock(); -    mEmptyCond.notify_one(); -    return true; +    // If there's a head element, pass it to canPop() to see if it's ready to pop.  +    if (! canPop(mStorage.front())) +        return WAITING; + +    // std::queue::front() is the element about to pop() +    element = mStorage.front(); +    mStorage.pop(); +    lock.unlock(); +    // now that we've popped, if somebody's been waiting to push, signal them +    mCapacityCond.notify_one(); +    return POPPED;  } -template<typename ElementT> -ElementT LLThreadSafeQueue<ElementT>::popBack(void) +template<typename ElementT, typename QueueT> +ElementT LLThreadSafeQueue<ElementT, QueueT>::pop(void)  { +    LL_PROFILE_ZONE_SCOPED_CATEGORY_THREAD;      lock_t lock1(mLock); +    ElementT value;      while (true)      { -        if (!mStorage.empty()) -        { -            ElementT value = mStorage.back(); -            mStorage.pop_back(); -            lock1.unlock(); -            mCapacityCond.notify_one(); -            return value; -        } - -        if (mClosed) +        // On the consumer side, we always try to pop before checking mClosed +        // so we can finish draining the queue. +        pop_result popped = pop_(lock1, value); +        if (popped == POPPED) +            return std::move(value); + +        // Once the queue is DONE, there will never be any more coming. +        if (popped == DONE)          {              LLTHROW(LLThreadSafeQueueInterrupt());          } -        // Storage empty. Wait for signal. +        // If we didn't pop because WAITING, i.e. canPop() returned false, +        // then even if the producer end has been closed, there's still at +        // least one item to drain: wait for it. Or we might be EMPTY, with +        // the queue still open. Either way, wait for signal.          mEmptyCond.wait(lock1);      }  } -template<typename ElementT> -bool LLThreadSafeQueue<ElementT>::tryPopBack(ElementT & element) +template<typename ElementT, typename QueueT> +bool LLThreadSafeQueue<ElementT, QueueT>::tryPop(ElementT & element)  { -    lock_t lock1(mLock, std::defer_lock); -    if (!lock1.try_lock()) -        return false; +    LL_PROFILE_ZONE_SCOPED_CATEGORY_THREAD; +    return tryLock( +        [this, &element](lock_t& lock) +        { +            // conflate EMPTY, DONE, WAITING: tryPop() behavior when the queue +            // is closed is implemented by simple inability to push any new +            // elements +            return pop_(lock, element) == POPPED; +        }); +} -    // no need to check mClosed: tryPopBack() behavior when the queue is -    // closed is implemented by simple inability to push any new elements -    if (mStorage.empty()) -        return false; -    element = mStorage.back(); -    mStorage.pop_back(); -    lock1.unlock(); -    mCapacityCond.notify_one(); -    return true; +template <typename ElementT, typename QueueT> +template <typename Rep, typename Period> +bool LLThreadSafeQueue<ElementT, QueueT>::tryPopFor( +    const std::chrono::duration<Rep, Period>& timeout, +    ElementT& element) +{ +    LL_PROFILE_ZONE_SCOPED_CATEGORY_THREAD; +    // Convert duration to time_point: passing the same timeout duration to +    // each of multiple calls is wrong. +    return tryPopUntil(std::chrono::steady_clock::now() + timeout, element);  } -template<typename ElementT> -size_t LLThreadSafeQueue<ElementT>::size(void) +template <typename ElementT, typename QueueT> +template <typename Clock, typename Duration> +bool LLThreadSafeQueue<ElementT, QueueT>::tryPopUntil( +    const std::chrono::time_point<Clock, Duration>& until, +    ElementT& element)  { +    LL_PROFILE_ZONE_SCOPED_CATEGORY_THREAD; +    return tryLockUntil( +        until, +        [this, until, &element](lock_t& lock) +        { +            // conflate EMPTY, DONE, WAITING +            return tryPopUntil_(lock, until, element) == POPPED; +        }); +} + + +// body of tryPopUntil(), called once we have the lock +template <typename ElementT, typename QueueT> +template <typename Clock, typename Duration> +typename LLThreadSafeQueue<ElementT, QueueT>::pop_result +LLThreadSafeQueue<ElementT, QueueT>::tryPopUntil_( +    lock_t& lock, +    const std::chrono::time_point<Clock, Duration>& until, +    ElementT& element) +{ +    LL_PROFILE_ZONE_SCOPED_CATEGORY_THREAD; +    while (true) +    { +        pop_result popped = pop_(lock, element); +        if (popped == POPPED || popped == DONE) +        { +            // If we succeeded, great! If we've drained the last item, so be +            // it. Either way, break the loop and tell caller. +            return popped; +        } + +        // EMPTY or WAITING: wait for signal. +        if (LLCoros::cv_status::timeout == mEmptyCond.wait_until(lock, until)) +        { +            // timed out -- formally we might recheck +            // as it is, break loop +            return popped; +        } +        // If we didn't time out, we were notified for some reason. Loop back +        // to check. +    } +} + + +template<typename ElementT, typename QueueT> +size_t LLThreadSafeQueue<ElementT, QueueT>::size(void) +{ +    LL_PROFILE_ZONE_SCOPED_CATEGORY_THREAD;      lock_t lock(mLock);      return mStorage.size();  } -template<typename ElementT> -void LLThreadSafeQueue<ElementT>::close() + +template<typename ElementT, typename QueueT> +void LLThreadSafeQueue<ElementT, QueueT>::close()  { +    LL_PROFILE_ZONE_SCOPED_CATEGORY_THREAD;      lock_t lock(mLock);      mClosed = true;      lock.unlock(); -    // wake up any blocked popBack() calls +    // wake up any blocked pop() calls      mEmptyCond.notify_all(); -    // wake up any blocked pushFront() calls +    // wake up any blocked push() calls      mCapacityCond.notify_all();  } -template<typename ElementT> -bool LLThreadSafeQueue<ElementT>::isClosed() + +template<typename ElementT, typename QueueT> +bool LLThreadSafeQueue<ElementT, QueueT>::isClosed()  { +    LL_PROFILE_ZONE_SCOPED_CATEGORY_THREAD;      lock_t lock(mLock); -    return mClosed && mStorage.size() == 0; +    return mClosed;  } -template<typename ElementT> -LLThreadSafeQueue<ElementT>::operator bool() + +template<typename ElementT, typename QueueT> +bool LLThreadSafeQueue<ElementT, QueueT>::done()  { -    return ! isClosed(); +    LL_PROFILE_ZONE_SCOPED_CATEGORY_THREAD; +    lock_t lock(mLock); +    return mClosed && mStorage.empty();  }  #endif diff --git a/indra/llcommon/lltrace.cpp b/indra/llcommon/lltrace.cpp index 54079a4689..f59b207ded 100644 --- a/indra/llcommon/lltrace.cpp +++ b/indra/llcommon/lltrace.cpp @@ -61,6 +61,7 @@ TimeBlockTreeNode::TimeBlockTreeNode()  void TimeBlockTreeNode::setParent( BlockTimerStatHandle* parent )  { +    LL_PROFILE_ZONE_SCOPED;  	llassert_always(parent != mBlock);  	llassert_always(parent != NULL); diff --git a/indra/llcommon/lltrace.h b/indra/llcommon/lltrace.h index 0d0cd6f581..fcd8753f75 100644 --- a/indra/llcommon/lltrace.h +++ b/indra/llcommon/lltrace.h @@ -227,6 +227,7 @@ public:  	void setName(const char* name)  	{ +        LL_PROFILE_ZONE_SCOPED_CATEGORY_STATS;  		mName = name;  		setKey(name);  	} @@ -234,12 +235,14 @@ public:  	/*virtual*/ const char* getUnitLabel() const { return "KB"; }  	StatType<MemAccumulator::AllocationFacet>& allocations()  -	{  +	{ +        LL_PROFILE_ZONE_SCOPED_CATEGORY_STATS;  		return static_cast<StatType<MemAccumulator::AllocationFacet>&>(*(StatType<MemAccumulator>*)this);  	}  	StatType<MemAccumulator::DeallocationFacet>& deallocations()  -	{  +	{ +        LL_PROFILE_ZONE_SCOPED_CATEGORY_STATS;  		return static_cast<StatType<MemAccumulator::DeallocationFacet>&>(*(StatType<MemAccumulator>*)this);  	}  }; @@ -261,6 +264,7 @@ struct MeasureMem<T, typename T::mem_trackable_tag_t, IS_BYTES>  {  	static size_t measureFootprint(const T& value)  	{ +        LL_PROFILE_ZONE_SCOPED_CATEGORY_STATS;  		return sizeof(T) + value.getMemFootprint();  	}  }; @@ -270,6 +274,7 @@ struct MeasureMem<T, IS_MEM_TRACKABLE, typename T::is_unit_t>  {  	static size_t measureFootprint(const T& value)  	{ +        LL_PROFILE_ZONE_SCOPED_CATEGORY_STATS;  		return U32Bytes(value).value();  	}  }; @@ -279,6 +284,7 @@ struct MeasureMem<T*, IS_MEM_TRACKABLE, IS_BYTES>  {  	static size_t measureFootprint(const T* value)  	{ +        LL_PROFILE_ZONE_SCOPED_CATEGORY_STATS;  		if (!value)  		{  			return 0; @@ -323,6 +329,7 @@ struct MeasureMem<std::basic_string<T>, IS_MEM_TRACKABLE, IS_BYTES>  {  	static size_t measureFootprint(const std::basic_string<T>& value)  	{ +        LL_PROFILE_ZONE_SCOPED_CATEGORY_STATS;  		return value.capacity() * sizeof(T);  	}  }; @@ -331,6 +338,7 @@ struct MeasureMem<std::basic_string<T>, IS_MEM_TRACKABLE, IS_BYTES>  template<typename T>  inline void claim_alloc(MemStatHandle& measurement, const T& value)  { +    LL_PROFILE_ZONE_SCOPED_CATEGORY_STATS;  #if LL_TRACE_ENABLED  	S32 size = MeasureMem<T>::measureFootprint(value);  	if(size == 0) return; @@ -343,6 +351,7 @@ inline void claim_alloc(MemStatHandle& measurement, const T& value)  template<typename T>  inline void disclaim_alloc(MemStatHandle& measurement, const T& value)  { +    LL_PROFILE_ZONE_SCOPED_CATEGORY_STATS;  #if LL_TRACE_ENABLED  	S32 size = MeasureMem<T>::measureFootprint(value);  	if(size == 0) return; @@ -352,141 +361,6 @@ inline void disclaim_alloc(MemStatHandle& measurement, const T& value)  #endif  } -template<typename DERIVED, size_t ALIGNMENT = LL_DEFAULT_HEAP_ALIGN> -class MemTrackableNonVirtual -{ -public: -	typedef void mem_trackable_tag_t; - -	MemTrackableNonVirtual(const char* name) -#if LL_TRACE_ENABLED -	:	mMemFootprint(0) -#endif -	{ -#if LL_TRACE_ENABLED -		static bool name_initialized = false; -		if (!name_initialized) -		{ -			name_initialized = true; -			sMemStat.setName(name); -		} -#endif -	} - -#if LL_TRACE_ENABLED -	~MemTrackableNonVirtual() -	{ -		disclaimMem(mMemFootprint); -	} - -	static MemStatHandle& getMemStatHandle() -	{ -		return sMemStat; -	} - -	S32 getMemFootprint() const	{ return mMemFootprint; } -#endif - -	void* operator new(size_t size)  -	{ -#if LL_TRACE_ENABLED -		claim_alloc(sMemStat, size); -#endif -		return ll_aligned_malloc<ALIGNMENT>(size); -	} - -	template<int CUSTOM_ALIGNMENT> -	static void* aligned_new(size_t size) -	{ -#if LL_TRACE_ENABLED -		claim_alloc(sMemStat, size); -#endif -		return ll_aligned_malloc<CUSTOM_ALIGNMENT>(size); -	} - -	void operator delete(void* ptr, size_t size) -	{ -#if LL_TRACE_ENABLED -		disclaim_alloc(sMemStat, size); -#endif -		ll_aligned_free<ALIGNMENT>(ptr); -	} - -	template<int CUSTOM_ALIGNMENT> -	static void aligned_delete(void* ptr, size_t size) -	{ -#if LL_TRACE_ENABLED -		disclaim_alloc(sMemStat, size); -#endif -		ll_aligned_free<CUSTOM_ALIGNMENT>(ptr); -	} - -	void* operator new [](size_t size) -	{ -#if LL_TRACE_ENABLED -		claim_alloc(sMemStat, size); -#endif -		return ll_aligned_malloc<ALIGNMENT>(size); -	} - -	void operator delete[](void* ptr, size_t size) -	{ -#if LL_TRACE_ENABLED -		disclaim_alloc(sMemStat, size); -#endif -		ll_aligned_free<ALIGNMENT>(ptr); -	} - -	// claim memory associated with other objects/data as our own, adding to our calculated footprint -	template<typename CLAIM_T> -	void claimMem(const CLAIM_T& value) const -	{ -#if LL_TRACE_ENABLED -		S32 size = MeasureMem<CLAIM_T>::measureFootprint(value); -		claim_alloc(sMemStat, size); -		mMemFootprint += size; -#endif -	} - -	// remove memory we had claimed from our calculated footprint -	template<typename CLAIM_T> -	void disclaimMem(const CLAIM_T& value) const -	{ -#if LL_TRACE_ENABLED -		S32 size = MeasureMem<CLAIM_T>::measureFootprint(value); -		disclaim_alloc(sMemStat, size); -		mMemFootprint -= size; -#endif -	} - -private: -#if LL_TRACE_ENABLED -	// use signed values so that we can temporarily go negative -	// and reconcile in destructor -	// NB: this assumes that no single class is responsible for > 2GB of allocations -	mutable S32 mMemFootprint; -	 -	static	MemStatHandle	sMemStat; -#endif - -}; - -#if LL_TRACE_ENABLED -template<typename DERIVED, size_t ALIGNMENT> -MemStatHandle MemTrackableNonVirtual<DERIVED, ALIGNMENT>::sMemStat(typeid(MemTrackableNonVirtual<DERIVED, ALIGNMENT>).name()); -#endif - -template<typename DERIVED, size_t ALIGNMENT = LL_DEFAULT_HEAP_ALIGN> -class MemTrackable : public MemTrackableNonVirtual<DERIVED, ALIGNMENT> -{ -public: -	MemTrackable(const char* name) -	:	MemTrackableNonVirtual<DERIVED, ALIGNMENT>(name) -	{} - -	virtual ~MemTrackable() -	{} -};  }  #endif // LL_LLTRACE_H diff --git a/indra/llcommon/lltraceaccumulators.cpp b/indra/llcommon/lltraceaccumulators.cpp index b1c23c6fb7..34299f5a29 100644 --- a/indra/llcommon/lltraceaccumulators.cpp +++ b/indra/llcommon/lltraceaccumulators.cpp @@ -41,6 +41,7 @@ extern MemStatHandle gTraceMemStat;  AccumulatorBufferGroup::AccumulatorBufferGroup()   { +    LL_PROFILE_ZONE_SCOPED_CATEGORY_STATS;  	claim_alloc(gTraceMemStat, mCounts.capacity() * sizeof(CountAccumulator));  	claim_alloc(gTraceMemStat, mSamples.capacity() * sizeof(SampleAccumulator));  	claim_alloc(gTraceMemStat, mEvents.capacity() * sizeof(EventAccumulator)); @@ -55,6 +56,7 @@ AccumulatorBufferGroup::AccumulatorBufferGroup(const AccumulatorBufferGroup& oth  	mStackTimers(other.mStackTimers),  	mMemStats(other.mMemStats)  { +    LL_PROFILE_ZONE_SCOPED_CATEGORY_STATS;  	claim_alloc(gTraceMemStat, mCounts.capacity() * sizeof(CountAccumulator));  	claim_alloc(gTraceMemStat, mSamples.capacity() * sizeof(SampleAccumulator));  	claim_alloc(gTraceMemStat, mEvents.capacity() * sizeof(EventAccumulator)); @@ -64,6 +66,7 @@ AccumulatorBufferGroup::AccumulatorBufferGroup(const AccumulatorBufferGroup& oth  AccumulatorBufferGroup::~AccumulatorBufferGroup()  { +    LL_PROFILE_ZONE_SCOPED_CATEGORY_STATS;  	disclaim_alloc(gTraceMemStat, mCounts.capacity() * sizeof(CountAccumulator));  	disclaim_alloc(gTraceMemStat, mSamples.capacity() * sizeof(SampleAccumulator));  	disclaim_alloc(gTraceMemStat, mEvents.capacity() * sizeof(EventAccumulator)); @@ -73,6 +76,7 @@ AccumulatorBufferGroup::~AccumulatorBufferGroup()  void AccumulatorBufferGroup::handOffTo(AccumulatorBufferGroup& other)  { +    LL_PROFILE_ZONE_SCOPED_CATEGORY_STATS;  	other.mCounts.reset(&mCounts);  	other.mSamples.reset(&mSamples);  	other.mEvents.reset(&mEvents); @@ -82,6 +86,7 @@ void AccumulatorBufferGroup::handOffTo(AccumulatorBufferGroup& other)  void AccumulatorBufferGroup::makeCurrent()  { +    LL_PROFILE_ZONE_SCOPED_CATEGORY_STATS;  	mCounts.makeCurrent();  	mSamples.makeCurrent();  	mEvents.makeCurrent(); @@ -104,6 +109,7 @@ void AccumulatorBufferGroup::makeCurrent()  //static  void AccumulatorBufferGroup::clearCurrent()  { +    LL_PROFILE_ZONE_SCOPED_CATEGORY_STATS;  	AccumulatorBuffer<CountAccumulator>::clearCurrent();	  	AccumulatorBuffer<SampleAccumulator>::clearCurrent();  	AccumulatorBuffer<EventAccumulator>::clearCurrent(); @@ -118,6 +124,7 @@ bool AccumulatorBufferGroup::isCurrent() const  void AccumulatorBufferGroup::append( const AccumulatorBufferGroup& other )  { +    LL_PROFILE_ZONE_SCOPED_CATEGORY_STATS;  	mCounts.addSamples(other.mCounts, SEQUENTIAL);  	mSamples.addSamples(other.mSamples, SEQUENTIAL);  	mEvents.addSamples(other.mEvents, SEQUENTIAL); @@ -127,6 +134,7 @@ void AccumulatorBufferGroup::append( const AccumulatorBufferGroup& other )  void AccumulatorBufferGroup::merge( const AccumulatorBufferGroup& other)  { +    LL_PROFILE_ZONE_SCOPED_CATEGORY_STATS;  	mCounts.addSamples(other.mCounts, NON_SEQUENTIAL);  	mSamples.addSamples(other.mSamples, NON_SEQUENTIAL);  	mEvents.addSamples(other.mEvents, NON_SEQUENTIAL); @@ -137,6 +145,7 @@ void AccumulatorBufferGroup::merge( const AccumulatorBufferGroup& other)  void AccumulatorBufferGroup::reset(AccumulatorBufferGroup* other)  { +    LL_PROFILE_ZONE_SCOPED_CATEGORY_STATS;  	mCounts.reset(other ? &other->mCounts : NULL);  	mSamples.reset(other ? &other->mSamples : NULL);  	mEvents.reset(other ? &other->mEvents : NULL); @@ -146,6 +155,7 @@ void AccumulatorBufferGroup::reset(AccumulatorBufferGroup* other)  void AccumulatorBufferGroup::sync()  { +    LL_PROFILE_ZONE_SCOPED_CATEGORY_STATS;  	if (isCurrent())  	{  		F64SecondsImplicit time_stamp = LLTimer::getTotalSeconds(); @@ -190,7 +200,7 @@ F64 SampleAccumulator::mergeSumsOfSquares(const SampleAccumulator& a, const Samp  void SampleAccumulator::addSamples( const SampleAccumulator& other, EBufferAppendType append_type )  { -	if (append_type == NON_SEQUENTIAL) +    if (append_type == NON_SEQUENTIAL)  	{  		return;  	} @@ -289,7 +299,7 @@ void EventAccumulator::addSamples( const EventAccumulator& other, EBufferAppendT  void EventAccumulator::reset( const EventAccumulator* other )  { -	mNumSamples = 0; +    mNumSamples = 0;  	mSum = 0;  	mMin = F32(NaN);  	mMax = F32(NaN); diff --git a/indra/llcommon/lltraceaccumulators.h b/indra/llcommon/lltraceaccumulators.h index 8eb5338a2a..7267a44300 100644 --- a/indra/llcommon/lltraceaccumulators.h +++ b/indra/llcommon/lltraceaccumulators.h @@ -66,6 +66,7 @@ namespace LLTrace  			: mStorageSize(0),  			mStorage(NULL)  		{ +            LL_PROFILE_ZONE_SCOPED_CATEGORY_STATS;  			const AccumulatorBuffer& other = *getDefaultBuffer();  			resize(sNextStorageSlot);  			for (S32 i = 0; i < sNextStorageSlot; i++) @@ -76,6 +77,7 @@ namespace LLTrace  		~AccumulatorBuffer()  		{ +            LL_PROFILE_ZONE_SCOPED_CATEGORY_STATS;  			if (isCurrent())  			{  				LLThreadLocalSingletonPointer<ACCUMULATOR>::setInstance(NULL); @@ -98,6 +100,7 @@ namespace LLTrace  			: mStorageSize(0),  			mStorage(NULL)  		{ +            LL_PROFILE_ZONE_SCOPED_CATEGORY_STATS;  			resize(sNextStorageSlot);  			for (S32 i = 0; i < sNextStorageSlot; i++)  			{ @@ -107,6 +110,7 @@ namespace LLTrace  		void addSamples(const AccumulatorBuffer<ACCUMULATOR>& other, EBufferAppendType append_type)  		{ +            LL_PROFILE_ZONE_SCOPED_CATEGORY_STATS;  			llassert(mStorageSize >= sNextStorageSlot && other.mStorageSize >= sNextStorageSlot);  			for (size_t i = 0; i < sNextStorageSlot; i++)  			{ @@ -116,6 +120,7 @@ namespace LLTrace  		void copyFrom(const AccumulatorBuffer<ACCUMULATOR>& other)  		{ +            LL_PROFILE_ZONE_SCOPED_CATEGORY_STATS;  			llassert(mStorageSize >= sNextStorageSlot && other.mStorageSize >= sNextStorageSlot);  			for (size_t i = 0; i < sNextStorageSlot; i++)  			{ @@ -125,6 +130,7 @@ namespace LLTrace  		void reset(const AccumulatorBuffer<ACCUMULATOR>* other = NULL)  		{ +            LL_PROFILE_ZONE_SCOPED_CATEGORY_STATS;  			llassert(mStorageSize >= sNextStorageSlot);  			for (size_t i = 0; i < sNextStorageSlot; i++)  			{ @@ -134,6 +140,7 @@ namespace LLTrace  		void sync(F64SecondsImplicit time_stamp)  		{ +            LL_PROFILE_ZONE_SCOPED_CATEGORY_STATS;  			llassert(mStorageSize >= sNextStorageSlot);  			for (size_t i = 0; i < sNextStorageSlot; i++)  			{ @@ -153,12 +160,13 @@ namespace LLTrace  		static void clearCurrent()  		{ -			LLThreadLocalSingletonPointer<ACCUMULATOR>::setInstance(NULL); +            LLThreadLocalSingletonPointer<ACCUMULATOR>::setInstance(NULL);  		}  		// NOTE: this is not thread-safe.  We assume that slots are reserved in the main thread before any child threads are spawned  		size_t reserveSlot()  		{ +            LL_PROFILE_ZONE_SCOPED_CATEGORY_STATS;  			size_t next_slot = sNextStorageSlot++;  			if (next_slot >= mStorageSize)  			{ @@ -172,6 +180,7 @@ namespace LLTrace  		void resize(size_t new_size)  		{ +            LL_PROFILE_ZONE_SCOPED_CATEGORY_STATS;  			if (new_size <= mStorageSize) return;  			ACCUMULATOR* old_storage = mStorage; @@ -212,6 +221,7 @@ namespace LLTrace  		static self_t* getDefaultBuffer()  		{ +            LL_PROFILE_ZONE_SCOPED_CATEGORY_STATS;  			static bool sInitialized = false;  			if (!sInitialized)  			{ @@ -326,6 +336,7 @@ namespace LLTrace  		void sample(F64 value)  		{ +            LL_PROFILE_ZONE_SCOPED_CATEGORY_STATS;  			F64SecondsImplicit time_stamp = LLTimer::getTotalSeconds();  			// store effect of last value @@ -444,9 +455,9 @@ namespace LLTrace  		S32	mNumSamples;  	}; -	class TimeBlockAccumulator +	class alignas(32) TimeBlockAccumulator  	{ -	public: +    public:  		typedef F64Seconds value_t;  		static F64Seconds getDefaultValue() { return F64Seconds(0); } @@ -539,6 +550,7 @@ namespace LLTrace  		void addSamples(const MemAccumulator& other, EBufferAppendType append_type)  		{ +            LL_PROFILE_ZONE_SCOPED_CATEGORY_STATS;  			mAllocations.addSamples(other.mAllocations, append_type);  			mDeallocations.addSamples(other.mDeallocations, append_type); @@ -557,6 +569,7 @@ namespace LLTrace  		void reset(const MemAccumulator* other)  		{ +            LL_PROFILE_ZONE_SCOPED_CATEGORY_STATS;  			mSize.reset(other ? &other->mSize : NULL);  			mAllocations.reset(other ? &other->mAllocations : NULL);  			mDeallocations.reset(other ? &other->mDeallocations : NULL); diff --git a/indra/llcommon/lltracerecording.cpp b/indra/llcommon/lltracerecording.cpp index 3094b627a2..1613af1dcf 100644 --- a/indra/llcommon/lltracerecording.cpp +++ b/indra/llcommon/lltracerecording.cpp @@ -50,6 +50,7 @@ Recording::Recording(EPlayState state)  :	mElapsedSeconds(0),  	mActiveBuffers(NULL)  { +    LL_PROFILE_ZONE_SCOPED_CATEGORY_STATS;  	claim_alloc(gTraceMemStat, this);  	mBuffers = new AccumulatorBufferGroup();  	claim_alloc(gTraceMemStat, mBuffers); @@ -59,12 +60,14 @@ Recording::Recording(EPlayState state)  Recording::Recording( const Recording& other )  :	mActiveBuffers(NULL)  { +    LL_PROFILE_ZONE_SCOPED_CATEGORY_STATS;  	claim_alloc(gTraceMemStat, this);  	*this = other;  }  Recording& Recording::operator = (const Recording& other)  { +    LL_PROFILE_ZONE_SCOPED_CATEGORY_STATS;  	// this will allow us to seamlessly start without affecting any data we've acquired from other  	setPlayState(PAUSED); @@ -85,6 +88,7 @@ Recording& Recording::operator = (const Recording& other)  Recording::~Recording()  { +    LL_PROFILE_ZONE_SCOPED_CATEGORY_STATS;  	disclaim_alloc(gTraceMemStat, this);  	disclaim_alloc(gTraceMemStat, mBuffers); @@ -103,6 +107,7 @@ void Recording::update()  #if LL_TRACE_ENABLED  	if (isStarted())  	{ +        LL_PROFILE_ZONE_SCOPED_CATEGORY_STATS;  		mElapsedSeconds += mSamplingTimer.getElapsedTimeF64();  		// must have  @@ -123,6 +128,7 @@ void Recording::update()  void Recording::handleReset()  { +    LL_PROFILE_ZONE_SCOPED_CATEGORY_STATS;  #if LL_TRACE_ENABLED  	mBuffers.write()->reset(); @@ -133,6 +139,7 @@ void Recording::handleReset()  void Recording::handleStart()  { +    LL_PROFILE_ZONE_SCOPED_CATEGORY_STATS;  #if LL_TRACE_ENABLED  	mSamplingTimer.reset();  	mBuffers.setStayUnique(true); @@ -144,6 +151,7 @@ void Recording::handleStart()  void Recording::handleStop()  { +    LL_PROFILE_ZONE_SCOPED_CATEGORY_STATS;  #if LL_TRACE_ENABLED  	mElapsedSeconds += mSamplingTimer.getElapsedTimeF64();  	// must have thread recorder running on this thread @@ -273,7 +281,7 @@ F64Kilobytes Recording::getMean(const StatType<MemAccumulator>& stat)  F64Kilobytes Recording::getMax(const StatType<MemAccumulator>& stat)  { -	update(); +    update();  	const MemAccumulator& accumulator = mBuffers->mMemStats[stat.getIndex()];  	const MemAccumulator* active_accumulator = mActiveBuffers ? &mActiveBuffers->mMemStats[stat.getIndex()] : NULL;  	return F64Bytes(llmax(accumulator.mSize.getMax(), active_accumulator && active_accumulator->mSize.hasValue() ? active_accumulator->mSize.getMax() : F32_MIN)); @@ -281,7 +289,7 @@ F64Kilobytes Recording::getMax(const StatType<MemAccumulator>& stat)  F64Kilobytes Recording::getStandardDeviation(const StatType<MemAccumulator>& stat)  { -	update(); +    update();  	const MemAccumulator& accumulator = mBuffers->mMemStats[stat.getIndex()];  	const MemAccumulator* active_accumulator = mActiveBuffers ? &mActiveBuffers->mMemStats[stat.getIndex()] : NULL;  	if (active_accumulator && active_accumulator->hasValue()) @@ -297,7 +305,7 @@ F64Kilobytes Recording::getStandardDeviation(const StatType<MemAccumulator>& sta  F64Kilobytes Recording::getLastValue(const StatType<MemAccumulator>& stat)  { -	update(); +    update();  	const MemAccumulator& accumulator = mBuffers->mMemStats[stat.getIndex()];  	const MemAccumulator* active_accumulator = mActiveBuffers ? &mActiveBuffers->mMemStats[stat.getIndex()] : NULL;  	return F64Bytes(active_accumulator ? active_accumulator->mSize.getLastValue() : accumulator.mSize.getLastValue()); @@ -305,7 +313,7 @@ F64Kilobytes Recording::getLastValue(const StatType<MemAccumulator>& stat)  bool Recording::hasValue(const StatType<MemAccumulator::AllocationFacet>& stat)  { -	update(); +    update();  	const MemAccumulator& accumulator = mBuffers->mMemStats[stat.getIndex()];  	const MemAccumulator* active_accumulator = mActiveBuffers ? &mActiveBuffers->mMemStats[stat.getIndex()] : NULL;  	return accumulator.mAllocations.hasValue() || (active_accumulator ? active_accumulator->mAllocations.hasValue() : false); @@ -313,7 +321,7 @@ bool Recording::hasValue(const StatType<MemAccumulator::AllocationFacet>& stat)  F64Kilobytes Recording::getSum(const StatType<MemAccumulator::AllocationFacet>& stat)  { -	update(); +    update();  	const MemAccumulator& accumulator = mBuffers->mMemStats[stat.getIndex()];  	const MemAccumulator* active_accumulator = mActiveBuffers ? &mActiveBuffers->mMemStats[stat.getIndex()] : NULL;  	return F64Bytes(accumulator.mAllocations.getSum() + (active_accumulator ? active_accumulator->mAllocations.getSum() : 0)); @@ -321,7 +329,7 @@ F64Kilobytes Recording::getSum(const StatType<MemAccumulator::AllocationFacet>&  F64Kilobytes Recording::getPerSec(const StatType<MemAccumulator::AllocationFacet>& stat)  { -	update(); +    update();  	const MemAccumulator& accumulator = mBuffers->mMemStats[stat.getIndex()];  	const MemAccumulator* active_accumulator = mActiveBuffers ? &mActiveBuffers->mMemStats[stat.getIndex()] : NULL;  	return F64Bytes((accumulator.mAllocations.getSum() + (active_accumulator ? active_accumulator->mAllocations.getSum() : 0)) / mElapsedSeconds.value()); @@ -329,7 +337,7 @@ F64Kilobytes Recording::getPerSec(const StatType<MemAccumulator::AllocationFacet  S32 Recording::getSampleCount(const StatType<MemAccumulator::AllocationFacet>& stat)  { -	update(); +    update();  	const MemAccumulator& accumulator = mBuffers->mMemStats[stat.getIndex()];  	const MemAccumulator* active_accumulator = mActiveBuffers ? &mActiveBuffers->mMemStats[stat.getIndex()] : NULL;  	return accumulator.mAllocations.getSampleCount() + (active_accumulator ? active_accumulator->mAllocations.getSampleCount() : 0); @@ -337,7 +345,7 @@ S32 Recording::getSampleCount(const StatType<MemAccumulator::AllocationFacet>& s  bool Recording::hasValue(const StatType<MemAccumulator::DeallocationFacet>& stat)  { -	update(); +    update();  	const MemAccumulator& accumulator = mBuffers->mMemStats[stat.getIndex()];  	const MemAccumulator* active_accumulator = mActiveBuffers ? &mActiveBuffers->mMemStats[stat.getIndex()] : NULL;  	return accumulator.mDeallocations.hasValue() || (active_accumulator ? active_accumulator->mDeallocations.hasValue() : false); @@ -346,7 +354,7 @@ bool Recording::hasValue(const StatType<MemAccumulator::DeallocationFacet>& stat  F64Kilobytes Recording::getSum(const StatType<MemAccumulator::DeallocationFacet>& stat)  { -	update(); +    update();  	const MemAccumulator& accumulator = mBuffers->mMemStats[stat.getIndex()];  	const MemAccumulator* active_accumulator = mActiveBuffers ? &mActiveBuffers->mMemStats[stat.getIndex()] : NULL;  	return F64Bytes(accumulator.mDeallocations.getSum() + (active_accumulator ? active_accumulator->mDeallocations.getSum() : 0)); @@ -354,7 +362,7 @@ F64Kilobytes Recording::getSum(const StatType<MemAccumulator::DeallocationFacet>  F64Kilobytes Recording::getPerSec(const StatType<MemAccumulator::DeallocationFacet>& stat)  { -	update(); +    update();  	const MemAccumulator& accumulator = mBuffers->mMemStats[stat.getIndex()];  	const MemAccumulator* active_accumulator = mActiveBuffers ? &mActiveBuffers->mMemStats[stat.getIndex()] : NULL;  	return F64Bytes((accumulator.mDeallocations.getSum() + (active_accumulator ? active_accumulator->mDeallocations.getSum() : 0)) / mElapsedSeconds.value()); @@ -362,7 +370,7 @@ F64Kilobytes Recording::getPerSec(const StatType<MemAccumulator::DeallocationFac  S32 Recording::getSampleCount(const StatType<MemAccumulator::DeallocationFacet>& stat)  { -	update(); +    update();  	const MemAccumulator& accumulator = mBuffers->mMemStats[stat.getIndex()];  	const MemAccumulator* active_accumulator = mActiveBuffers ? &mActiveBuffers->mMemStats[stat.getIndex()] : NULL;  	return accumulator.mDeallocations.getSampleCount() + (active_accumulator ? active_accumulator->mDeallocations.getSampleCount() : 0); @@ -370,7 +378,7 @@ S32 Recording::getSampleCount(const StatType<MemAccumulator::DeallocationFacet>&  bool Recording::hasValue(const StatType<CountAccumulator>& stat)  { -	update(); +    update();  	const CountAccumulator& accumulator = mBuffers->mCounts[stat.getIndex()];  	const CountAccumulator* active_accumulator = mActiveBuffers ? &mActiveBuffers->mCounts[stat.getIndex()] : NULL;  	return accumulator.hasValue() || (active_accumulator ? active_accumulator->hasValue() : false); @@ -378,7 +386,7 @@ bool Recording::hasValue(const StatType<CountAccumulator>& stat)  F64 Recording::getSum(const StatType<CountAccumulator>& stat)  { -	update(); +    update();  	const CountAccumulator& accumulator = mBuffers->mCounts[stat.getIndex()];  	const CountAccumulator* active_accumulator = mActiveBuffers ? &mActiveBuffers->mCounts[stat.getIndex()] : NULL;  	return accumulator.getSum() + (active_accumulator ? active_accumulator->getSum() : 0); @@ -386,7 +394,7 @@ F64 Recording::getSum(const StatType<CountAccumulator>& stat)  F64 Recording::getPerSec( const StatType<CountAccumulator>& stat )  { -	update(); +    update();  	const CountAccumulator& accumulator = mBuffers->mCounts[stat.getIndex()];  	const CountAccumulator* active_accumulator = mActiveBuffers ? &mActiveBuffers->mCounts[stat.getIndex()] : NULL;  	F64 sum = accumulator.getSum() + (active_accumulator ? active_accumulator->getSum() : 0); @@ -395,7 +403,7 @@ F64 Recording::getPerSec( const StatType<CountAccumulator>& stat )  S32 Recording::getSampleCount( const StatType<CountAccumulator>& stat )  { -	update(); +    update();  	const CountAccumulator& accumulator = mBuffers->mCounts[stat.getIndex()];  	const CountAccumulator* active_accumulator = mActiveBuffers ? &mActiveBuffers->mCounts[stat.getIndex()] : NULL;  	return accumulator.getSampleCount() + (active_accumulator ? active_accumulator->getSampleCount() : 0); @@ -403,7 +411,7 @@ S32 Recording::getSampleCount( const StatType<CountAccumulator>& stat )  bool Recording::hasValue(const StatType<SampleAccumulator>& stat)  { -	update(); +    update();  	const SampleAccumulator& accumulator = mBuffers->mSamples[stat.getIndex()];  	const SampleAccumulator* active_accumulator = mActiveBuffers ? &mActiveBuffers->mSamples[stat.getIndex()] : NULL;  	return accumulator.hasValue() || (active_accumulator && active_accumulator->hasValue()); @@ -411,7 +419,7 @@ bool Recording::hasValue(const StatType<SampleAccumulator>& stat)  F64 Recording::getMin( const StatType<SampleAccumulator>& stat )  { -	update(); +    update();  	const SampleAccumulator& accumulator = mBuffers->mSamples[stat.getIndex()];  	const SampleAccumulator* active_accumulator = mActiveBuffers ? &mActiveBuffers->mSamples[stat.getIndex()] : NULL;  	return llmin(accumulator.getMin(), active_accumulator && active_accumulator->hasValue() ? active_accumulator->getMin() : F32_MAX); @@ -419,7 +427,7 @@ F64 Recording::getMin( const StatType<SampleAccumulator>& stat )  F64 Recording::getMax( const StatType<SampleAccumulator>& stat )  { -	update(); +    update();  	const SampleAccumulator& accumulator = mBuffers->mSamples[stat.getIndex()];  	const SampleAccumulator* active_accumulator = mActiveBuffers ? &mActiveBuffers->mSamples[stat.getIndex()] : NULL;  	return llmax(accumulator.getMax(), active_accumulator && active_accumulator->hasValue() ? active_accumulator->getMax() : F32_MIN); @@ -427,7 +435,7 @@ F64 Recording::getMax( const StatType<SampleAccumulator>& stat )  F64 Recording::getMean( const StatType<SampleAccumulator>& stat )  { -	update(); +    update();  	const SampleAccumulator& accumulator = mBuffers->mSamples[stat.getIndex()];  	const SampleAccumulator* active_accumulator = mActiveBuffers ? &mActiveBuffers->mSamples[stat.getIndex()] : NULL;  	if (active_accumulator && active_accumulator->hasValue()) @@ -448,7 +456,7 @@ F64 Recording::getMean( const StatType<SampleAccumulator>& stat )  F64 Recording::getStandardDeviation( const StatType<SampleAccumulator>& stat )  { -	update(); +    update();  	const SampleAccumulator& accumulator = mBuffers->mSamples[stat.getIndex()];  	const SampleAccumulator* active_accumulator = mActiveBuffers ? &mActiveBuffers->mSamples[stat.getIndex()] : NULL; @@ -465,7 +473,7 @@ F64 Recording::getStandardDeviation( const StatType<SampleAccumulator>& stat )  F64 Recording::getLastValue( const StatType<SampleAccumulator>& stat )  { -	update(); +    update();  	const SampleAccumulator& accumulator = mBuffers->mSamples[stat.getIndex()];  	const SampleAccumulator* active_accumulator = mActiveBuffers ? &mActiveBuffers->mSamples[stat.getIndex()] : NULL;  	return (active_accumulator && active_accumulator->hasValue() ? active_accumulator->getLastValue() : accumulator.getLastValue()); @@ -473,7 +481,7 @@ F64 Recording::getLastValue( const StatType<SampleAccumulator>& stat )  S32 Recording::getSampleCount( const StatType<SampleAccumulator>& stat )  { -	update(); +    update();  	const SampleAccumulator& accumulator = mBuffers->mSamples[stat.getIndex()];  	const SampleAccumulator* active_accumulator = mActiveBuffers ? &mActiveBuffers->mSamples[stat.getIndex()] : NULL;  	return accumulator.getSampleCount() + (active_accumulator && active_accumulator->hasValue() ? active_accumulator->getSampleCount() : 0); @@ -481,7 +489,7 @@ S32 Recording::getSampleCount( const StatType<SampleAccumulator>& stat )  bool Recording::hasValue(const StatType<EventAccumulator>& stat)  { -	update(); +    update();  	const EventAccumulator& accumulator = mBuffers->mEvents[stat.getIndex()];  	const EventAccumulator* active_accumulator = mActiveBuffers ? &mActiveBuffers->mEvents[stat.getIndex()] : NULL;  	return accumulator.hasValue() || (active_accumulator && active_accumulator->hasValue()); @@ -489,7 +497,7 @@ bool Recording::hasValue(const StatType<EventAccumulator>& stat)  F64 Recording::getSum( const StatType<EventAccumulator>& stat)  { -	update(); +    update();  	const EventAccumulator& accumulator = mBuffers->mEvents[stat.getIndex()];  	const EventAccumulator* active_accumulator = mActiveBuffers ? &mActiveBuffers->mEvents[stat.getIndex()] : NULL;  	return (F64)(accumulator.getSum() + (active_accumulator && active_accumulator->hasValue() ? active_accumulator->getSum() : 0)); @@ -497,7 +505,7 @@ F64 Recording::getSum( const StatType<EventAccumulator>& stat)  F64 Recording::getMin( const StatType<EventAccumulator>& stat )  { -	update(); +    update();  	const EventAccumulator& accumulator = mBuffers->mEvents[stat.getIndex()];  	const EventAccumulator* active_accumulator = mActiveBuffers ? &mActiveBuffers->mEvents[stat.getIndex()] : NULL;  	return llmin(accumulator.getMin(), active_accumulator && active_accumulator->hasValue() ? active_accumulator->getMin() : F32_MAX); @@ -505,7 +513,7 @@ F64 Recording::getMin( const StatType<EventAccumulator>& stat )  F64 Recording::getMax( const StatType<EventAccumulator>& stat )  { -	update(); +    update();  	const EventAccumulator& accumulator = mBuffers->mEvents[stat.getIndex()];  	const EventAccumulator* active_accumulator = mActiveBuffers ? &mActiveBuffers->mEvents[stat.getIndex()] : NULL;  	return llmax(accumulator.getMax(), active_accumulator && active_accumulator->hasValue() ? active_accumulator->getMax() : F32_MIN); @@ -513,7 +521,7 @@ F64 Recording::getMax( const StatType<EventAccumulator>& stat )  F64 Recording::getMean( const StatType<EventAccumulator>& stat )  { -	update(); +    update();  	const EventAccumulator& accumulator = mBuffers->mEvents[stat.getIndex()];  	const EventAccumulator* active_accumulator = mActiveBuffers ? &mActiveBuffers->mEvents[stat.getIndex()] : NULL;  	if (active_accumulator && active_accumulator->hasValue()) @@ -534,7 +542,7 @@ F64 Recording::getMean( const StatType<EventAccumulator>& stat )  F64 Recording::getStandardDeviation( const StatType<EventAccumulator>& stat )  { -	update(); +    update();  	const EventAccumulator& accumulator = mBuffers->mEvents[stat.getIndex()];  	const EventAccumulator* active_accumulator = mActiveBuffers ? &mActiveBuffers->mEvents[stat.getIndex()] : NULL; @@ -551,7 +559,7 @@ F64 Recording::getStandardDeviation( const StatType<EventAccumulator>& stat )  F64 Recording::getLastValue( const StatType<EventAccumulator>& stat )  { -	update(); +    update();  	const EventAccumulator& accumulator = mBuffers->mEvents[stat.getIndex()];  	const EventAccumulator* active_accumulator = mActiveBuffers ? &mActiveBuffers->mEvents[stat.getIndex()] : NULL;  	return active_accumulator ? active_accumulator->getLastValue() : accumulator.getLastValue(); @@ -559,7 +567,7 @@ F64 Recording::getLastValue( const StatType<EventAccumulator>& stat )  S32 Recording::getSampleCount( const StatType<EventAccumulator>& stat )  { -	update(); +    update();  	const EventAccumulator& accumulator = mBuffers->mEvents[stat.getIndex()];  	const EventAccumulator* active_accumulator = mActiveBuffers ? &mActiveBuffers->mEvents[stat.getIndex()] : NULL;  	return accumulator.getSampleCount() + (active_accumulator ? active_accumulator->getSampleCount() : 0); @@ -575,17 +583,20 @@ PeriodicRecording::PeriodicRecording( S32 num_periods, EPlayState state)  	mNumRecordedPeriods(0),  	mRecordingPeriods(num_periods ? num_periods : 1)  { +    LL_PROFILE_ZONE_SCOPED_CATEGORY_STATS;  	setPlayState(state);  	claim_alloc(gTraceMemStat, this);  }  PeriodicRecording::~PeriodicRecording()  { +    LL_PROFILE_ZONE_SCOPED_CATEGORY_STATS;  	disclaim_alloc(gTraceMemStat, this);  }  void PeriodicRecording::nextPeriod()  { +    LL_PROFILE_ZONE_SCOPED_CATEGORY_STATS;  	if (mAutoResize)  	{  		mRecordingPeriods.push_back(Recording()); @@ -600,6 +611,7 @@ void PeriodicRecording::nextPeriod()  void PeriodicRecording::appendRecording(Recording& recording)  { +    LL_PROFILE_ZONE_SCOPED_CATEGORY_STATS;  	getCurRecording().appendRecording(recording);  	nextPeriod();  } @@ -607,6 +619,7 @@ void PeriodicRecording::appendRecording(Recording& recording)  void PeriodicRecording::appendPeriodicRecording( PeriodicRecording& other )  { +    LL_PROFILE_ZONE_SCOPED_CATEGORY_STATS;  	if (other.mRecordingPeriods.empty()) return;  	getCurRecording().update(); @@ -680,6 +693,7 @@ void PeriodicRecording::appendPeriodicRecording( PeriodicRecording& other )  F64Seconds PeriodicRecording::getDuration() const  { +    LL_PROFILE_ZONE_SCOPED_CATEGORY_STATS;  	F64Seconds duration;  	S32 num_periods = mRecordingPeriods.size();  	for (S32 i = 1; i <= num_periods; i++) @@ -693,6 +707,7 @@ F64Seconds PeriodicRecording::getDuration() const  LLTrace::Recording PeriodicRecording::snapshotCurRecording() const  { +    LL_PROFILE_ZONE_SCOPED_CATEGORY_STATS;  	Recording recording_copy(getCurRecording());  	recording_copy.stop();  	return recording_copy; @@ -735,16 +750,19 @@ const Recording& PeriodicRecording::getPrevRecording( S32 offset ) const  void PeriodicRecording::handleStart()  { +    LL_PROFILE_ZONE_SCOPED_CATEGORY_STATS;  	getCurRecording().start();  }  void PeriodicRecording::handleStop()  { +    LL_PROFILE_ZONE_SCOPED_CATEGORY_STATS;  	getCurRecording().pause();  }  void PeriodicRecording::handleReset()  { +    LL_PROFILE_ZONE_SCOPED_CATEGORY_STATS;  	getCurRecording().stop();  	if (mAutoResize) @@ -768,11 +786,13 @@ void PeriodicRecording::handleReset()  void PeriodicRecording::handleSplitTo(PeriodicRecording& other)  { +    LL_PROFILE_ZONE_SCOPED_CATEGORY_STATS;  	getCurRecording().splitTo(other.getCurRecording());  }  F64 PeriodicRecording::getPeriodMin( const StatType<EventAccumulator>& stat, S32 num_periods /*= S32_MAX*/ )  { +    LL_PROFILE_ZONE_SCOPED_CATEGORY_STATS;  	num_periods = llmin(num_periods, getNumRecordedPeriods());  	bool has_value = false; @@ -794,6 +814,7 @@ F64 PeriodicRecording::getPeriodMin( const StatType<EventAccumulator>& stat, S32  F64 PeriodicRecording::getPeriodMax( const StatType<EventAccumulator>& stat, S32 num_periods /*= S32_MAX*/ )  { +    LL_PROFILE_ZONE_SCOPED_CATEGORY_STATS;  	num_periods = llmin(num_periods, getNumRecordedPeriods());  	bool has_value = false; @@ -816,6 +837,7 @@ F64 PeriodicRecording::getPeriodMax( const StatType<EventAccumulator>& stat, S32  // calculates means using aggregates per period  F64 PeriodicRecording::getPeriodMean( const StatType<EventAccumulator>& stat, S32 num_periods /*= S32_MAX*/ )  { +    LL_PROFILE_ZONE_SCOPED_CATEGORY_STATS;  	num_periods = llmin(num_periods, getNumRecordedPeriods());  	F64 mean = 0; @@ -836,9 +858,9 @@ F64 PeriodicRecording::getPeriodMean( const StatType<EventAccumulator>& stat, S3  			: NaN;  } -  F64 PeriodicRecording::getPeriodStandardDeviation( const StatType<EventAccumulator>& stat, S32 num_periods /*= S32_MAX*/ )  { +    LL_PROFILE_ZONE_SCOPED_CATEGORY_STATS;  	num_periods = llmin(num_periods, getNumRecordedPeriods());  	F64 period_mean = getPeriodMean(stat, num_periods); @@ -863,6 +885,7 @@ F64 PeriodicRecording::getPeriodStandardDeviation( const StatType<EventAccumulat  F64 PeriodicRecording::getPeriodMin( const StatType<SampleAccumulator>& stat, S32 num_periods /*= S32_MAX*/ )  { +    LL_PROFILE_ZONE_SCOPED_CATEGORY_STATS;  	num_periods = llmin(num_periods, getNumRecordedPeriods());  	bool has_value = false; @@ -884,6 +907,7 @@ F64 PeriodicRecording::getPeriodMin( const StatType<SampleAccumulator>& stat, S3  F64 PeriodicRecording::getPeriodMax(const StatType<SampleAccumulator>& stat, S32 num_periods /*= S32_MAX*/)  { +    LL_PROFILE_ZONE_SCOPED_CATEGORY_STATS;  	num_periods = llmin(num_periods, getNumRecordedPeriods());  	bool has_value = false; @@ -906,6 +930,7 @@ F64 PeriodicRecording::getPeriodMax(const StatType<SampleAccumulator>& stat, S32  F64 PeriodicRecording::getPeriodMean( const StatType<SampleAccumulator>& stat, S32 num_periods /*= S32_MAX*/ )  { +    LL_PROFILE_ZONE_SCOPED_CATEGORY_STATS;  	num_periods = llmin(num_periods, getNumRecordedPeriods());  	S32 valid_period_count = 0; @@ -926,8 +951,35 @@ F64 PeriodicRecording::getPeriodMean( const StatType<SampleAccumulator>& stat, S  			: NaN;  } +F64 PeriodicRecording::getPeriodMedian( const StatType<SampleAccumulator>& stat, S32 num_periods /*= S32_MAX*/ ) +{ +    LL_PROFILE_ZONE_SCOPED_CATEGORY_STATS; +	num_periods = llmin(num_periods, getNumRecordedPeriods()); + +	std::vector<F64> buf; +	for (S32 i = 1; i <= num_periods; i++) +	{ +		Recording& recording = getPrevRecording(i); +		if (recording.getDuration() > (F32Seconds)0.f) +		{ +			if (recording.hasValue(stat)) +			{ +				buf.push_back(recording.getMean(stat)); +			} +		} +	} +	if (buf.size()==0) +	{ +		return 0.0f; +	} +	std::sort(buf.begin(), buf.end()); + +	return F64((buf.size() % 2 == 0) ? (buf[buf.size() / 2 - 1] + buf[buf.size() / 2]) / 2 : buf[buf.size() / 2]); +} +  F64 PeriodicRecording::getPeriodStandardDeviation( const StatType<SampleAccumulator>& stat, S32 num_periods /*= S32_MAX*/ )  { +    LL_PROFILE_ZONE_SCOPED_CATEGORY_STATS;  	num_periods = llmin(num_periods, getNumRecordedPeriods());  	F64 period_mean = getPeriodMean(stat, num_periods); @@ -953,6 +1005,7 @@ F64 PeriodicRecording::getPeriodStandardDeviation( const StatType<SampleAccumula  F64Kilobytes PeriodicRecording::getPeriodMin( const StatType<MemAccumulator>& stat, S32 num_periods /*= S32_MAX*/ )  { +    LL_PROFILE_ZONE_SCOPED_CATEGORY_STATS;  	num_periods = llmin(num_periods, getNumRecordedPeriods());  	F64Kilobytes min_val(std::numeric_limits<F64>::max()); @@ -972,6 +1025,7 @@ F64Kilobytes PeriodicRecording::getPeriodMin(const MemStatHandle& stat, S32 num_  F64Kilobytes PeriodicRecording::getPeriodMax(const StatType<MemAccumulator>& stat, S32 num_periods /*= S32_MAX*/)  { +    LL_PROFILE_ZONE_SCOPED_CATEGORY_STATS;  	num_periods = llmin(num_periods, getNumRecordedPeriods());  	F64Kilobytes max_val(0.0); @@ -991,6 +1045,7 @@ F64Kilobytes PeriodicRecording::getPeriodMax(const MemStatHandle& stat, S32 num_  F64Kilobytes PeriodicRecording::getPeriodMean( const StatType<MemAccumulator>& stat, S32 num_periods /*= S32_MAX*/ )  { +    LL_PROFILE_ZONE_SCOPED_CATEGORY_STATS;  	num_periods = llmin(num_periods, getNumRecordedPeriods());  	F64Kilobytes mean(0); @@ -1011,6 +1066,7 @@ F64Kilobytes PeriodicRecording::getPeriodMean(const MemStatHandle& stat, S32 num  F64Kilobytes PeriodicRecording::getPeriodStandardDeviation( const StatType<MemAccumulator>& stat, S32 num_periods /*= S32_MAX*/ )  { +    LL_PROFILE_ZONE_SCOPED_CATEGORY_STATS;  	num_periods = llmin(num_periods, getNumRecordedPeriods());  	F64Kilobytes period_mean = getPeriodMean(stat, num_periods); @@ -1044,6 +1100,7 @@ F64Kilobytes PeriodicRecording::getPeriodStandardDeviation(const MemStatHandle&  void ExtendableRecording::extend()  { +    LL_PROFILE_ZONE_SCOPED_CATEGORY_STATS;  	// push the data back to accepted recording  	mAcceptedRecording.appendRecording(mPotentialRecording);  	// flush data, so we can start from scratch @@ -1052,22 +1109,26 @@ void ExtendableRecording::extend()  void ExtendableRecording::handleStart()  { +    LL_PROFILE_ZONE_SCOPED_CATEGORY_STATS;  	mPotentialRecording.start();  }  void ExtendableRecording::handleStop()  { +    LL_PROFILE_ZONE_SCOPED_CATEGORY_STATS;  	mPotentialRecording.pause();  }  void ExtendableRecording::handleReset()  { +    LL_PROFILE_ZONE_SCOPED_CATEGORY_STATS;  	mAcceptedRecording.reset();  	mPotentialRecording.reset();  }  void ExtendableRecording::handleSplitTo(ExtendableRecording& other)  { +    LL_PROFILE_ZONE_SCOPED_CATEGORY_STATS;  	mPotentialRecording.splitTo(other.mPotentialRecording);  } @@ -1084,6 +1145,7 @@ ExtendablePeriodicRecording::ExtendablePeriodicRecording()  void ExtendablePeriodicRecording::extend()  { +    LL_PROFILE_ZONE_SCOPED_CATEGORY_STATS;  	// push the data back to accepted recording  	mAcceptedRecording.appendPeriodicRecording(mPotentialRecording);  	// flush data, so we can start from scratch @@ -1093,22 +1155,26 @@ void ExtendablePeriodicRecording::extend()  void ExtendablePeriodicRecording::handleStart()  { +    LL_PROFILE_ZONE_SCOPED_CATEGORY_STATS;  	mPotentialRecording.start();  }  void ExtendablePeriodicRecording::handleStop()  { +    LL_PROFILE_ZONE_SCOPED_CATEGORY_STATS;  	mPotentialRecording.pause();  }  void ExtendablePeriodicRecording::handleReset()  { +    LL_PROFILE_ZONE_SCOPED_CATEGORY_STATS;  	mAcceptedRecording.reset();  	mPotentialRecording.reset();  }  void ExtendablePeriodicRecording::handleSplitTo(ExtendablePeriodicRecording& other)  { +    LL_PROFILE_ZONE_SCOPED_CATEGORY_STATS;  	mPotentialRecording.splitTo(other.mPotentialRecording);  } @@ -1123,6 +1189,7 @@ PeriodicRecording& get_frame_recording()  void LLStopWatchControlsMixinCommon::start()  { +    LL_PROFILE_ZONE_SCOPED_CATEGORY_STATS;  	switch (mPlayState)  	{  	case STOPPED: @@ -1144,6 +1211,7 @@ void LLStopWatchControlsMixinCommon::start()  void LLStopWatchControlsMixinCommon::stop()  { +    LL_PROFILE_ZONE_SCOPED_CATEGORY_STATS;  	switch (mPlayState)  	{  	case STOPPED: @@ -1163,6 +1231,7 @@ void LLStopWatchControlsMixinCommon::stop()  void LLStopWatchControlsMixinCommon::pause()  { +    LL_PROFILE_ZONE_SCOPED_CATEGORY_STATS;  	switch (mPlayState)  	{  	case STOPPED: @@ -1182,6 +1251,7 @@ void LLStopWatchControlsMixinCommon::pause()  void LLStopWatchControlsMixinCommon::unpause()  { +    LL_PROFILE_ZONE_SCOPED_CATEGORY_STATS;  	switch (mPlayState)  	{  	case STOPPED: @@ -1201,6 +1271,7 @@ void LLStopWatchControlsMixinCommon::unpause()  void LLStopWatchControlsMixinCommon::resume()  { +    LL_PROFILE_ZONE_SCOPED_CATEGORY_STATS;  	switch (mPlayState)  	{  	case STOPPED: @@ -1221,6 +1292,7 @@ void LLStopWatchControlsMixinCommon::resume()  void LLStopWatchControlsMixinCommon::restart()  { +    LL_PROFILE_ZONE_SCOPED_CATEGORY_STATS;  	switch (mPlayState)  	{  	case STOPPED: @@ -1244,11 +1316,13 @@ void LLStopWatchControlsMixinCommon::restart()  void LLStopWatchControlsMixinCommon::reset()  { +    LL_PROFILE_ZONE_SCOPED_CATEGORY_STATS;  	handleReset();  }  void LLStopWatchControlsMixinCommon::setPlayState( EPlayState state )  { +    LL_PROFILE_ZONE_SCOPED_CATEGORY_STATS;  	switch(state)  	{  	case STOPPED: diff --git a/indra/llcommon/lltracerecording.h b/indra/llcommon/lltracerecording.h index d0b4a842a6..556b7470cf 100644 --- a/indra/llcommon/lltracerecording.h +++ b/indra/llcommon/lltracerecording.h @@ -355,6 +355,7 @@ namespace LLTrace  		template <typename T>  		S32 getSampleCount(const StatType<T>& stat, S32 num_periods = S32_MAX)          { +            LL_PROFILE_ZONE_SCOPED_CATEGORY_STATS;  			num_periods = llmin(num_periods, getNumRecordedPeriods());              S32 num_samples = 0; @@ -374,6 +375,7 @@ namespace LLTrace  		template <typename T>  		typename T::value_t getPeriodMin(const StatType<T>& stat, S32 num_periods = S32_MAX)  		{ +            LL_PROFILE_ZONE_SCOPED_CATEGORY_STATS;  			num_periods = llmin(num_periods, getNumRecordedPeriods());  			bool has_value = false; @@ -396,6 +398,7 @@ namespace LLTrace  		template<typename T>  		T getPeriodMin(const CountStatHandle<T>& stat, S32 num_periods = S32_MAX)  		{ +            LL_PROFILE_ZONE_SCOPED_CATEGORY_STATS;  			return T(getPeriodMin(static_cast<const StatType<CountAccumulator>&>(stat), num_periods));  		} @@ -403,6 +406,7 @@ namespace LLTrace  		template<typename T>  		T getPeriodMin(const SampleStatHandle<T>& stat, S32 num_periods = S32_MAX)  		{ +            LL_PROFILE_ZONE_SCOPED_CATEGORY_STATS;  			return T(getPeriodMin(static_cast<const StatType<SampleAccumulator>&>(stat), num_periods));  		} @@ -410,6 +414,7 @@ namespace LLTrace  		template<typename T>  		T getPeriodMin(const EventStatHandle<T>& stat, S32 num_periods = S32_MAX)  		{ +            LL_PROFILE_ZONE_SCOPED_CATEGORY_STATS;  			return T(getPeriodMin(static_cast<const StatType<EventAccumulator>&>(stat), num_periods));  		} @@ -419,6 +424,7 @@ namespace LLTrace  		template <typename T>  		typename RelatedTypes<typename T::value_t>::fractional_t getPeriodMinPerSec(const StatType<T>& stat, S32 num_periods = S32_MAX)  		{ +            LL_PROFILE_ZONE_SCOPED_CATEGORY_STATS;  			num_periods = llmin(num_periods, getNumRecordedPeriods());  			typename RelatedTypes<typename T::value_t>::fractional_t min_val(std::numeric_limits<F64>::max()); @@ -433,6 +439,7 @@ namespace LLTrace  		template<typename T>  		typename RelatedTypes<T>::fractional_t getPeriodMinPerSec(const CountStatHandle<T>& stat, S32 num_periods = S32_MAX)  		{ +            LL_PROFILE_ZONE_SCOPED_CATEGORY_STATS;  			return typename RelatedTypes<T>::fractional_t(getPeriodMinPerSec(static_cast<const StatType<CountAccumulator>&>(stat), num_periods));  		} @@ -444,6 +451,7 @@ namespace LLTrace  		template <typename T>  		typename T::value_t getPeriodMax(const StatType<T>& stat, S32 num_periods = S32_MAX)  		{ +            LL_PROFILE_ZONE_SCOPED_CATEGORY_STATS;  			num_periods = llmin(num_periods, getNumRecordedPeriods());  			bool has_value = false; @@ -466,6 +474,7 @@ namespace LLTrace  		template<typename T>  		T getPeriodMax(const CountStatHandle<T>& stat, S32 num_periods = S32_MAX)  		{ +            LL_PROFILE_ZONE_SCOPED_CATEGORY_STATS;  			return T(getPeriodMax(static_cast<const StatType<CountAccumulator>&>(stat), num_periods));  		} @@ -473,6 +482,7 @@ namespace LLTrace  		template<typename T>  		T getPeriodMax(const SampleStatHandle<T>& stat, S32 num_periods = S32_MAX)  		{ +            LL_PROFILE_ZONE_SCOPED_CATEGORY_STATS;  			return T(getPeriodMax(static_cast<const StatType<SampleAccumulator>&>(stat), num_periods));  		} @@ -480,6 +490,7 @@ namespace LLTrace  		template<typename T>  		T getPeriodMax(const EventStatHandle<T>& stat, S32 num_periods = S32_MAX)  		{ +            LL_PROFILE_ZONE_SCOPED_CATEGORY_STATS;  			return T(getPeriodMax(static_cast<const StatType<EventAccumulator>&>(stat), num_periods));  		} @@ -489,6 +500,7 @@ namespace LLTrace  		template <typename T>  		typename RelatedTypes<typename T::value_t>::fractional_t getPeriodMaxPerSec(const StatType<T>& stat, S32 num_periods = S32_MAX)  		{ +            LL_PROFILE_ZONE_SCOPED_CATEGORY_STATS;  			num_periods = llmin(num_periods, getNumRecordedPeriods());  			F64 max_val = std::numeric_limits<F64>::min(); @@ -503,6 +515,7 @@ namespace LLTrace  		template<typename T>  		typename RelatedTypes<T>::fractional_t getPeriodMaxPerSec(const CountStatHandle<T>& stat, S32 num_periods = S32_MAX)  		{ +            LL_PROFILE_ZONE_SCOPED_CATEGORY_STATS;  			return typename RelatedTypes<T>::fractional_t(getPeriodMaxPerSec(static_cast<const StatType<CountAccumulator>&>(stat), num_periods));  		} @@ -514,6 +527,7 @@ namespace LLTrace  		template <typename T>  		typename RelatedTypes<typename T::value_t>::fractional_t getPeriodMean(const StatType<T >& stat, S32 num_periods = S32_MAX)  		{ +            LL_PROFILE_ZONE_SCOPED_CATEGORY_STATS;  			num_periods = llmin(num_periods, getNumRecordedPeriods());  			typename RelatedTypes<typename T::value_t>::fractional_t mean(0); @@ -534,12 +548,14 @@ namespace LLTrace  		template<typename T>  		typename RelatedTypes<T>::fractional_t getPeriodMean(const CountStatHandle<T>& stat, S32 num_periods = S32_MAX)  		{ +            LL_PROFILE_ZONE_SCOPED_CATEGORY_STATS;  			return typename RelatedTypes<T>::fractional_t(getPeriodMean(static_cast<const StatType<CountAccumulator>&>(stat), num_periods));  		}  		F64 getPeriodMean(const StatType<SampleAccumulator>& stat, S32 num_periods = S32_MAX);  		template<typename T>   		typename RelatedTypes<T>::fractional_t getPeriodMean(const SampleStatHandle<T>& stat, S32 num_periods = S32_MAX)  		{ +            LL_PROFILE_ZONE_SCOPED_CATEGORY_STATS;  			return typename RelatedTypes<T>::fractional_t(getPeriodMean(static_cast<const StatType<SampleAccumulator>&>(stat), num_periods));  		} @@ -547,6 +563,7 @@ namespace LLTrace  		template<typename T>  		typename RelatedTypes<T>::fractional_t getPeriodMean(const EventStatHandle<T>& stat, S32 num_periods = S32_MAX)  		{ +            LL_PROFILE_ZONE_SCOPED_CATEGORY_STATS;  			return typename RelatedTypes<T>::fractional_t(getPeriodMean(static_cast<const StatType<EventAccumulator>&>(stat), num_periods));  		} @@ -556,6 +573,7 @@ namespace LLTrace  		template <typename T>  		typename RelatedTypes<typename T::value_t>::fractional_t getPeriodMeanPerSec(const StatType<T>& stat, S32 num_periods = S32_MAX)  		{ +            LL_PROFILE_ZONE_SCOPED_CATEGORY_STATS;  			num_periods = llmin(num_periods, getNumRecordedPeriods());  			typename RelatedTypes<typename T::value_t>::fractional_t mean = 0; @@ -577,9 +595,39 @@ namespace LLTrace  		template<typename T>  		typename RelatedTypes<T>::fractional_t getPeriodMeanPerSec(const CountStatHandle<T>& stat, S32 num_periods = S32_MAX)  		{ +            LL_PROFILE_ZONE_SCOPED_CATEGORY_STATS;  			return typename RelatedTypes<T>::fractional_t(getPeriodMeanPerSec(static_cast<const StatType<CountAccumulator>&>(stat), num_periods));  		} +        F64 getPeriodMedian( const StatType<SampleAccumulator>& stat, S32 num_periods = S32_MAX); + +        template <typename T> +        typename RelatedTypes<typename T::value_t>::fractional_t getPeriodMedianPerSec(const StatType<T>& stat, S32 num_periods = S32_MAX) +        { +            LL_PROFILE_ZONE_SCOPED_CATEGORY_STATS; +            num_periods = llmin(num_periods, getNumRecordedPeriods()); + +            std::vector <typename RelatedTypes<typename T::value_t>::fractional_t> buf; +            for (S32 i = 1; i <= num_periods; i++) +            { +                Recording& recording = getPrevRecording(i); +                if (recording.getDuration() > (F32Seconds)0.f) +                { +                    buf.push_back(recording.getPerSec(stat)); +                } +            } +            std::sort(buf.begin(), buf.end()); + +            return typename RelatedTypes<T>::fractional_t((buf.size() % 2 == 0) ? (buf[buf.size() / 2 - 1] + buf[buf.size() / 2]) / 2 : buf[buf.size() / 2]); +        } + +        template<typename T> +        typename RelatedTypes<T>::fractional_t getPeriodMedianPerSec(const CountStatHandle<T>& stat, S32 num_periods = S32_MAX) +        { +            LL_PROFILE_ZONE_SCOPED_CATEGORY_STATS; +            return typename RelatedTypes<T>::fractional_t(getPeriodMedianPerSec(static_cast<const StatType<CountAccumulator>&>(stat), num_periods)); +        } +  		//  		// PERIODIC STANDARD DEVIATION  		// @@ -589,6 +637,7 @@ namespace LLTrace  		template<typename T>   		typename RelatedTypes<T>::fractional_t getPeriodStandardDeviation(const SampleStatHandle<T>& stat, S32 num_periods = S32_MAX)  		{ +            LL_PROFILE_ZONE_SCOPED_CATEGORY_STATS;  			return typename RelatedTypes<T>::fractional_t(getPeriodStandardDeviation(static_cast<const StatType<SampleAccumulator>&>(stat), num_periods));  		} @@ -596,6 +645,7 @@ namespace LLTrace  		template<typename T>  		typename RelatedTypes<T>::fractional_t getPeriodStandardDeviation(const EventStatHandle<T>& stat, S32 num_periods = S32_MAX)  		{ +            LL_PROFILE_ZONE_SCOPED_CATEGORY_STATS;  			return typename RelatedTypes<T>::fractional_t(getPeriodStandardDeviation(static_cast<const StatType<EventAccumulator>&>(stat), num_periods));  		} diff --git a/indra/llcommon/lltracethreadrecorder.cpp b/indra/llcommon/lltracethreadrecorder.cpp index 025dc57044..090d3297a0 100644 --- a/indra/llcommon/lltracethreadrecorder.cpp +++ b/indra/llcommon/lltracethreadrecorder.cpp @@ -274,12 +274,10 @@ void ThreadRecorder::pushToParent()  } -static LLTrace::BlockTimerStatHandle FTM_PULL_TRACE_DATA_FROM_CHILDREN("Pull child thread trace data"); -  void ThreadRecorder::pullFromChildren()  {  #if LL_TRACE_ENABLED -	LL_RECORD_BLOCK_TIME(FTM_PULL_TRACE_DATA_FROM_CHILDREN); +    LL_PROFILE_ZONE_SCOPED_CATEGORY_STATS;  	if (mActiveRecordings.empty()) return;  	{ LLMutexLock lock(&mChildListMutex); diff --git a/indra/llcommon/lluuid.h b/indra/llcommon/lluuid.h index fe7482ba29..86a396ab06 100644 --- a/indra/llcommon/lluuid.h +++ b/indra/llcommon/lluuid.h @@ -184,6 +184,17 @@ struct boost::hash<LLUUID>      }  }; +// Adapt boost hash to std hash +namespace std +{ +    template<> struct hash<LLUUID> +    { +        std::size_t operator()(LLUUID const& s) const noexcept +        { +            return boost::hash<LLUUID>()(s); +        } +    }; +}  #endif diff --git a/indra/llcommon/stdtypes.h b/indra/llcommon/stdtypes.h index 887f6ab733..b07805b628 100644 --- a/indra/llcommon/stdtypes.h +++ b/indra/llcommon/stdtypes.h @@ -42,10 +42,17 @@ typedef unsigned int			U32;  // Windows wchar_t is 16-bit, whichever way /Zc:wchar_t is set. In effect,  // Windows wchar_t is always a typedef, either for unsigned short or __wchar_t.  // (__wchar_t, available either way, is Microsoft's native 2-byte wchar_t type.) +// The version of clang available with VS 2019 also defines wchar_t as __wchar_t +// which is also 16 bits.  // In any case, llwchar should be a UTF-32 type.  typedef U32				llwchar;  #else  typedef wchar_t				llwchar; +// What we'd actually want is a simple module-scope 'if constexpr' to test +// std::is_same<wchar_t, llwchar>::value and use that to define, or not +// define, string conversion specializations. Since we don't have that, we'll +// have to rely on #if instead. Sorry, Dr. Stroustrup. +#define LLWCHAR_IS_WCHAR_T 1  #endif  #if LL_WINDOWS diff --git a/indra/llcommon/stringize.h b/indra/llcommon/stringize.h index 38dd198ad3..12df693910 100644 --- a/indra/llcommon/stringize.h +++ b/indra/llcommon/stringize.h @@ -31,58 +31,109 @@  #include <sstream>  #include <llstring.h> +#include <boost/call_traits.hpp>  /** - * gstringize(item) encapsulates an idiom we use constantly, using - * operator<<(std::ostringstream&, TYPE) followed by std::ostringstream::str() - * or their wstring equivalents - * to render a string expressing some item. + * stream_to(std::ostream&, items, ...) streams each item in the parameter list + * to the passed std::ostream using the insertion operator <<. This can be + * used, for instance, to make a simple print() function, e.g.: + * + * @code + * template <typename... Items> + * void print(Items&&... items) + * { + *     stream_to(std::cout, std::forward<Items>(items)...); + * } + * @endcode   */ -template <typename CHARTYPE, typename T> -std::basic_string<CHARTYPE> gstringize(const T& item) +// recursion tail +template <typename CHARTYPE> +void stream_to(std::basic_ostream<CHARTYPE>& out) {} +// stream one or more items +template <typename CHARTYPE, typename T, typename... Items> +void stream_to(std::basic_ostream<CHARTYPE>& out, T&& item, Items&&... items)  { -    std::basic_ostringstream<CHARTYPE> out; -    out << item; -    return out.str(); +    out << std::forward<T>(item); +    stream_to(out, std::forward<Items>(items)...);  } +// why we use function overloads, not function template specializations: +// http://www.gotw.ca/publications/mill17.htm +  /** - *partial specialization of stringize for handling wstring - *TODO: we should have similar specializations for wchar_t[] but not until it is needed. + * gstringize(item, ...) encapsulates an idiom we use constantly, using + * operator<<(std::ostringstream&, TYPE) followed by std::ostringstream::str() + * or their wstring equivalents to render a string expressing one or more items.   */ -inline std::string stringize(const std::wstring& item) +// two or more args - the case of a single argument is handled separately +template <typename CHARTYPE, typename T0, typename T1, typename... Items> +auto gstringize(T0&& item0, T1&& item1, Items&&... items)  { -    return wstring_to_utf8str(item); +    std::basic_ostringstream<CHARTYPE> out; +    stream_to(out, std::forward<T0>(item0), std::forward<T1>(item1), +              std::forward<Items>(items)...); +    return out.str();  } -/** - * Specialization of gstringize for std::string return types - */ -template <typename T> -std::string stringize(const T& item) +// generic single argument: stream to out, as above +template <typename CHARTYPE, typename T> +struct gstringize_impl  { -    return gstringize<char>(item); +    auto operator()(typename boost::call_traits<T>::param_type arg) +    { +        std::basic_ostringstream<CHARTYPE> out; +        out << arg; +        return out.str(); +    } +}; + +// partially specialize for a single STRING argument - +// note that ll_convert<T>(T) already handles the trivial case +template <typename OUTCHAR, typename INCHAR> +struct gstringize_impl<OUTCHAR, std::basic_string<INCHAR>> +{ +    auto operator()(const std::basic_string<INCHAR>& arg) +    { +        return ll_convert<std::basic_string<OUTCHAR>>(arg); +    } +}; + +// partially specialize for a single CHARTYPE* argument - +// since it's not a basic_string and we do want to optimize this common case +template <typename OUTCHAR, typename INCHAR> +struct gstringize_impl<OUTCHAR, INCHAR*> +{ +    auto operator()(const INCHAR* arg) +    { +        return ll_convert<std::basic_string<OUTCHAR>>(arg); +    } +}; + +// gstringize(single argument) +template <typename CHARTYPE, typename T> +auto gstringize(T&& item) +{ +    // use decay<T> so we don't require separate specializations +    // for T, const T, T&, const T& ... +    return gstringize_impl<CHARTYPE, std::decay_t<T>>()(std::forward<T>(item));  }  /** - * Specialization for generating wstring from string. - * Both a convenience function and saves a miniscule amount of overhead. + * Specialization of gstringize for std::string return types   */ -inline std::wstring wstringize(const std::string& item) +template <typename... Items> +auto stringize(Items&&... items)  { -    // utf8str_to_wstring() returns LLWString, which isn't necessarily the -    // same as std::wstring -    LLWString s(utf8str_to_wstring(item)); -    return std::wstring(s.begin(), s.end()); +    return gstringize<char>(std::forward<Items>(items)...);  }  /**   * Specialization of gstringize for std::wstring return types   */ -template <typename T> -std::wstring wstringize(const T& item) +template <typename... Items> +auto wstringize(Items&&... items)  { -    return gstringize<wchar_t>(item); +    return gstringize<wchar_t>(std::forward<Items>(items)...);  }  /** @@ -146,11 +197,9 @@ void destringize_f(std::basic_string<CHARTYPE> const & str, Functor const & f)   * std::istringstream in(str);   * in >> item1 >> item2 >> item3 ... ;   * @endcode - * @NOTE - once we get generic lambdas, we shouldn't need DEWSTRINGIZE() any - * more since DESTRINGIZE() should do the right thing with a std::wstring. But - * until then, the lambda we pass must accept the right std::basic_istream.   */ -#define DESTRINGIZE(STR, EXPRESSION) (destringize_f((STR), [&](std::istream& in){in >> EXPRESSION;})) -#define DEWSTRINGIZE(STR, EXPRESSION) (destringize_f((STR), [&](std::wistream& in){in >> EXPRESSION;})) +#define DESTRINGIZE(STR, EXPRESSION) (destringize_f((STR), [&](auto& in){in >> EXPRESSION;})) +// legacy name, just use DESTRINGIZE() going forward +#define DEWSTRINGIZE(STR, EXPRESSION) DESTRINGIZE(STR, EXPRESSION)  #endif /* ! defined(LL_STRINGIZE_H) */ diff --git a/indra/llcommon/tests/llinstancetracker_test.cpp b/indra/llcommon/tests/llinstancetracker_test.cpp index 9b89159625..5daa29adf4 100644 --- a/indra/llcommon/tests/llinstancetracker_test.cpp +++ b/indra/llcommon/tests/llinstancetracker_test.cpp @@ -90,19 +90,19 @@ namespace tut          {              Keyed one("one");              ensure_equals(Keyed::instanceCount(), 1); -            Keyed* found = Keyed::getInstance("one"); -            ensure("couldn't find stack Keyed", found); -            ensure_equals("found wrong Keyed instance", found, &one); +            auto found = Keyed::getInstance("one"); +            ensure("couldn't find stack Keyed", bool(found)); +            ensure_equals("found wrong Keyed instance", found.get(), &one);              {                  boost::scoped_ptr<Keyed> two(new Keyed("two"));                  ensure_equals(Keyed::instanceCount(), 2); -                Keyed* found = Keyed::getInstance("two"); -                ensure("couldn't find heap Keyed", found); -                ensure_equals("found wrong Keyed instance", found, two.get()); +                auto found = Keyed::getInstance("two"); +                ensure("couldn't find heap Keyed", bool(found)); +                ensure_equals("found wrong Keyed instance", found.get(), two.get());              }              ensure_equals(Keyed::instanceCount(), 1);          } -        Keyed* found = Keyed::getInstance("one"); +        auto found = Keyed::getInstance("one");          ensure("Keyed key lives too long", ! found);          ensure_equals(Keyed::instanceCount(), 0);      } diff --git a/indra/llcommon/tests/llprocess_test.cpp b/indra/llcommon/tests/llprocess_test.cpp index e530975e86..999d432079 100644 --- a/indra/llcommon/tests/llprocess_test.cpp +++ b/indra/llcommon/tests/llprocess_test.cpp @@ -356,14 +356,15 @@ namespace tut          // Create a script file in a temporary place.          NamedTempFile script("py", +			"from __future__ import print_function" EOL              "import sys" EOL              "import time" EOL              EOL              "time.sleep(2)" EOL -            "print('stdout after wait', file=sys.stdout)" EOL +            "print('stdout after wait',file=sys.stdout)" EOL              "sys.stdout.flush()" EOL              "time.sleep(2)" EOL -            "print('stderr after wait', file=sys.stderr)" EOL +            "print('stderr after wait',file=sys.stderr)" EOL              "sys.stderr.flush()" EOL              ); @@ -572,12 +573,12 @@ namespace tut      {          set_test_name("arguments");          PythonProcessLauncher py(get_test_name(), -                                 "from __future__ import with_statement\n" +                                 "from __future__ import with_statement, print_function\n"                                   "import sys\n"                                   // note nonstandard output-file arg!                                   "with open(sys.argv[3], 'w') as f:\n"                                   "    for arg in sys.argv[1:]:\n" -                                 "        print(arg, file=f)\n"); +                                 "        print(arg,file=f)\n");          // We expect that PythonProcessLauncher has already appended          // its own NamedTempFile to mParams.args (sys.argv[0]).          py.mParams.args.add("first arg");          // sys.argv[1] @@ -861,6 +862,7 @@ namespace tut          set_test_name("'bogus' test");          CaptureLog recorder;          PythonProcessLauncher py(get_test_name(), +                                 "from __future__ import print_function\n"                                   "print('Hello world')\n");          py.mParams.files.add(LLProcess::FileParam("bogus"));          py.mPy = LLProcess::create(py.mParams); @@ -876,6 +878,7 @@ namespace tut          // Replace this test with one or more real 'file' tests when we          // implement 'file' support          PythonProcessLauncher py(get_test_name(), +                                 "from __future__ import print_function\n"                                   "print('Hello world')\n");          py.mParams.files.add(LLProcess::FileParam());          py.mParams.files.add(LLProcess::FileParam("file")); @@ -891,6 +894,7 @@ namespace tut          // implement 'tpipe' support          CaptureLog recorder;          PythonProcessLauncher py(get_test_name(), +                                 "from __future__ import print_function\n"                                   "print('Hello world')\n");          py.mParams.files.add(LLProcess::FileParam());          py.mParams.files.add(LLProcess::FileParam("tpipe")); @@ -908,6 +912,7 @@ namespace tut          // implement 'npipe' support          CaptureLog recorder;          PythonProcessLauncher py(get_test_name(), +                                 "from __future__ import print_function\n"                                   "print('Hello world')\n");          py.mParams.files.add(LLProcess::FileParam());          py.mParams.files.add(LLProcess::FileParam()); @@ -984,7 +989,8 @@ namespace tut      {          set_test_name("get*Pipe() validation");          PythonProcessLauncher py(get_test_name(), -                                 "print('this output is expected)'\n"); +                                 "from __future__ import print_function\n" +                                 "print('this output is expected')\n");          py.mParams.files.add(LLProcess::FileParam("pipe")); // pipe for  stdin          py.mParams.files.add(LLProcess::FileParam());       // inherit stdout          py.mParams.files.add(LLProcess::FileParam("pipe")); // pipe for stderr @@ -1004,6 +1010,7 @@ namespace tut      {          set_test_name("talk to stdin/stdout");          PythonProcessLauncher py(get_test_name(), +                                 "from __future__ import print_function\n"                                   "import sys, time\n"                                   "print('ok')\n"                                   "sys.stdout.flush()\n" @@ -1122,6 +1129,7 @@ namespace tut      {          set_test_name("ReadPipe \"eof\" event");          PythonProcessLauncher py(get_test_name(), +                                 "from __future__ import print_function\n"                                   "print('Hello from Python!')\n");          py.mParams.files.add(LLProcess::FileParam()); // stdin          py.mParams.files.add(LLProcess::FileParam("pipe")); // stdout diff --git a/indra/llcommon/tests/threadsafeschedule_test.cpp b/indra/llcommon/tests/threadsafeschedule_test.cpp new file mode 100644 index 0000000000..c421cc7b1c --- /dev/null +++ b/indra/llcommon/tests/threadsafeschedule_test.cpp @@ -0,0 +1,69 @@ +/** + * @file   threadsafeschedule_test.cpp + * @author Nat Goodspeed + * @date   2021-10-04 + * @brief  Test for threadsafeschedule. + *  + * $LicenseInfo:firstyear=2021&license=viewerlgpl$ + * Copyright (c) 2021, Linden Research, Inc. + * $/LicenseInfo$ + */ + +// Precompiled header +#include "linden_common.h" +// associated header +#include "threadsafeschedule.h" +// STL headers +// std headers +#include <chrono> +// external library headers +// other Linden headers +#include "../test/lltut.h" + +using namespace std::literals::chrono_literals; // ms suffix +using namespace std::literals::string_literals; // s suffix +using Queue = LL::ThreadSafeSchedule<std::string>; + +/***************************************************************************** +*   TUT +*****************************************************************************/ +namespace tut +{ +    struct threadsafeschedule_data +    { +        Queue queue; +    }; +    typedef test_group<threadsafeschedule_data> threadsafeschedule_group; +    typedef threadsafeschedule_group::object object; +    threadsafeschedule_group threadsafeschedulegrp("threadsafeschedule"); + +    template<> template<> +    void object::test<1>() +    { +        set_test_name("push"); +        // Simply calling push() a few times might result in indeterminate +        // delivery order if the resolution of steady_clock is coarser than +        // the real time required for each push() call. Explicitly increment +        // the timestamp for each one -- but since we're passing explicit +        // timestamps, make the queue reorder them. +        queue.push(Queue::TimeTuple(Queue::Clock::now() + 200ms, "ghi")); +        // Given the various push() overloads, you have to match the type +        // exactly: conversions are ambiguous. +        queue.push("abc"s); +        queue.push(Queue::Clock::now() + 100ms, "def"); +        queue.close(); +        auto entry = queue.pop(); +        ensure_equals("failed to pop first", std::get<0>(entry), "abc"s); +        entry = queue.pop(); +        ensure_equals("failed to pop second", std::get<0>(entry), "def"s); +        ensure("queue not closed", queue.isClosed()); +        ensure("queue prematurely done", ! queue.done()); +        std::string s; +        bool popped = queue.tryPopFor(1s, s); +        ensure("failed to pop third", popped); +        ensure_equals("third is wrong", s, "ghi"s); +        popped = queue.tryPop(s); +        ensure("queue not empty", ! popped); +        ensure("queue not done", queue.done()); +    } +} // namespace tut diff --git a/indra/llcommon/tests/tuple_test.cpp b/indra/llcommon/tests/tuple_test.cpp new file mode 100644 index 0000000000..af94e2086c --- /dev/null +++ b/indra/llcommon/tests/tuple_test.cpp @@ -0,0 +1,47 @@ +/** + * @file   tuple_test.cpp + * @author Nat Goodspeed + * @date   2021-10-04 + * @brief  Test for tuple. + *  + * $LicenseInfo:firstyear=2021&license=viewerlgpl$ + * Copyright (c) 2021, Linden Research, Inc. + * $/LicenseInfo$ + */ + +// Precompiled header +#include "linden_common.h" +// associated header +#include "tuple.h" +// STL headers +// std headers +// external library headers +// other Linden headers +#include "../test/lltut.h" + +/***************************************************************************** +*   TUT +*****************************************************************************/ +namespace tut +{ +    struct tuple_data +    { +    }; +    typedef test_group<tuple_data> tuple_group; +    typedef tuple_group::object object; +    tuple_group tuplegrp("tuple"); + +    template<> template<> +    void object::test<1>() +    { +        set_test_name("tuple"); +        std::tuple<std::string, int> tup{ "abc", 17 }; +        std::tuple<int, std::string, int> ptup{ tuple_cons(34, tup) }; +        std::tuple<std::string, int> tup2; +        int i; +        std::tie(i, tup2) = tuple_split(ptup); +        ensure_equals("tuple_car() fail", i, 34); +        ensure_equals("tuple_cdr() (0) fail", std::get<0>(tup2), "abc"); +        ensure_equals("tuple_cdr() (1) fail", std::get<1>(tup2), 17); +    } +} // namespace tut diff --git a/indra/llcommon/tests/workqueue_test.cpp b/indra/llcommon/tests/workqueue_test.cpp new file mode 100644 index 0000000000..1d73f7aa0d --- /dev/null +++ b/indra/llcommon/tests/workqueue_test.cpp @@ -0,0 +1,235 @@ +/** + * @file   workqueue_test.cpp + * @author Nat Goodspeed + * @date   2021-10-07 + * @brief  Test for workqueue. + *  + * $LicenseInfo:firstyear=2021&license=viewerlgpl$ + * Copyright (c) 2021, Linden Research, Inc. + * $/LicenseInfo$ + */ + +// Precompiled header +#include "linden_common.h" +// associated header +#include "workqueue.h" +// STL headers +// std headers +#include <chrono> +#include <deque> +// external library headers +// other Linden headers +#include "../test/lltut.h" +#include "../test/catch_and_store_what_in.h" +#include "llcond.h" +#include "llcoros.h" +#include "lleventcoro.h" +#include "llstring.h" +#include "stringize.h" + +using namespace LL; +using namespace std::literals::chrono_literals; // ms suffix +using namespace std::literals::string_literals; // s suffix + +/***************************************************************************** +*   TUT +*****************************************************************************/ +namespace tut +{ +    struct workqueue_data +    { +        WorkQueue queue{"queue"}; +    }; +    typedef test_group<workqueue_data> workqueue_group; +    typedef workqueue_group::object object; +    workqueue_group workqueuegrp("workqueue"); + +    template<> template<> +    void object::test<1>() +    { +        set_test_name("name"); +        ensure_equals("didn't capture name", queue.getKey(), "queue"); +        ensure("not findable", WorkQueue::getInstance("queue") == queue.getWeak().lock()); +        WorkQueue q2; +        ensure("has no name", LLStringUtil::startsWith(q2.getKey(), "WorkQueue")); +    } + +    template<> template<> +    void object::test<2>() +    { +        set_test_name("post"); +        bool wasRun{ false }; +        // We only get away with binding a simple bool because we're running +        // the work on the same thread. +        queue.post([&wasRun](){ wasRun = true; }); +        queue.close(); +        ensure("ran too soon", ! wasRun); +        queue.runUntilClose(); +        ensure("didn't run", wasRun); +    } + +    template<> template<> +    void object::test<3>() +    { +        set_test_name("postEvery"); +        // record of runs +        using Shared = std::deque<WorkQueue::TimePoint>; +        // This is an example of how to share data between the originator of +        // postEvery(work) and the work item itself, since usually a WorkQueue +        // is used to dispatch work to a different thread. Neither of them +        // should call any of LLCond's wait methods: you don't want to stall +        // either the worker thread or the originating thread (conventionally +        // main). Use LLCond or a subclass even if all you want to do is +        // signal the work item that it can quit; consider LLOneShotCond. +        LLCond<Shared> data; +        auto start = WorkQueue::TimePoint::clock::now(); +        auto interval = 100ms; +        queue.postEvery( +            interval, +            [&data, count = 0] +            () mutable +            { +                // record the timestamp at which this instance is running +                data.update_one( +                    [](Shared& data) +                    { +                        data.push_back(WorkQueue::TimePoint::clock::now()); +                    }); +                // by the 3rd call, return false to stop +                return (++count < 3); +            }); +        // no convenient way to close() our queue while we've got a +        // postEvery() running, so run until we have exhausted the iterations +        // or we time out waiting +        for (auto finish = start + 10*interval; +             WorkQueue::TimePoint::clock::now() < finish && +             data.get([](const Shared& data){ return data.size(); }) < 3; ) +        { +            queue.runPending(); +            std::this_thread::sleep_for(interval/10); +        } +        // Take a copy of the captured deque. +        Shared result = data.get(); +        ensure_equals("called wrong number of times", result.size(), 3); +        // postEvery() assumes you want the first call to happen right away. +        // Pretend our start time was (interval) earlier than that, to make +        // our too early/too late tests uniform for all entries. +        start -= interval; +        for (size_t i = 0; i < result.size(); ++i) +        { +            auto diff = result[i] - start; +            start += interval; +            try +            { +                ensure(STRINGIZE("call " << i << " too soon"), diff >= interval); +                ensure(STRINGIZE("call " << i << " too late"), diff < interval*1.5); +            } +            catch (const tut::failure&) +            { +                auto interval_ms = interval / 1ms; +                auto diff_ms = diff / 1ms; +                std::cerr << "interval " << interval_ms +                          << "ms; diff " << diff_ms << "ms" << std::endl; +                throw; +            } +        } +    } + +    template<> template<> +    void object::test<4>() +    { +        set_test_name("postTo"); +        WorkQueue main("main"); +        auto qptr = WorkQueue::getInstance("queue"); +        int result = 0; +        main.postTo( +            qptr, +            [](){ return 17; }, +            // Note that a postTo() *callback* can safely bind a reference to +            // a variable on the invoking thread, because the callback is run +            // on the invoking thread. (Of course the bound variable must +            // survive until the callback is called.) +            [&result](int i){ result = i; }); +        // this should post the callback to main +        qptr->runOne(); +        // this should run the callback +        main.runOne(); +        ensure_equals("failed to run int callback", result, 17); + +        std::string alpha; +        // postTo() handles arbitrary return types +        main.postTo( +            qptr, +            [](){ return "abc"s; }, +            [&alpha](const std::string& s){ alpha = s; }); +        qptr->runPending(); +        main.runPending(); +        ensure_equals("failed to run string callback", alpha, "abc"); +    } + +    template<> template<> +    void object::test<5>() +    { +        set_test_name("postTo with void return"); +        WorkQueue main("main"); +        auto qptr = WorkQueue::getInstance("queue"); +        std::string observe; +        main.postTo( +            qptr, +            // The ONLY reason we can get away with binding a reference to +            // 'observe' in our work callable is because we're directly +            // calling qptr->runOne() on this same thread. It would be a +            // mistake to do that if some other thread were servicing 'queue'. +            [&observe](){ observe = "queue"; }, +            [&observe](){ observe.append(";main"); }); +        qptr->runOne(); +        main.runOne(); +        ensure_equals("failed to run both lambdas", observe, "queue;main"); +    } + +    template<> template<> +    void object::test<6>() +    { +        set_test_name("waitForResult"); +        std::string stored; +        // Try to call waitForResult() on this thread's main coroutine. It +        // should throw because the main coroutine must service the queue. +        auto what{ catch_what<WorkQueue::Error>( +                [this, &stored](){ stored = queue.waitForResult( +                        [](){ return "should throw"; }); }) }; +        ensure("lambda should not have run", stored.empty()); +        ensure_not("waitForResult() should have thrown", what.empty()); +        ensure(STRINGIZE("should mention waitForResult: " << what), +               what.find("waitForResult") != std::string::npos); + +        // Call waitForResult() on a coroutine, with a string result. +        LLCoros::instance().launch( +            "waitForResult string", +            [this, &stored]() +            { stored = queue.waitForResult( +                    [](){ return "string result"; }); }); +        llcoro::suspend(); +        // Nothing will have happened yet because, even if the coroutine did +        // run immediately, all it did was to queue the inner lambda on +        // 'queue'. Service it. +        queue.runOne(); +        llcoro::suspend(); +        ensure_equals("bad waitForResult return", stored, "string result"); + +        // Call waitForResult() on a coroutine, with a void callable. +        stored.clear(); +        bool done = false; +        LLCoros::instance().launch( +            "waitForResult void", +            [this, &stored, &done]() +            { +                queue.waitForResult([&stored](){ stored = "ran"; }); +                done = true; +            }); +        llcoro::suspend(); +        queue.runOne(); +        llcoro::suspend(); +        ensure_equals("didn't run coroutine", stored, "ran"); +        ensure("void waitForResult() didn't return", done); +    } +} // namespace tut diff --git a/indra/llcommon/threadpool.cpp b/indra/llcommon/threadpool.cpp new file mode 100644 index 0000000000..ba914035e2 --- /dev/null +++ b/indra/llcommon/threadpool.cpp @@ -0,0 +1,88 @@ +/** + * @file   threadpool.cpp + * @author Nat Goodspeed + * @date   2021-10-21 + * @brief  Implementation for threadpool. + *  + * $LicenseInfo:firstyear=2021&license=viewerlgpl$ + * Copyright (c) 2021, Linden Research, Inc. + * $/LicenseInfo$ + */ + +// Precompiled header +#include "linden_common.h" +// associated header +#include "threadpool.h" +// STL headers +// std headers +// external library headers +// other Linden headers +#include "llerror.h" +#include "llevents.h" +#include "stringize.h" + +LL::ThreadPool::ThreadPool(const std::string& name, size_t threads, size_t capacity): +    mQueue(name, capacity), +    mName("ThreadPool:" + name), +    mThreadCount(threads) +{} + +void LL::ThreadPool::start() +{ +    for (size_t i = 0; i < mThreadCount; ++i) +    { +        std::string tname{ stringize(mName, ':', (i+1), '/', mThreadCount) }; +        mThreads.emplace_back(tname, [this, tname]() +            { +                LL_PROFILER_SET_THREAD_NAME(tname.c_str()); +                run(tname); +            }); +    } +    // Listen on "LLApp", and when the app is shutting down, close the queue +    // and join the workers. +    LLEventPumps::instance().obtain("LLApp").listen( +        mName, +        [this](const LLSD& stat) +        { +            std::string status(stat["status"]); +            if (status != "running") +            { +                // viewer is starting shutdown -- proclaim the end is nigh! +                LL_DEBUGS("ThreadPool") << mName << " saw " << status << LL_ENDL; +                close(); +            } +            return false; +        }); +} + +LL::ThreadPool::~ThreadPool() +{ +    close(); +} + +void LL::ThreadPool::close() +{ +    if (! mQueue.isClosed()) +    { +        LL_DEBUGS("ThreadPool") << mName << " closing queue and joining threads" << LL_ENDL; +        mQueue.close(); +        for (auto& pair: mThreads) +        { +            LL_DEBUGS("ThreadPool") << mName << " waiting on thread " << pair.first << LL_ENDL; +            pair.second.join(); +        } +        LL_DEBUGS("ThreadPool") << mName << " shutdown complete" << LL_ENDL; +    } +} + +void LL::ThreadPool::run(const std::string& name) +{ +    LL_DEBUGS("ThreadPool") << name << " starting" << LL_ENDL; +    run(); +    LL_DEBUGS("ThreadPool") << name << " stopping" << LL_ENDL; +} + +void LL::ThreadPool::run() +{ +    mQueue.runUntilClose(); +} diff --git a/indra/llcommon/threadpool.h b/indra/llcommon/threadpool.h new file mode 100644 index 0000000000..b79c9b9090 --- /dev/null +++ b/indra/llcommon/threadpool.h @@ -0,0 +1,71 @@ +/** + * @file   threadpool.h + * @author Nat Goodspeed + * @date   2021-10-21 + * @brief  ThreadPool configures a WorkQueue along with a pool of threads to + *         service it. + *  + * $LicenseInfo:firstyear=2021&license=viewerlgpl$ + * Copyright (c) 2021, Linden Research, Inc. + * $/LicenseInfo$ + */ + +#if ! defined(LL_THREADPOOL_H) +#define LL_THREADPOOL_H + +#include "workqueue.h" +#include <string> +#include <thread> +#include <utility>                  // std::pair +#include <vector> + +namespace LL +{ + +    class ThreadPool +    { +    public: +        /** +         * Pass ThreadPool a string name. This can be used to look up the +         * relevant WorkQueue. +         */ +        ThreadPool(const std::string& name, size_t threads=1, size_t capacity=1024); +        virtual ~ThreadPool(); + +        /** +         * Launch the ThreadPool. Until this call, a constructed ThreadPool +         * launches no threads. That permits coders to derive from ThreadPool, +         * or store it as a member of some other class, but refrain from +         * launching it until all other construction is complete. +         */ +        void start(); + +        /** +         * ThreadPool listens for application shutdown messages on the "LLApp" +         * LLEventPump. Call close() to shut down this ThreadPool early. +         */ +        void close(); + +        std::string getName() const { return mName; } +        size_t getWidth() const { return mThreads.size(); } +        /// obtain a non-const reference to the WorkQueue to post work to it +        WorkQueue& getQueue() { return mQueue; } + +        /** +         * Override run() if you need special processing. The default run() +         * implementation simply calls WorkQueue::runUntilClose(). +         */ +        virtual void run(); + +    private: +        void run(const std::string& name); + +        WorkQueue mQueue; +        std::string mName; +        size_t mThreadCount; +        std::vector<std::pair<std::string, std::thread>> mThreads; +    }; + +} // namespace LL + +#endif /* ! defined(LL_THREADPOOL_H) */ diff --git a/indra/llcommon/threadsafeschedule.h b/indra/llcommon/threadsafeschedule.h new file mode 100644 index 0000000000..3e0da94c02 --- /dev/null +++ b/indra/llcommon/threadsafeschedule.h @@ -0,0 +1,399 @@ +/** + * @file   threadsafeschedule.h + * @author Nat Goodspeed + * @date   2021-10-02 + * @brief  ThreadSafeSchedule is an ordered queue in which every item has an + *         associated timestamp. + *  + * $LicenseInfo:firstyear=2021&license=viewerlgpl$ + * Copyright (c) 2021, Linden Research, Inc. + * $/LicenseInfo$ + */ + +#if ! defined(LL_THREADSAFESCHEDULE_H) +#define LL_THREADSAFESCHEDULE_H + +#include "chrono.h" +#include "llexception.h" +#include "llthreadsafequeue.h" +#include "tuple.h" +#include <chrono> +#include <tuple>   + +namespace LL +{ +    namespace ThreadSafeSchedulePrivate +    { +        using TimePoint = std::chrono::steady_clock::time_point; +        // Bundle consumer's data with a TimePoint to order items by timestamp. +        template <typename... Args> +        using TimestampedTuple = std::tuple<TimePoint, Args...>; + +        // comparison functor for TimedTuples -- see TimedQueue comments +        struct ReverseTupleOrder +        { +            template <typename Tuple> +            bool operator()(const Tuple& left, const Tuple& right) const +            { +                return std::get<0>(left) > std::get<0>(right); +            } +        }; + +        template <typename... Args> +        using TimedQueue = PriorityQueueAdapter< +            TimestampedTuple<Args...>, +            // std::vector is the default storage for std::priority_queue, +            // have to restate to specify comparison template parameter +            std::vector<TimestampedTuple<Args...>>, +            // std::priority_queue uses a counterintuitive comparison +            // behavior: the default std::less comparator is used to present +            // the *highest* value as top(). So to sort by earliest timestamp, +            // we must invert by using >. +            ReverseTupleOrder>; +    } // namespace ThreadSafeSchedulePrivate + +    /** +     * ThreadSafeSchedule is an ordered LLThreadSafeQueue in which every item +     * is given an associated timestamp. That is, TimePoint is implicitly +     * prepended to the std::tuple with the specified types. +     * +     * Items are popped in increasing chronological order. Moreover, any item +     * with a timestamp in the future is held back until +     * std::chrono::steady_clock reaches that timestamp. +     */ +    template <typename... Args> +    class ThreadSafeSchedule: +        public LLThreadSafeQueue<ThreadSafeSchedulePrivate::TimestampedTuple<Args...>, +                                 ThreadSafeSchedulePrivate::TimedQueue<Args...>> +    { +    public: +        using DataTuple = std::tuple<Args...>; +        using TimeTuple = ThreadSafeSchedulePrivate::TimestampedTuple<Args...>; + +    private: +        using super = LLThreadSafeQueue<TimeTuple, ThreadSafeSchedulePrivate::TimedQueue<Args...>>; +        using lock_t = typename super::lock_t; +        // VS 2017 needs this due to a bug: +        // https://developercommunity.visualstudio.com/t/cannot-access-protected-enumerator-of-enclosing-cl/203430 +        enum pop_result { EMPTY=super::EMPTY, DONE=super::DONE, WAITING=super::WAITING, POPPED=super::POPPED }; + +    public: +        using Closed = LLThreadSafeQueueInterrupt; +        using TimePoint = ThreadSafeSchedulePrivate::TimePoint; +        using Clock = TimePoint::clock; + +        ThreadSafeSchedule(U32 capacity=1024): +            super(capacity) +        {} + +        /*----------------------------- push() -----------------------------*/ +        /// explicitly pass TimeTuple +        using super::push; + +        /// pass DataTuple with implicit now +        // This could be ambiguous for Args with a single type. Unfortunately +        // we can't enable_if an individual method with a condition based on +        // the *class* template arguments, only on that method's template +        // arguments. We could specialize this class for the single-Args case; +        // we could minimize redundancy by breaking out a common base class... +        void push(const DataTuple& tuple) +        { +            LL_PROFILE_ZONE_SCOPED_CATEGORY_THREAD; +            push(tuple_cons(Clock::now(), tuple)); +        } + +        /// individually pass each component of the TimeTuple +        void push(const TimePoint& time, Args&&... args) +        { +            LL_PROFILE_ZONE_SCOPED_CATEGORY_THREAD; +            push(TimeTuple(time, std::forward<Args>(args)...)); +        } + +        /// individually pass every component except the TimePoint (implies now) +        // This could be ambiguous if the first specified template parameter +        // type is also TimePoint. We could try to disambiguate, but a simpler +        // approach would be for the caller to explicitly construct DataTuple +        // and call that overload. +        void push(Args&&... args) +        { +            LL_PROFILE_ZONE_SCOPED_CATEGORY_THREAD; +            push(Clock::now(), std::forward<Args>(args)...); +        } + +        /*--------------------------- tryPush() ----------------------------*/ +        /// explicit TimeTuple +        using super::tryPush; + +        /// DataTuple with implicit now +        bool tryPush(const DataTuple& tuple) +        { +            LL_PROFILE_ZONE_SCOPED_CATEGORY_THREAD; +            return tryPush(tuple_cons(Clock::now(), tuple)); +        } + +        /// individually pass components +        bool tryPush(const TimePoint& time, Args&&... args) +        { +            LL_PROFILE_ZONE_SCOPED_CATEGORY_THREAD; +            return tryPush(TimeTuple(time, std::forward<Args>(args)...)); +        } + +        /// individually pass components with implicit now +        bool tryPush(Args&&... args) +        { +            LL_PROFILE_ZONE_SCOPED_CATEGORY_THREAD; +            return tryPush(Clock::now(), std::forward<Args>(args)...); +        } + +        /*-------------------------- tryPushFor() --------------------------*/ +        /// explicit TimeTuple +        using super::tryPushFor; + +        /// DataTuple with implicit now +        template <typename Rep, typename Period> +        bool tryPushFor(const std::chrono::duration<Rep, Period>& timeout, +                        const DataTuple& tuple) +        { +            LL_PROFILE_ZONE_SCOPED_CATEGORY_THREAD; +            return tryPushFor(timeout, tuple_cons(Clock::now(), tuple)); +        } + +        /// individually pass components +        template <typename Rep, typename Period> +        bool tryPushFor(const std::chrono::duration<Rep, Period>& timeout, +                        const TimePoint& time, Args&&... args) +        { +            LL_PROFILE_ZONE_SCOPED_CATEGORY_THREAD; +            return tryPushFor(TimeTuple(time, std::forward<Args>(args)...)); +        } + +        /// individually pass components with implicit now +        template <typename Rep, typename Period> +        bool tryPushFor(const std::chrono::duration<Rep, Period>& timeout, +                        Args&&... args) +        { +            LL_PROFILE_ZONE_SCOPED_CATEGORY_THREAD; +            return tryPushFor(Clock::now(), std::forward<Args>(args)...); +        } + +        /*------------------------- tryPushUntil() -------------------------*/ +        /// explicit TimeTuple +        using super::tryPushUntil; + +        /// DataTuple with implicit now +        template <typename Clock, typename Duration> +        bool tryPushUntil(const std::chrono::time_point<Clock, Duration>& until, +                          const DataTuple& tuple) +        { +            LL_PROFILE_ZONE_SCOPED_CATEGORY_THREAD; +            return tryPushUntil(until, tuple_cons(Clock::now(), tuple)); +        } + +        /// individually pass components +        template <typename Clock, typename Duration> +        bool tryPushUntil(const std::chrono::time_point<Clock, Duration>& until, +                          const TimePoint& time, Args&&... args) +        { +            LL_PROFILE_ZONE_SCOPED_CATEGORY_THREAD; +            return tryPushUntil(until, TimeTuple(time, std::forward<Args>(args)...)); +        } + +        /// individually pass components with implicit now +        template <typename Clock, typename Duration> +        bool tryPushUntil(const std::chrono::time_point<Clock, Duration>& until, +                          Args&&... args) +        { +            LL_PROFILE_ZONE_SCOPED_CATEGORY_THREAD; +            return tryPushUntil(until, Clock::now(), std::forward<Args>(args)...); +        } + +        /*----------------------------- pop() ------------------------------*/ +        // Our consumer may or may not care about the timestamp associated +        // with each popped item, so we allow retrieving either DataTuple or +        // TimeTuple. One potential use would be to observe, and possibly +        // adjust for, the time lag between the item time and the actual +        // current time. + +        /// pop DataTuple by value +        // It would be great to notice when sizeof...(Args) == 1 and directly +        // return the first (only) value, instead of making pop()'s caller +        // call std::get<0>(value). See push(DataTuple) remarks for why we +        // haven't yet jumped through those hoops. +        DataTuple pop() +        { +            LL_PROFILE_ZONE_SCOPED_CATEGORY_THREAD; +            return tuple_cdr(popWithTime()); +        } + +        /// pop TimeTuple by value +        TimeTuple popWithTime() +        { +            LL_PROFILE_ZONE_SCOPED_CATEGORY_THREAD; +            lock_t lock(super::mLock); +            // We can't just sit around waiting forever, given that there may +            // be items in the queue that are not yet ready but will *become* +            // ready in the near future. So in fact, with this class, every +            // pop() becomes a tryPopUntil(), constrained to the timestamp of +            // the head item. It almost doesn't matter what we specify for the +            // caller's time constraint -- all we really care about is the +            // head item's timestamp. Since pop() and popWithTime() are +            // defined to wait until either an item becomes available or the +            // queue is closed, loop until one of those things happens. The +            // constraint we pass just determines how often we'll loop while +            // waiting. +            TimeTuple tt; +            while (true) +            { +                // Pick a point suitably far into the future. +                TimePoint until = TimePoint::clock::now() + std::chrono::hours(24); +                pop_result popped = tryPopUntil_(lock, until, tt); +                if (popped == POPPED) +                    return std::move(tt); + +                // DONE: throw, just as super::pop() does +                if (popped == DONE) +                { +                    LLTHROW(LLThreadSafeQueueInterrupt()); +                } +                // WAITING: we've still got items to drain. +                // EMPTY: not closed, so it's worth waiting for more items. +                // Either way, loop back to wait. +            } +        } + +        // We can use tryPop(TimeTuple&) just as it stands; the only behavior +        // difference is in our canPop() override method. +        using super::tryPop; + +        /// tryPop(DataTuple&) +        bool tryPop(DataTuple& tuple) +        { +            LL_PROFILE_ZONE_SCOPED_CATEGORY_THREAD; +            TimeTuple tt; +            if (! super::tryPop(tt)) +                return false; +            tuple = tuple_cdr(std::move(tt)); +            return true; +        } + +        /// for when Args has exactly one type +        bool tryPop(typename std::tuple_element<1, TimeTuple>::type& value) +        { +            LL_PROFILE_ZONE_SCOPED_CATEGORY_THREAD; +            TimeTuple tt; +            if (! super::tryPop(tt)) +                return false; +            value = std::get<1>(std::move(tt)); +            return true; +        } + +        /// tryPopFor() +        template <typename Rep, typename Period, typename Tuple> +        bool tryPopFor(const std::chrono::duration<Rep, Period>& timeout, Tuple& tuple) +        { +            LL_PROFILE_ZONE_SCOPED_CATEGORY_THREAD; +            // It's important to use OUR tryPopUntil() implementation, rather +            // than delegating immediately to our base class. +            return tryPopUntil(Clock::now() + timeout, tuple); +        } + +        /// tryPopUntil(TimeTuple&) +        template <typename Clock, typename Duration> +        bool tryPopUntil(const std::chrono::time_point<Clock, Duration>& until, +                         TimeTuple& tuple) +        { +            LL_PROFILE_ZONE_SCOPED_CATEGORY_THREAD; +            // super::tryPopUntil() wakes up when an item becomes available or +            // we hit 'until', whichever comes first. Thing is, the current +            // head of the queue could become ready sooner than either of +            // those events, and we need to deliver it as soon as it does. +            // Don't wait past the TimePoint of the head item. +            // Naturally, lock the queue before peeking at mStorage. +            return super::tryLockUntil( +                until, +                [this, until, &tuple](lock_t& lock) +                { +                    // Use our time_point_cast to allow for 'until' that's a +                    // time_point type other than TimePoint. +                    return POPPED == +                        tryPopUntil_(lock, LL::time_point_cast<TimePoint>(until), tuple); +                }); +        } + +        pop_result tryPopUntil_(lock_t& lock, const TimePoint& until, TimeTuple& tuple) +        { +            LL_PROFILE_ZONE_SCOPED_CATEGORY_THREAD; +            TimePoint adjusted = until; +            if (! super::mStorage.empty()) +            { +                LL_PROFILE_ZONE_NAMED("tpu - adjust"); +                // use whichever is earlier: the head item's timestamp, or +                // the caller's limit +                adjusted = min(std::get<0>(super::mStorage.front()), adjusted); +            } +            // now delegate to base-class tryPopUntil_() +            pop_result popped; +            { +                LL_PROFILE_ZONE_NAMED("tpu - super"); +                while ((popped = pop_result(super::tryPopUntil_(lock, adjusted, tuple))) == WAITING) +                { +                    // If super::tryPopUntil_() returns WAITING, it means there's +                    // a head item, but it's not yet time. But it's worth looping +                    // back to recheck. +                } +            } +            return popped; +        } + +        /// tryPopUntil(DataTuple&) +        template <typename Clock, typename Duration> +        bool tryPopUntil(const std::chrono::time_point<Clock, Duration>& until, +                         DataTuple& tuple) +        { +            LL_PROFILE_ZONE_SCOPED_CATEGORY_THREAD; +            TimeTuple tt; +            if (! tryPopUntil(until, tt)) +                return false; +            tuple = tuple_cdr(std::move(tt)); +            return true; +        } + +        /// for when Args has exactly one type +        template <typename Clock, typename Duration> +        bool tryPopUntil(const std::chrono::time_point<Clock, Duration>& until, +                         typename std::tuple_element<1, TimeTuple>::type& value) +        { +            LL_PROFILE_ZONE_SCOPED_CATEGORY_THREAD; +            TimeTuple tt; +            if (! tryPopUntil(until, tt)) +                return false; +            value = std::get<1>(std::move(tt)); +            return true; +        } + +        /*------------------------------ etc. ------------------------------*/ +        // We can't hide items that aren't yet ready because we can't traverse +        // the underlying priority_queue: it has no iterators, only top(). So +        // a consumer could observe size() > 0 and yet tryPop() returns false. +        // Shrug, in a multi-consumer scenario that would be expected behavior. +        using super::size; +        // open/closed state +        using super::close; +        using super::isClosed; +        using super::done; + +    private: +        // this method is called by base class pop_() every time we're +        // considering whether to deliver the current head element +        bool canPop(const TimeTuple& head) const override +        { +            LL_PROFILE_ZONE_SCOPED_CATEGORY_THREAD; +            // an item with a future timestamp isn't yet ready to pop +            // (should we add some slop for overhead?) +            return std::get<0>(head) <= Clock::now(); +        } +    }; + +} // namespace LL + +#endif /* ! defined(LL_THREADSAFESCHEDULE_H) */ diff --git a/indra/llcommon/timing.cpp b/indra/llcommon/timing.cpp deleted file mode 100644 index c2dc695ef3..0000000000 --- a/indra/llcommon/timing.cpp +++ /dev/null @@ -1,25 +0,0 @@ -/**  - * @file timing.cpp - * @brief This file will be deprecated in the future. - * - * $LicenseInfo:firstyear=2000&license=viewerlgpl$ - * Second Life Viewer Source Code - * Copyright (C) 2010, Linden Research, Inc. - *  - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; - * version 2.1 of the License only. - *  - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU - * Lesser General Public License for more details. - *  - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA - *  - * Linden Research, Inc., 945 Battery Street, San Francisco, CA  94111  USA - * $/LicenseInfo$ - */ diff --git a/indra/llcommon/tuple.h b/indra/llcommon/tuple.h new file mode 100644 index 0000000000..bfe7e3c2ba --- /dev/null +++ b/indra/llcommon/tuple.h @@ -0,0 +1,84 @@ +/** + * @file   tuple.h + * @author Nat Goodspeed + * @date   2021-10-04 + * @brief  A couple tuple utilities + *  + * $LicenseInfo:firstyear=2021&license=viewerlgpl$ + * Copyright (c) 2021, Linden Research, Inc. + * $/LicenseInfo$ + */ + +#if ! defined(LL_TUPLE_H) +#define LL_TUPLE_H + +#include <tuple> +#include <type_traits>              // std::remove_reference +#include <utility>                  // std::pair + +/** + * tuple_cons() behaves like LISP cons: it uses std::tuple_cat() to prepend a + * new item of arbitrary type to an existing std::tuple. + */ +template <typename First, typename... Rest, typename Tuple_=std::tuple<Rest...>> +auto tuple_cons(First&& first, Tuple_&& rest) +{ +    // All we need to do is make a tuple containing 'first', and let +    // tuple_cat() do the hard part. +    return std::tuple_cat(std::tuple<First>(std::forward<First>(first)), +                          std::forward<Tuple_>(rest)); +} + +/** + * tuple_car() behaves like LISP car: it extracts the first item from a + * std::tuple. + */ +template <typename... Args, typename Tuple_=std::tuple<Args...>> +auto tuple_car(Tuple_&& tuple) +{ +    return std::get<0>(std::forward<Tuple_>(tuple)); +} + +/** + * tuple_cdr() behaves like LISP cdr: it returns a new tuple containing + * everything BUT the first item. + */ +// derived from https://stackoverflow.com/a/24046437 +template <typename Tuple, std::size_t... Indices> +auto tuple_cdr_(Tuple&& tuple, const std::index_sequence<Indices...>) +{ +    // Given an index sequence from [0..N-1), extract tuple items [1..N) +    return std::make_tuple(std::get<Indices+1u>(std::forward<Tuple>(tuple))...); +} + +template <typename Tuple> +auto tuple_cdr(Tuple&& tuple) +{ +    return tuple_cdr_( +        std::forward<Tuple>(tuple), +        // Pass helper function an index sequence one item shorter than tuple +        std::make_index_sequence< +            std::tuple_size< +                // tuple_size doesn't like reference types +                typename std::remove_reference<Tuple>::type +            >::value - 1u> +        ()); +} + +/** + * tuple_split(), the opposite of tuple_cons(), has no direct analog in LISP. + * It returns a std::pair of tuple_car(), tuple_cdr(). We could call this + * function tuple_car_cdr(), or tuple_slice() or some such. But tuple_split() + * feels more descriptive. + */ +template <typename... Args, typename Tuple_=std::tuple<Args...>> +auto tuple_split(Tuple_&& tuple) +{ +    // We're not really worried about forwarding multiple times a tuple that +    // might contain move-only items, because the implementation above only +    // applies std::get() exactly once to each item. +    return std::make_pair(tuple_car(std::forward<Tuple_>(tuple)), +                          tuple_cdr(std::forward<Tuple_>(tuple))); +} + +#endif /* ! defined(LL_TUPLE_H) */ diff --git a/indra/llcommon/workqueue.cpp b/indra/llcommon/workqueue.cpp new file mode 100644 index 0000000000..eb06890468 --- /dev/null +++ b/indra/llcommon/workqueue.cpp @@ -0,0 +1,158 @@ +/** + * @file   workqueue.cpp + * @author Nat Goodspeed + * @date   2021-10-06 + * @brief  Implementation for WorkQueue. + *  + * $LicenseInfo:firstyear=2021&license=viewerlgpl$ + * Copyright (c) 2021, Linden Research, Inc. + * $/LicenseInfo$ + */ + +// Precompiled header +#include "linden_common.h" +// associated header +#include "workqueue.h" +// STL headers +// std headers +// external library headers +// other Linden headers +#include "llcoros.h" +#include LLCOROS_MUTEX_HEADER +#include "llerror.h" +#include "llexception.h" +#include "stringize.h" + +using Mutex = LLCoros::Mutex; +using Lock  = LLCoros::LockType; + +LL::WorkQueue::WorkQueue(const std::string& name, size_t capacity): +    super(makeName(name)), +    mQueue(capacity) +{ +    // TODO: register for "LLApp" events so we can implicitly close() on +    // viewer shutdown. +} + +void LL::WorkQueue::close() +{ +    mQueue.close(); +} + +size_t LL::WorkQueue::size() +{ +    return mQueue.size(); +} + +bool LL::WorkQueue::isClosed() +{ +    return mQueue.isClosed(); +} + +bool LL::WorkQueue::done() +{ +    return mQueue.done(); +} + +void LL::WorkQueue::runUntilClose() +{ +    try +    { +        for (;;) +        { +            LL_PROFILE_ZONE_SCOPED_CATEGORY_THREAD; +            callWork(mQueue.pop()); +        } +    } +    catch (const Queue::Closed&) +    { +    } +} + +bool LL::WorkQueue::runPending() +{ +    LL_PROFILE_ZONE_SCOPED_CATEGORY_THREAD; +    for (Work work; mQueue.tryPop(work); ) +    { +        callWork(work); +    } +    return ! mQueue.done(); +} + +bool LL::WorkQueue::runOne() +{ +    Work work; +    if (mQueue.tryPop(work)) +    { +        callWork(work); +    } +    return ! mQueue.done(); +} + +bool LL::WorkQueue::runUntil(const TimePoint& until) +{ +    LL_PROFILE_ZONE_SCOPED_CATEGORY_THREAD; +    // Should we subtract some slop to allow for typical Work execution time? +    // How much slop? +    // runUntil() is simply a time-bounded runPending(). +    for (Work work; TimePoint::clock::now() < until && mQueue.tryPop(work); ) +    { +        callWork(work); +    } +    return ! mQueue.done(); +} + +std::string LL::WorkQueue::makeName(const std::string& name) +{ +    if (! name.empty()) +        return name; + +    static U32 discriminator = 0; +    static Mutex mutex; +    U32 num; +    { +        // Protect discriminator from concurrent access by different threads. +        // It can't be thread_local, else two racing threads will come up with +        // the same name. +        Lock lk(mutex); +        num = discriminator++; +    } +    return STRINGIZE("WorkQueue" << num); +} + +void LL::WorkQueue::callWork(const Queue::DataTuple& work) +{ +    // ThreadSafeSchedule::pop() always delivers a tuple, even when +    // there's only one data field per item, as for us. +    callWork(std::get<0>(work)); +} + +void LL::WorkQueue::callWork(const Work& work) +{ +    LL_PROFILE_ZONE_SCOPED_CATEGORY_THREAD; +    try +    { +        work(); +    } +    catch (...) +    { +        // No matter what goes wrong with any individual work item, the worker +        // thread must go on! Log our own instance name with the exception. +        LOG_UNHANDLED_EXCEPTION(getKey()); +    } +} + +void LL::WorkQueue::error(const std::string& msg) +{ +    LL_ERRS("WorkQueue") << msg << LL_ENDL; +} + +void LL::WorkQueue::checkCoroutine(const std::string& method) +{ +    // By convention, the default coroutine on each thread has an empty name +    // string. See also LLCoros::logname(). +    if (LLCoros::getName().empty()) +    { +        LLTHROW(Error("Do not call " + method + " from a thread's default coroutine")); +    } +} diff --git a/indra/llcommon/workqueue.h b/indra/llcommon/workqueue.h new file mode 100644 index 0000000000..96574a18b9 --- /dev/null +++ b/indra/llcommon/workqueue.h @@ -0,0 +1,574 @@ +/** + * @file   workqueue.h + * @author Nat Goodspeed + * @date   2021-09-30 + * @brief  Queue used for inter-thread work passing. + *  + * $LicenseInfo:firstyear=2021&license=viewerlgpl$ + * Copyright (c) 2021, Linden Research, Inc. + * $/LicenseInfo$ + */ + +#if ! defined(LL_WORKQUEUE_H) +#define LL_WORKQUEUE_H + +#include "llcoros.h" +#include "llexception.h" +#include "llinstancetracker.h" +#include "threadsafeschedule.h" +#include <chrono> +#include <exception>                // std::current_exception +#include <functional>               // std::function +#include <string> + +namespace LL +{ +    /** +     * A typical WorkQueue has a string name that can be used to find it. +     */ +    class WorkQueue: public LLInstanceTracker<WorkQueue, std::string> +    { +    private: +        using super = LLInstanceTracker<WorkQueue, std::string>; + +    public: +        using Work = std::function<void()>; + +    private: +        using Queue = ThreadSafeSchedule<Work>; +        // helper for postEvery() +        template <typename Rep, typename Period, typename CALLABLE> +        class BackJack; + +    public: +        using TimePoint = Queue::TimePoint; +        using TimedWork = Queue::TimeTuple; +        using Closed    = Queue::Closed; + +        struct Error: public LLException +        { +            Error(const std::string& what): LLException(what) {} +        }; + +        /** +         * You may omit the WorkQueue name, in which case a unique name is +         * synthesized; for practical purposes that makes it anonymous. +         */ +        WorkQueue(const std::string& name = std::string(), size_t capacity=1024); + +        /** +         * Since the point of WorkQueue is to pass work to some other worker +         * thread(s) asynchronously, it's important that the WorkQueue continue +         * to exist until the worker thread(s) have drained it. To communicate +         * that it's time for them to quit, close() the queue. +         */ +        void close(); + +        /** +         * WorkQueue supports multiple producers and multiple consumers. In +         * the general case it's misleading to test size(), since any other +         * thread might change it the nanosecond the lock is released. On that +         * basis, some might argue against publishing a size() method at all. +         * +         * But there are two specific cases in which a test based on size() +         * might be reasonable: +         * +         * * If you're the only producer, noticing that size() == 0 is +         *   meaningful. +         * * If you're the only consumer, noticing that size() > 0 is +         *   meaningful. +         */ +        size_t size(); +        /// producer end: are we prevented from pushing any additional items? +        bool isClosed(); +        /// consumer end: are we done, is the queue entirely drained? +        bool done(); + +        /*---------------------- fire and forget API -----------------------*/ + +        /// fire-and-forget, but at a particular (future?) time +        template <typename CALLABLE> +        void post(const TimePoint& time, CALLABLE&& callable) +        { +            // Defer reifying an arbitrary CALLABLE until we hit this or +            // postIfOpen(). All other methods should accept CALLABLEs of +            // arbitrary type to avoid multiple levels of std::function +            // indirection. +            mQueue.push(TimedWork(time, std::move(callable))); +        } + +        /// fire-and-forget +        template <typename CALLABLE> +        void post(CALLABLE&& callable) +        { +            // We use TimePoint::clock::now() instead of TimePoint's +            // representation of the epoch because this WorkQueue may contain +            // a mix of past-due TimedWork items and TimedWork items scheduled +            // for the future. Sift this new item into the correct place. +            post(TimePoint::clock::now(), std::move(callable)); +        } + +        /** +         * post work for a particular time, unless the queue is closed before +         * we can post +         */ +        template <typename CALLABLE> +        bool postIfOpen(const TimePoint& time, CALLABLE&& callable) +        { +            // Defer reifying an arbitrary CALLABLE until we hit this or +            // post(). All other methods should accept CALLABLEs of arbitrary +            // type to avoid multiple levels of std::function indirection. +            return mQueue.pushIfOpen(TimedWork(time, std::move(callable))); +        } + +        /** +         * post work, unless the queue is closed before we can post +         */ +        template <typename CALLABLE> +        bool postIfOpen(CALLABLE&& callable) +        { +            return postIfOpen(TimePoint::clock::now(), std::move(callable)); +        } + +        /** +         * Post work to be run at a specified time to another WorkQueue, which +         * may or may not still exist and be open. Return true if we were able +         * to post. +         */ +        template <typename CALLABLE> +        static bool postMaybe(weak_t target, const TimePoint& time, CALLABLE&& callable); + +        /** +         * Post work to another WorkQueue, which may or may not still exist +         * and be open. Return true if we were able to post. +         */ +        template <typename CALLABLE> +        static bool postMaybe(weak_t target, CALLABLE&& callable) +        { +            return postMaybe(target, TimePoint::clock::now(), +                             std::forward<CALLABLE>(callable)); +        } + +        /** +         * Launch a callable returning bool that will trigger repeatedly at +         * specified interval, until the callable returns false. +         * +         * If you need to signal that callable from outside, DO NOT bind a +         * reference to a simple bool! That's not thread-safe. Instead, bind +         * an LLCond variant, e.g. LLOneShotCond or LLBoolCond. +         */ +        template <typename Rep, typename Period, typename CALLABLE> +        void postEvery(const std::chrono::duration<Rep, Period>& interval, +                       CALLABLE&& callable); + +        template <typename CALLABLE> +        bool tryPost(CALLABLE&& callable) +        { +            return mQueue.tryPush(TimedWork(TimePoint::clock::now(), std::move(callable))); +        } + +        /*------------------------- handshake API --------------------------*/ + +        /** +         * Post work to another WorkQueue to be run at a specified time, +         * requesting a specific callback to be run on this WorkQueue on +         * completion. +         * +         * Returns true if able to post, false if the other WorkQueue is +         * inaccessible. +         */ +        // Apparently some Microsoft header file defines a macro CALLBACK? The +        // natural template argument name CALLBACK produces very weird Visual +        // Studio compile errors that seem utterly unrelated to this source +        // code. +        template <typename CALLABLE, typename FOLLOWUP> +        bool postTo(weak_t target, +                    const TimePoint& time, CALLABLE&& callable, FOLLOWUP&& callback); + +        /** +         * Post work to another WorkQueue, requesting a specific callback to +         * be run on this WorkQueue on completion. +         * +         * Returns true if able to post, false if the other WorkQueue is +         * inaccessible. +         */ +        template <typename CALLABLE, typename FOLLOWUP> +        bool postTo(weak_t target, CALLABLE&& callable, FOLLOWUP&& callback) +        { +            return postTo(target, TimePoint::clock::now(), +                          std::move(callable), std::move(callback)); +        } + +        /** +         * Post work to another WorkQueue to be run at a specified time, +         * blocking the calling coroutine until then, returning the result to +         * caller on completion. +         * +         * In general, we assume that each thread's default coroutine is busy +         * servicing its WorkQueue or whatever. To try to prevent mistakes, we +         * forbid calling waitForResult() from a thread's default coroutine. +         */ +        template <typename CALLABLE> +        auto waitForResult(const TimePoint& time, CALLABLE&& callable); + +        /** +         * Post work to another WorkQueue, blocking the calling coroutine +         * until then, returning the result to caller on completion. +         * +         * In general, we assume that each thread's default coroutine is busy +         * servicing its WorkQueue or whatever. To try to prevent mistakes, we +         * forbid calling waitForResult() from a thread's default coroutine. +         */ +        template <typename CALLABLE> +        auto waitForResult(CALLABLE&& callable) +        { +            return waitForResult(TimePoint::clock::now(), std::move(callable)); +        } + +        /*--------------------------- worker API ---------------------------*/ + +        /** +         * runUntilClose() pulls TimedWork items off this WorkQueue until the +         * queue is closed, at which point it returns. This would be the +         * typical entry point for a simple worker thread. +         */ +        void runUntilClose(); + +        /** +         * runPending() runs all TimedWork items that are ready to run. It +         * returns true if the queue remains open, false if the queue has been +         * closed. This could be used by a thread whose primary purpose is to +         * serve the queue, but also wants to do other things with its idle time. +         */ +        bool runPending(); + +        /** +         * runOne() runs at most one ready TimedWork item -- zero if none are +         * ready. It returns true if the queue remains open, false if the +         * queue has been closed. +         */ +        bool runOne(); + +        /** +         * runFor() runs a subset of ready TimedWork items, until the +         * timeslice has been exceeded. It returns true if the queue remains +         * open, false if the queue has been closed. This could be used by a +         * busy main thread to lend a bounded few CPU cycles to this WorkQueue +         * without risking the WorkQueue blowing out the length of any one +         * frame. +         */ +        template <typename Rep, typename Period> +        bool runFor(const std::chrono::duration<Rep, Period>& timeslice) +        { +            LL_PROFILE_ZONE_SCOPED; +            return runUntil(TimePoint::clock::now() + timeslice); +        } + +        /** +         * runUntil() is just like runFor(), only with a specific end time +         * instead of a timeslice duration. +         */ +        bool runUntil(const TimePoint& until); + +    private: +        template <typename CALLABLE, typename FOLLOWUP> +        static auto makeReplyLambda(CALLABLE&& callable, FOLLOWUP&& callback); +        /// general case: arbitrary C++ return type +        template <typename CALLABLE, typename FOLLOWUP, typename RETURNTYPE> +        struct MakeReplyLambda; +        /// specialize for CALLABLE returning void +        template <typename CALLABLE, typename FOLLOWUP> +        struct MakeReplyLambda<CALLABLE, FOLLOWUP, void>; + +        /// general case: arbitrary C++ return type +        template <typename CALLABLE, typename RETURNTYPE> +        struct WaitForResult; +        /// specialize for CALLABLE returning void +        template <typename CALLABLE> +        struct WaitForResult<CALLABLE, void>; + +        static void checkCoroutine(const std::string& method); +        static void error(const std::string& msg); +        static std::string makeName(const std::string& name); +        void callWork(const Queue::DataTuple& work); +        void callWork(const Work& work); +        Queue mQueue; +    }; + +    /** +     * BackJack is, in effect, a hand-rolled lambda, binding a WorkQueue, a +     * CALLABLE that returns bool, a TimePoint and an interval at which to +     * relaunch it. As long as the callable continues returning true, BackJack +     * keeps resubmitting it to the target WorkQueue. +     */ +    // Why is BackJack a class and not a lambda? Because, unlike a lambda, a +    // class method gets its own 'this' pointer -- which we need to resubmit +    // the whole BackJack callable. +    template <typename Rep, typename Period, typename CALLABLE> +    class WorkQueue::BackJack +    { +    public: +        // bind the desired data +        BackJack(weak_t target, +                 const TimePoint& start, +                 const std::chrono::duration<Rep, Period>& interval, +                 CALLABLE&& callable): +            mTarget(target), +            mStart(start), +            mInterval(interval), +            mCallable(std::move(callable)) +        {} + +        // Call by target WorkQueue -- note that although WE require a +        // callable returning bool, WorkQueue wants a void callable. We +        // consume the bool. +        void operator()() +        { +            // If mCallable() throws an exception, don't catch it here: if it +            // throws once, it's likely to throw every time, so it's a waste +            // of time to arrange to call it again. +            if (mCallable()) +            { +                // Modify mStart to the new start time we desire. If we simply +                // added mInterval to now, we'd get actual timings of +                // (mInterval + slop), where 'slop' is the latency between the +                // previous mStart and the WorkQueue actually calling us. +                // Instead, add mInterval to mStart so that at least we +                // register our intent to fire at exact mIntervals. +                mStart += mInterval; + +                // We're being called at this moment by the target WorkQueue. +                // Assume it still exists, rather than checking the result of +                // lock(). +                // Resubmit the whole *this callable: that's why we're a class +                // rather than a lambda. Allow moving *this so we can carry a +                // move-only callable; but naturally this statement must be +                // the last time we reference this instance, which may become +                // moved-from. +                try +                { +                    mTarget.lock()->post(mStart, std::move(*this)); +                } +                catch (const Closed&) +                { +                    // Once this queue is closed, oh well, just stop +                } +            } +        } + +    private: +        weak_t mTarget; +        TimePoint mStart; +        std::chrono::duration<Rep, Period> mInterval; +        CALLABLE mCallable; +    }; + +    template <typename Rep, typename Period, typename CALLABLE> +    void WorkQueue::postEvery(const std::chrono::duration<Rep, Period>& interval, +                              CALLABLE&& callable) +    { +        if (interval.count() <= 0) +        { +            // It's essential that postEvery() be called with a positive +            // interval, since each call to BackJack posts another instance of +            // itself at (start + interval) and we order by target time. A +            // zero or negative interval would result in that BackJack +            // instance going to the head of the queue every time, immediately +            // ready to run. Effectively that would produce an infinite loop, +            // a denial of service on this WorkQueue. +            error("postEvery(interval) may not be 0"); +        } +        // Instantiate and post a suitable BackJack, binding a weak_ptr to +        // self, the current time, the desired interval and the desired +        // callable. +        post( +            BackJack<Rep, Period, CALLABLE>( +                 getWeak(), TimePoint::clock::now(), interval, std::move(callable))); +    } + +    /// general case: arbitrary C++ return type +    template <typename CALLABLE, typename FOLLOWUP, typename RETURNTYPE> +    struct WorkQueue::MakeReplyLambda +    { +        auto operator()(CALLABLE&& callable, FOLLOWUP&& callback) +        { +            // Call the callable in any case -- but to minimize +            // copying the result, immediately bind it into the reply +            // lambda. The reply lambda also binds the original +            // callback, so that when we, the originating WorkQueue, +            // finally receive and process the reply lambda, we'll +            // call the bound callback with the bound result -- on the +            // same thread that originally called postTo(). +            return +                [result = std::forward<CALLABLE>(callable)(), +                 callback = std::move(callback)] +                () +                { callback(std::move(result)); }; +        } +    }; + +    /// specialize for CALLABLE returning void +    template <typename CALLABLE, typename FOLLOWUP> +    struct WorkQueue::MakeReplyLambda<CALLABLE, FOLLOWUP, void> +    { +        auto operator()(CALLABLE&& callable, FOLLOWUP&& callback) +        { +            // Call the callable, which produces no result. +            std::forward<CALLABLE>(callable)(); +            // Our completion callback is simply the caller's callback. +            return std::move(callback); +        } +    }; + +    template <typename CALLABLE, typename FOLLOWUP> +    auto WorkQueue::makeReplyLambda(CALLABLE&& callable, FOLLOWUP&& callback) +    { +        return MakeReplyLambda<CALLABLE, FOLLOWUP, +                               decltype(std::forward<CALLABLE>(callable)())>() +            (std::move(callable), std::move(callback)); +    } + +    template <typename CALLABLE, typename FOLLOWUP> +    bool WorkQueue::postTo(weak_t target, +                           const TimePoint& time, CALLABLE&& callable, FOLLOWUP&& callback) +    { +        LL_PROFILE_ZONE_SCOPED; +        // We're being asked to post to the WorkQueue at target. +        // target is a weak_ptr: have to lock it to check it. +        auto tptr = target.lock(); +        if (! tptr) +            // can't post() if the target WorkQueue has been destroyed +            return false; + +        // Here we believe target WorkQueue still exists. Post to it a +        // lambda that packages our callable, our callback and a weak_ptr +        // to this originating WorkQueue. +        tptr->post( +            time, +            [reply = super::getWeak(), +             callable = std::move(callable), +             callback = std::move(callback)] +            () +            { +                // Use postMaybe() below in case this originating WorkQueue +                // has been closed or destroyed. Remember, the outer lambda is +                // now running on a thread servicing the target WorkQueue, and +                // real time has elapsed since postTo()'s tptr->post() call. +                try +                { +                    // Make a reply lambda to repost to THIS WorkQueue. +                    // Delegate to makeReplyLambda() so we can partially +                    // specialize on void return. +                    postMaybe(reply, makeReplyLambda(std::move(callable), std::move(callback))); +                } +                catch (...) +                { +                    // Either variant of makeReplyLambda() is responsible for +                    // calling the caller's callable. If that throws, return +                    // the exception to the originating thread. +                    postMaybe( +                        reply, +                        // Bind the current exception to transport back to the +                        // originating WorkQueue. Once there, rethrow it. +                        [exc = std::current_exception()](){ std::rethrow_exception(exc); }); +                } +            }); + +        // looks like we were able to post() +        return true; +    } + +    template <typename CALLABLE> +    bool WorkQueue::postMaybe(weak_t target, const TimePoint& time, CALLABLE&& callable) +    { +        LL_PROFILE_ZONE_SCOPED; +        // target is a weak_ptr: have to lock it to check it +        auto tptr = target.lock(); +        if (tptr) +        { +            try +            { +                tptr->post(time, std::forward<CALLABLE>(callable)); +                // we were able to post() +                return true; +            } +            catch (const Closed&) +            { +                // target WorkQueue still exists, but is Closed +            } +        } +        // either target no longer exists, or its WorkQueue is Closed +        return false; +    } + +    /// general case: arbitrary C++ return type +    template <typename CALLABLE, typename RETURNTYPE> +    struct WorkQueue::WaitForResult +    { +        auto operator()(WorkQueue* self, const TimePoint& time, CALLABLE&& callable) +        { +            LLCoros::Promise<RETURNTYPE> promise; +            self->post( +                time, +                // We dare to bind a reference to Promise because it's +                // specifically designed for cross-thread communication. +                [&promise, callable = std::move(callable)]() +                { +                    try +                    { +                        // call the caller's callable and trigger promise with result +                        promise.set_value(callable()); +                    } +                    catch (...) +                    { +                        promise.set_exception(std::current_exception()); +                    } +                }); +            auto future{ LLCoros::getFuture(promise) }; +            // now, on the calling thread, wait for that result +            LLCoros::TempStatus st("waiting for WorkQueue::waitForResult()"); +            return future.get(); +        } +    }; + +    /// specialize for CALLABLE returning void +    template <typename CALLABLE> +    struct WorkQueue::WaitForResult<CALLABLE, void> +    { +        void operator()(WorkQueue* self, const TimePoint& time, CALLABLE&& callable) +        { +            LLCoros::Promise<void> promise; +            self->post( +                time, +                // &promise is designed for cross-thread access +                [&promise, callable = std::move(callable)]() +                { +                    try +                    { +                        callable(); +                        promise.set_value(); +                    } +                    catch (...) +                    { +                        promise.set_exception(std::current_exception()); +                    } +                }); +            auto future{ LLCoros::getFuture(promise) }; +            // block until set_value() +            LLCoros::TempStatus st("waiting for void WorkQueue::waitForResult()"); +            future.get(); +        } +    }; + +    template <typename CALLABLE> +    auto WorkQueue::waitForResult(const TimePoint& time, CALLABLE&& callable) +    { +        checkCoroutine("waitForResult()"); +        // derive callable's return type so we can specialize for void +        return WaitForResult<CALLABLE, decltype(std::forward<CALLABLE>(callable)())>() +            (this, time, std::forward<CALLABLE>(callable)); +    } + +} // namespace LL + +#endif /* ! defined(LL_WORKQUEUE_H) */  | 
