diff options
Diffstat (limited to 'indra/llcommon')
53 files changed, 3234 insertions, 664 deletions
diff --git a/indra/llcommon/CMakeLists.txt b/indra/llcommon/CMakeLists.txt index 766a1849f9..782f656406 100644 --- a/indra/llcommon/CMakeLists.txt +++ b/indra/llcommon/CMakeLists.txt @@ -12,6 +12,7 @@ include(JsonCpp) include(Copy3rdPartyLibs) include(ZLIB) include(URIPARSER) +include(Tracy) include_directories( ${EXPAT_INCLUDE_DIRS} @@ -19,6 +20,7 @@ include_directories( ${JSONCPP_INCLUDE_DIR} ${ZLIB_INCLUDE_DIRS} ${URIPARSER_INCLUDE_DIRS} + ${TRACY_INCLUDE_DIR} ) # add_executable(lltreeiterators lltreeiterators.cpp) @@ -117,14 +119,16 @@ set(llcommon_SOURCE_FILES lluriparser.cpp lluuid.cpp llworkerthread.cpp - timing.cpp u64.cpp + threadpool.cpp + workqueue.cpp StackWalker.cpp ) set(llcommon_HEADER_FILES CMakeLists.txt + chrono.h ctype_workaround.h fix_macros.h indra_constants.h @@ -197,6 +201,7 @@ set(llcommon_HEADER_FILES llmortician.h llnametable.h llpointer.h + llprofiler.h llpounceable.h llpredicate.h llpreprocessor.h @@ -251,8 +256,12 @@ set(llcommon_HEADER_FILES lockstatic.h stdtypes.h stringize.h + threadpool.h + threadsafeschedule.h timer.h + tuple.h u64.h + workqueue.h StackWalker.h ) @@ -299,6 +308,7 @@ target_link_libraries( ${BOOST_SYSTEM_LIBRARY} ${GOOGLE_PERFTOOLS_LIBRARIES} ${URIPARSER_LIBRARIES} + ${TRACY_LIBRARY} ) if (DARWIN) @@ -355,6 +365,9 @@ if (LL_TESTS) LL_ADD_INTEGRATION_TEST(lluri "" "${test_libs}") LL_ADD_INTEGRATION_TEST(llunits "" "${test_libs}") LL_ADD_INTEGRATION_TEST(stringize "" "${test_libs}") + LL_ADD_INTEGRATION_TEST(threadsafeschedule "" "${test_libs}") + LL_ADD_INTEGRATION_TEST(tuple "" "${test_libs}") + LL_ADD_INTEGRATION_TEST(workqueue "" "${test_libs}") ## llexception_test.cpp isn't a regression test, and doesn't need to be run ## every build. It's to help a developer make implementation choices about diff --git a/indra/llcommon/chrono.h b/indra/llcommon/chrono.h new file mode 100644 index 0000000000..806e871892 --- /dev/null +++ b/indra/llcommon/chrono.h @@ -0,0 +1,65 @@ +/** + * @file chrono.h + * @author Nat Goodspeed + * @date 2021-10-05 + * @brief supplement <chrono> with utility functions + * + * $LicenseInfo:firstyear=2021&license=viewerlgpl$ + * Copyright (c) 2021, Linden Research, Inc. + * $/LicenseInfo$ + */ + +#if ! defined(LL_CHRONO_H) +#define LL_CHRONO_H + +#include <chrono> +#include <type_traits> // std::enable_if + +namespace LL +{ + +// time_point_cast() is derived from https://stackoverflow.com/a/35293183 +// without the iteration: we think errors in the ~1 microsecond range are +// probably acceptable. + +// This variant is for the optimal case when the source and dest use the same +// clock: that case is handled by std::chrono. +template <typename DestTimePoint, typename SrcTimePoint, + typename std::enable_if<std::is_same<typename DestTimePoint::clock, + typename SrcTimePoint::clock>::value, + bool>::type = true> +DestTimePoint time_point_cast(const SrcTimePoint& time) +{ + return std::chrono::time_point_cast<typename DestTimePoint::duration>(time); +} + +// This variant is for when the source and dest use different clocks -- see +// the linked StackOverflow answer, also Howard Hinnant's, for more context. +template <typename DestTimePoint, typename SrcTimePoint, + typename std::enable_if<! std::is_same<typename DestTimePoint::clock, + typename SrcTimePoint::clock>::value, + bool>::type = true> +DestTimePoint time_point_cast(const SrcTimePoint& time) +{ + // The basic idea is that we must adjust the passed time_point by the + // difference between the clocks' epochs. But since time_point doesn't + // expose its epoch, we fall back on what each of them thinks is now(). + // However, since we necessarily make sequential calls to those now() + // functions, the answers differ not only by the cycles spent executing + // those calls, but by potential OS interruptions between them. Try to + // reduce that error by capturing the source clock time both before and + // after the dest clock, and splitting the difference. Of course an + // interruption between two of these now() calls without a comparable + // interruption between the other two will skew the result, but better is + // more expensive. + const auto src_before = typename SrcTimePoint::clock::now(); + const auto dest_now = typename DestTimePoint::clock::now(); + const auto src_after = typename SrcTimePoint::clock::now(); + const auto src_diff = src_after - src_before; + const auto src_now = src_before + src_diff / 2; + return dest_now + (time - src_now); +} + +} // namespace LL + +#endif /* ! defined(LL_CHRONO_H) */ diff --git a/indra/llcommon/linden_common.h b/indra/llcommon/linden_common.h index e5a913a6a9..a228fd22be 100644 --- a/indra/llcommon/linden_common.h +++ b/indra/llcommon/linden_common.h @@ -27,6 +27,14 @@ #ifndef LL_LINDEN_COMMON_H #define LL_LINDEN_COMMON_H +#include "llprofiler.h" +#if TRACY_ENABLE && !defined(LL_PROFILER_ENABLE_TRACY_OPENGL) // hooks for memory profiling +void *tracy_aligned_malloc(size_t size, size_t alignment); +void tracy_aligned_free(void *memblock); +#define _aligned_malloc(X, Y) tracy_aligned_malloc((X), (Y)) +#define _aligned_free(X) tracy_aligned_free((X)) +#endif + // *NOTE: Please keep includes here to a minimum! // // Files included here are included in every library .cpp file and diff --git a/indra/llcommon/llcommon.cpp b/indra/llcommon/llcommon.cpp index 96be913d17..25a809dad2 100644 --- a/indra/llcommon/llcommon.cpp +++ b/indra/llcommon/llcommon.cpp @@ -33,6 +33,66 @@ #include "lltracethreadrecorder.h" #include "llcleanup.h" +thread_local bool gProfilerEnabled = false; + +#if (TRACY_ENABLE) +// Override new/delete for tracy memory profiling +void *operator new(size_t size) +{ + void* ptr; + if (gProfilerEnabled) + { + LL_PROFILE_ZONE_SCOPED; + ptr = (malloc)(size); + } + else + { + ptr = (malloc)(size); + } + if (!ptr) + { + throw std::bad_alloc(); + } + TracyAlloc(ptr, size); + return ptr; +} + +void operator delete(void *ptr) noexcept +{ + TracyFree(ptr); + if (gProfilerEnabled) + { + LL_PROFILE_ZONE_SCOPED; + (free)(ptr); + } + else + { + (free)(ptr); + } +} + +// C-style malloc/free can't be so easily overridden, so we define tracy versions and use +// a pre-processor #define in linden_common.h to redirect to them. The parens around the native +// functions below prevents recursive substitution by the preprocessor. +// +// Unaligned mallocs are rare in LL code but hooking them causes problems in 3p lib code (looking at +// you, Havok), so we'll only capture the aligned version. + +void *tracy_aligned_malloc(size_t size, size_t alignment) +{ + auto ptr = ll_aligned_malloc_fallback(size, alignment); + if (ptr) TracyAlloc(ptr, size); + return ptr; +} + +void tracy_aligned_free(void *memblock) +{ + TracyFree(memblock); + ll_aligned_free_fallback(memblock); +} + +#endif + //static BOOL LLCommon::sAprInitialized = FALSE; diff --git a/indra/llcommon/llcond.h b/indra/llcommon/llcond.h index e31b67d893..da6e6affe1 100644 --- a/indra/llcommon/llcond.h +++ b/indra/llcommon/llcond.h @@ -53,6 +53,8 @@ private: LLCoros::Mutex mMutex; // Use LLCoros::ConditionVariable for the same reason. LLCoros::ConditionVariable mCond; + using LockType = LLCoros::LockType; + using cv_status = LLCoros::cv_status; public: /// LLCond can be explicitly initialized with a specific value for mData if @@ -65,10 +67,29 @@ public: LLCond(const LLCond&) = delete; LLCond& operator=(const LLCond&) = delete; - /// get() returns a const reference to the stored DATA. The only way to - /// get a non-const reference -- to modify the stored DATA -- is via - /// update_one() or update_all(). - const value_type& get() const { return mData; } + /** + * get() returns the stored DATA by value -- so to use get(), DATA must + * be copyable. The only way to get a non-const reference -- to modify + * the stored DATA -- is via update_one() or update_all(). + */ + value_type get() + { + LockType lk(mMutex); + return mData; + } + + /** + * get(functor) returns whatever the functor returns. It allows us to peek + * at the stored DATA without copying the whole thing. The functor must + * accept a const reference to DATA. If you want to modify DATA, call + * update_one() or update_all() instead. + */ + template <typename FUNC> + auto get(FUNC&& func) + { + LockType lk(mMutex); + return std::forward<FUNC>(func)(const_data()); + } /** * Pass update_one() an invocable accepting non-const (DATA&). The @@ -80,11 +101,11 @@ public: * update_one() when DATA is a struct or class. */ template <typename MODIFY> - void update_one(MODIFY modify) + void update_one(MODIFY&& modify) { { // scope of lock can/should end before notify_one() - LLCoros::LockType lk(mMutex); - modify(mData); + LockType lk(mMutex); + std::forward<MODIFY>(modify)(mData); } mCond.notify_one(); } @@ -99,11 +120,11 @@ public: * update_all() when DATA is a struct or class. */ template <typename MODIFY> - void update_all(MODIFY modify) + void update_all(MODIFY&& modify) { { // scope of lock can/should end before notify_all() - LLCoros::LockType lk(mMutex); - modify(mData); + LockType lk(mMutex); + std::forward<MODIFY>(modify)(mData); } mCond.notify_all(); } @@ -116,9 +137,9 @@ public: * wait() on the condition_variable. */ template <typename Pred> - void wait(Pred pred) + void wait(Pred&& pred) { - LLCoros::LockType lk(mMutex); + LockType lk(mMutex); // We must iterate explicitly since the predicate accepted by // condition_variable::wait() requires a different signature: // condition_variable::wait() calls its predicate with no arguments. @@ -127,7 +148,7 @@ public: // But what if they instead pass a predicate accepting non-const // (DATA&)? Such a predicate could modify mData, which would be Bad. // Forbid that. - while (! pred(const_cast<const value_type&>(mData))) + while (! std::forward<Pred>(pred)(const_data())) { mCond.wait(lk); } @@ -144,7 +165,7 @@ public: * returning true. */ template <typename Rep, typename Period, typename Pred> - bool wait_for(const std::chrono::duration<Rep, Period>& timeout_duration, Pred pred) + bool wait_for(const std::chrono::duration<Rep, Period>& timeout_duration, Pred&& pred) { // Instead of replicating wait_until() logic, convert duration to // time_point and just call wait_until(). @@ -153,7 +174,8 @@ public: // wrong! We'd keep pushing the timeout time farther and farther into // the future. This way, we establish a definite timeout time and // stick to it. - return wait_until(std::chrono::steady_clock::now() + timeout_duration, pred); + return wait_until(std::chrono::steady_clock::now() + timeout_duration, + std::forward<Pred>(pred)); } /** @@ -163,9 +185,9 @@ public: * generic wait_for() method. */ template <typename Pred> - bool wait_for(F32Milliseconds timeout_duration, Pred pred) + bool wait_for(F32Milliseconds timeout_duration, Pred&& pred) { - return wait_for(convert(timeout_duration), pred); + return wait_for(convert(timeout_duration), std::forward<Pred>(pred)); } protected: @@ -183,6 +205,10 @@ protected: } private: + // It's important to pass a const ref to certain user-specified functors + // that aren't supposed to be able to modify mData. + const value_type& const_data() const { return mData; } + /** * Pass wait_until() a chrono::time_point, indicating the time at which we * should stop waiting, and a predicate accepting (const DATA&), returning @@ -203,21 +229,21 @@ private: * honoring a fixed timeout. */ template <typename Clock, typename Duration, typename Pred> - bool wait_until(const std::chrono::time_point<Clock, Duration>& timeout_time, Pred pred) + bool wait_until(const std::chrono::time_point<Clock, Duration>& timeout_time, Pred&& pred) { - LLCoros::LockType lk(mMutex); + LockType lk(mMutex); // We advise the caller to pass a predicate accepting (const DATA&). // But what if they instead pass a predicate accepting non-const // (DATA&)? Such a predicate could modify mData, which would be Bad. // Forbid that. - while (! pred(const_cast<const value_type&>(mData))) + while (! std::forward<Pred>(pred)(const_data())) { - if (LLCoros::cv_status::timeout == mCond.wait_until(lk, timeout_time)) + if (cv_status::timeout == mCond.wait_until(lk, timeout_time)) { // It's possible that wait_until() timed out AND the predicate // became true more or less simultaneously. Even though // wait_until() timed out, check the predicate one more time. - return pred(const_cast<const value_type&>(mData)); + return std::forward<Pred>(pred)(const_data()); } } return true; diff --git a/indra/llcommon/lldate.cpp b/indra/llcommon/lldate.cpp index 7a2a0869f4..2ddcf40895 100644 --- a/indra/llcommon/lldate.cpp +++ b/indra/llcommon/lldate.cpp @@ -86,11 +86,9 @@ std::string LLDate::asRFC1123() const return toHTTPDateString (std::string ("%A, %d %b %Y %H:%M:%S GMT")); } -LLTrace::BlockTimerStatHandle FT_DATE_FORMAT("Date Format"); - std::string LLDate::toHTTPDateString (std::string fmt) const { - LL_RECORD_BLOCK_TIME(FT_DATE_FORMAT); + LL_PROFILE_ZONE_SCOPED; time_t locSeconds = (time_t) mSecondsSinceEpoch; struct tm * gmt = gmtime (&locSeconds); @@ -99,7 +97,7 @@ std::string LLDate::toHTTPDateString (std::string fmt) const std::string LLDate::toHTTPDateString (tm * gmt, std::string fmt) { - LL_RECORD_BLOCK_TIME(FT_DATE_FORMAT); + LL_PROFILE_ZONE_SCOPED; // avoid calling setlocale() unnecessarily - it's expensive. static std::string prev_locale = ""; diff --git a/indra/llcommon/llerror.cpp b/indra/llcommon/llerror.cpp index 3fe8ce5f0e..17a5ec5776 100644 --- a/indra/llcommon/llerror.cpp +++ b/indra/llcommon/llerror.cpp @@ -109,6 +109,7 @@ namespace { virtual void recordMessage(LLError::ELevel level, const std::string& message) override { + LL_PROFILE_ZONE_SCOPED int syslogPriority = LOG_CRIT; switch (level) { case LLError::LEVEL_DEBUG: syslogPriority = LOG_DEBUG; break; @@ -166,6 +167,7 @@ namespace { virtual void recordMessage(LLError::ELevel level, const std::string& message) override { + LL_PROFILE_ZONE_SCOPED if (LLError::getAlwaysFlush()) { mFile << message << std::endl; @@ -194,7 +196,7 @@ namespace { { return LLError::getEnabledLogTypesMask() & 0x04; } - + LL_FORCE_INLINE std::string createBoldANSI() { std::string ansi_code; @@ -220,10 +222,10 @@ namespace { LL_FORCE_INLINE std::string createANSI(const std::string& color) { std::string ansi_code; - ansi_code += '\033'; - ansi_code += "["; + ansi_code += '\033'; + ansi_code += "["; ansi_code += "38;5;"; - ansi_code += color; + ansi_code += color; ansi_code += "m"; return ansi_code; @@ -232,6 +234,7 @@ namespace { virtual void recordMessage(LLError::ELevel level, const std::string& message) override { + LL_PROFILE_ZONE_SCOPED // The default colors for error, warn and debug are now a bit more pastel // and easier to read on the default (black) terminal background but you // now have the option to set the color of each via an environment variables: @@ -261,6 +264,7 @@ namespace { } else { + LL_PROFILE_ZONE_NAMED("fprintf"); fprintf(stderr, "%s\n", message.c_str()); } } @@ -270,6 +274,7 @@ namespace { LL_FORCE_INLINE void writeANSI(const std::string& ansi_code, const std::string& message) { + LL_PROFILE_ZONE_SCOPED static std::string s_ansi_bold = createBoldANSI(); // bold text static std::string s_ansi_reset = createResetANSI(); // reset // ANSI color code escape sequence, message, and reset in one fprintf call @@ -306,6 +311,7 @@ namespace { virtual void recordMessage(LLError::ELevel level, const std::string& message) override { + LL_PROFILE_ZONE_SCOPED mBuffer->addLine(message); } @@ -332,6 +338,7 @@ namespace { virtual void recordMessage(LLError::ELevel level, const std::string& message) override { + LL_PROFILE_ZONE_SCOPED debugger_print(message); } }; @@ -1213,6 +1220,7 @@ namespace void writeToRecorders(const LLError::CallSite& site, const std::string& message) { + LL_PROFILE_ZONE_SCOPED LLError::ELevel level = site.mLevel; SettingsConfigPtr s = Globals::getInstance()->getSettingsConfig(); @@ -1347,6 +1355,7 @@ namespace LLError bool Log::shouldLog(CallSite& site) { + LL_PROFILE_ZONE_SCOPED LLMutexTrylock lock(getMutex<LOG_MUTEX>(), 5); if (!lock.isLocked()) { @@ -1391,6 +1400,7 @@ namespace LLError void Log::flush(const std::ostringstream& out, const CallSite& site) { + LL_PROFILE_ZONE_SCOPED LLMutexTrylock lock(getMutex<LOG_MUTEX>(),5); if (!lock.isLocked()) { diff --git a/indra/llcommon/llerror.h b/indra/llcommon/llerror.h index d439136ca8..d06c0e2132 100644 --- a/indra/llcommon/llerror.h +++ b/indra/llcommon/llerror.h @@ -35,7 +35,9 @@ #include "stdtypes.h" +#include "llprofiler.h" #include "llpreprocessor.h" + #include <boost/static_assert.hpp> const int LL_ERR_NOERR = 0; @@ -348,7 +350,8 @@ typedef LLError::NoClassInfo _LL_CLASS_TO_LOG; // if (condition) LL_INFOS() << "True" << LL_ENDL; else LL_INFOS()() << "False" << LL_ENDL; #define lllog(level, once, ...) \ - do { \ + do { \ + LL_PROFILE_ZONE_NAMED("lllog"); \ const char* tags[] = {"", ##__VA_ARGS__}; \ static LLError::CallSite _site(lllog_site_args_(level, once, tags)); \ lllog_test_() diff --git a/indra/llcommon/llerrorcontrol.h b/indra/llcommon/llerrorcontrol.h index e87bb7bf35..57f10b7895 100644 --- a/indra/llcommon/llerrorcontrol.h +++ b/indra/llcommon/llerrorcontrol.h @@ -190,6 +190,7 @@ namespace LLError {} void recordMessage(LLError::ELevel level, const std::string& message) override { + LL_PROFILE_ZONE_SCOPED mCallable(level, message); } private: diff --git a/indra/llcommon/lleventfilter.h b/indra/llcommon/lleventfilter.h index 48c2570732..7613850fb2 100644 --- a/indra/llcommon/lleventfilter.h +++ b/indra/llcommon/lleventfilter.h @@ -429,6 +429,8 @@ public: // path, then stores it to mTarget. virtual bool post(const LLSD& event) { + LL_PROFILE_ZONE_SCOPED + // Extract the element specified by 'mPath' from 'event'. To perform a // generic type-appropriate store through mTarget, construct an // LLSDParam<T> and store that, thus engaging LLSDParam's custom diff --git a/indra/llcommon/llfasttimer.cpp b/indra/llcommon/llfasttimer.cpp index 5b6a7b82f8..d38946004f 100644 --- a/indra/llcommon/llfasttimer.cpp +++ b/indra/llcommon/llfasttimer.cpp @@ -191,29 +191,30 @@ TimeBlockTreeNode& BlockTimerStatHandle::getTreeNode() const } + void BlockTimer::bootstrapTimerTree() { - for (auto& base : BlockTimerStatHandle::instance_snapshot()) - { - // because of indirect derivation from LLInstanceTracker, have to downcast - BlockTimerStatHandle& timer = static_cast<BlockTimerStatHandle&>(base); - if (&timer == &BlockTimer::getRootTimeBlock()) continue; - - // bootstrap tree construction by attaching to last timer to be on stack - // when this timer was called - if (timer.getParent() == &BlockTimer::getRootTimeBlock()) - { - TimeBlockAccumulator& accumulator = timer.getCurrentAccumulator(); - - if (accumulator.mLastCaller) - { - timer.setParent(accumulator.mLastCaller); - accumulator.mParent = accumulator.mLastCaller; - } - // no need to push up tree on first use, flag can be set spuriously - accumulator.mMoveUpTree = false; - } - } + for (auto& base : BlockTimerStatHandle::instance_snapshot()) + { + // because of indirect derivation from LLInstanceTracker, have to downcast + BlockTimerStatHandle& timer = static_cast<BlockTimerStatHandle&>(base); + if (&timer == &BlockTimer::getRootTimeBlock()) continue; + + // bootstrap tree construction by attaching to last timer to be on stack + // when this timer was called + if (timer.getParent() == &BlockTimer::getRootTimeBlock()) + { + TimeBlockAccumulator& accumulator = timer.getCurrentAccumulator(); + + if (accumulator.mLastCaller) + { + timer.setParent(accumulator.mLastCaller); + accumulator.mParent = accumulator.mLastCaller; + } + // no need to push up tree on first use, flag can be set spuriously + accumulator.mMoveUpTree = false; + } + } } // bump timers up tree if they have been flagged as being in the wrong place @@ -221,6 +222,7 @@ void BlockTimer::bootstrapTimerTree() // this preserves partial order derived from current frame's observations void BlockTimer::incrementalUpdateTimerTree() { + LL_PROFILE_ZONE_SCOPED; for(block_timer_tree_df_post_iterator_t it = begin_block_timer_tree_df_post(BlockTimer::getRootTimeBlock()); it != end_block_timer_tree_df_post(); ++it) @@ -260,7 +262,8 @@ void BlockTimer::incrementalUpdateTimerTree() void BlockTimer::updateTimes() - { +{ + LL_PROFILE_ZONE_SCOPED; // walk up stack of active timers and accumulate current time while leaving timing structures active BlockTimerStackRecord* stack_record = LLThreadLocalSingletonPointer<BlockTimerStackRecord>::getInstance(); if (!stack_record) return; @@ -271,7 +274,7 @@ void BlockTimer::updateTimes() while(cur_timer && cur_timer->mParentTimerData.mActiveTimer != cur_timer) // root defined by parent pointing to self - { + { U64 cumulative_time_delta = cur_time - cur_timer->mStartTime; cur_timer->mStartTime = cur_time; diff --git a/indra/llcommon/llfasttimer.h b/indra/llcommon/llfasttimer.h index dfc63d08a2..9bd93d7240 100644 --- a/indra/llcommon/llfasttimer.h +++ b/indra/llcommon/llfasttimer.h @@ -38,7 +38,10 @@ #define LL_FAST_TIMER_ON 1 #define LL_FASTTIMER_USE_RDTSC 1 +// NOTE: Also see llprofiler.h +#if !defined(LL_PROFILER_CONFIGURATION) #define LL_RECORD_BLOCK_TIME(timer_stat) const LLTrace::BlockTimer& LL_GLUE_TOKENS(block_time_recorder, __LINE__)(LLTrace::timeThisBlock(timer_stat)); (void)LL_GLUE_TOKENS(block_time_recorder, __LINE__); +#endif // LL_PROFILER_CONFIGURATION namespace LLTrace { diff --git a/indra/llcommon/llframetimer.cpp b/indra/llcommon/llframetimer.cpp index 1e9920746b..c54029e8b4 100644 --- a/indra/llcommon/llframetimer.cpp +++ b/indra/llcommon/llframetimer.cpp @@ -29,6 +29,11 @@ #include "llframetimer.h" +// We don't bother building a stand alone lib; we just need to include the one source file for Tracy support +#if LL_PROFILER_CONFIGURATION == LL_PROFILER_CONFIG_TRACY || LL_PROFILER_CONFIGURATION == LL_PROFILER_CONFIG_TRACY_FAST_TIMER + #include "TracyClient.cpp" +#endif // LL_PROFILER_CONFIGURATION + // Static members //LLTimer LLFrameTimer::sInternalTimer; U64 LLFrameTimer::sStartTotalTime = totalTime(); diff --git a/indra/llcommon/llinstancetracker.h b/indra/llcommon/llinstancetracker.h index 402333cca7..02535a59e7 100644 --- a/indra/llcommon/llinstancetracker.h +++ b/indra/llcommon/llinstancetracker.h @@ -83,13 +83,34 @@ class LLInstanceTracker typedef llthread::LockStatic<StaticData> LockStatic; public: + using ptr_t = std::shared_ptr<T>; + using weak_t = std::weak_ptr<T>; + + /** + * Storing a dumb T* somewhere external is a bad idea, since + * LLInstanceTracker subclasses are explicitly destroyed rather than + * managed by smart pointers. It's legal to declare stack instances of an + * LLInstanceTracker subclass. But it's reasonable to store a + * std::weak_ptr<T>, which will become invalid when the T instance is + * destroyed. + */ + weak_t getWeak() + { + return mSelf; + } + + static S32 instanceCount() + { + return LockStatic()->mMap.size(); + } + // snapshot of std::pair<const KEY, std::shared_ptr<T>> pairs class snapshot { // It's very important that what we store in this snapshot are // weak_ptrs, NOT shared_ptrs. That's how we discover whether any // instance has been deleted during the lifespan of a snapshot. - typedef std::vector<std::pair<const KEY, std::weak_ptr<T>>> VectorType; + typedef std::vector<std::pair<const KEY, weak_t>> VectorType; // Dereferencing our iterator produces a std::shared_ptr for each // instance that still exists. Since we store weak_ptrs, that involves // two chained transformations: @@ -98,7 +119,7 @@ public: // It is very important that we filter lazily, that is, during // traversal. Any one of our stored weak_ptrs might expire during // traversal. - typedef std::pair<const KEY, std::shared_ptr<T>> strong_pair; + typedef std::pair<const KEY, ptr_t> strong_pair; // Note for future reference: nat has not yet had any luck (up to // Boost 1.67) trying to use boost::transform_iterator with a hand- // coded functor, only with actual functions. In my experience, an @@ -202,17 +223,12 @@ public: iterator end() { return iterator(snapshot::end(), key_getter); } }; - static T* getInstance(const KEY& k) + static ptr_t getInstance(const KEY& k) { LockStatic lock; const InstanceMap& map(lock->mMap); typename InstanceMap::const_iterator found = map.find(k); - return (found == map.end()) ? NULL : found->second.get(); - } - - static S32 instanceCount() - { - return LockStatic()->mMap.size(); + return (found == map.end()) ? NULL : found->second; } protected: @@ -222,7 +238,9 @@ protected: // shared_ptr, so give it a no-op deleter. We store shared_ptrs in our // InstanceMap specifically so snapshot can store weak_ptrs so we can // detect deletions during traversals. - std::shared_ptr<T> ptr(static_cast<T*>(this), [](T*){}); + ptr_t ptr(static_cast<T*>(this), [](T*){}); + // save corresponding weak_ptr for future reference + mSelf = ptr; LockStatic lock; add_(lock, key, ptr); } @@ -257,7 +275,7 @@ private: static std::string report(const char* key) { return report(std::string(key)); } // caller must instantiate LockStatic - void add_(LockStatic& lock, const KEY& key, const std::shared_ptr<T>& ptr) + void add_(LockStatic& lock, const KEY& key, const ptr_t& ptr) { mInstanceKey = key; InstanceMap& map = lock->mMap; @@ -281,7 +299,7 @@ private: break; } } - std::shared_ptr<T> remove_(LockStatic& lock) + ptr_t remove_(LockStatic& lock) { InstanceMap& map = lock->mMap; typename InstanceMap::iterator iter = map.find(mInstanceKey); @@ -295,6 +313,9 @@ private: } private: + // Storing a weak_ptr to self is a bit like deriving from + // std::enable_shared_from_this(), except more explicit. + weak_t mSelf; KEY mInstanceKey; }; @@ -326,6 +347,9 @@ class LLInstanceTracker<T, void, KEY_COLLISION_BEHAVIOR> typedef llthread::LockStatic<StaticData> LockStatic; public: + using ptr_t = std::shared_ptr<T>; + using weak_t = std::weak_ptr<T>; + /** * Storing a dumb T* somewhere external is a bad idea, since * LLInstanceTracker subclasses are explicitly destroyed rather than @@ -334,12 +358,15 @@ public: * std::weak_ptr<T>, which will become invalid when the T instance is * destroyed. */ - std::weak_ptr<T> getWeak() + weak_t getWeak() { return mSelf; } - static S32 instanceCount() { return LockStatic()->mSet.size(); } + static S32 instanceCount() + { + return LockStatic()->mSet.size(); + } // snapshot of std::shared_ptr<T> pointers class snapshot @@ -347,7 +374,7 @@ public: // It's very important that what we store in this snapshot are // weak_ptrs, NOT shared_ptrs. That's how we discover whether any // instance has been deleted during the lifespan of a snapshot. - typedef std::vector<std::weak_ptr<T>> VectorType; + typedef std::vector<weak_t> VectorType; // Dereferencing our iterator produces a std::shared_ptr for each // instance that still exists. Since we store weak_ptrs, that involves // two chained transformations: @@ -453,7 +480,7 @@ protected: private: // Storing a weak_ptr to self is a bit like deriving from // std::enable_shared_from_this(), except more explicit. - std::weak_ptr<T> mSelf; + weak_t mSelf; }; #endif diff --git a/indra/llcommon/llleaplistener.cpp b/indra/llcommon/llleaplistener.cpp index 3e6ce9092c..11bfec1b31 100644 --- a/indra/llcommon/llleaplistener.cpp +++ b/indra/llcommon/llleaplistener.cpp @@ -220,7 +220,7 @@ void LLLeapListener::getAPI(const LLSD& request) const { Response reply(LLSD(), request); - LLEventAPI* found = LLEventAPI::getInstance(request["api"]); + auto found = LLEventAPI::getInstance(request["api"]); if (found) { reply["name"] = found->getName(); diff --git a/indra/llcommon/llmemory.cpp b/indra/llcommon/llmemory.cpp index ea84e4c1ea..849867586a 100644 --- a/indra/llcommon/llmemory.cpp +++ b/indra/llcommon/llmemory.cpp @@ -82,6 +82,7 @@ void LLMemory::initMaxHeapSizeGB(F32Gigabytes max_heap_size) //static void LLMemory::updateMemoryInfo() { + LL_PROFILE_ZONE_SCOPED #if LL_WINDOWS PROCESS_MEMORY_COUNTERS counters; @@ -145,6 +146,7 @@ void* LLMemory::tryToAlloc(void* address, U32 size) //static void LLMemory::logMemoryInfo(BOOL update) { + LL_PROFILE_ZONE_SCOPED if(update) { updateMemoryInfo() ; diff --git a/indra/llcommon/llmemory.h b/indra/llcommon/llmemory.h index 24f86cc11e..41023b4ba4 100644 --- a/indra/llcommon/llmemory.h +++ b/indra/llcommon/llmemory.h @@ -101,6 +101,29 @@ template <typename T> T* LL_NEXT_ALIGNED_ADDRESS_64(T* address) #define LL_ALIGN_16(var) LL_ALIGN_PREFIX(16) var LL_ALIGN_POSTFIX(16) +#define LL_ALIGN_NEW \ +public: \ + void* operator new(size_t size) \ + { \ + return ll_aligned_malloc_16(size); \ + } \ + \ + void operator delete(void* ptr) \ + { \ + ll_aligned_free_16(ptr); \ + } \ + \ + void* operator new[](size_t size) \ + { \ + return ll_aligned_malloc_16(size); \ + } \ + \ + void operator delete[](void* ptr) \ + { \ + ll_aligned_free_16(ptr); \ + } + + //------------------------------------------------------------------------------------------------ //------------------------------------------------------------------------------------------------ // for enable buffer overrun detection predefine LL_DEBUG_BUFFER_OVERRUN in current library @@ -113,8 +136,9 @@ template <typename T> T* LL_NEXT_ALIGNED_ADDRESS_64(T* address) #else inline void* ll_aligned_malloc_fallback( size_t size, int align ) { + LL_PROFILE_ZONE_SCOPED; #if defined(LL_WINDOWS) - return _aligned_malloc(size, align); + void* ret = _aligned_malloc(size, align); #else char* aligned = NULL; void* mem = malloc( size + (align - 1) + sizeof(void*) ); @@ -125,12 +149,16 @@ template <typename T> T* LL_NEXT_ALIGNED_ADDRESS_64(T* address) ((void**)aligned)[-1] = mem; } - return aligned; + void* ret = aligned; #endif + LL_PROFILE_ALLOC(ret, size); + return ret; } inline void ll_aligned_free_fallback( void* ptr ) { + LL_PROFILE_ZONE_SCOPED; + LL_PROFILE_FREE(ptr); #if defined(LL_WINDOWS) _aligned_free(ptr); #else @@ -146,21 +174,24 @@ template <typename T> T* LL_NEXT_ALIGNED_ADDRESS_64(T* address) inline void* ll_aligned_malloc_16(size_t size) // returned hunk MUST be freed with ll_aligned_free_16(). { + LL_PROFILE_ZONE_SCOPED; #if defined(LL_WINDOWS) - return _aligned_malloc(size, 16); + void* ret = _aligned_malloc(size, 16); #elif defined(LL_DARWIN) - return malloc(size); // default osx malloc is 16 byte aligned. + void* ret = malloc(size); // default osx malloc is 16 byte aligned. #else - void *rtn; - if (LL_LIKELY(0 == posix_memalign(&rtn, 16, size))) - return rtn; - else // bad alignment requested, or out of memory - return NULL; + void *ret; + if (0 != posix_memalign(&ret, 16, size)) + return nullptr; #endif + LL_PROFILE_ALLOC(ret, size); + return ret; } inline void ll_aligned_free_16(void *p) { + LL_PROFILE_ZONE_SCOPED; + LL_PROFILE_FREE(p); #if defined(LL_WINDOWS) _aligned_free(p); #elif defined(LL_DARWIN) @@ -172,10 +203,12 @@ inline void ll_aligned_free_16(void *p) inline void* ll_aligned_realloc_16(void* ptr, size_t size, size_t old_size) // returned hunk MUST be freed with ll_aligned_free_16(). { + LL_PROFILE_ZONE_SCOPED; + LL_PROFILE_FREE(ptr); #if defined(LL_WINDOWS) - return _aligned_realloc(ptr, size, 16); + void* ret = _aligned_realloc(ptr, size, 16); #elif defined(LL_DARWIN) - return realloc(ptr,size); // default osx malloc is 16 byte aligned. + void* ret = realloc(ptr,size); // default osx malloc is 16 byte aligned. #else //FIXME: memcpy is SLOW void* ret = ll_aligned_malloc_16(size); @@ -188,27 +221,31 @@ inline void* ll_aligned_realloc_16(void* ptr, size_t size, size_t old_size) // r } ll_aligned_free_16(ptr); } - return ret; #endif + LL_PROFILE_ALLOC(ptr, size); + return ret; } inline void* ll_aligned_malloc_32(size_t size) // returned hunk MUST be freed with ll_aligned_free_32(). { + LL_PROFILE_ZONE_SCOPED; #if defined(LL_WINDOWS) - return _aligned_malloc(size, 32); + void* ret = _aligned_malloc(size, 32); #elif defined(LL_DARWIN) - return ll_aligned_malloc_fallback( size, 32 ); + void* ret = ll_aligned_malloc_fallback( size, 32 ); #else - void *rtn; - if (LL_LIKELY(0 == posix_memalign(&rtn, 32, size))) - return rtn; - else // bad alignment requested, or out of memory - return NULL; + void *ret; + if (0 != posix_memalign(&ret, 32, size)) + return nullptr; #endif + LL_PROFILE_ALLOC(ret, size); + return ret; } inline void ll_aligned_free_32(void *p) { + LL_PROFILE_ZONE_SCOPED; + LL_PROFILE_FREE(p); #if defined(LL_WINDOWS) _aligned_free(p); #elif defined(LL_DARWIN) @@ -222,29 +259,35 @@ inline void ll_aligned_free_32(void *p) template<size_t ALIGNMENT> LL_FORCE_INLINE void* ll_aligned_malloc(size_t size) { + LL_PROFILE_ZONE_SCOPED; + void* ret; if (LL_DEFAULT_HEAP_ALIGN % ALIGNMENT == 0) { - return malloc(size); + ret = malloc(size); + LL_PROFILE_ALLOC(ret, size); } else if (ALIGNMENT == 16) { - return ll_aligned_malloc_16(size); + ret = ll_aligned_malloc_16(size); } else if (ALIGNMENT == 32) { - return ll_aligned_malloc_32(size); + ret = ll_aligned_malloc_32(size); } else { - return ll_aligned_malloc_fallback(size, ALIGNMENT); + ret = ll_aligned_malloc_fallback(size, ALIGNMENT); } + return ret; } template<size_t ALIGNMENT> LL_FORCE_INLINE void ll_aligned_free(void* ptr) { + LL_PROFILE_ZONE_SCOPED; if (ALIGNMENT == LL_DEFAULT_HEAP_ALIGN) { + LL_PROFILE_FREE(ptr); free(ptr); } else if (ALIGNMENT == 16) @@ -266,6 +309,7 @@ LL_FORCE_INLINE void ll_aligned_free(void* ptr) // inline void ll_memcpy_nonaliased_aligned_16(char* __restrict dst, const char* __restrict src, size_t bytes) { + LL_PROFILE_ZONE_SCOPED; assert(src != NULL); assert(dst != NULL); assert(bytes > 0); diff --git a/indra/llcommon/llmutex.cpp b/indra/llcommon/llmutex.cpp index 4d73c04d07..a49002b5dc 100644 --- a/indra/llcommon/llmutex.cpp +++ b/indra/llcommon/llmutex.cpp @@ -44,6 +44,7 @@ LLMutex::~LLMutex() void LLMutex::lock() { + LL_PROFILE_ZONE_SCOPED if(isSelfLocked()) { //redundant lock mCount++; @@ -65,6 +66,7 @@ void LLMutex::lock() void LLMutex::unlock() { + LL_PROFILE_ZONE_SCOPED if (mCount > 0) { //not the root unlock mCount--; @@ -85,6 +87,7 @@ void LLMutex::unlock() bool LLMutex::isLocked() { + LL_PROFILE_ZONE_SCOPED if (!mMutex.try_lock()) { return true; @@ -108,6 +111,7 @@ LLThread::id_t LLMutex::lockingThread() const bool LLMutex::trylock() { + LL_PROFILE_ZONE_SCOPED if(isSelfLocked()) { //redundant lock mCount++; @@ -146,17 +150,20 @@ LLCondition::~LLCondition() void LLCondition::wait() { + LL_PROFILE_ZONE_SCOPED std::unique_lock< std::mutex > lock(mMutex); mCond.wait(lock); } void LLCondition::signal() { + LL_PROFILE_ZONE_SCOPED mCond.notify_one(); } void LLCondition::broadcast() { + LL_PROFILE_ZONE_SCOPED mCond.notify_all(); } @@ -166,6 +173,7 @@ LLMutexTrylock::LLMutexTrylock(LLMutex* mutex) : mMutex(mutex), mLocked(false) { + LL_PROFILE_ZONE_SCOPED if (mMutex) mLocked = mMutex->trylock(); } @@ -174,6 +182,7 @@ LLMutexTrylock::LLMutexTrylock(LLMutex* mutex, U32 aTries, U32 delay_ms) : mMutex(mutex), mLocked(false) { + LL_PROFILE_ZONE_SCOPED if (!mMutex) return; @@ -188,6 +197,7 @@ LLMutexTrylock::LLMutexTrylock(LLMutex* mutex, U32 aTries, U32 delay_ms) LLMutexTrylock::~LLMutexTrylock() { + LL_PROFILE_ZONE_SCOPED if (mMutex && mLocked) mMutex->unlock(); } @@ -199,6 +209,7 @@ LLMutexTrylock::~LLMutexTrylock() // LLScopedLock::LLScopedLock(std::mutex* mutex) : mMutex(mutex) { + LL_PROFILE_ZONE_SCOPED if(mutex) { mutex->lock(); @@ -217,6 +228,7 @@ LLScopedLock::~LLScopedLock() void LLScopedLock::unlock() { + LL_PROFILE_ZONE_SCOPED if(mLocked) { mMutex->unlock(); diff --git a/indra/llcommon/llpreprocessor.h b/indra/llcommon/llpreprocessor.h index b17a8e761a..dc586b0008 100644 --- a/indra/llcommon/llpreprocessor.h +++ b/indra/llcommon/llpreprocessor.h @@ -171,7 +171,9 @@ #define LL_DLLIMPORT #endif // LL_WINDOWS -#if ! defined(LL_WINDOWS) +#if __clang__ || ! defined(LL_WINDOWS) +// Only on Windows, and only with the Microsoft compiler (vs. clang) is +// wchar_t potentially not a distinct type. #define LL_WCHAR_T_NATIVE 1 #else // LL_WINDOWS // https://docs.microsoft.com/en-us/cpp/preprocessor/predefined-macros diff --git a/indra/llcommon/llprofiler.h b/indra/llcommon/llprofiler.h new file mode 100644 index 0000000000..ca60d23248 --- /dev/null +++ b/indra/llcommon/llprofiler.h @@ -0,0 +1,111 @@ +/** + * @file llprofiler.h + * @brief Wrapper for Tracy and/or other profilers + * + * $LicenseInfo:firstyear=2021&license=viewerlgpl$ + * Second Life Viewer Source Code + * Copyright (C) 2021, Linden Research, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; + * version 2.1 of the License only. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + * Linden Research, Inc., 945 Battery Street, San Francisco, CA 94111 USA + * $/LicenseInfo$ + */ + +#ifndef LL_PROFILER_H +#define LL_PROFILER_H + +#define LL_PROFILER_CONFIG_NONE 0 // No profiling +#define LL_PROFILER_CONFIG_FAST_TIMER 1 // Profiling on: Only Fast Timers +#define LL_PROFILER_CONFIG_TRACY 2 // Profiling on: Only Tracy +#define LL_PROFILER_CONFIG_TRACY_FAST_TIMER 3 // Profiling on: Fast Timers + Tracy + +#ifndef LL_PROFILER_CONFIGURATION +#define LL_PROFILER_CONFIGURATION LL_PROFILER_CONFIG_FAST_TIMER +#endif + +extern thread_local bool gProfilerEnabled; + +#if defined(LL_PROFILER_CONFIGURATION) && (LL_PROFILER_CONFIGURATION > LL_PROFILER_CONFIG_NONE) + #if LL_PROFILER_CONFIGURATION == LL_PROFILER_CONFIG_TRACY || LL_PROFILER_CONFIGURATION == LL_PROFILER_CONFIG_TRACY_FAST_TIMER + #define TRACY_ENABLE 1 +// Normally these would be enabled but we want to be able to build any viewer with Tracy enabled and run the Tracy server on another machine +// They must be undefined in order to work across multiple machines +// #define TRACY_NO_BROADCAST 1 +// #define TRACY_ONLY_LOCALHOST 1 + #define TRACY_ONLY_IPV4 1 + #include "Tracy.hpp" + + // Mutually exclusive with detailed memory tracing + #define LL_PROFILER_ENABLE_TRACY_OPENGL 0 + #endif + + #if LL_PROFILER_CONFIGURATION == LL_PROFILER_CONFIG_TRACY + #define LL_PROFILER_FRAME_END FrameMark + #define LL_PROFILER_SET_THREAD_NAME( name ) tracy::SetThreadName( name ); gProfilerEnabled = true; + #define LL_RECORD_BLOCK_TIME(name) ZoneScoped // Want descriptive names; was: ZoneNamedN( ___tracy_scoped_zone, #name, true ); + #define LL_PROFILE_ZONE_NAMED(name) ZoneNamedN( ___tracy_scoped_zone, name, true ); + #define LL_PROFILE_ZONE_NAMED_COLOR(name,color) ZoneNamedNC( ___tracy_scopped_zone, name, color, true ) // RGB + #define LL_PROFILE_ZONE_SCOPED ZoneScoped + + #define LL_PROFILE_ZONE_NUM( val ) ZoneValue( val ) + #define LL_PROFILE_ZONE_TEXT( text, size ) ZoneText( text, size ) + + #define LL_PROFILE_ZONE_ERR(name) LL_PROFILE_ZONE_NAMED_COLOR( name, 0XFF0000 ) // RGB yellow + #define LL_PROFILE_ZONE_INFO(name) LL_PROFILE_ZONE_NAMED_COLOR( name, 0X00FFFF ) // RGB cyan + #define LL_PROFILE_ZONE_WARN(name) LL_PROFILE_ZONE_NAMED_COLOR( name, 0x0FFFF00 ) // RGB red + #define LL_PROFILE_ALLOC(ptr, size) TracyAlloc(ptr, size) + #define LL_PROFILE_FREE(ptr) TracyFree(ptr) + #endif + #if LL_PROFILER_CONFIGURATION == LL_PROFILER_CONFIG_FAST_TIMER + #define LL_PROFILER_FRAME_END + #define LL_PROFILER_SET_THREAD_NAME( name ) (void)(name) + #define LL_RECORD_BLOCK_TIME(name) const LLTrace::BlockTimer& LL_GLUE_TOKENS(block_time_recorder, __LINE__)(LLTrace::timeThisBlock(name)); (void)LL_GLUE_TOKENS(block_time_recorder, __LINE__); + #define LL_PROFILE_ZONE_NAMED(name) // LL_PROFILE_ZONE_NAMED is a no-op when Tracy is disabled + #define LL_PROFILE_ZONE_SCOPED // LL_PROFILE_ZONE_SCOPED is a no-op when Tracy is disabled + #define LL_PROFILE_ZONE_COLOR(name,color) // LL_RECORD_BLOCK_TIME(name) + + #define LL_PROFILE_ZONE_NUM( val ) (void)( val ); // Not supported + #define LL_PROFILE_ZONE_TEXT( text, size ) (void)( text ); void( size ); // Not supported + + #define LL_PROFILE_ZONE_ERR(name) (void)(name); // Not supported + #define LL_PROFILE_ZONE_INFO(name) (void)(name); // Not supported + #define LL_PROFILE_ZONE_WARN(name) (void)(name); // Not supported + #define LL_PROFILE_ALLOC(ptr, size) (void)(ptr); (void)(size); + #define LL_PROFILE_FREE(ptr) (void)(ptr); + #endif + #if LL_PROFILER_CONFIGURATION == LL_PROFILER_CONFIG_TRACY_FAST_TIMER + #define LL_PROFILER_FRAME_END FrameMark + #define LL_PROFILER_SET_THREAD_NAME( name ) tracy::SetThreadName( name ); gProfilerEnabled = true; + #define LL_RECORD_BLOCK_TIME(name) ZoneNamedN(___tracy_scoped_zone, #name, true); const LLTrace::BlockTimer& LL_GLUE_TOKENS(block_time_recorder, __LINE__)(LLTrace::timeThisBlock(name)); (void)LL_GLUE_TOKENS(block_time_recorder, __LINE__); + #define LL_PROFILE_ZONE_NAMED(name) ZoneNamedN( ___tracy_scoped_zone, #name, true ); + #define LL_PROFILE_ZONE_NAMED_COLOR(name,color) ZoneNamedNC( ___tracy_scopped_zone, name, color, true ) // RGB + #define LL_PROFILE_ZONE_SCOPED ZoneScoped + + #define LL_PROFILE_ZONE_NUM( val ) ZoneValue( val ) + #define LL_PROFILE_ZONE_TEXT( text, size ) ZoneText( text, size ) + + #define LL_PROFILE_ZONE_ERR(name) LL_PROFILE_ZONE_NAMED_COLOR( name, 0XFF0000 ) // RGB yellow + #define LL_PROFILE_ZONE_INFO(name) LL_PROFILE_ZONE_NAMED_COLOR( name, 0X00FFFF ) // RGB cyan + #define LL_PROFILE_ZONE_WARN(name) LL_PROFILE_ZONE_NAMED_COLOR( name, 0x0FFFF00 ) // RGB red + #define LL_PROFILE_ALLOC(ptr, size) TracyAlloc(ptr, size) + #define LL_PROFILE_FREE(ptr) TracyFree(ptr) + #endif +#else + #define LL_PROFILER_FRAME_END + #define LL_PROFILER_SET_THREAD_NAME( name ) (void)(name) +#endif // LL_PROFILER + +#endif // LL_PROFILER_H diff --git a/indra/llcommon/llsd.cpp b/indra/llcommon/llsd.cpp index 57b746889d..605f6bf0e3 100644 --- a/indra/llcommon/llsd.cpp +++ b/indra/llcommon/llsd.cpp @@ -400,6 +400,7 @@ namespace ImplMap& ImplMap::makeMap(LLSD::Impl*& var) { + LL_PROFILE_ZONE_SCOPED; if (shared()) { ImplMap* i = new ImplMap(mData); @@ -414,18 +415,21 @@ namespace bool ImplMap::has(const LLSD::String& k) const { + LL_PROFILE_ZONE_SCOPED; DataMap::const_iterator i = mData.find(k); return i != mData.end(); } LLSD ImplMap::get(const LLSD::String& k) const { + LL_PROFILE_ZONE_SCOPED; DataMap::const_iterator i = mData.find(k); return (i != mData.end()) ? i->second : LLSD(); } LLSD ImplMap::getKeys() const { + LL_PROFILE_ZONE_SCOPED; LLSD keys = LLSD::emptyArray(); DataMap::const_iterator iter = mData.begin(); while (iter != mData.end()) @@ -438,11 +442,13 @@ namespace void ImplMap::insert(const LLSD::String& k, const LLSD& v) { + LL_PROFILE_ZONE_SCOPED; mData.insert(DataMap::value_type(k, v)); } void ImplMap::erase(const LLSD::String& k) { + LL_PROFILE_ZONE_SCOPED; mData.erase(k); } @@ -684,6 +690,7 @@ const LLSD::Impl& LLSD::Impl::safe(const Impl* impl) ImplMap& LLSD::Impl::makeMap(Impl*& var) { + LL_PROFILE_ZONE_SCOPED; ImplMap* im = new ImplMap; reset(var, im); return *im; @@ -887,11 +894,16 @@ LLSD& LLSD::with(const String& k, const LLSD& v) } void LLSD::erase(const String& k) { makeMap(impl).erase(k); } -LLSD& LLSD::operator[](const String& k) - { return makeMap(impl).ref(k); } +LLSD& LLSD::operator[](const String& k) +{ + LL_PROFILE_ZONE_SCOPED; + return makeMap(impl).ref(k); +} const LLSD& LLSD::operator[](const String& k) const - { return safe(impl).ref(k); } - +{ + LL_PROFILE_ZONE_SCOPED; + return safe(impl).ref(k); +} LLSD LLSD::emptyArray() { @@ -914,10 +926,16 @@ LLSD& LLSD::with(Integer i, const LLSD& v) LLSD& LLSD::append(const LLSD& v) { return makeArray(impl).append(v); } void LLSD::erase(Integer i) { makeArray(impl).erase(i); } -LLSD& LLSD::operator[](Integer i) - { return makeArray(impl).ref(i); } +LLSD& LLSD::operator[](Integer i) +{ + LL_PROFILE_ZONE_SCOPED; + return makeArray(impl).ref(i); +} const LLSD& LLSD::operator[](Integer i) const - { return safe(impl).ref(i); } +{ + LL_PROFILE_ZONE_SCOPED; + return safe(impl).ref(i); +} static const char *llsd_dump(const LLSD &llsd, bool useXMLFormat) { diff --git a/indra/llcommon/llsd.h b/indra/llcommon/llsd.h index 5b6d5545af..b8ddf21596 100644 --- a/indra/llcommon/llsd.h +++ b/indra/llcommon/llsd.h @@ -290,9 +290,17 @@ public: LLSD& with(const String&, const LLSD&); LLSD& operator[](const String&); - LLSD& operator[](const char* c) { return (*this)[String(c)]; } + LLSD& operator[](const char* c) + { + LL_PROFILE_ZONE_SCOPED; + return (*this)[String(c)]; + } const LLSD& operator[](const String&) const; - const LLSD& operator[](const char* c) const { return (*this)[String(c)]; } + const LLSD& operator[](const char* c) const + { + LL_PROFILE_ZONE_SCOPED; + return (*this)[String(c)]; + } //@} /** @name Array Values */ diff --git a/indra/llcommon/llsdparam.cpp b/indra/llcommon/llsdparam.cpp index 2e7b46f885..af4ccf25fd 100644 --- a/indra/llcommon/llsdparam.cpp +++ b/indra/llcommon/llsdparam.cpp @@ -37,8 +37,6 @@ static LLInitParam::Parser::parser_write_func_map_t sWriteFuncs; static LLInitParam::Parser::parser_inspect_func_map_t sInspectFuncs; static const LLSD NO_VALUE_MARKER; -LLTrace::BlockTimerStatHandle FTM_SD_PARAM_ADAPTOR("LLSD to LLInitParam conversion"); - // // LLParamSDParser // diff --git a/indra/llcommon/llsdparam.h b/indra/llcommon/llsdparam.h index 93910b70ae..82a623a8a0 100644 --- a/indra/llcommon/llsdparam.h +++ b/indra/llcommon/llsdparam.h @@ -110,7 +110,6 @@ private: }; -extern LL_COMMON_API LLTrace::BlockTimerStatHandle FTM_SD_PARAM_ADAPTOR; template<typename T> class LLSDParamAdapter : public T { @@ -118,7 +117,7 @@ public: LLSDParamAdapter() {} LLSDParamAdapter(const LLSD& sd) { - LL_RECORD_BLOCK_TIME(FTM_SD_PARAM_ADAPTOR); + LL_PROFILE_ZONE_SCOPED; LLParamSDParser parser; // don't spam for implicit parsing of LLSD, as we want to allow arbitrary freeform data and ignore most of it bool parse_silently = true; diff --git a/indra/llcommon/llsdutil.cpp b/indra/llcommon/llsdutil.cpp index eb3a96b133..c2fe15e9b7 100644 --- a/indra/llcommon/llsdutil.cpp +++ b/indra/llcommon/llsdutil.cpp @@ -214,6 +214,8 @@ BOOL compare_llsd_with_template( const LLSD& template_llsd, LLSD& resultant_llsd) { + LL_PROFILE_ZONE_SCOPED + if ( llsd_to_test.isUndefined() && template_llsd.isDefined() ) @@ -335,6 +337,8 @@ bool filter_llsd_with_template( const LLSD & template_llsd, LLSD & resultant_llsd) { + LL_PROFILE_ZONE_SCOPED + if (llsd_to_test.isUndefined() && template_llsd.isDefined()) { resultant_llsd = template_llsd; @@ -529,6 +533,8 @@ class TypeLookup public: TypeLookup() { + LL_PROFILE_ZONE_SCOPED + for (const Data *di(boost::begin(typedata)), *dend(boost::end(typedata)); di != dend; ++di) { mMap[di->type] = di->name; @@ -537,6 +543,8 @@ public: std::string lookup(LLSD::Type type) const { + LL_PROFILE_ZONE_SCOPED + MapType::const_iterator found = mMap.find(type); if (found != mMap.end()) { @@ -587,6 +595,8 @@ static std::string match_types(LLSD::Type expect, // prototype.type() LLSD::Type actual, // type we're checking const std::string& pfx) // as for llsd_matches { + LL_PROFILE_ZONE_SCOPED + // Trivial case: if the actual type is exactly what we expect, we're good. if (actual == expect) return ""; @@ -624,6 +634,8 @@ static std::string match_types(LLSD::Type expect, // prototype.type() // see docstring in .h file std::string llsd_matches(const LLSD& prototype, const LLSD& data, const std::string& pfx) { + LL_PROFILE_ZONE_SCOPED + // An undefined prototype means that any data is valid. // An undefined slot in an array or map prototype means that any data // may fill that slot. @@ -756,6 +768,8 @@ std::string llsd_matches(const LLSD& prototype, const LLSD& data, const std::str bool llsd_equals(const LLSD& lhs, const LLSD& rhs, int bits) { + LL_PROFILE_ZONE_SCOPED + // We're comparing strict equality of LLSD representation rather than // performing any conversions. So if the types aren't equal, the LLSD // values aren't equal. @@ -864,6 +878,8 @@ namespace llsd LLSD& drill(LLSD& blob, const LLSD& rawPath) { + LL_PROFILE_ZONE_SCOPED + // Treat rawPath uniformly as an array. If it's not already an array, // store it as the only entry in one. (But let's say Undefined means an // empty array.) @@ -889,6 +905,8 @@ LLSD& drill(LLSD& blob, const LLSD& rawPath) // path entry that's bad. for (LLSD::Integer i = 0; i < path.size(); ++i) { + LL_PROFILE_ZONE_NUM( i ) + const LLSD& key{path[i]}; if (key.isString()) { @@ -917,6 +935,8 @@ LLSD& drill(LLSD& blob, const LLSD& rawPath) LLSD drill(const LLSD& blob, const LLSD& path) { + LL_PROFILE_ZONE_SCOPED + // non-const drill() does exactly what we want. Temporarily cast away // const-ness and use that. return drill(const_cast<LLSD&>(blob), path); @@ -929,6 +949,8 @@ LLSD drill(const LLSD& blob, const LLSD& path) // filter may be include to exclude/include keys in a map. LLSD llsd_clone(LLSD value, LLSD filter) { + LL_PROFILE_ZONE_SCOPED + LLSD clone; bool has_filter(filter.isMap()); diff --git a/indra/llcommon/llsingleton.h b/indra/llcommon/llsingleton.h index 7c81d65a8b..6042c0906c 100644 --- a/indra/llcommon/llsingleton.h +++ b/indra/llcommon/llsingleton.h @@ -455,6 +455,7 @@ public: static DERIVED_TYPE* getInstance() { + LL_PROFILE_ZONE_SCOPED; // We know the viewer has LLSingleton dependency circularities. If you // feel strongly motivated to eliminate them, cheers and good luck. // (At that point we could consider a much simpler locking mechanism.) @@ -838,4 +839,36 @@ private: \ /* LLSINGLETON() is carefully implemented to permit exactly this */ \ LLSINGLETON_C11(DERIVED_CLASS) {} +// Relatively unsafe singleton implementation that is much faster +// and simpler than LLSingleton, but has no dependency tracking +// or inherent thread safety and requires manual invocation of +// createInstance before first use. +template<class T> +class LLSimpleton +{ +public: + template <typename... ARGS> + static void createInstance(ARGS&&... args) + { + llassert(sInstance == nullptr); + sInstance = new T(std::forward<ARGS>(args)...); + } + + static inline T* getInstance() { return sInstance; } + static inline T& instance() { return *getInstance(); } + static inline bool instanceExists() { return sInstance != nullptr; } + + static void deleteSingleton() + { + delete sInstance; + sInstance = nullptr; + } + +private: + static T* sInstance; +}; + +template <class T> +T* LLSimpleton<T>::sInstance{ nullptr }; + #endif diff --git a/indra/llcommon/llstring.cpp b/indra/llcommon/llstring.cpp index 0290eea143..bdea1e76ea 100644 --- a/indra/llcommon/llstring.cpp +++ b/indra/llcommon/llstring.cpp @@ -37,9 +37,6 @@ #include <winnls.h> // for WideCharToMultiByte #endif -LLTrace::BlockTimerStatHandle FT_STRING_FORMAT("String Format"); - - std::string ll_safe_string(const char* in) { if(in) return std::string(in); @@ -215,7 +212,7 @@ S32 utf16chars_to_wchar(const U16* inchars, llwchar* outchar) return inchars - base; } -llutf16string wstring_to_utf16str(const LLWString &utf32str, S32 len) +llutf16string wstring_to_utf16str(const llwchar* utf32str, size_t len) { llutf16string out; @@ -237,27 +234,19 @@ llutf16string wstring_to_utf16str(const LLWString &utf32str, S32 len) return out; } -llutf16string wstring_to_utf16str(const LLWString &utf32str) -{ - const S32 len = (S32)utf32str.length(); - return wstring_to_utf16str(utf32str, len); -} - -llutf16string utf8str_to_utf16str ( const std::string& utf8str ) +llutf16string utf8str_to_utf16str( const char* utf8str, size_t len ) { - LLWString wstr = utf8str_to_wstring ( utf8str ); + LLWString wstr = utf8str_to_wstring ( utf8str, len ); return wstring_to_utf16str ( wstr ); } - -LLWString utf16str_to_wstring(const llutf16string &utf16str, S32 len) +LLWString utf16str_to_wstring(const U16* utf16str, size_t len) { LLWString wout; - if((len <= 0) || utf16str.empty()) return wout; + if (len == 0) return wout; S32 i = 0; - // craziness to make gcc happy (llutf16string.c_str() is tweaked on linux): - const U16* chars16 = &(*(utf16str.begin())); + const U16* chars16 = utf16str; while (i < len) { llwchar cur_char; @@ -267,12 +256,6 @@ LLWString utf16str_to_wstring(const llutf16string &utf16str, S32 len) return wout; } -LLWString utf16str_to_wstring(const llutf16string &utf16str) -{ - const S32 len = (S32)utf16str.length(); - return utf16str_to_wstring(utf16str, len); -} - // Length in llwchar (UTF-32) of the first len units (16 bits) of the given UTF-16 string. S32 utf16str_wstring_length(const llutf16string &utf16str, const S32 utf16_len) { @@ -392,8 +375,7 @@ S32 wstring_utf8_length(const LLWString& wstr) return len; } - -LLWString utf8str_to_wstring(const std::string& utf8str, S32 len) +LLWString utf8str_to_wstring(const char* utf8str, size_t len) { LLWString wout; @@ -481,13 +463,7 @@ LLWString utf8str_to_wstring(const std::string& utf8str, S32 len) return wout; } -LLWString utf8str_to_wstring(const std::string& utf8str) -{ - const S32 len = (S32)utf8str.length(); - return utf8str_to_wstring(utf8str, len); -} - -std::string wstring_to_utf8str(const LLWString& utf32str, S32 len) +std::string wstring_to_utf8str(const llwchar* utf32str, size_t len) { std::string out; @@ -503,20 +479,9 @@ std::string wstring_to_utf8str(const LLWString& utf32str, S32 len) return out; } -std::string wstring_to_utf8str(const LLWString& utf32str) -{ - const S32 len = (S32)utf32str.length(); - return wstring_to_utf8str(utf32str, len); -} - -std::string utf16str_to_utf8str(const llutf16string& utf16str) -{ - return wstring_to_utf8str(utf16str_to_wstring(utf16str)); -} - -std::string utf16str_to_utf8str(const llutf16string& utf16str, S32 len) +std::string utf16str_to_utf8str(const U16* utf16str, size_t len) { - return wstring_to_utf8str(utf16str_to_wstring(utf16str, len), len); + return wstring_to_utf8str(utf16str_to_wstring(utf16str, len)); } std::string utf8str_trim(const std::string& utf8str) @@ -657,17 +622,16 @@ std::string utf8str_removeCRLF(const std::string& utf8str) } #if LL_WINDOWS -std::string ll_convert_wide_to_string(const wchar_t* in) +unsigned int ll_wstring_default_code_page() { - return ll_convert_wide_to_string(in, CP_UTF8); + return CP_UTF8; } -std::string ll_convert_wide_to_string(const wchar_t* in, unsigned int code_page) +std::string ll_convert_wide_to_string(const wchar_t* in, size_t len_in, unsigned int code_page) { std::string out; if(in) { - int len_in = wcslen(in); int len_out = WideCharToMultiByte( code_page, 0, @@ -699,12 +663,7 @@ std::string ll_convert_wide_to_string(const wchar_t* in, unsigned int code_page) return out; } -std::wstring ll_convert_string_to_wide(const std::string& in) -{ - return ll_convert_string_to_wide(in, CP_UTF8); -} - -std::wstring ll_convert_string_to_wide(const std::string& in, unsigned int code_page) +std::wstring ll_convert_string_to_wide(const char* in, size_t len, unsigned int code_page) { // From review: // We can preallocate a wide char buffer that is the same length (in wchar_t elements) as the utf8 input, @@ -716,10 +675,10 @@ std::wstring ll_convert_string_to_wide(const std::string& in, unsigned int code_ // reserve an output buffer that will be destroyed on exit, with a place // to put NULL terminator - std::vector<wchar_t> w_out(in.length() + 1); + std::vector<wchar_t> w_out(len + 1); memset(&w_out[0], 0, w_out.size()); - int real_output_str_len = MultiByteToWideChar(code_page, 0, in.c_str(), in.length(), + int real_output_str_len = MultiByteToWideChar(code_page, 0, in, len, &w_out[0], w_out.size() - 1); //looks like MultiByteToWideChar didn't add null terminator to converted string, see EXT-4858. @@ -729,30 +688,32 @@ std::wstring ll_convert_string_to_wide(const std::string& in, unsigned int code_ return {&w_out[0]}; } -LLWString ll_convert_wide_to_wstring(const std::wstring& in) +LLWString ll_convert_wide_to_wstring(const wchar_t* in, size_t len) { - // This function, like its converse, is a placeholder, encapsulating a - // guilty little hack: the only "official" way nat has found to convert - // between std::wstring (16 bits on Windows) and LLWString (UTF-32) is - // by using iconv, which we've avoided so far. It kinda sorta works to - // just copy individual characters... - // The point is that if/when we DO introduce some more official way to - // perform such conversions, we should only have to call it here. - return { in.begin(), in.end() }; + // Whether or not std::wstring and llutf16string are distinct types, they + // both hold UTF-16LE characters. (See header file comments.) Pretend this + // wchar_t* sequence is really a U16* sequence and use the conversion we + // define above. + return utf16str_to_wstring(reinterpret_cast<const U16*>(in), len); } -std::wstring ll_convert_wstring_to_wide(const LLWString& in) +std::wstring ll_convert_wstring_to_wide(const llwchar* in, size_t len) { - // See comments in ll_convert_wide_to_wstring() - return { in.begin(), in.end() }; + // first, convert to llutf16string, for which we have a real implementation + auto utf16str{ wstring_to_utf16str(in, len) }; + // then, because each U16 char must be UTF-16LE encoded, pretend the U16* + // string pointer is a wchar_t* and instantiate a std::wstring of the same + // length. + return { reinterpret_cast<const wchar_t*>(utf16str.c_str()), utf16str.length() }; } std::string ll_convert_string_to_utf8_string(const std::string& in) { - auto w_mesg = ll_convert_string_to_wide(in, CP_ACP); - std::string out_utf8(ll_convert_wide_to_string(w_mesg.c_str(), CP_UTF8)); - - return out_utf8; + // If you pass code_page, you must also pass length, otherwise the code + // page parameter will be mistaken for length. + auto w_mesg = ll_convert_string_to_wide(in, in.length(), CP_ACP); + // CP_UTF8 is default -- see ll_wstring_default_code_page() above. + return ll_convert_wide_to_string(w_mesg); } namespace @@ -1356,7 +1317,7 @@ bool LLStringUtil::formatDatetime(std::string& replacement, std::string token, template<> S32 LLStringUtil::format(std::string& s, const format_map_t& substitutions) { - LL_RECORD_BLOCK_TIME(FT_STRING_FORMAT); + LL_PROFILE_ZONE_SCOPED; S32 res = 0; std::string output; @@ -1429,7 +1390,7 @@ S32 LLStringUtil::format(std::string& s, const format_map_t& substitutions) template<> S32 LLStringUtil::format(std::string& s, const LLSD& substitutions) { - LL_RECORD_BLOCK_TIME(FT_STRING_FORMAT); + LL_PROFILE_ZONE_SCOPED; S32 res = 0; if (!substitutions.isMap()) diff --git a/indra/llcommon/llstring.h b/indra/llcommon/llstring.h index 4263122f36..d94f549480 100644 --- a/indra/llcommon/llstring.h +++ b/indra/llcommon/llstring.h @@ -27,9 +27,11 @@ #ifndef LL_LLSTRING_H #define LL_LLSTRING_H +#include <boost/call_traits.hpp> #include <boost/optional/optional.hpp> #include <string> #include <cstdio> +#include <cwchar> // std::wcslen() //#include <locale> #include <iomanip> #include <algorithm> @@ -527,14 +529,71 @@ struct ll_convert_impl<T, T> T operator()(const T& in) const { return in; } }; +// simple construction from char* +template<typename T> +struct ll_convert_impl<T, const typename T::value_type*> +{ + T operator()(const typename T::value_type* in) const { return { in }; } +}; + // specialize ll_convert_impl<TO, FROM> to return EXPR #define ll_convert_alias(TO, FROM, EXPR) \ template<> \ struct ll_convert_impl<TO, FROM> \ { \ - TO operator()(const FROM& in) const { return EXPR; } \ + /* param_type optimally passes both char* and string */ \ + TO operator()(typename boost::call_traits<FROM>::param_type in) const { return EXPR; } \ +} + +// If all we're doing is copying characters, pass this to ll_convert_alias as +// EXPR. Since it expands into the 'return EXPR' slot in the ll_convert_impl +// specialization above, it implies TO{ in.begin(), in.end() }. +#define LL_CONVERT_COPY_CHARS { in.begin(), in.end() } + +// Generic name for strlen() / wcslen() - the default implementation should +// (!) work with U16 and llwchar, but we don't intend to engage it. +template <typename CHARTYPE> +size_t ll_convert_length(const CHARTYPE* zstr) +{ + const CHARTYPE* zp; + // classic C string scan + for (zp = zstr; *zp; ++zp) + ; + return (zp - zstr); } +// specialize where we have a library function; may use intrinsic operations +template <> +inline size_t ll_convert_length<wchar_t>(const wchar_t* zstr) { return std::wcslen(zstr); } +template <> +inline size_t ll_convert_length<char> (const char* zstr) { return std::strlen(zstr); } + +// ll_convert_forms() is short for a bunch of boilerplate. It defines +// longname(const char*, len), longname(const char*), longname(const string&) +// and longname(const string&, len) so calls written pre-ll_convert() will +// work. Most of these overloads will be unified once we turn on C++17 and can +// use std::string_view. +// It also uses aliasmacro to ensure that both ll_convert<OUTSTR>(const char*) +// and ll_convert<OUTSTR>(const string&) will work. +#define ll_convert_forms(aliasmacro, OUTSTR, INSTR, longname) \ +LL_COMMON_API OUTSTR longname(const INSTR::value_type* in, size_t len); \ +inline auto longname(const INSTR& in, size_t len) \ +{ \ + return longname(in.c_str(), len); \ +} \ +inline auto longname(const INSTR::value_type* in) \ +{ \ + return longname(in, ll_convert_length(in)); \ +} \ +inline auto longname(const INSTR& in) \ +{ \ + return longname(in.c_str(), in.length()); \ +} \ +/* string param */ \ +aliasmacro(OUTSTR, INSTR, longname(in)); \ +/* char* param */ \ +aliasmacro(OUTSTR, const INSTR::value_type*, longname(in)) + // Make the incoming string a utf8 string. Replaces any unknown glyph // with the UNKNOWN_CHARACTER. Once any unknown glyph is found, the rest // of the data may not be recovered. @@ -571,63 +630,47 @@ LL_COMMON_API std::string rawstr_to_utf8(const std::string& raw); // LL_WCHAR_T_NATIVE. typedef std::basic_string<U16> llutf16string; -#if ! defined(LL_WCHAR_T_NATIVE) -// wchar_t is identical to U16, and std::wstring is identical to llutf16string. -// Defining an ll_convert alias involving llutf16string would collide with the -// comparable preferred alias involving std::wstring. (In this scenario, if -// you pass llutf16string, it will engage the std::wstring specialization.) -#define ll_convert_u16_alias(TO, FROM, EXPR) // nothing -#else // defined(LL_WCHAR_T_NATIVE) -// wchar_t is a distinct native type, so llutf16string is also a distinct -// type, and there IS a point to converting separately to/from llutf16string. -// (But why? Windows APIs are still defined in terms of wchar_t, and -// in this scenario llutf16string won't work for them!) -#define ll_convert_u16_alias(TO, FROM, EXPR) ll_convert_alias(TO, FROM, EXPR) +// Considering wchar_t, llwchar and U16, there are three relevant cases: +#if LLWCHAR_IS_WCHAR_T // every which way but Windows +// llwchar is identical to wchar_t, LLWString is identical to std::wstring. +// U16 is distinct, llutf16string is distinct (though pretty useless). +// Given conversions to/from LLWString and to/from llutf16string, conversions +// involving std::wstring would collide. +#define ll_convert_wstr_alias(TO, FROM, EXPR) // nothing +// but we can define conversions involving llutf16string without collisions +#define ll_convert_u16_alias(TO, FROM, EXPR) ll_convert_alias(TO, FROM, EXPR) + +#elif defined(LL_WCHAR_T_NATIVE) // Windows, either clang or MS /Zc:wchar_t +// llwchar (32-bit), wchar_t (16-bit) and U16 are all different types. +// Conversions to/from LLWString, to/from std::wstring and to/from llutf16string +// can all be defined. +#define ll_convert_wstr_alias(TO, FROM, EXPR) ll_convert_alias(TO, FROM, EXPR) +#define ll_convert_u16_alias(TO, FROM, EXPR) ll_convert_alias(TO, FROM, EXPR) + +#else // ! LL_WCHAR_T_NATIVE: Windows with MS /Zc:wchar_t- +// wchar_t is identical to U16, std::wstring is identical to llutf16string. +// Given conversions to/from LLWString and to/from std::wstring, conversions +// involving llutf16string would collide. +#define ll_convert_u16_alias(TO, FROM, EXPR) // nothing +// but we can define conversions involving std::wstring without collisions +#define ll_convert_wstr_alias(TO, FROM, EXPR) ll_convert_alias(TO, FROM, EXPR) +#endif + +ll_convert_forms(ll_convert_u16_alias, LLWString, llutf16string, utf16str_to_wstring); +ll_convert_forms(ll_convert_u16_alias, llutf16string, LLWString, wstring_to_utf16str); +ll_convert_forms(ll_convert_u16_alias, llutf16string, std::string, utf8str_to_utf16str); +ll_convert_forms(ll_convert_alias, LLWString, std::string, utf8str_to_wstring); -#if LL_WINDOWS -// LL_WCHAR_T_NATIVE is defined on non-Windows systems because, in fact, -// wchar_t is native. Everywhere but Windows, we use it for llwchar (see -// stdtypes.h). That makes LLWString identical to std::wstring, so these -// aliases for std::wstring would collide with those for LLWString. Only -// define on Windows, where converting between std::wstring and llutf16string -// means copying chars. -ll_convert_alias(llutf16string, std::wstring, llutf16string(in.begin(), in.end())); -ll_convert_alias(std::wstring, llutf16string, std::wstring(in.begin(), in.end())); -#endif // LL_WINDOWS -#endif // defined(LL_WCHAR_T_NATIVE) - -LL_COMMON_API LLWString utf16str_to_wstring(const llutf16string &utf16str, S32 len); -LL_COMMON_API LLWString utf16str_to_wstring(const llutf16string &utf16str); -ll_convert_u16_alias(LLWString, llutf16string, utf16str_to_wstring(in)); - -LL_COMMON_API llutf16string wstring_to_utf16str(const LLWString &utf32str, S32 len); -LL_COMMON_API llutf16string wstring_to_utf16str(const LLWString &utf32str); -ll_convert_u16_alias(llutf16string, LLWString, wstring_to_utf16str(in)); - -LL_COMMON_API llutf16string utf8str_to_utf16str ( const std::string& utf8str, S32 len); -LL_COMMON_API llutf16string utf8str_to_utf16str ( const std::string& utf8str ); -ll_convert_u16_alias(llutf16string, std::string, utf8str_to_utf16str(in)); - -LL_COMMON_API LLWString utf8str_to_wstring(const std::string &utf8str, S32 len); -LL_COMMON_API LLWString utf8str_to_wstring(const std::string &utf8str); // Same function, better name. JC inline LLWString utf8string_to_wstring(const std::string& utf8_string) { return utf8str_to_wstring(utf8_string); } -// best name of all -ll_convert_alias(LLWString, std::string, utf8string_to_wstring(in)); -// LL_COMMON_API S32 wchar_to_utf8chars(llwchar inchar, char* outchars); -LL_COMMON_API std::string wstring_to_utf8str(const LLWString &utf32str, S32 len); -LL_COMMON_API std::string wstring_to_utf8str(const LLWString &utf32str); -ll_convert_alias(std::string, LLWString, wstring_to_utf8str(in)); -LL_COMMON_API std::string utf16str_to_utf8str(const llutf16string &utf16str, S32 len); -LL_COMMON_API std::string utf16str_to_utf8str(const llutf16string &utf16str); -ll_convert_u16_alias(std::string, llutf16string, utf16str_to_utf8str(in)); +ll_convert_forms(ll_convert_alias, std::string, LLWString, wstring_to_utf8str); +ll_convert_forms(ll_convert_u16_alias, std::string, llutf16string, utf16str_to_utf8str); -#if LL_WINDOWS +// an older alias for utf16str_to_utf8str(llutf16string) inline std::string wstring_to_utf8str(const llutf16string &utf16str) { return utf16str_to_utf8str(utf16str);} -#endif // Length of this UTF32 string in bytes when transformed to UTF8 LL_COMMON_API S32 wstring_utf8_length(const LLWString& wstr); @@ -701,42 +744,48 @@ LL_COMMON_API std::string utf8str_removeCRLF(const std::string& utf8str); //@{ /** - * @brief Convert a wide string to std::string + * @brief Convert a wide string to/from std::string + * Convert a Windows wide string to/from our LLWString * * This replaces the unsafe W2A macro from ATL. */ -LL_COMMON_API std::string ll_convert_wide_to_string(const wchar_t* in, unsigned int code_page); -LL_COMMON_API std::string ll_convert_wide_to_string(const wchar_t* in); // default CP_UTF8 -inline std::string ll_convert_wide_to_string(const std::wstring& in, unsigned int code_page) -{ - return ll_convert_wide_to_string(in.c_str(), code_page); -} -inline std::string ll_convert_wide_to_string(const std::wstring& in) -{ - return ll_convert_wide_to_string(in.c_str()); -} -ll_convert_alias(std::string, std::wstring, ll_convert_wide_to_string(in)); - -/** - * Converts a string to wide string. - */ -LL_COMMON_API std::wstring ll_convert_string_to_wide(const std::string& in, - unsigned int code_page); -LL_COMMON_API std::wstring ll_convert_string_to_wide(const std::string& in); - // default CP_UTF8 -ll_convert_alias(std::wstring, std::string, ll_convert_string_to_wide(in)); - -/** - * Convert a Windows wide string to our LLWString - */ -LL_COMMON_API LLWString ll_convert_wide_to_wstring(const std::wstring& in); -ll_convert_alias(LLWString, std::wstring, ll_convert_wide_to_wstring(in)); - -/** - * Convert LLWString to Windows wide string - */ -LL_COMMON_API std::wstring ll_convert_wstring_to_wide(const LLWString& in); -ll_convert_alias(std::wstring, LLWString, ll_convert_wstring_to_wide(in)); +// Avoid requiring this header to #include the Windows header file declaring +// our actual default code_page by delegating this function to our .cpp file. +LL_COMMON_API unsigned int ll_wstring_default_code_page(); + +// This is like ll_convert_forms(), with the added complexity of a code page +// parameter that may or may not be passed. +#define ll_convert_cp_forms(aliasmacro, OUTSTR, INSTR, longname) \ +/* declare the only nontrivial implementation (in .cpp file) */ \ +LL_COMMON_API OUTSTR longname( \ + const INSTR::value_type* in, \ + size_t len, \ + unsigned int code_page=ll_wstring_default_code_page()); \ +/* if passed only a char pointer, scan for nul terminator */ \ +inline auto longname(const INSTR::value_type* in) \ +{ \ + return longname(in, ll_convert_length(in)); \ +} \ +/* if passed string and length, extract its char pointer */ \ +inline auto longname( \ + const INSTR& in, \ + size_t len, \ + unsigned int code_page=ll_wstring_default_code_page()) \ +{ \ + return longname(in.c_str(), len, code_page); \ +} \ +/* if passed only a string object, no scan, pass known length */ \ +inline auto longname(const INSTR& in) \ +{ \ + return longname(in.c_str(), in.length()); \ +} \ +aliasmacro(OUTSTR, INSTR, longname(in)); \ +aliasmacro(OUTSTR, const INSTR::value_type*, longname(in)) + +ll_convert_cp_forms(ll_convert_wstr_alias, std::string, std::wstring, ll_convert_wide_to_string); +ll_convert_cp_forms(ll_convert_wstr_alias, std::wstring, std::string, ll_convert_string_to_wide); + ll_convert_forms(ll_convert_wstr_alias, LLWString, std::wstring, ll_convert_wide_to_wstring); + ll_convert_forms(ll_convert_wstr_alias, std::wstring, LLWString, ll_convert_wstring_to_wide); /** * Converts incoming string into utf8 string @@ -1937,4 +1986,14 @@ void LLStringUtilBase<T>::truncate(string_type& string, size_type count) string.resize(count < cur_size ? count : cur_size); } +// The good thing about *declaration* macros, vs. usage macros, is that now +// we're done with them: we don't need them to bleed into the consuming source +// file. +#undef ll_convert_alias +#undef ll_convert_u16_alias +#undef ll_convert_wstr_alias +#undef LL_CONVERT_COPY_CHARS +#undef ll_convert_forms +#undef ll_convert_cp_forms + #endif // LL_STRING_H diff --git a/indra/llcommon/llsys.cpp b/indra/llcommon/llsys.cpp index 2ca15a31c6..18f4684b49 100644 --- a/indra/llcommon/llsys.cpp +++ b/indra/llcommon/llsys.cpp @@ -828,6 +828,7 @@ LLSD LLMemoryInfo::getStatsMap() const LLMemoryInfo& LLMemoryInfo::refresh() { + LL_PROFILE_ZONE_SCOPED mStatsMap = loadStatsMap(); LL_DEBUGS("LLMemoryInfo") << "Populated mStatsMap:\n"; @@ -837,11 +838,9 @@ LLMemoryInfo& LLMemoryInfo::refresh() return *this; } -static LLTrace::BlockTimerStatHandle FTM_MEMINFO_LOAD_STATS("MemInfo Load Stats"); - LLSD LLMemoryInfo::loadStatsMap() { - LL_RECORD_BLOCK_TIME(FTM_MEMINFO_LOAD_STATS); + LL_PROFILE_ZONE_SCOPED; // This implementation is derived from stream() code (as of 2011-06-29). Stats stats; diff --git a/indra/llcommon/llthread.cpp b/indra/llcommon/llthread.cpp index 6d531d842d..11f5a015f1 100644 --- a/indra/llcommon/llthread.cpp +++ b/indra/llcommon/llthread.cpp @@ -135,6 +135,8 @@ void LLThread::threadRun() set_thread_name(-1, mName.c_str()); #endif + LL_PROFILER_SET_THREAD_NAME( mName.c_str() ); + // this is the first point at which we're actually running in the new thread mID = currentID(); @@ -331,6 +333,7 @@ bool LLThread::runCondition(void) // Stop thread execution if requested until unpaused. void LLThread::checkPause() { + LL_PROFILE_ZONE_SCOPED mDataLock->lock(); // This is in a while loop because the pthread API allows for spurious wakeups. @@ -362,17 +365,20 @@ void LLThread::setQuitting() // static LLThread::id_t LLThread::currentID() { + LL_PROFILE_ZONE_SCOPED return std::this_thread::get_id(); } // static void LLThread::yield() { + LL_PROFILE_ZONE_SCOPED std::this_thread::yield(); } void LLThread::wake() { + LL_PROFILE_ZONE_SCOPED mDataLock->lock(); if(!shouldSleep()) { @@ -383,6 +389,7 @@ void LLThread::wake() void LLThread::wakeLocked() { + LL_PROFILE_ZONE_SCOPED if(!shouldSleep()) { mRunCondition->signal(); @@ -391,11 +398,13 @@ void LLThread::wakeLocked() void LLThread::lockData() { + LL_PROFILE_ZONE_SCOPED mDataLock->lock(); } void LLThread::unlockData() { + LL_PROFILE_ZONE_SCOPED mDataLock->unlock(); } diff --git a/indra/llcommon/llthreadsafequeue.h b/indra/llcommon/llthreadsafequeue.h index 26e0d71d31..2806506550 100644 --- a/indra/llcommon/llthreadsafequeue.h +++ b/indra/llcommon/llthreadsafequeue.h @@ -1,6 +1,6 @@ /** * @file llthreadsafequeue.h - * @brief Base classes for thread, mutex and condition handling. + * @brief Queue protected with mutexes for cross-thread use * * $LicenseInfo:firstyear=2004&license=viewerlgpl$ * Second Life Viewer Source Code @@ -27,16 +27,19 @@ #ifndef LL_LLTHREADSAFEQUEUE_H #define LL_LLTHREADSAFEQUEUE_H -#include "llexception.h" -#include <deque> -#include <string> -#include <chrono> -#include "mutex.h" #include "llcoros.h" #include LLCOROS_MUTEX_HEADER #include <boost/fiber/timed_mutex.hpp> #include LLCOROS_CONDVAR_HEADER +#include "llexception.h" +#include "mutex.h" +#include <chrono> +#include <queue> +#include <string> +/***************************************************************************** +* LLThreadSafeQueue +*****************************************************************************/ // // A general queue exception. // @@ -66,70 +69,116 @@ public: } }; -// -// Implements a thread safe FIFO. -// -template<typename ElementT> +/** + * Implements a thread safe FIFO. + */ +// Let the default std::queue default to underlying std::deque. Override if +// desired. +template<typename ElementT, typename QueueT=std::queue<ElementT>> class LLThreadSafeQueue { public: typedef ElementT value_type; - - // If the pool is set to NULL one will be allocated and managed by this - // queue. + + // Limiting the number of pending items prevents unbounded growth of the + // underlying queue. LLThreadSafeQueue(U32 capacity = 1024); - - // Add an element to the front of queue (will block if the queue has - // reached capacity). + virtual ~LLThreadSafeQueue() {} + + // Add an element to the queue (will block if the queue has reached + // capacity). // // This call will raise an interrupt error if the queue is closed while // the caller is blocked. - void pushFront(ElementT const & element); - - // Try to add an element to the front of queue without blocking. Returns + template <typename T> + void push(T&& element); + // legacy name + void pushFront(ElementT const & element) { return push(element); } + + // Add an element to the queue (will block if the queue has reached + // capacity). Return false if the queue is closed before push is possible. + template <typename T> + bool pushIfOpen(T&& element); + + // Try to add an element to the queue without blocking. Returns // true only if the element was actually added. - bool tryPushFront(ElementT const & element); + template <typename T> + bool tryPush(T&& element); + // legacy name + bool tryPushFront(ElementT const & element) { return tryPush(element); } - // Try to add an element to the front of queue, blocking if full but with - // timeout. Returns true if the element was added. + // Try to add an element to the queue, blocking if full but with timeout + // after specified duration. Returns true if the element was added. // There are potentially two different timeouts involved: how long to try // to lock the mutex, versus how long to wait for the queue to stop being // full. Careful settings for each timeout might be orders of magnitude // apart. However, this method conflates them. + template <typename Rep, typename Period, typename T> + bool tryPushFor(const std::chrono::duration<Rep, Period>& timeout, + T&& element); + // legacy name template <typename Rep, typename Period> bool tryPushFrontFor(const std::chrono::duration<Rep, Period>& timeout, - ElementT const & element); + ElementT const & element) { return tryPushFor(timeout, element); } + + // Try to add an element to the queue, blocking if full but with + // timeout at specified time_point. Returns true if the element was added. + template <typename Clock, typename Duration, typename T> + bool tryPushUntil(const std::chrono::time_point<Clock, Duration>& until, + T&& element); + // no legacy name because this is a newer method - // Pop the element at the end of the queue (will block if the queue is + // Pop the element at the head of the queue (will block if the queue is // empty). // // This call will raise an interrupt error if the queue is closed while // the caller is blocked. - ElementT popBack(void); - - // Pop an element from the end of the queue if there is one available. + ElementT pop(void); + // legacy name + ElementT popBack(void) { return pop(); } + + // Pop an element from the head of the queue if there is one available. // Returns true only if an element was popped. - bool tryPopBack(ElementT & element); - + bool tryPop(ElementT & element); + // legacy name + bool tryPopBack(ElementT & element) { return tryPop(element); } + + // Pop the element at the head of the queue, blocking if empty, with + // timeout after specified duration. Returns true if an element was popped. + template <typename Rep, typename Period> + bool tryPopFor(const std::chrono::duration<Rep, Period>& timeout, ElementT& element); + // no legacy name because this is a newer method + + // Pop the element at the head of the queue, blocking if empty, with + // timeout at specified time_point. Returns true if an element was popped. + template <typename Clock, typename Duration> + bool tryPopUntil(const std::chrono::time_point<Clock, Duration>& until, + ElementT& element); + // no legacy name because this is a newer method + // Returns the size of the queue. size_t size(); + //Returns the capacity of the queue. + U32 capacity() { return mCapacity; } + // closes the queue: - // - every subsequent pushFront() call will throw LLThreadSafeQueueInterrupt - // - every subsequent tryPushFront() call will return false - // - popBack() calls will return normally until the queue is drained, then - // every subsequent popBack() will throw LLThreadSafeQueueInterrupt - // - tryPopBack() calls will return normally until the queue is drained, - // then every subsequent tryPopBack() call will return false + // - every subsequent push() call will throw LLThreadSafeQueueInterrupt + // - every subsequent tryPush() call will return false + // - pop() calls will return normally until the queue is drained, then + // every subsequent pop() will throw LLThreadSafeQueueInterrupt + // - tryPop() calls will return normally until the queue is drained, + // then every subsequent tryPop() call will return false void close(); - // detect closed state + // producer end: are we prevented from pushing any additional items? bool isClosed(); - // inverse of isClosed() - explicit operator bool(); + // consumer end: are we done, is the queue entirely drained? + bool done(); -private: - std::deque< ElementT > mStorage; +protected: + typedef QueueT queue_type; + QueueT mStorage; U32 mCapacity; bool mClosed; @@ -137,37 +186,154 @@ private: typedef std::unique_lock<decltype(mLock)> lock_t; boost::fibers::condition_variable_any mCapacityCond; boost::fibers::condition_variable_any mEmptyCond; -}; -// LLThreadSafeQueue -//----------------------------------------------------------------------------- + enum pop_result { EMPTY, DONE, WAITING, POPPED }; + // implementation logic, suitable for passing to tryLockUntil() + template <typename Clock, typename Duration> + pop_result tryPopUntil_(lock_t& lock, + const std::chrono::time_point<Clock, Duration>& until, + ElementT& element); + // if we're able to lock immediately, do so and run the passed callable, + // which must accept lock_t& and return bool + template <typename CALLABLE> + bool tryLock(CALLABLE&& callable); + // if we're able to lock before the passed time_point, do so and run the + // passed callable, which must accept lock_t& and return bool + template <typename Clock, typename Duration, typename CALLABLE> + bool tryLockUntil(const std::chrono::time_point<Clock, Duration>& until, + CALLABLE&& callable); + // while lock is locked, really push the passed element, if we can + template <typename T> + bool push_(lock_t& lock, T&& element); + // while lock is locked, really pop the head element, if we can + pop_result pop_(lock_t& lock, ElementT& element); + // Is the current head element ready to pop? We say yes; subclass can + // override as needed. + virtual bool canPop(const ElementT& head) const { return true; } +}; -template<typename ElementT> -LLThreadSafeQueue<ElementT>::LLThreadSafeQueue(U32 capacity) : +/***************************************************************************** +* PriorityQueueAdapter +*****************************************************************************/ +namespace LL +{ + /** + * std::priority_queue's API is almost like std::queue, intentionally of + * course, but you must access the element about to pop() as top() rather + * than as front(). Make an adapter for use with LLThreadSafeQueue. + */ + template <typename T, typename Container=std::vector<T>, + typename Compare=std::less<typename Container::value_type>> + class PriorityQueueAdapter + { + public: + // publish all the same types + typedef std::priority_queue<T, Container, Compare> queue_type; + typedef typename queue_type::container_type container_type; + typedef typename queue_type::value_compare value_compare; + typedef typename queue_type::value_type value_type; + typedef typename queue_type::size_type size_type; + typedef typename queue_type::reference reference; + typedef typename queue_type::const_reference const_reference; + + // Although std::queue defines both const and non-const front() + // methods, std::priority_queue defines only const top(). + const_reference front() const { return mQ.top(); } + // std::priority_queue has no equivalent to back(), so it's good that + // LLThreadSafeQueue doesn't use it. + + // All the rest of these merely forward to the corresponding + // queue_type methods. + bool empty() const { return mQ.empty(); } + size_type size() const { return mQ.size(); } + void push(const value_type& value) { mQ.push(value); } + void push(value_type&& value) { mQ.push(std::move(value)); } + template <typename... Args> + void emplace(Args&&... args) { mQ.emplace(std::forward<Args>(args)...); } + void pop() { mQ.pop(); } + + private: + queue_type mQ; + }; +} // namespace LL + + +/***************************************************************************** +* LLThreadSafeQueue implementation +*****************************************************************************/ +template<typename ElementT, typename QueueT> +LLThreadSafeQueue<ElementT, QueueT>::LLThreadSafeQueue(U32 capacity) : mCapacity(capacity), mClosed(false) { } -template<typename ElementT> -void LLThreadSafeQueue<ElementT>::pushFront(ElementT const & element) +// if we're able to lock immediately, do so and run the passed callable, which +// must accept lock_t& and return bool +template <typename ElementT, typename QueueT> +template <typename CALLABLE> +bool LLThreadSafeQueue<ElementT, QueueT>::tryLock(CALLABLE&& callable) +{ + LL_PROFILE_ZONE_SCOPED; + lock_t lock1(mLock, std::defer_lock); + if (!lock1.try_lock()) + return false; + + return std::forward<CALLABLE>(callable)(lock1); +} + + +// if we're able to lock before the passed time_point, do so and run the +// passed callable, which must accept lock_t& and return bool +template <typename ElementT, typename QueueT> +template <typename Clock, typename Duration, typename CALLABLE> +bool LLThreadSafeQueue<ElementT, QueueT>::tryLockUntil( + const std::chrono::time_point<Clock, Duration>& until, + CALLABLE&& callable) { + LL_PROFILE_ZONE_SCOPED; + lock_t lock1(mLock, std::defer_lock); + if (!lock1.try_lock_until(until)) + return false; + + return std::forward<CALLABLE>(callable)(lock1); +} + + +// while lock is locked, really push the passed element, if we can +template <typename ElementT, typename QueueT> +template <typename T> +bool LLThreadSafeQueue<ElementT, QueueT>::push_(lock_t& lock, T&& element) +{ + LL_PROFILE_ZONE_SCOPED; + if (mStorage.size() >= mCapacity) + return false; + + mStorage.push(std::forward<T>(element)); + lock.unlock(); + // now that we've pushed, if somebody's been waiting to pop, signal them + mEmptyCond.notify_one(); + return true; +} + + +template <typename ElementT, typename QueueT> +template <typename T> +bool LLThreadSafeQueue<ElementT, QueueT>::pushIfOpen(T&& element) +{ + LL_PROFILE_ZONE_SCOPED; lock_t lock1(mLock); while (true) { + // On the producer side, it doesn't matter whether the queue has been + // drained or not: the moment either end calls close(), further push() + // operations will fail. if (mClosed) - { - LLTHROW(LLThreadSafeQueueInterrupt()); - } + return false; - if (mStorage.size() < mCapacity) - { - mStorage.push_front(element); - lock1.unlock(); - mEmptyCond.notify_one(); - return; - } + if (push_(lock1, std::forward<T>(element))) + return true; // Storage Full. Wait for signal. mCapacityCond.wait(lock1); @@ -175,142 +341,250 @@ void LLThreadSafeQueue<ElementT>::pushFront(ElementT const & element) } -template <typename ElementT> -template <typename Rep, typename Period> -bool LLThreadSafeQueue<ElementT>::tryPushFrontFor(const std::chrono::duration<Rep, Period>& timeout, - ElementT const & element) +template <typename ElementT, typename QueueT> +template<typename T> +void LLThreadSafeQueue<ElementT, QueueT>::push(T&& element) { - // Convert duration to time_point: passing the same timeout duration to - // each of multiple calls is wrong. - auto endpoint = std::chrono::steady_clock::now() + timeout; + LL_PROFILE_ZONE_SCOPED; + if (! pushIfOpen(std::forward<T>(element))) + { + LLTHROW(LLThreadSafeQueueInterrupt()); + } +} - lock_t lock1(mLock, std::defer_lock); - if (!lock1.try_lock_until(endpoint)) - return false; - while (true) - { - if (mClosed) +template<typename ElementT, typename QueueT> +template<typename T> +bool LLThreadSafeQueue<ElementT, QueueT>::tryPush(T&& element) +{ + LL_PROFILE_ZONE_SCOPED; + return tryLock( + [this, element=std::move(element)](lock_t& lock) { - return false; - } + if (mClosed) + return false; + return push_(lock, std::move(element)); + }); +} - if (mStorage.size() < mCapacity) - { - mStorage.push_front(element); - lock1.unlock(); - mEmptyCond.notify_one(); - return true; - } - // Storage Full. Wait for signal. - if (LLCoros::cv_status::timeout == mCapacityCond.wait_until(lock1, endpoint)) - { - // timed out -- formally we might recheck both conditions above - return false; - } - // If we didn't time out, we were notified for some reason. Loop back - // to check. - } +template <typename ElementT, typename QueueT> +template <typename Rep, typename Period, typename T> +bool LLThreadSafeQueue<ElementT, QueueT>::tryPushFor( + const std::chrono::duration<Rep, Period>& timeout, + T&& element) +{ + LL_PROFILE_ZONE_SCOPED; + // Convert duration to time_point: passing the same timeout duration to + // each of multiple calls is wrong. + return tryPushUntil(std::chrono::steady_clock::now() + timeout, + std::forward<T>(element)); } -template<typename ElementT> -bool LLThreadSafeQueue<ElementT>::tryPushFront(ElementT const & element) +template <typename ElementT, typename QueueT> +template <typename Clock, typename Duration, typename T> +bool LLThreadSafeQueue<ElementT, QueueT>::tryPushUntil( + const std::chrono::time_point<Clock, Duration>& until, + T&& element) { - lock_t lock1(mLock, std::defer_lock); - if (!lock1.try_lock()) - return false; + LL_PROFILE_ZONE_SCOPED; + return tryLockUntil( + until, + [this, until, element=std::move(element)](lock_t& lock) + { + while (true) + { + if (mClosed) + { + return false; + } + + if (push_(lock, std::move(element))) + return true; + + // Storage Full. Wait for signal. + if (LLCoros::cv_status::timeout == mCapacityCond.wait_until(lock, until)) + { + // timed out -- formally we might recheck both conditions above + return false; + } + // If we didn't time out, we were notified for some reason. Loop back + // to check. + } + }); +} - if (mClosed) - return false; - if (mStorage.size() >= mCapacity) - return false; +// while lock is locked, really pop the head element, if we can +template <typename ElementT, typename QueueT> +typename LLThreadSafeQueue<ElementT, QueueT>::pop_result +LLThreadSafeQueue<ElementT, QueueT>::pop_(lock_t& lock, ElementT& element) +{ + LL_PROFILE_ZONE_SCOPED; + // If mStorage is empty, there's no head element. + if (mStorage.empty()) + return mClosed? DONE : EMPTY; - mStorage.push_front(element); - lock1.unlock(); - mEmptyCond.notify_one(); - return true; + // If there's a head element, pass it to canPop() to see if it's ready to pop. + if (! canPop(mStorage.front())) + return WAITING; + + // std::queue::front() is the element about to pop() + element = mStorage.front(); + mStorage.pop(); + lock.unlock(); + // now that we've popped, if somebody's been waiting to push, signal them + mCapacityCond.notify_one(); + return POPPED; } -template<typename ElementT> -ElementT LLThreadSafeQueue<ElementT>::popBack(void) +template<typename ElementT, typename QueueT> +ElementT LLThreadSafeQueue<ElementT, QueueT>::pop(void) { + LL_PROFILE_ZONE_SCOPED; lock_t lock1(mLock); + ElementT value; while (true) { - if (!mStorage.empty()) - { - ElementT value = mStorage.back(); - mStorage.pop_back(); - lock1.unlock(); - mCapacityCond.notify_one(); - return value; - } - - if (mClosed) + // On the consumer side, we always try to pop before checking mClosed + // so we can finish draining the queue. + pop_result popped = pop_(lock1, value); + if (popped == POPPED) + return std::move(value); + + // Once the queue is DONE, there will never be any more coming. + if (popped == DONE) { LLTHROW(LLThreadSafeQueueInterrupt()); } - // Storage empty. Wait for signal. + // If we didn't pop because WAITING, i.e. canPop() returned false, + // then even if the producer end has been closed, there's still at + // least one item to drain: wait for it. Or we might be EMPTY, with + // the queue still open. Either way, wait for signal. mEmptyCond.wait(lock1); } } -template<typename ElementT> -bool LLThreadSafeQueue<ElementT>::tryPopBack(ElementT & element) +template<typename ElementT, typename QueueT> +bool LLThreadSafeQueue<ElementT, QueueT>::tryPop(ElementT & element) { - lock_t lock1(mLock, std::defer_lock); - if (!lock1.try_lock()) - return false; + LL_PROFILE_ZONE_SCOPED; + return tryLock( + [this, &element](lock_t& lock) + { + // conflate EMPTY, DONE, WAITING: tryPop() behavior when the queue + // is closed is implemented by simple inability to push any new + // elements + return pop_(lock, element) == POPPED; + }); +} - // no need to check mClosed: tryPopBack() behavior when the queue is - // closed is implemented by simple inability to push any new elements - if (mStorage.empty()) - return false; - element = mStorage.back(); - mStorage.pop_back(); - lock1.unlock(); - mCapacityCond.notify_one(); - return true; +template <typename ElementT, typename QueueT> +template <typename Rep, typename Period> +bool LLThreadSafeQueue<ElementT, QueueT>::tryPopFor( + const std::chrono::duration<Rep, Period>& timeout, + ElementT& element) +{ + LL_PROFILE_ZONE_SCOPED; + // Convert duration to time_point: passing the same timeout duration to + // each of multiple calls is wrong. + return tryPopUntil(std::chrono::steady_clock::now() + timeout, element); } -template<typename ElementT> -size_t LLThreadSafeQueue<ElementT>::size(void) +template <typename ElementT, typename QueueT> +template <typename Clock, typename Duration> +bool LLThreadSafeQueue<ElementT, QueueT>::tryPopUntil( + const std::chrono::time_point<Clock, Duration>& until, + ElementT& element) { + LL_PROFILE_ZONE_SCOPED; + return tryLockUntil( + until, + [this, until, &element](lock_t& lock) + { + // conflate EMPTY, DONE, WAITING + return tryPopUntil_(lock, until, element) == POPPED; + }); +} + + +// body of tryPopUntil(), called once we have the lock +template <typename ElementT, typename QueueT> +template <typename Clock, typename Duration> +typename LLThreadSafeQueue<ElementT, QueueT>::pop_result +LLThreadSafeQueue<ElementT, QueueT>::tryPopUntil_( + lock_t& lock, + const std::chrono::time_point<Clock, Duration>& until, + ElementT& element) +{ + LL_PROFILE_ZONE_SCOPED; + while (true) + { + pop_result popped = pop_(lock, element); + if (popped == POPPED || popped == DONE) + { + // If we succeeded, great! If we've drained the last item, so be + // it. Either way, break the loop and tell caller. + return popped; + } + + // EMPTY or WAITING: wait for signal. + if (LLCoros::cv_status::timeout == mEmptyCond.wait_until(lock, until)) + { + // timed out -- formally we might recheck + // as it is, break loop + return popped; + } + // If we didn't time out, we were notified for some reason. Loop back + // to check. + } +} + + +template<typename ElementT, typename QueueT> +size_t LLThreadSafeQueue<ElementT, QueueT>::size(void) +{ + LL_PROFILE_ZONE_SCOPED; lock_t lock(mLock); return mStorage.size(); } -template<typename ElementT> -void LLThreadSafeQueue<ElementT>::close() + +template<typename ElementT, typename QueueT> +void LLThreadSafeQueue<ElementT, QueueT>::close() { + LL_PROFILE_ZONE_SCOPED; lock_t lock(mLock); mClosed = true; lock.unlock(); - // wake up any blocked popBack() calls + // wake up any blocked pop() calls mEmptyCond.notify_all(); - // wake up any blocked pushFront() calls + // wake up any blocked push() calls mCapacityCond.notify_all(); } -template<typename ElementT> -bool LLThreadSafeQueue<ElementT>::isClosed() + +template<typename ElementT, typename QueueT> +bool LLThreadSafeQueue<ElementT, QueueT>::isClosed() { + LL_PROFILE_ZONE_SCOPED; lock_t lock(mLock); - return mClosed && mStorage.size() == 0; + return mClosed; } -template<typename ElementT> -LLThreadSafeQueue<ElementT>::operator bool() + +template<typename ElementT, typename QueueT> +bool LLThreadSafeQueue<ElementT, QueueT>::done() { - return ! isClosed(); + LL_PROFILE_ZONE_SCOPED; + lock_t lock(mLock); + return mClosed && mStorage.empty(); } #endif diff --git a/indra/llcommon/lltrace.cpp b/indra/llcommon/lltrace.cpp index 54079a4689..f59b207ded 100644 --- a/indra/llcommon/lltrace.cpp +++ b/indra/llcommon/lltrace.cpp @@ -61,6 +61,7 @@ TimeBlockTreeNode::TimeBlockTreeNode() void TimeBlockTreeNode::setParent( BlockTimerStatHandle* parent ) { + LL_PROFILE_ZONE_SCOPED; llassert_always(parent != mBlock); llassert_always(parent != NULL); diff --git a/indra/llcommon/lltrace.h b/indra/llcommon/lltrace.h index 0d0cd6f581..4051c558a4 100644 --- a/indra/llcommon/lltrace.h +++ b/indra/llcommon/lltrace.h @@ -227,6 +227,7 @@ public: void setName(const char* name) { + LL_PROFILE_ZONE_SCOPED; mName = name; setKey(name); } @@ -234,12 +235,14 @@ public: /*virtual*/ const char* getUnitLabel() const { return "KB"; } StatType<MemAccumulator::AllocationFacet>& allocations() - { + { + LL_PROFILE_ZONE_SCOPED; return static_cast<StatType<MemAccumulator::AllocationFacet>&>(*(StatType<MemAccumulator>*)this); } StatType<MemAccumulator::DeallocationFacet>& deallocations() - { + { + LL_PROFILE_ZONE_SCOPED; return static_cast<StatType<MemAccumulator::DeallocationFacet>&>(*(StatType<MemAccumulator>*)this); } }; @@ -261,6 +264,7 @@ struct MeasureMem<T, typename T::mem_trackable_tag_t, IS_BYTES> { static size_t measureFootprint(const T& value) { + LL_PROFILE_ZONE_SCOPED; return sizeof(T) + value.getMemFootprint(); } }; @@ -270,6 +274,7 @@ struct MeasureMem<T, IS_MEM_TRACKABLE, typename T::is_unit_t> { static size_t measureFootprint(const T& value) { + LL_PROFILE_ZONE_SCOPED; return U32Bytes(value).value(); } }; @@ -279,6 +284,7 @@ struct MeasureMem<T*, IS_MEM_TRACKABLE, IS_BYTES> { static size_t measureFootprint(const T* value) { + LL_PROFILE_ZONE_SCOPED; if (!value) { return 0; @@ -323,6 +329,7 @@ struct MeasureMem<std::basic_string<T>, IS_MEM_TRACKABLE, IS_BYTES> { static size_t measureFootprint(const std::basic_string<T>& value) { + LL_PROFILE_ZONE_SCOPED; return value.capacity() * sizeof(T); } }; @@ -331,6 +338,7 @@ struct MeasureMem<std::basic_string<T>, IS_MEM_TRACKABLE, IS_BYTES> template<typename T> inline void claim_alloc(MemStatHandle& measurement, const T& value) { + LL_PROFILE_ZONE_SCOPED; #if LL_TRACE_ENABLED S32 size = MeasureMem<T>::measureFootprint(value); if(size == 0) return; @@ -343,6 +351,7 @@ inline void claim_alloc(MemStatHandle& measurement, const T& value) template<typename T> inline void disclaim_alloc(MemStatHandle& measurement, const T& value) { + LL_PROFILE_ZONE_SCOPED; #if LL_TRACE_ENABLED S32 size = MeasureMem<T>::measureFootprint(value); if(size == 0) return; @@ -352,141 +361,6 @@ inline void disclaim_alloc(MemStatHandle& measurement, const T& value) #endif } -template<typename DERIVED, size_t ALIGNMENT = LL_DEFAULT_HEAP_ALIGN> -class MemTrackableNonVirtual -{ -public: - typedef void mem_trackable_tag_t; - - MemTrackableNonVirtual(const char* name) -#if LL_TRACE_ENABLED - : mMemFootprint(0) -#endif - { -#if LL_TRACE_ENABLED - static bool name_initialized = false; - if (!name_initialized) - { - name_initialized = true; - sMemStat.setName(name); - } -#endif - } - -#if LL_TRACE_ENABLED - ~MemTrackableNonVirtual() - { - disclaimMem(mMemFootprint); - } - - static MemStatHandle& getMemStatHandle() - { - return sMemStat; - } - - S32 getMemFootprint() const { return mMemFootprint; } -#endif - - void* operator new(size_t size) - { -#if LL_TRACE_ENABLED - claim_alloc(sMemStat, size); -#endif - return ll_aligned_malloc<ALIGNMENT>(size); - } - - template<int CUSTOM_ALIGNMENT> - static void* aligned_new(size_t size) - { -#if LL_TRACE_ENABLED - claim_alloc(sMemStat, size); -#endif - return ll_aligned_malloc<CUSTOM_ALIGNMENT>(size); - } - - void operator delete(void* ptr, size_t size) - { -#if LL_TRACE_ENABLED - disclaim_alloc(sMemStat, size); -#endif - ll_aligned_free<ALIGNMENT>(ptr); - } - - template<int CUSTOM_ALIGNMENT> - static void aligned_delete(void* ptr, size_t size) - { -#if LL_TRACE_ENABLED - disclaim_alloc(sMemStat, size); -#endif - ll_aligned_free<CUSTOM_ALIGNMENT>(ptr); - } - - void* operator new [](size_t size) - { -#if LL_TRACE_ENABLED - claim_alloc(sMemStat, size); -#endif - return ll_aligned_malloc<ALIGNMENT>(size); - } - - void operator delete[](void* ptr, size_t size) - { -#if LL_TRACE_ENABLED - disclaim_alloc(sMemStat, size); -#endif - ll_aligned_free<ALIGNMENT>(ptr); - } - - // claim memory associated with other objects/data as our own, adding to our calculated footprint - template<typename CLAIM_T> - void claimMem(const CLAIM_T& value) const - { -#if LL_TRACE_ENABLED - S32 size = MeasureMem<CLAIM_T>::measureFootprint(value); - claim_alloc(sMemStat, size); - mMemFootprint += size; -#endif - } - - // remove memory we had claimed from our calculated footprint - template<typename CLAIM_T> - void disclaimMem(const CLAIM_T& value) const - { -#if LL_TRACE_ENABLED - S32 size = MeasureMem<CLAIM_T>::measureFootprint(value); - disclaim_alloc(sMemStat, size); - mMemFootprint -= size; -#endif - } - -private: -#if LL_TRACE_ENABLED - // use signed values so that we can temporarily go negative - // and reconcile in destructor - // NB: this assumes that no single class is responsible for > 2GB of allocations - mutable S32 mMemFootprint; - - static MemStatHandle sMemStat; -#endif - -}; - -#if LL_TRACE_ENABLED -template<typename DERIVED, size_t ALIGNMENT> -MemStatHandle MemTrackableNonVirtual<DERIVED, ALIGNMENT>::sMemStat(typeid(MemTrackableNonVirtual<DERIVED, ALIGNMENT>).name()); -#endif - -template<typename DERIVED, size_t ALIGNMENT = LL_DEFAULT_HEAP_ALIGN> -class MemTrackable : public MemTrackableNonVirtual<DERIVED, ALIGNMENT> -{ -public: - MemTrackable(const char* name) - : MemTrackableNonVirtual<DERIVED, ALIGNMENT>(name) - {} - - virtual ~MemTrackable() - {} -}; } #endif // LL_LLTRACE_H diff --git a/indra/llcommon/lltraceaccumulators.cpp b/indra/llcommon/lltraceaccumulators.cpp index b1c23c6fb7..8e9aaee0e6 100644 --- a/indra/llcommon/lltraceaccumulators.cpp +++ b/indra/llcommon/lltraceaccumulators.cpp @@ -41,6 +41,7 @@ extern MemStatHandle gTraceMemStat; AccumulatorBufferGroup::AccumulatorBufferGroup() { + LL_PROFILE_ZONE_SCOPED; claim_alloc(gTraceMemStat, mCounts.capacity() * sizeof(CountAccumulator)); claim_alloc(gTraceMemStat, mSamples.capacity() * sizeof(SampleAccumulator)); claim_alloc(gTraceMemStat, mEvents.capacity() * sizeof(EventAccumulator)); @@ -55,6 +56,7 @@ AccumulatorBufferGroup::AccumulatorBufferGroup(const AccumulatorBufferGroup& oth mStackTimers(other.mStackTimers), mMemStats(other.mMemStats) { + LL_PROFILE_ZONE_SCOPED; claim_alloc(gTraceMemStat, mCounts.capacity() * sizeof(CountAccumulator)); claim_alloc(gTraceMemStat, mSamples.capacity() * sizeof(SampleAccumulator)); claim_alloc(gTraceMemStat, mEvents.capacity() * sizeof(EventAccumulator)); @@ -64,6 +66,7 @@ AccumulatorBufferGroup::AccumulatorBufferGroup(const AccumulatorBufferGroup& oth AccumulatorBufferGroup::~AccumulatorBufferGroup() { + LL_PROFILE_ZONE_SCOPED; disclaim_alloc(gTraceMemStat, mCounts.capacity() * sizeof(CountAccumulator)); disclaim_alloc(gTraceMemStat, mSamples.capacity() * sizeof(SampleAccumulator)); disclaim_alloc(gTraceMemStat, mEvents.capacity() * sizeof(EventAccumulator)); @@ -73,6 +76,7 @@ AccumulatorBufferGroup::~AccumulatorBufferGroup() void AccumulatorBufferGroup::handOffTo(AccumulatorBufferGroup& other) { + LL_PROFILE_ZONE_SCOPED; other.mCounts.reset(&mCounts); other.mSamples.reset(&mSamples); other.mEvents.reset(&mEvents); @@ -82,6 +86,7 @@ void AccumulatorBufferGroup::handOffTo(AccumulatorBufferGroup& other) void AccumulatorBufferGroup::makeCurrent() { + LL_PROFILE_ZONE_SCOPED; mCounts.makeCurrent(); mSamples.makeCurrent(); mEvents.makeCurrent(); @@ -104,6 +109,7 @@ void AccumulatorBufferGroup::makeCurrent() //static void AccumulatorBufferGroup::clearCurrent() { + LL_PROFILE_ZONE_SCOPED; AccumulatorBuffer<CountAccumulator>::clearCurrent(); AccumulatorBuffer<SampleAccumulator>::clearCurrent(); AccumulatorBuffer<EventAccumulator>::clearCurrent(); @@ -118,6 +124,7 @@ bool AccumulatorBufferGroup::isCurrent() const void AccumulatorBufferGroup::append( const AccumulatorBufferGroup& other ) { + LL_PROFILE_ZONE_SCOPED; mCounts.addSamples(other.mCounts, SEQUENTIAL); mSamples.addSamples(other.mSamples, SEQUENTIAL); mEvents.addSamples(other.mEvents, SEQUENTIAL); @@ -127,6 +134,7 @@ void AccumulatorBufferGroup::append( const AccumulatorBufferGroup& other ) void AccumulatorBufferGroup::merge( const AccumulatorBufferGroup& other) { + LL_PROFILE_ZONE_SCOPED; mCounts.addSamples(other.mCounts, NON_SEQUENTIAL); mSamples.addSamples(other.mSamples, NON_SEQUENTIAL); mEvents.addSamples(other.mEvents, NON_SEQUENTIAL); @@ -137,6 +145,7 @@ void AccumulatorBufferGroup::merge( const AccumulatorBufferGroup& other) void AccumulatorBufferGroup::reset(AccumulatorBufferGroup* other) { + LL_PROFILE_ZONE_SCOPED; mCounts.reset(other ? &other->mCounts : NULL); mSamples.reset(other ? &other->mSamples : NULL); mEvents.reset(other ? &other->mEvents : NULL); @@ -146,6 +155,7 @@ void AccumulatorBufferGroup::reset(AccumulatorBufferGroup* other) void AccumulatorBufferGroup::sync() { + LL_PROFILE_ZONE_SCOPED; if (isCurrent()) { F64SecondsImplicit time_stamp = LLTimer::getTotalSeconds(); @@ -190,7 +200,7 @@ F64 SampleAccumulator::mergeSumsOfSquares(const SampleAccumulator& a, const Samp void SampleAccumulator::addSamples( const SampleAccumulator& other, EBufferAppendType append_type ) { - if (append_type == NON_SEQUENTIAL) + if (append_type == NON_SEQUENTIAL) { return; } @@ -289,7 +299,7 @@ void EventAccumulator::addSamples( const EventAccumulator& other, EBufferAppendT void EventAccumulator::reset( const EventAccumulator* other ) { - mNumSamples = 0; + mNumSamples = 0; mSum = 0; mMin = F32(NaN); mMax = F32(NaN); diff --git a/indra/llcommon/lltraceaccumulators.h b/indra/llcommon/lltraceaccumulators.h index 8eb5338a2a..b183fcd14a 100644 --- a/indra/llcommon/lltraceaccumulators.h +++ b/indra/llcommon/lltraceaccumulators.h @@ -66,6 +66,7 @@ namespace LLTrace : mStorageSize(0), mStorage(NULL) { + LL_PROFILE_ZONE_SCOPED; const AccumulatorBuffer& other = *getDefaultBuffer(); resize(sNextStorageSlot); for (S32 i = 0; i < sNextStorageSlot; i++) @@ -76,6 +77,7 @@ namespace LLTrace ~AccumulatorBuffer() { + LL_PROFILE_ZONE_SCOPED; if (isCurrent()) { LLThreadLocalSingletonPointer<ACCUMULATOR>::setInstance(NULL); @@ -98,6 +100,7 @@ namespace LLTrace : mStorageSize(0), mStorage(NULL) { + LL_PROFILE_ZONE_SCOPED; resize(sNextStorageSlot); for (S32 i = 0; i < sNextStorageSlot; i++) { @@ -107,6 +110,7 @@ namespace LLTrace void addSamples(const AccumulatorBuffer<ACCUMULATOR>& other, EBufferAppendType append_type) { + LL_PROFILE_ZONE_SCOPED; llassert(mStorageSize >= sNextStorageSlot && other.mStorageSize >= sNextStorageSlot); for (size_t i = 0; i < sNextStorageSlot; i++) { @@ -116,6 +120,7 @@ namespace LLTrace void copyFrom(const AccumulatorBuffer<ACCUMULATOR>& other) { + LL_PROFILE_ZONE_SCOPED; llassert(mStorageSize >= sNextStorageSlot && other.mStorageSize >= sNextStorageSlot); for (size_t i = 0; i < sNextStorageSlot; i++) { @@ -125,6 +130,7 @@ namespace LLTrace void reset(const AccumulatorBuffer<ACCUMULATOR>* other = NULL) { + LL_PROFILE_ZONE_SCOPED; llassert(mStorageSize >= sNextStorageSlot); for (size_t i = 0; i < sNextStorageSlot; i++) { @@ -134,6 +140,7 @@ namespace LLTrace void sync(F64SecondsImplicit time_stamp) { + LL_PROFILE_ZONE_SCOPED; llassert(mStorageSize >= sNextStorageSlot); for (size_t i = 0; i < sNextStorageSlot; i++) { @@ -153,12 +160,13 @@ namespace LLTrace static void clearCurrent() { - LLThreadLocalSingletonPointer<ACCUMULATOR>::setInstance(NULL); + LLThreadLocalSingletonPointer<ACCUMULATOR>::setInstance(NULL); } // NOTE: this is not thread-safe. We assume that slots are reserved in the main thread before any child threads are spawned size_t reserveSlot() { + LL_PROFILE_ZONE_SCOPED; size_t next_slot = sNextStorageSlot++; if (next_slot >= mStorageSize) { @@ -172,6 +180,7 @@ namespace LLTrace void resize(size_t new_size) { + LL_PROFILE_ZONE_SCOPED; if (new_size <= mStorageSize) return; ACCUMULATOR* old_storage = mStorage; @@ -212,6 +221,7 @@ namespace LLTrace static self_t* getDefaultBuffer() { + LL_PROFILE_ZONE_SCOPED; static bool sInitialized = false; if (!sInitialized) { @@ -326,6 +336,7 @@ namespace LLTrace void sample(F64 value) { + LL_PROFILE_ZONE_SCOPED; F64SecondsImplicit time_stamp = LLTimer::getTotalSeconds(); // store effect of last value @@ -444,9 +455,9 @@ namespace LLTrace S32 mNumSamples; }; - class TimeBlockAccumulator + class alignas(32) TimeBlockAccumulator { - public: + public: typedef F64Seconds value_t; static F64Seconds getDefaultValue() { return F64Seconds(0); } @@ -539,6 +550,7 @@ namespace LLTrace void addSamples(const MemAccumulator& other, EBufferAppendType append_type) { + LL_PROFILE_ZONE_SCOPED; mAllocations.addSamples(other.mAllocations, append_type); mDeallocations.addSamples(other.mDeallocations, append_type); @@ -557,6 +569,7 @@ namespace LLTrace void reset(const MemAccumulator* other) { + LL_PROFILE_ZONE_SCOPED; mSize.reset(other ? &other->mSize : NULL); mAllocations.reset(other ? &other->mAllocations : NULL); mDeallocations.reset(other ? &other->mDeallocations : NULL); diff --git a/indra/llcommon/lltracerecording.cpp b/indra/llcommon/lltracerecording.cpp index 3094b627a2..5ce1b337fe 100644 --- a/indra/llcommon/lltracerecording.cpp +++ b/indra/llcommon/lltracerecording.cpp @@ -50,6 +50,7 @@ Recording::Recording(EPlayState state) : mElapsedSeconds(0), mActiveBuffers(NULL) { + LL_PROFILE_ZONE_SCOPED; claim_alloc(gTraceMemStat, this); mBuffers = new AccumulatorBufferGroup(); claim_alloc(gTraceMemStat, mBuffers); @@ -59,12 +60,14 @@ Recording::Recording(EPlayState state) Recording::Recording( const Recording& other ) : mActiveBuffers(NULL) { + LL_PROFILE_ZONE_SCOPED; claim_alloc(gTraceMemStat, this); *this = other; } Recording& Recording::operator = (const Recording& other) { + LL_PROFILE_ZONE_SCOPED; // this will allow us to seamlessly start without affecting any data we've acquired from other setPlayState(PAUSED); @@ -85,6 +88,7 @@ Recording& Recording::operator = (const Recording& other) Recording::~Recording() { + LL_PROFILE_ZONE_SCOPED; disclaim_alloc(gTraceMemStat, this); disclaim_alloc(gTraceMemStat, mBuffers); @@ -103,6 +107,7 @@ void Recording::update() #if LL_TRACE_ENABLED if (isStarted()) { + LL_PROFILE_ZONE_SCOPED; mElapsedSeconds += mSamplingTimer.getElapsedTimeF64(); // must have @@ -123,6 +128,7 @@ void Recording::update() void Recording::handleReset() { + LL_PROFILE_ZONE_SCOPED; #if LL_TRACE_ENABLED mBuffers.write()->reset(); @@ -133,6 +139,7 @@ void Recording::handleReset() void Recording::handleStart() { + LL_PROFILE_ZONE_SCOPED; #if LL_TRACE_ENABLED mSamplingTimer.reset(); mBuffers.setStayUnique(true); @@ -144,6 +151,7 @@ void Recording::handleStart() void Recording::handleStop() { + LL_PROFILE_ZONE_SCOPED; #if LL_TRACE_ENABLED mElapsedSeconds += mSamplingTimer.getElapsedTimeF64(); // must have thread recorder running on this thread @@ -273,7 +281,7 @@ F64Kilobytes Recording::getMean(const StatType<MemAccumulator>& stat) F64Kilobytes Recording::getMax(const StatType<MemAccumulator>& stat) { - update(); + update(); const MemAccumulator& accumulator = mBuffers->mMemStats[stat.getIndex()]; const MemAccumulator* active_accumulator = mActiveBuffers ? &mActiveBuffers->mMemStats[stat.getIndex()] : NULL; return F64Bytes(llmax(accumulator.mSize.getMax(), active_accumulator && active_accumulator->mSize.hasValue() ? active_accumulator->mSize.getMax() : F32_MIN)); @@ -281,7 +289,7 @@ F64Kilobytes Recording::getMax(const StatType<MemAccumulator>& stat) F64Kilobytes Recording::getStandardDeviation(const StatType<MemAccumulator>& stat) { - update(); + update(); const MemAccumulator& accumulator = mBuffers->mMemStats[stat.getIndex()]; const MemAccumulator* active_accumulator = mActiveBuffers ? &mActiveBuffers->mMemStats[stat.getIndex()] : NULL; if (active_accumulator && active_accumulator->hasValue()) @@ -297,7 +305,7 @@ F64Kilobytes Recording::getStandardDeviation(const StatType<MemAccumulator>& sta F64Kilobytes Recording::getLastValue(const StatType<MemAccumulator>& stat) { - update(); + update(); const MemAccumulator& accumulator = mBuffers->mMemStats[stat.getIndex()]; const MemAccumulator* active_accumulator = mActiveBuffers ? &mActiveBuffers->mMemStats[stat.getIndex()] : NULL; return F64Bytes(active_accumulator ? active_accumulator->mSize.getLastValue() : accumulator.mSize.getLastValue()); @@ -305,7 +313,7 @@ F64Kilobytes Recording::getLastValue(const StatType<MemAccumulator>& stat) bool Recording::hasValue(const StatType<MemAccumulator::AllocationFacet>& stat) { - update(); + update(); const MemAccumulator& accumulator = mBuffers->mMemStats[stat.getIndex()]; const MemAccumulator* active_accumulator = mActiveBuffers ? &mActiveBuffers->mMemStats[stat.getIndex()] : NULL; return accumulator.mAllocations.hasValue() || (active_accumulator ? active_accumulator->mAllocations.hasValue() : false); @@ -313,7 +321,7 @@ bool Recording::hasValue(const StatType<MemAccumulator::AllocationFacet>& stat) F64Kilobytes Recording::getSum(const StatType<MemAccumulator::AllocationFacet>& stat) { - update(); + update(); const MemAccumulator& accumulator = mBuffers->mMemStats[stat.getIndex()]; const MemAccumulator* active_accumulator = mActiveBuffers ? &mActiveBuffers->mMemStats[stat.getIndex()] : NULL; return F64Bytes(accumulator.mAllocations.getSum() + (active_accumulator ? active_accumulator->mAllocations.getSum() : 0)); @@ -321,7 +329,7 @@ F64Kilobytes Recording::getSum(const StatType<MemAccumulator::AllocationFacet>& F64Kilobytes Recording::getPerSec(const StatType<MemAccumulator::AllocationFacet>& stat) { - update(); + update(); const MemAccumulator& accumulator = mBuffers->mMemStats[stat.getIndex()]; const MemAccumulator* active_accumulator = mActiveBuffers ? &mActiveBuffers->mMemStats[stat.getIndex()] : NULL; return F64Bytes((accumulator.mAllocations.getSum() + (active_accumulator ? active_accumulator->mAllocations.getSum() : 0)) / mElapsedSeconds.value()); @@ -329,7 +337,7 @@ F64Kilobytes Recording::getPerSec(const StatType<MemAccumulator::AllocationFacet S32 Recording::getSampleCount(const StatType<MemAccumulator::AllocationFacet>& stat) { - update(); + update(); const MemAccumulator& accumulator = mBuffers->mMemStats[stat.getIndex()]; const MemAccumulator* active_accumulator = mActiveBuffers ? &mActiveBuffers->mMemStats[stat.getIndex()] : NULL; return accumulator.mAllocations.getSampleCount() + (active_accumulator ? active_accumulator->mAllocations.getSampleCount() : 0); @@ -337,7 +345,7 @@ S32 Recording::getSampleCount(const StatType<MemAccumulator::AllocationFacet>& s bool Recording::hasValue(const StatType<MemAccumulator::DeallocationFacet>& stat) { - update(); + update(); const MemAccumulator& accumulator = mBuffers->mMemStats[stat.getIndex()]; const MemAccumulator* active_accumulator = mActiveBuffers ? &mActiveBuffers->mMemStats[stat.getIndex()] : NULL; return accumulator.mDeallocations.hasValue() || (active_accumulator ? active_accumulator->mDeallocations.hasValue() : false); @@ -346,7 +354,7 @@ bool Recording::hasValue(const StatType<MemAccumulator::DeallocationFacet>& stat F64Kilobytes Recording::getSum(const StatType<MemAccumulator::DeallocationFacet>& stat) { - update(); + update(); const MemAccumulator& accumulator = mBuffers->mMemStats[stat.getIndex()]; const MemAccumulator* active_accumulator = mActiveBuffers ? &mActiveBuffers->mMemStats[stat.getIndex()] : NULL; return F64Bytes(accumulator.mDeallocations.getSum() + (active_accumulator ? active_accumulator->mDeallocations.getSum() : 0)); @@ -354,7 +362,7 @@ F64Kilobytes Recording::getSum(const StatType<MemAccumulator::DeallocationFacet> F64Kilobytes Recording::getPerSec(const StatType<MemAccumulator::DeallocationFacet>& stat) { - update(); + update(); const MemAccumulator& accumulator = mBuffers->mMemStats[stat.getIndex()]; const MemAccumulator* active_accumulator = mActiveBuffers ? &mActiveBuffers->mMemStats[stat.getIndex()] : NULL; return F64Bytes((accumulator.mDeallocations.getSum() + (active_accumulator ? active_accumulator->mDeallocations.getSum() : 0)) / mElapsedSeconds.value()); @@ -362,7 +370,7 @@ F64Kilobytes Recording::getPerSec(const StatType<MemAccumulator::DeallocationFac S32 Recording::getSampleCount(const StatType<MemAccumulator::DeallocationFacet>& stat) { - update(); + update(); const MemAccumulator& accumulator = mBuffers->mMemStats[stat.getIndex()]; const MemAccumulator* active_accumulator = mActiveBuffers ? &mActiveBuffers->mMemStats[stat.getIndex()] : NULL; return accumulator.mDeallocations.getSampleCount() + (active_accumulator ? active_accumulator->mDeallocations.getSampleCount() : 0); @@ -370,7 +378,7 @@ S32 Recording::getSampleCount(const StatType<MemAccumulator::DeallocationFacet>& bool Recording::hasValue(const StatType<CountAccumulator>& stat) { - update(); + update(); const CountAccumulator& accumulator = mBuffers->mCounts[stat.getIndex()]; const CountAccumulator* active_accumulator = mActiveBuffers ? &mActiveBuffers->mCounts[stat.getIndex()] : NULL; return accumulator.hasValue() || (active_accumulator ? active_accumulator->hasValue() : false); @@ -378,7 +386,7 @@ bool Recording::hasValue(const StatType<CountAccumulator>& stat) F64 Recording::getSum(const StatType<CountAccumulator>& stat) { - update(); + update(); const CountAccumulator& accumulator = mBuffers->mCounts[stat.getIndex()]; const CountAccumulator* active_accumulator = mActiveBuffers ? &mActiveBuffers->mCounts[stat.getIndex()] : NULL; return accumulator.getSum() + (active_accumulator ? active_accumulator->getSum() : 0); @@ -386,7 +394,7 @@ F64 Recording::getSum(const StatType<CountAccumulator>& stat) F64 Recording::getPerSec( const StatType<CountAccumulator>& stat ) { - update(); + update(); const CountAccumulator& accumulator = mBuffers->mCounts[stat.getIndex()]; const CountAccumulator* active_accumulator = mActiveBuffers ? &mActiveBuffers->mCounts[stat.getIndex()] : NULL; F64 sum = accumulator.getSum() + (active_accumulator ? active_accumulator->getSum() : 0); @@ -395,7 +403,7 @@ F64 Recording::getPerSec( const StatType<CountAccumulator>& stat ) S32 Recording::getSampleCount( const StatType<CountAccumulator>& stat ) { - update(); + update(); const CountAccumulator& accumulator = mBuffers->mCounts[stat.getIndex()]; const CountAccumulator* active_accumulator = mActiveBuffers ? &mActiveBuffers->mCounts[stat.getIndex()] : NULL; return accumulator.getSampleCount() + (active_accumulator ? active_accumulator->getSampleCount() : 0); @@ -403,7 +411,7 @@ S32 Recording::getSampleCount( const StatType<CountAccumulator>& stat ) bool Recording::hasValue(const StatType<SampleAccumulator>& stat) { - update(); + update(); const SampleAccumulator& accumulator = mBuffers->mSamples[stat.getIndex()]; const SampleAccumulator* active_accumulator = mActiveBuffers ? &mActiveBuffers->mSamples[stat.getIndex()] : NULL; return accumulator.hasValue() || (active_accumulator && active_accumulator->hasValue()); @@ -411,7 +419,7 @@ bool Recording::hasValue(const StatType<SampleAccumulator>& stat) F64 Recording::getMin( const StatType<SampleAccumulator>& stat ) { - update(); + update(); const SampleAccumulator& accumulator = mBuffers->mSamples[stat.getIndex()]; const SampleAccumulator* active_accumulator = mActiveBuffers ? &mActiveBuffers->mSamples[stat.getIndex()] : NULL; return llmin(accumulator.getMin(), active_accumulator && active_accumulator->hasValue() ? active_accumulator->getMin() : F32_MAX); @@ -419,7 +427,7 @@ F64 Recording::getMin( const StatType<SampleAccumulator>& stat ) F64 Recording::getMax( const StatType<SampleAccumulator>& stat ) { - update(); + update(); const SampleAccumulator& accumulator = mBuffers->mSamples[stat.getIndex()]; const SampleAccumulator* active_accumulator = mActiveBuffers ? &mActiveBuffers->mSamples[stat.getIndex()] : NULL; return llmax(accumulator.getMax(), active_accumulator && active_accumulator->hasValue() ? active_accumulator->getMax() : F32_MIN); @@ -427,7 +435,7 @@ F64 Recording::getMax( const StatType<SampleAccumulator>& stat ) F64 Recording::getMean( const StatType<SampleAccumulator>& stat ) { - update(); + update(); const SampleAccumulator& accumulator = mBuffers->mSamples[stat.getIndex()]; const SampleAccumulator* active_accumulator = mActiveBuffers ? &mActiveBuffers->mSamples[stat.getIndex()] : NULL; if (active_accumulator && active_accumulator->hasValue()) @@ -448,7 +456,7 @@ F64 Recording::getMean( const StatType<SampleAccumulator>& stat ) F64 Recording::getStandardDeviation( const StatType<SampleAccumulator>& stat ) { - update(); + update(); const SampleAccumulator& accumulator = mBuffers->mSamples[stat.getIndex()]; const SampleAccumulator* active_accumulator = mActiveBuffers ? &mActiveBuffers->mSamples[stat.getIndex()] : NULL; @@ -465,7 +473,7 @@ F64 Recording::getStandardDeviation( const StatType<SampleAccumulator>& stat ) F64 Recording::getLastValue( const StatType<SampleAccumulator>& stat ) { - update(); + update(); const SampleAccumulator& accumulator = mBuffers->mSamples[stat.getIndex()]; const SampleAccumulator* active_accumulator = mActiveBuffers ? &mActiveBuffers->mSamples[stat.getIndex()] : NULL; return (active_accumulator && active_accumulator->hasValue() ? active_accumulator->getLastValue() : accumulator.getLastValue()); @@ -473,7 +481,7 @@ F64 Recording::getLastValue( const StatType<SampleAccumulator>& stat ) S32 Recording::getSampleCount( const StatType<SampleAccumulator>& stat ) { - update(); + update(); const SampleAccumulator& accumulator = mBuffers->mSamples[stat.getIndex()]; const SampleAccumulator* active_accumulator = mActiveBuffers ? &mActiveBuffers->mSamples[stat.getIndex()] : NULL; return accumulator.getSampleCount() + (active_accumulator && active_accumulator->hasValue() ? active_accumulator->getSampleCount() : 0); @@ -481,7 +489,7 @@ S32 Recording::getSampleCount( const StatType<SampleAccumulator>& stat ) bool Recording::hasValue(const StatType<EventAccumulator>& stat) { - update(); + update(); const EventAccumulator& accumulator = mBuffers->mEvents[stat.getIndex()]; const EventAccumulator* active_accumulator = mActiveBuffers ? &mActiveBuffers->mEvents[stat.getIndex()] : NULL; return accumulator.hasValue() || (active_accumulator && active_accumulator->hasValue()); @@ -489,7 +497,7 @@ bool Recording::hasValue(const StatType<EventAccumulator>& stat) F64 Recording::getSum( const StatType<EventAccumulator>& stat) { - update(); + update(); const EventAccumulator& accumulator = mBuffers->mEvents[stat.getIndex()]; const EventAccumulator* active_accumulator = mActiveBuffers ? &mActiveBuffers->mEvents[stat.getIndex()] : NULL; return (F64)(accumulator.getSum() + (active_accumulator && active_accumulator->hasValue() ? active_accumulator->getSum() : 0)); @@ -497,7 +505,7 @@ F64 Recording::getSum( const StatType<EventAccumulator>& stat) F64 Recording::getMin( const StatType<EventAccumulator>& stat ) { - update(); + update(); const EventAccumulator& accumulator = mBuffers->mEvents[stat.getIndex()]; const EventAccumulator* active_accumulator = mActiveBuffers ? &mActiveBuffers->mEvents[stat.getIndex()] : NULL; return llmin(accumulator.getMin(), active_accumulator && active_accumulator->hasValue() ? active_accumulator->getMin() : F32_MAX); @@ -505,7 +513,7 @@ F64 Recording::getMin( const StatType<EventAccumulator>& stat ) F64 Recording::getMax( const StatType<EventAccumulator>& stat ) { - update(); + update(); const EventAccumulator& accumulator = mBuffers->mEvents[stat.getIndex()]; const EventAccumulator* active_accumulator = mActiveBuffers ? &mActiveBuffers->mEvents[stat.getIndex()] : NULL; return llmax(accumulator.getMax(), active_accumulator && active_accumulator->hasValue() ? active_accumulator->getMax() : F32_MIN); @@ -513,7 +521,7 @@ F64 Recording::getMax( const StatType<EventAccumulator>& stat ) F64 Recording::getMean( const StatType<EventAccumulator>& stat ) { - update(); + update(); const EventAccumulator& accumulator = mBuffers->mEvents[stat.getIndex()]; const EventAccumulator* active_accumulator = mActiveBuffers ? &mActiveBuffers->mEvents[stat.getIndex()] : NULL; if (active_accumulator && active_accumulator->hasValue()) @@ -534,7 +542,7 @@ F64 Recording::getMean( const StatType<EventAccumulator>& stat ) F64 Recording::getStandardDeviation( const StatType<EventAccumulator>& stat ) { - update(); + update(); const EventAccumulator& accumulator = mBuffers->mEvents[stat.getIndex()]; const EventAccumulator* active_accumulator = mActiveBuffers ? &mActiveBuffers->mEvents[stat.getIndex()] : NULL; @@ -551,7 +559,7 @@ F64 Recording::getStandardDeviation( const StatType<EventAccumulator>& stat ) F64 Recording::getLastValue( const StatType<EventAccumulator>& stat ) { - update(); + update(); const EventAccumulator& accumulator = mBuffers->mEvents[stat.getIndex()]; const EventAccumulator* active_accumulator = mActiveBuffers ? &mActiveBuffers->mEvents[stat.getIndex()] : NULL; return active_accumulator ? active_accumulator->getLastValue() : accumulator.getLastValue(); @@ -559,7 +567,7 @@ F64 Recording::getLastValue( const StatType<EventAccumulator>& stat ) S32 Recording::getSampleCount( const StatType<EventAccumulator>& stat ) { - update(); + update(); const EventAccumulator& accumulator = mBuffers->mEvents[stat.getIndex()]; const EventAccumulator* active_accumulator = mActiveBuffers ? &mActiveBuffers->mEvents[stat.getIndex()] : NULL; return accumulator.getSampleCount() + (active_accumulator ? active_accumulator->getSampleCount() : 0); @@ -575,17 +583,20 @@ PeriodicRecording::PeriodicRecording( S32 num_periods, EPlayState state) mNumRecordedPeriods(0), mRecordingPeriods(num_periods ? num_periods : 1) { + LL_PROFILE_ZONE_SCOPED; setPlayState(state); claim_alloc(gTraceMemStat, this); } PeriodicRecording::~PeriodicRecording() { + LL_PROFILE_ZONE_SCOPED; disclaim_alloc(gTraceMemStat, this); } void PeriodicRecording::nextPeriod() { + LL_PROFILE_ZONE_SCOPED; if (mAutoResize) { mRecordingPeriods.push_back(Recording()); @@ -600,6 +611,7 @@ void PeriodicRecording::nextPeriod() void PeriodicRecording::appendRecording(Recording& recording) { + LL_PROFILE_ZONE_SCOPED; getCurRecording().appendRecording(recording); nextPeriod(); } @@ -607,6 +619,7 @@ void PeriodicRecording::appendRecording(Recording& recording) void PeriodicRecording::appendPeriodicRecording( PeriodicRecording& other ) { + LL_PROFILE_ZONE_SCOPED; if (other.mRecordingPeriods.empty()) return; getCurRecording().update(); @@ -680,6 +693,7 @@ void PeriodicRecording::appendPeriodicRecording( PeriodicRecording& other ) F64Seconds PeriodicRecording::getDuration() const { + LL_PROFILE_ZONE_SCOPED; F64Seconds duration; S32 num_periods = mRecordingPeriods.size(); for (S32 i = 1; i <= num_periods; i++) @@ -693,6 +707,7 @@ F64Seconds PeriodicRecording::getDuration() const LLTrace::Recording PeriodicRecording::snapshotCurRecording() const { + LL_PROFILE_ZONE_SCOPED; Recording recording_copy(getCurRecording()); recording_copy.stop(); return recording_copy; @@ -735,16 +750,19 @@ const Recording& PeriodicRecording::getPrevRecording( S32 offset ) const void PeriodicRecording::handleStart() { + LL_PROFILE_ZONE_SCOPED; getCurRecording().start(); } void PeriodicRecording::handleStop() { + LL_PROFILE_ZONE_SCOPED; getCurRecording().pause(); } void PeriodicRecording::handleReset() { + LL_PROFILE_ZONE_SCOPED; getCurRecording().stop(); if (mAutoResize) @@ -768,11 +786,13 @@ void PeriodicRecording::handleReset() void PeriodicRecording::handleSplitTo(PeriodicRecording& other) { + LL_PROFILE_ZONE_SCOPED; getCurRecording().splitTo(other.getCurRecording()); } F64 PeriodicRecording::getPeriodMin( const StatType<EventAccumulator>& stat, S32 num_periods /*= S32_MAX*/ ) { + LL_PROFILE_ZONE_SCOPED; num_periods = llmin(num_periods, getNumRecordedPeriods()); bool has_value = false; @@ -794,6 +814,7 @@ F64 PeriodicRecording::getPeriodMin( const StatType<EventAccumulator>& stat, S32 F64 PeriodicRecording::getPeriodMax( const StatType<EventAccumulator>& stat, S32 num_periods /*= S32_MAX*/ ) { + LL_PROFILE_ZONE_SCOPED; num_periods = llmin(num_periods, getNumRecordedPeriods()); bool has_value = false; @@ -816,6 +837,7 @@ F64 PeriodicRecording::getPeriodMax( const StatType<EventAccumulator>& stat, S32 // calculates means using aggregates per period F64 PeriodicRecording::getPeriodMean( const StatType<EventAccumulator>& stat, S32 num_periods /*= S32_MAX*/ ) { + LL_PROFILE_ZONE_SCOPED; num_periods = llmin(num_periods, getNumRecordedPeriods()); F64 mean = 0; @@ -836,9 +858,9 @@ F64 PeriodicRecording::getPeriodMean( const StatType<EventAccumulator>& stat, S3 : NaN; } - F64 PeriodicRecording::getPeriodStandardDeviation( const StatType<EventAccumulator>& stat, S32 num_periods /*= S32_MAX*/ ) { + LL_PROFILE_ZONE_SCOPED; num_periods = llmin(num_periods, getNumRecordedPeriods()); F64 period_mean = getPeriodMean(stat, num_periods); @@ -863,6 +885,7 @@ F64 PeriodicRecording::getPeriodStandardDeviation( const StatType<EventAccumulat F64 PeriodicRecording::getPeriodMin( const StatType<SampleAccumulator>& stat, S32 num_periods /*= S32_MAX*/ ) { + LL_PROFILE_ZONE_SCOPED; num_periods = llmin(num_periods, getNumRecordedPeriods()); bool has_value = false; @@ -884,6 +907,7 @@ F64 PeriodicRecording::getPeriodMin( const StatType<SampleAccumulator>& stat, S3 F64 PeriodicRecording::getPeriodMax(const StatType<SampleAccumulator>& stat, S32 num_periods /*= S32_MAX*/) { + LL_PROFILE_ZONE_SCOPED; num_periods = llmin(num_periods, getNumRecordedPeriods()); bool has_value = false; @@ -906,6 +930,7 @@ F64 PeriodicRecording::getPeriodMax(const StatType<SampleAccumulator>& stat, S32 F64 PeriodicRecording::getPeriodMean( const StatType<SampleAccumulator>& stat, S32 num_periods /*= S32_MAX*/ ) { + LL_PROFILE_ZONE_SCOPED; num_periods = llmin(num_periods, getNumRecordedPeriods()); S32 valid_period_count = 0; @@ -926,8 +951,35 @@ F64 PeriodicRecording::getPeriodMean( const StatType<SampleAccumulator>& stat, S : NaN; } +F64 PeriodicRecording::getPeriodMedian( const StatType<SampleAccumulator>& stat, S32 num_periods /*= S32_MAX*/ ) +{ + LL_PROFILE_ZONE_SCOPED; + num_periods = llmin(num_periods, getNumRecordedPeriods()); + + std::vector<F64> buf; + for (S32 i = 1; i <= num_periods; i++) + { + Recording& recording = getPrevRecording(i); + if (recording.getDuration() > (F32Seconds)0.f) + { + if (recording.hasValue(stat)) + { + buf.push_back(recording.getMean(stat)); + } + } + } + if (buf.size()==0) + { + return 0.0f; + } + std::sort(buf.begin(), buf.end()); + + return F64((buf.size() % 2 == 0) ? (buf[buf.size() / 2 - 1] + buf[buf.size() / 2]) / 2 : buf[buf.size() / 2]); +} + F64 PeriodicRecording::getPeriodStandardDeviation( const StatType<SampleAccumulator>& stat, S32 num_periods /*= S32_MAX*/ ) { + LL_PROFILE_ZONE_SCOPED; num_periods = llmin(num_periods, getNumRecordedPeriods()); F64 period_mean = getPeriodMean(stat, num_periods); @@ -953,6 +1005,7 @@ F64 PeriodicRecording::getPeriodStandardDeviation( const StatType<SampleAccumula F64Kilobytes PeriodicRecording::getPeriodMin( const StatType<MemAccumulator>& stat, S32 num_periods /*= S32_MAX*/ ) { + LL_PROFILE_ZONE_SCOPED; num_periods = llmin(num_periods, getNumRecordedPeriods()); F64Kilobytes min_val(std::numeric_limits<F64>::max()); @@ -972,6 +1025,7 @@ F64Kilobytes PeriodicRecording::getPeriodMin(const MemStatHandle& stat, S32 num_ F64Kilobytes PeriodicRecording::getPeriodMax(const StatType<MemAccumulator>& stat, S32 num_periods /*= S32_MAX*/) { + LL_PROFILE_ZONE_SCOPED; num_periods = llmin(num_periods, getNumRecordedPeriods()); F64Kilobytes max_val(0.0); @@ -991,6 +1045,7 @@ F64Kilobytes PeriodicRecording::getPeriodMax(const MemStatHandle& stat, S32 num_ F64Kilobytes PeriodicRecording::getPeriodMean( const StatType<MemAccumulator>& stat, S32 num_periods /*= S32_MAX*/ ) { + LL_PROFILE_ZONE_SCOPED; num_periods = llmin(num_periods, getNumRecordedPeriods()); F64Kilobytes mean(0); @@ -1011,6 +1066,7 @@ F64Kilobytes PeriodicRecording::getPeriodMean(const MemStatHandle& stat, S32 num F64Kilobytes PeriodicRecording::getPeriodStandardDeviation( const StatType<MemAccumulator>& stat, S32 num_periods /*= S32_MAX*/ ) { + LL_PROFILE_ZONE_SCOPED; num_periods = llmin(num_periods, getNumRecordedPeriods()); F64Kilobytes period_mean = getPeriodMean(stat, num_periods); @@ -1044,6 +1100,7 @@ F64Kilobytes PeriodicRecording::getPeriodStandardDeviation(const MemStatHandle& void ExtendableRecording::extend() { + LL_PROFILE_ZONE_SCOPED; // push the data back to accepted recording mAcceptedRecording.appendRecording(mPotentialRecording); // flush data, so we can start from scratch @@ -1052,22 +1109,26 @@ void ExtendableRecording::extend() void ExtendableRecording::handleStart() { + LL_PROFILE_ZONE_SCOPED; mPotentialRecording.start(); } void ExtendableRecording::handleStop() { + LL_PROFILE_ZONE_SCOPED; mPotentialRecording.pause(); } void ExtendableRecording::handleReset() { + LL_PROFILE_ZONE_SCOPED; mAcceptedRecording.reset(); mPotentialRecording.reset(); } void ExtendableRecording::handleSplitTo(ExtendableRecording& other) { + LL_PROFILE_ZONE_SCOPED; mPotentialRecording.splitTo(other.mPotentialRecording); } @@ -1084,6 +1145,7 @@ ExtendablePeriodicRecording::ExtendablePeriodicRecording() void ExtendablePeriodicRecording::extend() { + LL_PROFILE_ZONE_SCOPED; // push the data back to accepted recording mAcceptedRecording.appendPeriodicRecording(mPotentialRecording); // flush data, so we can start from scratch @@ -1093,22 +1155,26 @@ void ExtendablePeriodicRecording::extend() void ExtendablePeriodicRecording::handleStart() { + LL_PROFILE_ZONE_SCOPED; mPotentialRecording.start(); } void ExtendablePeriodicRecording::handleStop() { + LL_PROFILE_ZONE_SCOPED; mPotentialRecording.pause(); } void ExtendablePeriodicRecording::handleReset() { + LL_PROFILE_ZONE_SCOPED; mAcceptedRecording.reset(); mPotentialRecording.reset(); } void ExtendablePeriodicRecording::handleSplitTo(ExtendablePeriodicRecording& other) { + LL_PROFILE_ZONE_SCOPED; mPotentialRecording.splitTo(other.mPotentialRecording); } @@ -1123,6 +1189,7 @@ PeriodicRecording& get_frame_recording() void LLStopWatchControlsMixinCommon::start() { + LL_PROFILE_ZONE_SCOPED; switch (mPlayState) { case STOPPED: @@ -1144,6 +1211,7 @@ void LLStopWatchControlsMixinCommon::start() void LLStopWatchControlsMixinCommon::stop() { + LL_PROFILE_ZONE_SCOPED; switch (mPlayState) { case STOPPED: @@ -1163,6 +1231,7 @@ void LLStopWatchControlsMixinCommon::stop() void LLStopWatchControlsMixinCommon::pause() { + LL_PROFILE_ZONE_SCOPED; switch (mPlayState) { case STOPPED: @@ -1182,6 +1251,7 @@ void LLStopWatchControlsMixinCommon::pause() void LLStopWatchControlsMixinCommon::unpause() { + LL_PROFILE_ZONE_SCOPED; switch (mPlayState) { case STOPPED: @@ -1201,6 +1271,7 @@ void LLStopWatchControlsMixinCommon::unpause() void LLStopWatchControlsMixinCommon::resume() { + LL_PROFILE_ZONE_SCOPED; switch (mPlayState) { case STOPPED: @@ -1221,6 +1292,7 @@ void LLStopWatchControlsMixinCommon::resume() void LLStopWatchControlsMixinCommon::restart() { + LL_PROFILE_ZONE_SCOPED; switch (mPlayState) { case STOPPED: @@ -1244,11 +1316,13 @@ void LLStopWatchControlsMixinCommon::restart() void LLStopWatchControlsMixinCommon::reset() { + LL_PROFILE_ZONE_SCOPED; handleReset(); } void LLStopWatchControlsMixinCommon::setPlayState( EPlayState state ) { + LL_PROFILE_ZONE_SCOPED; switch(state) { case STOPPED: diff --git a/indra/llcommon/lltracerecording.h b/indra/llcommon/lltracerecording.h index d0b4a842a6..1f3d37336a 100644 --- a/indra/llcommon/lltracerecording.h +++ b/indra/llcommon/lltracerecording.h @@ -355,6 +355,7 @@ namespace LLTrace template <typename T> S32 getSampleCount(const StatType<T>& stat, S32 num_periods = S32_MAX) { + LL_PROFILE_ZONE_SCOPED; num_periods = llmin(num_periods, getNumRecordedPeriods()); S32 num_samples = 0; @@ -374,6 +375,7 @@ namespace LLTrace template <typename T> typename T::value_t getPeriodMin(const StatType<T>& stat, S32 num_periods = S32_MAX) { + LL_PROFILE_ZONE_SCOPED; num_periods = llmin(num_periods, getNumRecordedPeriods()); bool has_value = false; @@ -396,6 +398,7 @@ namespace LLTrace template<typename T> T getPeriodMin(const CountStatHandle<T>& stat, S32 num_periods = S32_MAX) { + LL_PROFILE_ZONE_SCOPED; return T(getPeriodMin(static_cast<const StatType<CountAccumulator>&>(stat), num_periods)); } @@ -403,6 +406,7 @@ namespace LLTrace template<typename T> T getPeriodMin(const SampleStatHandle<T>& stat, S32 num_periods = S32_MAX) { + LL_PROFILE_ZONE_SCOPED; return T(getPeriodMin(static_cast<const StatType<SampleAccumulator>&>(stat), num_periods)); } @@ -410,6 +414,7 @@ namespace LLTrace template<typename T> T getPeriodMin(const EventStatHandle<T>& stat, S32 num_periods = S32_MAX) { + LL_PROFILE_ZONE_SCOPED; return T(getPeriodMin(static_cast<const StatType<EventAccumulator>&>(stat), num_periods)); } @@ -419,6 +424,7 @@ namespace LLTrace template <typename T> typename RelatedTypes<typename T::value_t>::fractional_t getPeriodMinPerSec(const StatType<T>& stat, S32 num_periods = S32_MAX) { + LL_PROFILE_ZONE_SCOPED; num_periods = llmin(num_periods, getNumRecordedPeriods()); typename RelatedTypes<typename T::value_t>::fractional_t min_val(std::numeric_limits<F64>::max()); @@ -433,6 +439,7 @@ namespace LLTrace template<typename T> typename RelatedTypes<T>::fractional_t getPeriodMinPerSec(const CountStatHandle<T>& stat, S32 num_periods = S32_MAX) { + LL_PROFILE_ZONE_SCOPED; return typename RelatedTypes<T>::fractional_t(getPeriodMinPerSec(static_cast<const StatType<CountAccumulator>&>(stat), num_periods)); } @@ -444,6 +451,7 @@ namespace LLTrace template <typename T> typename T::value_t getPeriodMax(const StatType<T>& stat, S32 num_periods = S32_MAX) { + LL_PROFILE_ZONE_SCOPED; num_periods = llmin(num_periods, getNumRecordedPeriods()); bool has_value = false; @@ -466,6 +474,7 @@ namespace LLTrace template<typename T> T getPeriodMax(const CountStatHandle<T>& stat, S32 num_periods = S32_MAX) { + LL_PROFILE_ZONE_SCOPED; return T(getPeriodMax(static_cast<const StatType<CountAccumulator>&>(stat), num_periods)); } @@ -473,6 +482,7 @@ namespace LLTrace template<typename T> T getPeriodMax(const SampleStatHandle<T>& stat, S32 num_periods = S32_MAX) { + LL_PROFILE_ZONE_SCOPED; return T(getPeriodMax(static_cast<const StatType<SampleAccumulator>&>(stat), num_periods)); } @@ -480,6 +490,7 @@ namespace LLTrace template<typename T> T getPeriodMax(const EventStatHandle<T>& stat, S32 num_periods = S32_MAX) { + LL_PROFILE_ZONE_SCOPED; return T(getPeriodMax(static_cast<const StatType<EventAccumulator>&>(stat), num_periods)); } @@ -489,6 +500,7 @@ namespace LLTrace template <typename T> typename RelatedTypes<typename T::value_t>::fractional_t getPeriodMaxPerSec(const StatType<T>& stat, S32 num_periods = S32_MAX) { + LL_PROFILE_ZONE_SCOPED; num_periods = llmin(num_periods, getNumRecordedPeriods()); F64 max_val = std::numeric_limits<F64>::min(); @@ -503,6 +515,7 @@ namespace LLTrace template<typename T> typename RelatedTypes<T>::fractional_t getPeriodMaxPerSec(const CountStatHandle<T>& stat, S32 num_periods = S32_MAX) { + LL_PROFILE_ZONE_SCOPED; return typename RelatedTypes<T>::fractional_t(getPeriodMaxPerSec(static_cast<const StatType<CountAccumulator>&>(stat), num_periods)); } @@ -514,6 +527,7 @@ namespace LLTrace template <typename T> typename RelatedTypes<typename T::value_t>::fractional_t getPeriodMean(const StatType<T >& stat, S32 num_periods = S32_MAX) { + LL_PROFILE_ZONE_SCOPED; num_periods = llmin(num_periods, getNumRecordedPeriods()); typename RelatedTypes<typename T::value_t>::fractional_t mean(0); @@ -534,12 +548,14 @@ namespace LLTrace template<typename T> typename RelatedTypes<T>::fractional_t getPeriodMean(const CountStatHandle<T>& stat, S32 num_periods = S32_MAX) { + LL_PROFILE_ZONE_SCOPED; return typename RelatedTypes<T>::fractional_t(getPeriodMean(static_cast<const StatType<CountAccumulator>&>(stat), num_periods)); } F64 getPeriodMean(const StatType<SampleAccumulator>& stat, S32 num_periods = S32_MAX); template<typename T> typename RelatedTypes<T>::fractional_t getPeriodMean(const SampleStatHandle<T>& stat, S32 num_periods = S32_MAX) { + LL_PROFILE_ZONE_SCOPED; return typename RelatedTypes<T>::fractional_t(getPeriodMean(static_cast<const StatType<SampleAccumulator>&>(stat), num_periods)); } @@ -547,6 +563,7 @@ namespace LLTrace template<typename T> typename RelatedTypes<T>::fractional_t getPeriodMean(const EventStatHandle<T>& stat, S32 num_periods = S32_MAX) { + LL_PROFILE_ZONE_SCOPED; return typename RelatedTypes<T>::fractional_t(getPeriodMean(static_cast<const StatType<EventAccumulator>&>(stat), num_periods)); } @@ -556,6 +573,7 @@ namespace LLTrace template <typename T> typename RelatedTypes<typename T::value_t>::fractional_t getPeriodMeanPerSec(const StatType<T>& stat, S32 num_periods = S32_MAX) { + LL_PROFILE_ZONE_SCOPED; num_periods = llmin(num_periods, getNumRecordedPeriods()); typename RelatedTypes<typename T::value_t>::fractional_t mean = 0; @@ -577,9 +595,39 @@ namespace LLTrace template<typename T> typename RelatedTypes<T>::fractional_t getPeriodMeanPerSec(const CountStatHandle<T>& stat, S32 num_periods = S32_MAX) { + LL_PROFILE_ZONE_SCOPED; return typename RelatedTypes<T>::fractional_t(getPeriodMeanPerSec(static_cast<const StatType<CountAccumulator>&>(stat), num_periods)); } + F64 getPeriodMedian( const StatType<SampleAccumulator>& stat, S32 num_periods = S32_MAX); + + template <typename T> + typename RelatedTypes<typename T::value_t>::fractional_t getPeriodMedianPerSec(const StatType<T>& stat, S32 num_periods = S32_MAX) + { + LL_PROFILE_ZONE_SCOPED; + num_periods = llmin(num_periods, getNumRecordedPeriods()); + + std::vector <typename RelatedTypes<typename T::value_t>::fractional_t> buf; + for (S32 i = 1; i <= num_periods; i++) + { + Recording& recording = getPrevRecording(i); + if (recording.getDuration() > (F32Seconds)0.f) + { + buf.push_back(recording.getPerSec(stat)); + } + } + std::sort(buf.begin(), buf.end()); + + return typename RelatedTypes<T>::fractional_t((buf.size() % 2 == 0) ? (buf[buf.size() / 2 - 1] + buf[buf.size() / 2]) / 2 : buf[buf.size() / 2]); + } + + template<typename T> + typename RelatedTypes<T>::fractional_t getPeriodMedianPerSec(const CountStatHandle<T>& stat, S32 num_periods = S32_MAX) + { + LL_PROFILE_ZONE_SCOPED; + return typename RelatedTypes<T>::fractional_t(getPeriodMedianPerSec(static_cast<const StatType<CountAccumulator>&>(stat), num_periods)); + } + // // PERIODIC STANDARD DEVIATION // @@ -589,6 +637,7 @@ namespace LLTrace template<typename T> typename RelatedTypes<T>::fractional_t getPeriodStandardDeviation(const SampleStatHandle<T>& stat, S32 num_periods = S32_MAX) { + LL_PROFILE_ZONE_SCOPED; return typename RelatedTypes<T>::fractional_t(getPeriodStandardDeviation(static_cast<const StatType<SampleAccumulator>&>(stat), num_periods)); } @@ -596,6 +645,7 @@ namespace LLTrace template<typename T> typename RelatedTypes<T>::fractional_t getPeriodStandardDeviation(const EventStatHandle<T>& stat, S32 num_periods = S32_MAX) { + LL_PROFILE_ZONE_SCOPED; return typename RelatedTypes<T>::fractional_t(getPeriodStandardDeviation(static_cast<const StatType<EventAccumulator>&>(stat), num_periods)); } diff --git a/indra/llcommon/lltracethreadrecorder.cpp b/indra/llcommon/lltracethreadrecorder.cpp index 025dc57044..7ae1e72784 100644 --- a/indra/llcommon/lltracethreadrecorder.cpp +++ b/indra/llcommon/lltracethreadrecorder.cpp @@ -274,12 +274,10 @@ void ThreadRecorder::pushToParent() } -static LLTrace::BlockTimerStatHandle FTM_PULL_TRACE_DATA_FROM_CHILDREN("Pull child thread trace data"); - void ThreadRecorder::pullFromChildren() { #if LL_TRACE_ENABLED - LL_RECORD_BLOCK_TIME(FTM_PULL_TRACE_DATA_FROM_CHILDREN); + LL_PROFILE_ZONE_SCOPED; if (mActiveRecordings.empty()) return; { LLMutexLock lock(&mChildListMutex); diff --git a/indra/llcommon/lluuid.h b/indra/llcommon/lluuid.h index fe7482ba29..86a396ab06 100644 --- a/indra/llcommon/lluuid.h +++ b/indra/llcommon/lluuid.h @@ -184,6 +184,17 @@ struct boost::hash<LLUUID> } }; +// Adapt boost hash to std hash +namespace std +{ + template<> struct hash<LLUUID> + { + std::size_t operator()(LLUUID const& s) const noexcept + { + return boost::hash<LLUUID>()(s); + } + }; +} #endif diff --git a/indra/llcommon/stdtypes.h b/indra/llcommon/stdtypes.h index 887f6ab733..b07805b628 100644 --- a/indra/llcommon/stdtypes.h +++ b/indra/llcommon/stdtypes.h @@ -42,10 +42,17 @@ typedef unsigned int U32; // Windows wchar_t is 16-bit, whichever way /Zc:wchar_t is set. In effect, // Windows wchar_t is always a typedef, either for unsigned short or __wchar_t. // (__wchar_t, available either way, is Microsoft's native 2-byte wchar_t type.) +// The version of clang available with VS 2019 also defines wchar_t as __wchar_t +// which is also 16 bits. // In any case, llwchar should be a UTF-32 type. typedef U32 llwchar; #else typedef wchar_t llwchar; +// What we'd actually want is a simple module-scope 'if constexpr' to test +// std::is_same<wchar_t, llwchar>::value and use that to define, or not +// define, string conversion specializations. Since we don't have that, we'll +// have to rely on #if instead. Sorry, Dr. Stroustrup. +#define LLWCHAR_IS_WCHAR_T 1 #endif #if LL_WINDOWS diff --git a/indra/llcommon/stringize.h b/indra/llcommon/stringize.h index 38dd198ad3..12df693910 100644 --- a/indra/llcommon/stringize.h +++ b/indra/llcommon/stringize.h @@ -31,58 +31,109 @@ #include <sstream> #include <llstring.h> +#include <boost/call_traits.hpp> /** - * gstringize(item) encapsulates an idiom we use constantly, using - * operator<<(std::ostringstream&, TYPE) followed by std::ostringstream::str() - * or their wstring equivalents - * to render a string expressing some item. + * stream_to(std::ostream&, items, ...) streams each item in the parameter list + * to the passed std::ostream using the insertion operator <<. This can be + * used, for instance, to make a simple print() function, e.g.: + * + * @code + * template <typename... Items> + * void print(Items&&... items) + * { + * stream_to(std::cout, std::forward<Items>(items)...); + * } + * @endcode */ -template <typename CHARTYPE, typename T> -std::basic_string<CHARTYPE> gstringize(const T& item) +// recursion tail +template <typename CHARTYPE> +void stream_to(std::basic_ostream<CHARTYPE>& out) {} +// stream one or more items +template <typename CHARTYPE, typename T, typename... Items> +void stream_to(std::basic_ostream<CHARTYPE>& out, T&& item, Items&&... items) { - std::basic_ostringstream<CHARTYPE> out; - out << item; - return out.str(); + out << std::forward<T>(item); + stream_to(out, std::forward<Items>(items)...); } +// why we use function overloads, not function template specializations: +// http://www.gotw.ca/publications/mill17.htm + /** - *partial specialization of stringize for handling wstring - *TODO: we should have similar specializations for wchar_t[] but not until it is needed. + * gstringize(item, ...) encapsulates an idiom we use constantly, using + * operator<<(std::ostringstream&, TYPE) followed by std::ostringstream::str() + * or their wstring equivalents to render a string expressing one or more items. */ -inline std::string stringize(const std::wstring& item) +// two or more args - the case of a single argument is handled separately +template <typename CHARTYPE, typename T0, typename T1, typename... Items> +auto gstringize(T0&& item0, T1&& item1, Items&&... items) { - return wstring_to_utf8str(item); + std::basic_ostringstream<CHARTYPE> out; + stream_to(out, std::forward<T0>(item0), std::forward<T1>(item1), + std::forward<Items>(items)...); + return out.str(); } -/** - * Specialization of gstringize for std::string return types - */ -template <typename T> -std::string stringize(const T& item) +// generic single argument: stream to out, as above +template <typename CHARTYPE, typename T> +struct gstringize_impl { - return gstringize<char>(item); + auto operator()(typename boost::call_traits<T>::param_type arg) + { + std::basic_ostringstream<CHARTYPE> out; + out << arg; + return out.str(); + } +}; + +// partially specialize for a single STRING argument - +// note that ll_convert<T>(T) already handles the trivial case +template <typename OUTCHAR, typename INCHAR> +struct gstringize_impl<OUTCHAR, std::basic_string<INCHAR>> +{ + auto operator()(const std::basic_string<INCHAR>& arg) + { + return ll_convert<std::basic_string<OUTCHAR>>(arg); + } +}; + +// partially specialize for a single CHARTYPE* argument - +// since it's not a basic_string and we do want to optimize this common case +template <typename OUTCHAR, typename INCHAR> +struct gstringize_impl<OUTCHAR, INCHAR*> +{ + auto operator()(const INCHAR* arg) + { + return ll_convert<std::basic_string<OUTCHAR>>(arg); + } +}; + +// gstringize(single argument) +template <typename CHARTYPE, typename T> +auto gstringize(T&& item) +{ + // use decay<T> so we don't require separate specializations + // for T, const T, T&, const T& ... + return gstringize_impl<CHARTYPE, std::decay_t<T>>()(std::forward<T>(item)); } /** - * Specialization for generating wstring from string. - * Both a convenience function and saves a miniscule amount of overhead. + * Specialization of gstringize for std::string return types */ -inline std::wstring wstringize(const std::string& item) +template <typename... Items> +auto stringize(Items&&... items) { - // utf8str_to_wstring() returns LLWString, which isn't necessarily the - // same as std::wstring - LLWString s(utf8str_to_wstring(item)); - return std::wstring(s.begin(), s.end()); + return gstringize<char>(std::forward<Items>(items)...); } /** * Specialization of gstringize for std::wstring return types */ -template <typename T> -std::wstring wstringize(const T& item) +template <typename... Items> +auto wstringize(Items&&... items) { - return gstringize<wchar_t>(item); + return gstringize<wchar_t>(std::forward<Items>(items)...); } /** @@ -146,11 +197,9 @@ void destringize_f(std::basic_string<CHARTYPE> const & str, Functor const & f) * std::istringstream in(str); * in >> item1 >> item2 >> item3 ... ; * @endcode - * @NOTE - once we get generic lambdas, we shouldn't need DEWSTRINGIZE() any - * more since DESTRINGIZE() should do the right thing with a std::wstring. But - * until then, the lambda we pass must accept the right std::basic_istream. */ -#define DESTRINGIZE(STR, EXPRESSION) (destringize_f((STR), [&](std::istream& in){in >> EXPRESSION;})) -#define DEWSTRINGIZE(STR, EXPRESSION) (destringize_f((STR), [&](std::wistream& in){in >> EXPRESSION;})) +#define DESTRINGIZE(STR, EXPRESSION) (destringize_f((STR), [&](auto& in){in >> EXPRESSION;})) +// legacy name, just use DESTRINGIZE() going forward +#define DEWSTRINGIZE(STR, EXPRESSION) DESTRINGIZE(STR, EXPRESSION) #endif /* ! defined(LL_STRINGIZE_H) */ diff --git a/indra/llcommon/tests/llinstancetracker_test.cpp b/indra/llcommon/tests/llinstancetracker_test.cpp index 9b89159625..5daa29adf4 100644 --- a/indra/llcommon/tests/llinstancetracker_test.cpp +++ b/indra/llcommon/tests/llinstancetracker_test.cpp @@ -90,19 +90,19 @@ namespace tut { Keyed one("one"); ensure_equals(Keyed::instanceCount(), 1); - Keyed* found = Keyed::getInstance("one"); - ensure("couldn't find stack Keyed", found); - ensure_equals("found wrong Keyed instance", found, &one); + auto found = Keyed::getInstance("one"); + ensure("couldn't find stack Keyed", bool(found)); + ensure_equals("found wrong Keyed instance", found.get(), &one); { boost::scoped_ptr<Keyed> two(new Keyed("two")); ensure_equals(Keyed::instanceCount(), 2); - Keyed* found = Keyed::getInstance("two"); - ensure("couldn't find heap Keyed", found); - ensure_equals("found wrong Keyed instance", found, two.get()); + auto found = Keyed::getInstance("two"); + ensure("couldn't find heap Keyed", bool(found)); + ensure_equals("found wrong Keyed instance", found.get(), two.get()); } ensure_equals(Keyed::instanceCount(), 1); } - Keyed* found = Keyed::getInstance("one"); + auto found = Keyed::getInstance("one"); ensure("Keyed key lives too long", ! found); ensure_equals(Keyed::instanceCount(), 0); } diff --git a/indra/llcommon/tests/llprocess_test.cpp b/indra/llcommon/tests/llprocess_test.cpp index f0eafa8201..447c7f50f2 100644 --- a/indra/llcommon/tests/llprocess_test.cpp +++ b/indra/llcommon/tests/llprocess_test.cpp @@ -356,14 +356,15 @@ namespace tut // Create a script file in a temporary place. NamedTempFile script("py", + "from __future__ import print_function" EOL "import sys" EOL "import time" EOL EOL "time.sleep(2)" EOL - "print >>sys.stdout, 'stdout after wait'" EOL + "print('stdout after wait',file=sys.stdout)" EOL "sys.stdout.flush()" EOL "time.sleep(2)" EOL - "print >>sys.stderr, 'stderr after wait'" EOL + "print('stderr after wait',file=sys.stderr)" EOL "sys.stderr.flush()" EOL ); @@ -568,12 +569,12 @@ namespace tut { set_test_name("arguments"); PythonProcessLauncher py(get_test_name(), - "from __future__ import with_statement\n" + "from __future__ import with_statement, print_function\n" "import sys\n" // note nonstandard output-file arg! "with open(sys.argv[3], 'w') as f:\n" " for arg in sys.argv[1:]:\n" - " print >>f, arg\n"); + " print(arg,file=f)\n"); // We expect that PythonProcessLauncher has already appended // its own NamedTempFile to mParams.args (sys.argv[0]). py.mParams.args.add("first arg"); // sys.argv[1] @@ -857,7 +858,8 @@ namespace tut set_test_name("'bogus' test"); CaptureLog recorder; PythonProcessLauncher py(get_test_name(), - "print 'Hello world'\n"); + "from __future__ import print_function\n" + "print('Hello world')\n"); py.mParams.files.add(LLProcess::FileParam("bogus")); py.mPy = LLProcess::create(py.mParams); ensure("should have rejected 'bogus'", ! py.mPy); @@ -872,7 +874,8 @@ namespace tut // Replace this test with one or more real 'file' tests when we // implement 'file' support PythonProcessLauncher py(get_test_name(), - "print 'Hello world'\n"); + "from __future__ import print_function\n" + "print('Hello world')\n"); py.mParams.files.add(LLProcess::FileParam()); py.mParams.files.add(LLProcess::FileParam("file")); py.mPy = LLProcess::create(py.mParams); @@ -887,7 +890,8 @@ namespace tut // implement 'tpipe' support CaptureLog recorder; PythonProcessLauncher py(get_test_name(), - "print 'Hello world'\n"); + "from __future__ import print_function\n" + "print('Hello world')\n"); py.mParams.files.add(LLProcess::FileParam()); py.mParams.files.add(LLProcess::FileParam("tpipe")); py.mPy = LLProcess::create(py.mParams); @@ -904,7 +908,8 @@ namespace tut // implement 'npipe' support CaptureLog recorder; PythonProcessLauncher py(get_test_name(), - "print 'Hello world'\n"); + "from __future__ import print_function\n" + "print('Hello world')\n"); py.mParams.files.add(LLProcess::FileParam()); py.mParams.files.add(LLProcess::FileParam()); py.mParams.files.add(LLProcess::FileParam("npipe")); @@ -980,7 +985,8 @@ namespace tut { set_test_name("get*Pipe() validation"); PythonProcessLauncher py(get_test_name(), - "print 'this output is expected'\n"); + "from __future__ import print_function\n" + "print('this output is expected')\n"); py.mParams.files.add(LLProcess::FileParam("pipe")); // pipe for stdin py.mParams.files.add(LLProcess::FileParam()); // inherit stdout py.mParams.files.add(LLProcess::FileParam("pipe")); // pipe for stderr @@ -1000,14 +1006,15 @@ namespace tut { set_test_name("talk to stdin/stdout"); PythonProcessLauncher py(get_test_name(), + "from __future__ import print_function\n" "import sys, time\n" - "print 'ok'\n" + "print('ok')\n" "sys.stdout.flush()\n" "# wait for 'go' from test program\n" "go = sys.stdin.readline()\n" "if go != 'go\\n':\n" " sys.exit('expected \"go\", saw %r' % go)\n" - "print 'ack'\n"); + "print('ack')\n"); py.mParams.files.add(LLProcess::FileParam("pipe")); // stdin py.mParams.files.add(LLProcess::FileParam("pipe")); // stdout py.launch(); @@ -1118,7 +1125,8 @@ namespace tut { set_test_name("ReadPipe \"eof\" event"); PythonProcessLauncher py(get_test_name(), - "print 'Hello from Python!'\n"); + "from __future__ import print_function\n" + "print('Hello from Python!')\n"); py.mParams.files.add(LLProcess::FileParam()); // stdin py.mParams.files.add(LLProcess::FileParam("pipe")); // stdout py.launch(); diff --git a/indra/llcommon/tests/threadsafeschedule_test.cpp b/indra/llcommon/tests/threadsafeschedule_test.cpp new file mode 100644 index 0000000000..c421cc7b1c --- /dev/null +++ b/indra/llcommon/tests/threadsafeschedule_test.cpp @@ -0,0 +1,69 @@ +/** + * @file threadsafeschedule_test.cpp + * @author Nat Goodspeed + * @date 2021-10-04 + * @brief Test for threadsafeschedule. + * + * $LicenseInfo:firstyear=2021&license=viewerlgpl$ + * Copyright (c) 2021, Linden Research, Inc. + * $/LicenseInfo$ + */ + +// Precompiled header +#include "linden_common.h" +// associated header +#include "threadsafeschedule.h" +// STL headers +// std headers +#include <chrono> +// external library headers +// other Linden headers +#include "../test/lltut.h" + +using namespace std::literals::chrono_literals; // ms suffix +using namespace std::literals::string_literals; // s suffix +using Queue = LL::ThreadSafeSchedule<std::string>; + +/***************************************************************************** +* TUT +*****************************************************************************/ +namespace tut +{ + struct threadsafeschedule_data + { + Queue queue; + }; + typedef test_group<threadsafeschedule_data> threadsafeschedule_group; + typedef threadsafeschedule_group::object object; + threadsafeschedule_group threadsafeschedulegrp("threadsafeschedule"); + + template<> template<> + void object::test<1>() + { + set_test_name("push"); + // Simply calling push() a few times might result in indeterminate + // delivery order if the resolution of steady_clock is coarser than + // the real time required for each push() call. Explicitly increment + // the timestamp for each one -- but since we're passing explicit + // timestamps, make the queue reorder them. + queue.push(Queue::TimeTuple(Queue::Clock::now() + 200ms, "ghi")); + // Given the various push() overloads, you have to match the type + // exactly: conversions are ambiguous. + queue.push("abc"s); + queue.push(Queue::Clock::now() + 100ms, "def"); + queue.close(); + auto entry = queue.pop(); + ensure_equals("failed to pop first", std::get<0>(entry), "abc"s); + entry = queue.pop(); + ensure_equals("failed to pop second", std::get<0>(entry), "def"s); + ensure("queue not closed", queue.isClosed()); + ensure("queue prematurely done", ! queue.done()); + std::string s; + bool popped = queue.tryPopFor(1s, s); + ensure("failed to pop third", popped); + ensure_equals("third is wrong", s, "ghi"s); + popped = queue.tryPop(s); + ensure("queue not empty", ! popped); + ensure("queue not done", queue.done()); + } +} // namespace tut diff --git a/indra/llcommon/tests/tuple_test.cpp b/indra/llcommon/tests/tuple_test.cpp new file mode 100644 index 0000000000..af94e2086c --- /dev/null +++ b/indra/llcommon/tests/tuple_test.cpp @@ -0,0 +1,47 @@ +/** + * @file tuple_test.cpp + * @author Nat Goodspeed + * @date 2021-10-04 + * @brief Test for tuple. + * + * $LicenseInfo:firstyear=2021&license=viewerlgpl$ + * Copyright (c) 2021, Linden Research, Inc. + * $/LicenseInfo$ + */ + +// Precompiled header +#include "linden_common.h" +// associated header +#include "tuple.h" +// STL headers +// std headers +// external library headers +// other Linden headers +#include "../test/lltut.h" + +/***************************************************************************** +* TUT +*****************************************************************************/ +namespace tut +{ + struct tuple_data + { + }; + typedef test_group<tuple_data> tuple_group; + typedef tuple_group::object object; + tuple_group tuplegrp("tuple"); + + template<> template<> + void object::test<1>() + { + set_test_name("tuple"); + std::tuple<std::string, int> tup{ "abc", 17 }; + std::tuple<int, std::string, int> ptup{ tuple_cons(34, tup) }; + std::tuple<std::string, int> tup2; + int i; + std::tie(i, tup2) = tuple_split(ptup); + ensure_equals("tuple_car() fail", i, 34); + ensure_equals("tuple_cdr() (0) fail", std::get<0>(tup2), "abc"); + ensure_equals("tuple_cdr() (1) fail", std::get<1>(tup2), 17); + } +} // namespace tut diff --git a/indra/llcommon/tests/workqueue_test.cpp b/indra/llcommon/tests/workqueue_test.cpp new file mode 100644 index 0000000000..1d73f7aa0d --- /dev/null +++ b/indra/llcommon/tests/workqueue_test.cpp @@ -0,0 +1,235 @@ +/** + * @file workqueue_test.cpp + * @author Nat Goodspeed + * @date 2021-10-07 + * @brief Test for workqueue. + * + * $LicenseInfo:firstyear=2021&license=viewerlgpl$ + * Copyright (c) 2021, Linden Research, Inc. + * $/LicenseInfo$ + */ + +// Precompiled header +#include "linden_common.h" +// associated header +#include "workqueue.h" +// STL headers +// std headers +#include <chrono> +#include <deque> +// external library headers +// other Linden headers +#include "../test/lltut.h" +#include "../test/catch_and_store_what_in.h" +#include "llcond.h" +#include "llcoros.h" +#include "lleventcoro.h" +#include "llstring.h" +#include "stringize.h" + +using namespace LL; +using namespace std::literals::chrono_literals; // ms suffix +using namespace std::literals::string_literals; // s suffix + +/***************************************************************************** +* TUT +*****************************************************************************/ +namespace tut +{ + struct workqueue_data + { + WorkQueue queue{"queue"}; + }; + typedef test_group<workqueue_data> workqueue_group; + typedef workqueue_group::object object; + workqueue_group workqueuegrp("workqueue"); + + template<> template<> + void object::test<1>() + { + set_test_name("name"); + ensure_equals("didn't capture name", queue.getKey(), "queue"); + ensure("not findable", WorkQueue::getInstance("queue") == queue.getWeak().lock()); + WorkQueue q2; + ensure("has no name", LLStringUtil::startsWith(q2.getKey(), "WorkQueue")); + } + + template<> template<> + void object::test<2>() + { + set_test_name("post"); + bool wasRun{ false }; + // We only get away with binding a simple bool because we're running + // the work on the same thread. + queue.post([&wasRun](){ wasRun = true; }); + queue.close(); + ensure("ran too soon", ! wasRun); + queue.runUntilClose(); + ensure("didn't run", wasRun); + } + + template<> template<> + void object::test<3>() + { + set_test_name("postEvery"); + // record of runs + using Shared = std::deque<WorkQueue::TimePoint>; + // This is an example of how to share data between the originator of + // postEvery(work) and the work item itself, since usually a WorkQueue + // is used to dispatch work to a different thread. Neither of them + // should call any of LLCond's wait methods: you don't want to stall + // either the worker thread or the originating thread (conventionally + // main). Use LLCond or a subclass even if all you want to do is + // signal the work item that it can quit; consider LLOneShotCond. + LLCond<Shared> data; + auto start = WorkQueue::TimePoint::clock::now(); + auto interval = 100ms; + queue.postEvery( + interval, + [&data, count = 0] + () mutable + { + // record the timestamp at which this instance is running + data.update_one( + [](Shared& data) + { + data.push_back(WorkQueue::TimePoint::clock::now()); + }); + // by the 3rd call, return false to stop + return (++count < 3); + }); + // no convenient way to close() our queue while we've got a + // postEvery() running, so run until we have exhausted the iterations + // or we time out waiting + for (auto finish = start + 10*interval; + WorkQueue::TimePoint::clock::now() < finish && + data.get([](const Shared& data){ return data.size(); }) < 3; ) + { + queue.runPending(); + std::this_thread::sleep_for(interval/10); + } + // Take a copy of the captured deque. + Shared result = data.get(); + ensure_equals("called wrong number of times", result.size(), 3); + // postEvery() assumes you want the first call to happen right away. + // Pretend our start time was (interval) earlier than that, to make + // our too early/too late tests uniform for all entries. + start -= interval; + for (size_t i = 0; i < result.size(); ++i) + { + auto diff = result[i] - start; + start += interval; + try + { + ensure(STRINGIZE("call " << i << " too soon"), diff >= interval); + ensure(STRINGIZE("call " << i << " too late"), diff < interval*1.5); + } + catch (const tut::failure&) + { + auto interval_ms = interval / 1ms; + auto diff_ms = diff / 1ms; + std::cerr << "interval " << interval_ms + << "ms; diff " << diff_ms << "ms" << std::endl; + throw; + } + } + } + + template<> template<> + void object::test<4>() + { + set_test_name("postTo"); + WorkQueue main("main"); + auto qptr = WorkQueue::getInstance("queue"); + int result = 0; + main.postTo( + qptr, + [](){ return 17; }, + // Note that a postTo() *callback* can safely bind a reference to + // a variable on the invoking thread, because the callback is run + // on the invoking thread. (Of course the bound variable must + // survive until the callback is called.) + [&result](int i){ result = i; }); + // this should post the callback to main + qptr->runOne(); + // this should run the callback + main.runOne(); + ensure_equals("failed to run int callback", result, 17); + + std::string alpha; + // postTo() handles arbitrary return types + main.postTo( + qptr, + [](){ return "abc"s; }, + [&alpha](const std::string& s){ alpha = s; }); + qptr->runPending(); + main.runPending(); + ensure_equals("failed to run string callback", alpha, "abc"); + } + + template<> template<> + void object::test<5>() + { + set_test_name("postTo with void return"); + WorkQueue main("main"); + auto qptr = WorkQueue::getInstance("queue"); + std::string observe; + main.postTo( + qptr, + // The ONLY reason we can get away with binding a reference to + // 'observe' in our work callable is because we're directly + // calling qptr->runOne() on this same thread. It would be a + // mistake to do that if some other thread were servicing 'queue'. + [&observe](){ observe = "queue"; }, + [&observe](){ observe.append(";main"); }); + qptr->runOne(); + main.runOne(); + ensure_equals("failed to run both lambdas", observe, "queue;main"); + } + + template<> template<> + void object::test<6>() + { + set_test_name("waitForResult"); + std::string stored; + // Try to call waitForResult() on this thread's main coroutine. It + // should throw because the main coroutine must service the queue. + auto what{ catch_what<WorkQueue::Error>( + [this, &stored](){ stored = queue.waitForResult( + [](){ return "should throw"; }); }) }; + ensure("lambda should not have run", stored.empty()); + ensure_not("waitForResult() should have thrown", what.empty()); + ensure(STRINGIZE("should mention waitForResult: " << what), + what.find("waitForResult") != std::string::npos); + + // Call waitForResult() on a coroutine, with a string result. + LLCoros::instance().launch( + "waitForResult string", + [this, &stored]() + { stored = queue.waitForResult( + [](){ return "string result"; }); }); + llcoro::suspend(); + // Nothing will have happened yet because, even if the coroutine did + // run immediately, all it did was to queue the inner lambda on + // 'queue'. Service it. + queue.runOne(); + llcoro::suspend(); + ensure_equals("bad waitForResult return", stored, "string result"); + + // Call waitForResult() on a coroutine, with a void callable. + stored.clear(); + bool done = false; + LLCoros::instance().launch( + "waitForResult void", + [this, &stored, &done]() + { + queue.waitForResult([&stored](){ stored = "ran"; }); + done = true; + }); + llcoro::suspend(); + queue.runOne(); + llcoro::suspend(); + ensure_equals("didn't run coroutine", stored, "ran"); + ensure("void waitForResult() didn't return", done); + } +} // namespace tut diff --git a/indra/llcommon/threadpool.cpp b/indra/llcommon/threadpool.cpp new file mode 100644 index 0000000000..ba914035e2 --- /dev/null +++ b/indra/llcommon/threadpool.cpp @@ -0,0 +1,88 @@ +/** + * @file threadpool.cpp + * @author Nat Goodspeed + * @date 2021-10-21 + * @brief Implementation for threadpool. + * + * $LicenseInfo:firstyear=2021&license=viewerlgpl$ + * Copyright (c) 2021, Linden Research, Inc. + * $/LicenseInfo$ + */ + +// Precompiled header +#include "linden_common.h" +// associated header +#include "threadpool.h" +// STL headers +// std headers +// external library headers +// other Linden headers +#include "llerror.h" +#include "llevents.h" +#include "stringize.h" + +LL::ThreadPool::ThreadPool(const std::string& name, size_t threads, size_t capacity): + mQueue(name, capacity), + mName("ThreadPool:" + name), + mThreadCount(threads) +{} + +void LL::ThreadPool::start() +{ + for (size_t i = 0; i < mThreadCount; ++i) + { + std::string tname{ stringize(mName, ':', (i+1), '/', mThreadCount) }; + mThreads.emplace_back(tname, [this, tname]() + { + LL_PROFILER_SET_THREAD_NAME(tname.c_str()); + run(tname); + }); + } + // Listen on "LLApp", and when the app is shutting down, close the queue + // and join the workers. + LLEventPumps::instance().obtain("LLApp").listen( + mName, + [this](const LLSD& stat) + { + std::string status(stat["status"]); + if (status != "running") + { + // viewer is starting shutdown -- proclaim the end is nigh! + LL_DEBUGS("ThreadPool") << mName << " saw " << status << LL_ENDL; + close(); + } + return false; + }); +} + +LL::ThreadPool::~ThreadPool() +{ + close(); +} + +void LL::ThreadPool::close() +{ + if (! mQueue.isClosed()) + { + LL_DEBUGS("ThreadPool") << mName << " closing queue and joining threads" << LL_ENDL; + mQueue.close(); + for (auto& pair: mThreads) + { + LL_DEBUGS("ThreadPool") << mName << " waiting on thread " << pair.first << LL_ENDL; + pair.second.join(); + } + LL_DEBUGS("ThreadPool") << mName << " shutdown complete" << LL_ENDL; + } +} + +void LL::ThreadPool::run(const std::string& name) +{ + LL_DEBUGS("ThreadPool") << name << " starting" << LL_ENDL; + run(); + LL_DEBUGS("ThreadPool") << name << " stopping" << LL_ENDL; +} + +void LL::ThreadPool::run() +{ + mQueue.runUntilClose(); +} diff --git a/indra/llcommon/threadpool.h b/indra/llcommon/threadpool.h new file mode 100644 index 0000000000..b79c9b9090 --- /dev/null +++ b/indra/llcommon/threadpool.h @@ -0,0 +1,71 @@ +/** + * @file threadpool.h + * @author Nat Goodspeed + * @date 2021-10-21 + * @brief ThreadPool configures a WorkQueue along with a pool of threads to + * service it. + * + * $LicenseInfo:firstyear=2021&license=viewerlgpl$ + * Copyright (c) 2021, Linden Research, Inc. + * $/LicenseInfo$ + */ + +#if ! defined(LL_THREADPOOL_H) +#define LL_THREADPOOL_H + +#include "workqueue.h" +#include <string> +#include <thread> +#include <utility> // std::pair +#include <vector> + +namespace LL +{ + + class ThreadPool + { + public: + /** + * Pass ThreadPool a string name. This can be used to look up the + * relevant WorkQueue. + */ + ThreadPool(const std::string& name, size_t threads=1, size_t capacity=1024); + virtual ~ThreadPool(); + + /** + * Launch the ThreadPool. Until this call, a constructed ThreadPool + * launches no threads. That permits coders to derive from ThreadPool, + * or store it as a member of some other class, but refrain from + * launching it until all other construction is complete. + */ + void start(); + + /** + * ThreadPool listens for application shutdown messages on the "LLApp" + * LLEventPump. Call close() to shut down this ThreadPool early. + */ + void close(); + + std::string getName() const { return mName; } + size_t getWidth() const { return mThreads.size(); } + /// obtain a non-const reference to the WorkQueue to post work to it + WorkQueue& getQueue() { return mQueue; } + + /** + * Override run() if you need special processing. The default run() + * implementation simply calls WorkQueue::runUntilClose(). + */ + virtual void run(); + + private: + void run(const std::string& name); + + WorkQueue mQueue; + std::string mName; + size_t mThreadCount; + std::vector<std::pair<std::string, std::thread>> mThreads; + }; + +} // namespace LL + +#endif /* ! defined(LL_THREADPOOL_H) */ diff --git a/indra/llcommon/threadsafeschedule.h b/indra/llcommon/threadsafeschedule.h new file mode 100644 index 0000000000..601681d550 --- /dev/null +++ b/indra/llcommon/threadsafeschedule.h @@ -0,0 +1,399 @@ +/** + * @file threadsafeschedule.h + * @author Nat Goodspeed + * @date 2021-10-02 + * @brief ThreadSafeSchedule is an ordered queue in which every item has an + * associated timestamp. + * + * $LicenseInfo:firstyear=2021&license=viewerlgpl$ + * Copyright (c) 2021, Linden Research, Inc. + * $/LicenseInfo$ + */ + +#if ! defined(LL_THREADSAFESCHEDULE_H) +#define LL_THREADSAFESCHEDULE_H + +#include "chrono.h" +#include "llexception.h" +#include "llthreadsafequeue.h" +#include "tuple.h" +#include <chrono> +#include <tuple> + +namespace LL +{ + namespace ThreadSafeSchedulePrivate + { + using TimePoint = std::chrono::steady_clock::time_point; + // Bundle consumer's data with a TimePoint to order items by timestamp. + template <typename... Args> + using TimestampedTuple = std::tuple<TimePoint, Args...>; + + // comparison functor for TimedTuples -- see TimedQueue comments + struct ReverseTupleOrder + { + template <typename Tuple> + bool operator()(const Tuple& left, const Tuple& right) const + { + return std::get<0>(left) > std::get<0>(right); + } + }; + + template <typename... Args> + using TimedQueue = PriorityQueueAdapter< + TimestampedTuple<Args...>, + // std::vector is the default storage for std::priority_queue, + // have to restate to specify comparison template parameter + std::vector<TimestampedTuple<Args...>>, + // std::priority_queue uses a counterintuitive comparison + // behavior: the default std::less comparator is used to present + // the *highest* value as top(). So to sort by earliest timestamp, + // we must invert by using >. + ReverseTupleOrder>; + } // namespace ThreadSafeSchedulePrivate + + /** + * ThreadSafeSchedule is an ordered LLThreadSafeQueue in which every item + * is given an associated timestamp. That is, TimePoint is implicitly + * prepended to the std::tuple with the specified types. + * + * Items are popped in increasing chronological order. Moreover, any item + * with a timestamp in the future is held back until + * std::chrono::steady_clock reaches that timestamp. + */ + template <typename... Args> + class ThreadSafeSchedule: + public LLThreadSafeQueue<ThreadSafeSchedulePrivate::TimestampedTuple<Args...>, + ThreadSafeSchedulePrivate::TimedQueue<Args...>> + { + public: + using DataTuple = std::tuple<Args...>; + using TimeTuple = ThreadSafeSchedulePrivate::TimestampedTuple<Args...>; + + private: + using super = LLThreadSafeQueue<TimeTuple, ThreadSafeSchedulePrivate::TimedQueue<Args...>>; + using lock_t = typename super::lock_t; + // VS 2017 needs this due to a bug: + // https://developercommunity.visualstudio.com/t/cannot-access-protected-enumerator-of-enclosing-cl/203430 + enum pop_result { EMPTY=super::EMPTY, DONE=super::DONE, WAITING=super::WAITING, POPPED=super::POPPED }; + + public: + using Closed = LLThreadSafeQueueInterrupt; + using TimePoint = ThreadSafeSchedulePrivate::TimePoint; + using Clock = TimePoint::clock; + + ThreadSafeSchedule(U32 capacity=1024): + super(capacity) + {} + + /*----------------------------- push() -----------------------------*/ + /// explicitly pass TimeTuple + using super::push; + + /// pass DataTuple with implicit now + // This could be ambiguous for Args with a single type. Unfortunately + // we can't enable_if an individual method with a condition based on + // the *class* template arguments, only on that method's template + // arguments. We could specialize this class for the single-Args case; + // we could minimize redundancy by breaking out a common base class... + void push(const DataTuple& tuple) + { + LL_PROFILE_ZONE_SCOPED; + push(tuple_cons(Clock::now(), tuple)); + } + + /// individually pass each component of the TimeTuple + void push(const TimePoint& time, Args&&... args) + { + LL_PROFILE_ZONE_SCOPED; + push(TimeTuple(time, std::forward<Args>(args)...)); + } + + /// individually pass every component except the TimePoint (implies now) + // This could be ambiguous if the first specified template parameter + // type is also TimePoint. We could try to disambiguate, but a simpler + // approach would be for the caller to explicitly construct DataTuple + // and call that overload. + void push(Args&&... args) + { + LL_PROFILE_ZONE_SCOPED; + push(Clock::now(), std::forward<Args>(args)...); + } + + /*--------------------------- tryPush() ----------------------------*/ + /// explicit TimeTuple + using super::tryPush; + + /// DataTuple with implicit now + bool tryPush(const DataTuple& tuple) + { + LL_PROFILE_ZONE_SCOPED; + return tryPush(tuple_cons(Clock::now(), tuple)); + } + + /// individually pass components + bool tryPush(const TimePoint& time, Args&&... args) + { + LL_PROFILE_ZONE_SCOPED; + return tryPush(TimeTuple(time, std::forward<Args>(args)...)); + } + + /// individually pass components with implicit now + bool tryPush(Args&&... args) + { + LL_PROFILE_ZONE_SCOPED; + return tryPush(Clock::now(), std::forward<Args>(args)...); + } + + /*-------------------------- tryPushFor() --------------------------*/ + /// explicit TimeTuple + using super::tryPushFor; + + /// DataTuple with implicit now + template <typename Rep, typename Period> + bool tryPushFor(const std::chrono::duration<Rep, Period>& timeout, + const DataTuple& tuple) + { + LL_PROFILE_ZONE_SCOPED; + return tryPushFor(timeout, tuple_cons(Clock::now(), tuple)); + } + + /// individually pass components + template <typename Rep, typename Period> + bool tryPushFor(const std::chrono::duration<Rep, Period>& timeout, + const TimePoint& time, Args&&... args) + { + LL_PROFILE_ZONE_SCOPED; + return tryPushFor(TimeTuple(time, std::forward<Args>(args)...)); + } + + /// individually pass components with implicit now + template <typename Rep, typename Period> + bool tryPushFor(const std::chrono::duration<Rep, Period>& timeout, + Args&&... args) + { + LL_PROFILE_ZONE_SCOPED; + return tryPushFor(Clock::now(), std::forward<Args>(args)...); + } + + /*------------------------- tryPushUntil() -------------------------*/ + /// explicit TimeTuple + using super::tryPushUntil; + + /// DataTuple with implicit now + template <typename Clock, typename Duration> + bool tryPushUntil(const std::chrono::time_point<Clock, Duration>& until, + const DataTuple& tuple) + { + LL_PROFILE_ZONE_SCOPED; + return tryPushUntil(until, tuple_cons(Clock::now(), tuple)); + } + + /// individually pass components + template <typename Clock, typename Duration> + bool tryPushUntil(const std::chrono::time_point<Clock, Duration>& until, + const TimePoint& time, Args&&... args) + { + LL_PROFILE_ZONE_SCOPED; + return tryPushUntil(until, TimeTuple(time, std::forward<Args>(args)...)); + } + + /// individually pass components with implicit now + template <typename Clock, typename Duration> + bool tryPushUntil(const std::chrono::time_point<Clock, Duration>& until, + Args&&... args) + { + LL_PROFILE_ZONE_SCOPED; + return tryPushUntil(until, Clock::now(), std::forward<Args>(args)...); + } + + /*----------------------------- pop() ------------------------------*/ + // Our consumer may or may not care about the timestamp associated + // with each popped item, so we allow retrieving either DataTuple or + // TimeTuple. One potential use would be to observe, and possibly + // adjust for, the time lag between the item time and the actual + // current time. + + /// pop DataTuple by value + // It would be great to notice when sizeof...(Args) == 1 and directly + // return the first (only) value, instead of making pop()'s caller + // call std::get<0>(value). See push(DataTuple) remarks for why we + // haven't yet jumped through those hoops. + DataTuple pop() + { + LL_PROFILE_ZONE_SCOPED; + return tuple_cdr(popWithTime()); + } + + /// pop TimeTuple by value + TimeTuple popWithTime() + { + LL_PROFILE_ZONE_SCOPED; + lock_t lock(super::mLock); + // We can't just sit around waiting forever, given that there may + // be items in the queue that are not yet ready but will *become* + // ready in the near future. So in fact, with this class, every + // pop() becomes a tryPopUntil(), constrained to the timestamp of + // the head item. It almost doesn't matter what we specify for the + // caller's time constraint -- all we really care about is the + // head item's timestamp. Since pop() and popWithTime() are + // defined to wait until either an item becomes available or the + // queue is closed, loop until one of those things happens. The + // constraint we pass just determines how often we'll loop while + // waiting. + TimeTuple tt; + while (true) + { + // Pick a point suitably far into the future. + TimePoint until = TimePoint::clock::now() + std::chrono::hours(24); + pop_result popped = tryPopUntil_(lock, until, tt); + if (popped == POPPED) + return std::move(tt); + + // DONE: throw, just as super::pop() does + if (popped == DONE) + { + LLTHROW(LLThreadSafeQueueInterrupt()); + } + // WAITING: we've still got items to drain. + // EMPTY: not closed, so it's worth waiting for more items. + // Either way, loop back to wait. + } + } + + // We can use tryPop(TimeTuple&) just as it stands; the only behavior + // difference is in our canPop() override method. + using super::tryPop; + + /// tryPop(DataTuple&) + bool tryPop(DataTuple& tuple) + { + LL_PROFILE_ZONE_SCOPED; + TimeTuple tt; + if (! super::tryPop(tt)) + return false; + tuple = tuple_cdr(std::move(tt)); + return true; + } + + /// for when Args has exactly one type + bool tryPop(typename std::tuple_element<1, TimeTuple>::type& value) + { + LL_PROFILE_ZONE_SCOPED; + TimeTuple tt; + if (! super::tryPop(tt)) + return false; + value = std::get<1>(std::move(tt)); + return true; + } + + /// tryPopFor() + template <typename Rep, typename Period, typename Tuple> + bool tryPopFor(const std::chrono::duration<Rep, Period>& timeout, Tuple& tuple) + { + LL_PROFILE_ZONE_SCOPED; + // It's important to use OUR tryPopUntil() implementation, rather + // than delegating immediately to our base class. + return tryPopUntil(Clock::now() + timeout, tuple); + } + + /// tryPopUntil(TimeTuple&) + template <typename Clock, typename Duration> + bool tryPopUntil(const std::chrono::time_point<Clock, Duration>& until, + TimeTuple& tuple) + { + LL_PROFILE_ZONE_SCOPED; + // super::tryPopUntil() wakes up when an item becomes available or + // we hit 'until', whichever comes first. Thing is, the current + // head of the queue could become ready sooner than either of + // those events, and we need to deliver it as soon as it does. + // Don't wait past the TimePoint of the head item. + // Naturally, lock the queue before peeking at mStorage. + return super::tryLockUntil( + until, + [this, until, &tuple](lock_t& lock) + { + // Use our time_point_cast to allow for 'until' that's a + // time_point type other than TimePoint. + return POPPED == + tryPopUntil_(lock, LL::time_point_cast<TimePoint>(until), tuple); + }); + } + + pop_result tryPopUntil_(lock_t& lock, const TimePoint& until, TimeTuple& tuple) + { + LL_PROFILE_ZONE_SCOPED; + TimePoint adjusted = until; + if (! super::mStorage.empty()) + { + LL_PROFILE_ZONE_NAMED("tpu - adjust"); + // use whichever is earlier: the head item's timestamp, or + // the caller's limit + adjusted = min(std::get<0>(super::mStorage.front()), adjusted); + } + // now delegate to base-class tryPopUntil_() + pop_result popped; + { + LL_PROFILE_ZONE_NAMED("tpu - super"); + while ((popped = pop_result(super::tryPopUntil_(lock, adjusted, tuple))) == WAITING) + { + // If super::tryPopUntil_() returns WAITING, it means there's + // a head item, but it's not yet time. But it's worth looping + // back to recheck. + } + } + return popped; + } + + /// tryPopUntil(DataTuple&) + template <typename Clock, typename Duration> + bool tryPopUntil(const std::chrono::time_point<Clock, Duration>& until, + DataTuple& tuple) + { + LL_PROFILE_ZONE_SCOPED; + TimeTuple tt; + if (! tryPopUntil(until, tt)) + return false; + tuple = tuple_cdr(std::move(tt)); + return true; + } + + /// for when Args has exactly one type + template <typename Clock, typename Duration> + bool tryPopUntil(const std::chrono::time_point<Clock, Duration>& until, + typename std::tuple_element<1, TimeTuple>::type& value) + { + LL_PROFILE_ZONE_SCOPED; + TimeTuple tt; + if (! tryPopUntil(until, tt)) + return false; + value = std::get<1>(std::move(tt)); + return true; + } + + /*------------------------------ etc. ------------------------------*/ + // We can't hide items that aren't yet ready because we can't traverse + // the underlying priority_queue: it has no iterators, only top(). So + // a consumer could observe size() > 0 and yet tryPop() returns false. + // Shrug, in a multi-consumer scenario that would be expected behavior. + using super::size; + // open/closed state + using super::close; + using super::isClosed; + using super::done; + + private: + // this method is called by base class pop_() every time we're + // considering whether to deliver the current head element + bool canPop(const TimeTuple& head) const override + { + LL_PROFILE_ZONE_SCOPED; + // an item with a future timestamp isn't yet ready to pop + // (should we add some slop for overhead?) + return std::get<0>(head) <= Clock::now(); + } + }; + +} // namespace LL + +#endif /* ! defined(LL_THREADSAFESCHEDULE_H) */ diff --git a/indra/llcommon/timing.cpp b/indra/llcommon/timing.cpp deleted file mode 100644 index c2dc695ef3..0000000000 --- a/indra/llcommon/timing.cpp +++ /dev/null @@ -1,25 +0,0 @@ -/** - * @file timing.cpp - * @brief This file will be deprecated in the future. - * - * $LicenseInfo:firstyear=2000&license=viewerlgpl$ - * Second Life Viewer Source Code - * Copyright (C) 2010, Linden Research, Inc. - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; - * version 2.1 of the License only. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - * - * Linden Research, Inc., 945 Battery Street, San Francisco, CA 94111 USA - * $/LicenseInfo$ - */ diff --git a/indra/llcommon/tuple.h b/indra/llcommon/tuple.h new file mode 100644 index 0000000000..bfe7e3c2ba --- /dev/null +++ b/indra/llcommon/tuple.h @@ -0,0 +1,84 @@ +/** + * @file tuple.h + * @author Nat Goodspeed + * @date 2021-10-04 + * @brief A couple tuple utilities + * + * $LicenseInfo:firstyear=2021&license=viewerlgpl$ + * Copyright (c) 2021, Linden Research, Inc. + * $/LicenseInfo$ + */ + +#if ! defined(LL_TUPLE_H) +#define LL_TUPLE_H + +#include <tuple> +#include <type_traits> // std::remove_reference +#include <utility> // std::pair + +/** + * tuple_cons() behaves like LISP cons: it uses std::tuple_cat() to prepend a + * new item of arbitrary type to an existing std::tuple. + */ +template <typename First, typename... Rest, typename Tuple_=std::tuple<Rest...>> +auto tuple_cons(First&& first, Tuple_&& rest) +{ + // All we need to do is make a tuple containing 'first', and let + // tuple_cat() do the hard part. + return std::tuple_cat(std::tuple<First>(std::forward<First>(first)), + std::forward<Tuple_>(rest)); +} + +/** + * tuple_car() behaves like LISP car: it extracts the first item from a + * std::tuple. + */ +template <typename... Args, typename Tuple_=std::tuple<Args...>> +auto tuple_car(Tuple_&& tuple) +{ + return std::get<0>(std::forward<Tuple_>(tuple)); +} + +/** + * tuple_cdr() behaves like LISP cdr: it returns a new tuple containing + * everything BUT the first item. + */ +// derived from https://stackoverflow.com/a/24046437 +template <typename Tuple, std::size_t... Indices> +auto tuple_cdr_(Tuple&& tuple, const std::index_sequence<Indices...>) +{ + // Given an index sequence from [0..N-1), extract tuple items [1..N) + return std::make_tuple(std::get<Indices+1u>(std::forward<Tuple>(tuple))...); +} + +template <typename Tuple> +auto tuple_cdr(Tuple&& tuple) +{ + return tuple_cdr_( + std::forward<Tuple>(tuple), + // Pass helper function an index sequence one item shorter than tuple + std::make_index_sequence< + std::tuple_size< + // tuple_size doesn't like reference types + typename std::remove_reference<Tuple>::type + >::value - 1u> + ()); +} + +/** + * tuple_split(), the opposite of tuple_cons(), has no direct analog in LISP. + * It returns a std::pair of tuple_car(), tuple_cdr(). We could call this + * function tuple_car_cdr(), or tuple_slice() or some such. But tuple_split() + * feels more descriptive. + */ +template <typename... Args, typename Tuple_=std::tuple<Args...>> +auto tuple_split(Tuple_&& tuple) +{ + // We're not really worried about forwarding multiple times a tuple that + // might contain move-only items, because the implementation above only + // applies std::get() exactly once to each item. + return std::make_pair(tuple_car(std::forward<Tuple_>(tuple)), + tuple_cdr(std::forward<Tuple_>(tuple))); +} + +#endif /* ! defined(LL_TUPLE_H) */ diff --git a/indra/llcommon/workqueue.cpp b/indra/llcommon/workqueue.cpp new file mode 100644 index 0000000000..c74dada2e4 --- /dev/null +++ b/indra/llcommon/workqueue.cpp @@ -0,0 +1,158 @@ +/** + * @file workqueue.cpp + * @author Nat Goodspeed + * @date 2021-10-06 + * @brief Implementation for WorkQueue. + * + * $LicenseInfo:firstyear=2021&license=viewerlgpl$ + * Copyright (c) 2021, Linden Research, Inc. + * $/LicenseInfo$ + */ + +// Precompiled header +#include "linden_common.h" +// associated header +#include "workqueue.h" +// STL headers +// std headers +// external library headers +// other Linden headers +#include "llcoros.h" +#include LLCOROS_MUTEX_HEADER +#include "llerror.h" +#include "llexception.h" +#include "stringize.h" + +using Mutex = LLCoros::Mutex; +using Lock = LLCoros::LockType; + +LL::WorkQueue::WorkQueue(const std::string& name, size_t capacity): + super(makeName(name)), + mQueue(capacity) +{ + // TODO: register for "LLApp" events so we can implicitly close() on + // viewer shutdown. +} + +void LL::WorkQueue::close() +{ + mQueue.close(); +} + +size_t LL::WorkQueue::size() +{ + return mQueue.size(); +} + +bool LL::WorkQueue::isClosed() +{ + return mQueue.isClosed(); +} + +bool LL::WorkQueue::done() +{ + return mQueue.done(); +} + +void LL::WorkQueue::runUntilClose() +{ + try + { + for (;;) + { + LL_PROFILE_ZONE_SCOPED; + callWork(mQueue.pop()); + } + } + catch (const Queue::Closed&) + { + } +} + +bool LL::WorkQueue::runPending() +{ + LL_PROFILE_ZONE_SCOPED; + for (Work work; mQueue.tryPop(work); ) + { + callWork(work); + } + return ! mQueue.done(); +} + +bool LL::WorkQueue::runOne() +{ + Work work; + if (mQueue.tryPop(work)) + { + callWork(work); + } + return ! mQueue.done(); +} + +bool LL::WorkQueue::runUntil(const TimePoint& until) +{ + LL_PROFILE_ZONE_SCOPED; + // Should we subtract some slop to allow for typical Work execution time? + // How much slop? + // runUntil() is simply a time-bounded runPending(). + for (Work work; TimePoint::clock::now() < until && mQueue.tryPop(work); ) + { + callWork(work); + } + return ! mQueue.done(); +} + +std::string LL::WorkQueue::makeName(const std::string& name) +{ + if (! name.empty()) + return name; + + static U32 discriminator = 0; + static Mutex mutex; + U32 num; + { + // Protect discriminator from concurrent access by different threads. + // It can't be thread_local, else two racing threads will come up with + // the same name. + Lock lk(mutex); + num = discriminator++; + } + return STRINGIZE("WorkQueue" << num); +} + +void LL::WorkQueue::callWork(const Queue::DataTuple& work) +{ + // ThreadSafeSchedule::pop() always delivers a tuple, even when + // there's only one data field per item, as for us. + callWork(std::get<0>(work)); +} + +void LL::WorkQueue::callWork(const Work& work) +{ + LL_PROFILE_ZONE_SCOPED; + try + { + work(); + } + catch (...) + { + // No matter what goes wrong with any individual work item, the worker + // thread must go on! Log our own instance name with the exception. + LOG_UNHANDLED_EXCEPTION(getKey()); + } +} + +void LL::WorkQueue::error(const std::string& msg) +{ + LL_ERRS("WorkQueue") << msg << LL_ENDL; +} + +void LL::WorkQueue::checkCoroutine(const std::string& method) +{ + // By convention, the default coroutine on each thread has an empty name + // string. See also LLCoros::logname(). + if (LLCoros::getName().empty()) + { + LLTHROW(Error("Do not call " + method + " from a thread's default coroutine")); + } +} diff --git a/indra/llcommon/workqueue.h b/indra/llcommon/workqueue.h new file mode 100644 index 0000000000..96574a18b9 --- /dev/null +++ b/indra/llcommon/workqueue.h @@ -0,0 +1,574 @@ +/** + * @file workqueue.h + * @author Nat Goodspeed + * @date 2021-09-30 + * @brief Queue used for inter-thread work passing. + * + * $LicenseInfo:firstyear=2021&license=viewerlgpl$ + * Copyright (c) 2021, Linden Research, Inc. + * $/LicenseInfo$ + */ + +#if ! defined(LL_WORKQUEUE_H) +#define LL_WORKQUEUE_H + +#include "llcoros.h" +#include "llexception.h" +#include "llinstancetracker.h" +#include "threadsafeschedule.h" +#include <chrono> +#include <exception> // std::current_exception +#include <functional> // std::function +#include <string> + +namespace LL +{ + /** + * A typical WorkQueue has a string name that can be used to find it. + */ + class WorkQueue: public LLInstanceTracker<WorkQueue, std::string> + { + private: + using super = LLInstanceTracker<WorkQueue, std::string>; + + public: + using Work = std::function<void()>; + + private: + using Queue = ThreadSafeSchedule<Work>; + // helper for postEvery() + template <typename Rep, typename Period, typename CALLABLE> + class BackJack; + + public: + using TimePoint = Queue::TimePoint; + using TimedWork = Queue::TimeTuple; + using Closed = Queue::Closed; + + struct Error: public LLException + { + Error(const std::string& what): LLException(what) {} + }; + + /** + * You may omit the WorkQueue name, in which case a unique name is + * synthesized; for practical purposes that makes it anonymous. + */ + WorkQueue(const std::string& name = std::string(), size_t capacity=1024); + + /** + * Since the point of WorkQueue is to pass work to some other worker + * thread(s) asynchronously, it's important that the WorkQueue continue + * to exist until the worker thread(s) have drained it. To communicate + * that it's time for them to quit, close() the queue. + */ + void close(); + + /** + * WorkQueue supports multiple producers and multiple consumers. In + * the general case it's misleading to test size(), since any other + * thread might change it the nanosecond the lock is released. On that + * basis, some might argue against publishing a size() method at all. + * + * But there are two specific cases in which a test based on size() + * might be reasonable: + * + * * If you're the only producer, noticing that size() == 0 is + * meaningful. + * * If you're the only consumer, noticing that size() > 0 is + * meaningful. + */ + size_t size(); + /// producer end: are we prevented from pushing any additional items? + bool isClosed(); + /// consumer end: are we done, is the queue entirely drained? + bool done(); + + /*---------------------- fire and forget API -----------------------*/ + + /// fire-and-forget, but at a particular (future?) time + template <typename CALLABLE> + void post(const TimePoint& time, CALLABLE&& callable) + { + // Defer reifying an arbitrary CALLABLE until we hit this or + // postIfOpen(). All other methods should accept CALLABLEs of + // arbitrary type to avoid multiple levels of std::function + // indirection. + mQueue.push(TimedWork(time, std::move(callable))); + } + + /// fire-and-forget + template <typename CALLABLE> + void post(CALLABLE&& callable) + { + // We use TimePoint::clock::now() instead of TimePoint's + // representation of the epoch because this WorkQueue may contain + // a mix of past-due TimedWork items and TimedWork items scheduled + // for the future. Sift this new item into the correct place. + post(TimePoint::clock::now(), std::move(callable)); + } + + /** + * post work for a particular time, unless the queue is closed before + * we can post + */ + template <typename CALLABLE> + bool postIfOpen(const TimePoint& time, CALLABLE&& callable) + { + // Defer reifying an arbitrary CALLABLE until we hit this or + // post(). All other methods should accept CALLABLEs of arbitrary + // type to avoid multiple levels of std::function indirection. + return mQueue.pushIfOpen(TimedWork(time, std::move(callable))); + } + + /** + * post work, unless the queue is closed before we can post + */ + template <typename CALLABLE> + bool postIfOpen(CALLABLE&& callable) + { + return postIfOpen(TimePoint::clock::now(), std::move(callable)); + } + + /** + * Post work to be run at a specified time to another WorkQueue, which + * may or may not still exist and be open. Return true if we were able + * to post. + */ + template <typename CALLABLE> + static bool postMaybe(weak_t target, const TimePoint& time, CALLABLE&& callable); + + /** + * Post work to another WorkQueue, which may or may not still exist + * and be open. Return true if we were able to post. + */ + template <typename CALLABLE> + static bool postMaybe(weak_t target, CALLABLE&& callable) + { + return postMaybe(target, TimePoint::clock::now(), + std::forward<CALLABLE>(callable)); + } + + /** + * Launch a callable returning bool that will trigger repeatedly at + * specified interval, until the callable returns false. + * + * If you need to signal that callable from outside, DO NOT bind a + * reference to a simple bool! That's not thread-safe. Instead, bind + * an LLCond variant, e.g. LLOneShotCond or LLBoolCond. + */ + template <typename Rep, typename Period, typename CALLABLE> + void postEvery(const std::chrono::duration<Rep, Period>& interval, + CALLABLE&& callable); + + template <typename CALLABLE> + bool tryPost(CALLABLE&& callable) + { + return mQueue.tryPush(TimedWork(TimePoint::clock::now(), std::move(callable))); + } + + /*------------------------- handshake API --------------------------*/ + + /** + * Post work to another WorkQueue to be run at a specified time, + * requesting a specific callback to be run on this WorkQueue on + * completion. + * + * Returns true if able to post, false if the other WorkQueue is + * inaccessible. + */ + // Apparently some Microsoft header file defines a macro CALLBACK? The + // natural template argument name CALLBACK produces very weird Visual + // Studio compile errors that seem utterly unrelated to this source + // code. + template <typename CALLABLE, typename FOLLOWUP> + bool postTo(weak_t target, + const TimePoint& time, CALLABLE&& callable, FOLLOWUP&& callback); + + /** + * Post work to another WorkQueue, requesting a specific callback to + * be run on this WorkQueue on completion. + * + * Returns true if able to post, false if the other WorkQueue is + * inaccessible. + */ + template <typename CALLABLE, typename FOLLOWUP> + bool postTo(weak_t target, CALLABLE&& callable, FOLLOWUP&& callback) + { + return postTo(target, TimePoint::clock::now(), + std::move(callable), std::move(callback)); + } + + /** + * Post work to another WorkQueue to be run at a specified time, + * blocking the calling coroutine until then, returning the result to + * caller on completion. + * + * In general, we assume that each thread's default coroutine is busy + * servicing its WorkQueue or whatever. To try to prevent mistakes, we + * forbid calling waitForResult() from a thread's default coroutine. + */ + template <typename CALLABLE> + auto waitForResult(const TimePoint& time, CALLABLE&& callable); + + /** + * Post work to another WorkQueue, blocking the calling coroutine + * until then, returning the result to caller on completion. + * + * In general, we assume that each thread's default coroutine is busy + * servicing its WorkQueue or whatever. To try to prevent mistakes, we + * forbid calling waitForResult() from a thread's default coroutine. + */ + template <typename CALLABLE> + auto waitForResult(CALLABLE&& callable) + { + return waitForResult(TimePoint::clock::now(), std::move(callable)); + } + + /*--------------------------- worker API ---------------------------*/ + + /** + * runUntilClose() pulls TimedWork items off this WorkQueue until the + * queue is closed, at which point it returns. This would be the + * typical entry point for a simple worker thread. + */ + void runUntilClose(); + + /** + * runPending() runs all TimedWork items that are ready to run. It + * returns true if the queue remains open, false if the queue has been + * closed. This could be used by a thread whose primary purpose is to + * serve the queue, but also wants to do other things with its idle time. + */ + bool runPending(); + + /** + * runOne() runs at most one ready TimedWork item -- zero if none are + * ready. It returns true if the queue remains open, false if the + * queue has been closed. + */ + bool runOne(); + + /** + * runFor() runs a subset of ready TimedWork items, until the + * timeslice has been exceeded. It returns true if the queue remains + * open, false if the queue has been closed. This could be used by a + * busy main thread to lend a bounded few CPU cycles to this WorkQueue + * without risking the WorkQueue blowing out the length of any one + * frame. + */ + template <typename Rep, typename Period> + bool runFor(const std::chrono::duration<Rep, Period>& timeslice) + { + LL_PROFILE_ZONE_SCOPED; + return runUntil(TimePoint::clock::now() + timeslice); + } + + /** + * runUntil() is just like runFor(), only with a specific end time + * instead of a timeslice duration. + */ + bool runUntil(const TimePoint& until); + + private: + template <typename CALLABLE, typename FOLLOWUP> + static auto makeReplyLambda(CALLABLE&& callable, FOLLOWUP&& callback); + /// general case: arbitrary C++ return type + template <typename CALLABLE, typename FOLLOWUP, typename RETURNTYPE> + struct MakeReplyLambda; + /// specialize for CALLABLE returning void + template <typename CALLABLE, typename FOLLOWUP> + struct MakeReplyLambda<CALLABLE, FOLLOWUP, void>; + + /// general case: arbitrary C++ return type + template <typename CALLABLE, typename RETURNTYPE> + struct WaitForResult; + /// specialize for CALLABLE returning void + template <typename CALLABLE> + struct WaitForResult<CALLABLE, void>; + + static void checkCoroutine(const std::string& method); + static void error(const std::string& msg); + static std::string makeName(const std::string& name); + void callWork(const Queue::DataTuple& work); + void callWork(const Work& work); + Queue mQueue; + }; + + /** + * BackJack is, in effect, a hand-rolled lambda, binding a WorkQueue, a + * CALLABLE that returns bool, a TimePoint and an interval at which to + * relaunch it. As long as the callable continues returning true, BackJack + * keeps resubmitting it to the target WorkQueue. + */ + // Why is BackJack a class and not a lambda? Because, unlike a lambda, a + // class method gets its own 'this' pointer -- which we need to resubmit + // the whole BackJack callable. + template <typename Rep, typename Period, typename CALLABLE> + class WorkQueue::BackJack + { + public: + // bind the desired data + BackJack(weak_t target, + const TimePoint& start, + const std::chrono::duration<Rep, Period>& interval, + CALLABLE&& callable): + mTarget(target), + mStart(start), + mInterval(interval), + mCallable(std::move(callable)) + {} + + // Call by target WorkQueue -- note that although WE require a + // callable returning bool, WorkQueue wants a void callable. We + // consume the bool. + void operator()() + { + // If mCallable() throws an exception, don't catch it here: if it + // throws once, it's likely to throw every time, so it's a waste + // of time to arrange to call it again. + if (mCallable()) + { + // Modify mStart to the new start time we desire. If we simply + // added mInterval to now, we'd get actual timings of + // (mInterval + slop), where 'slop' is the latency between the + // previous mStart and the WorkQueue actually calling us. + // Instead, add mInterval to mStart so that at least we + // register our intent to fire at exact mIntervals. + mStart += mInterval; + + // We're being called at this moment by the target WorkQueue. + // Assume it still exists, rather than checking the result of + // lock(). + // Resubmit the whole *this callable: that's why we're a class + // rather than a lambda. Allow moving *this so we can carry a + // move-only callable; but naturally this statement must be + // the last time we reference this instance, which may become + // moved-from. + try + { + mTarget.lock()->post(mStart, std::move(*this)); + } + catch (const Closed&) + { + // Once this queue is closed, oh well, just stop + } + } + } + + private: + weak_t mTarget; + TimePoint mStart; + std::chrono::duration<Rep, Period> mInterval; + CALLABLE mCallable; + }; + + template <typename Rep, typename Period, typename CALLABLE> + void WorkQueue::postEvery(const std::chrono::duration<Rep, Period>& interval, + CALLABLE&& callable) + { + if (interval.count() <= 0) + { + // It's essential that postEvery() be called with a positive + // interval, since each call to BackJack posts another instance of + // itself at (start + interval) and we order by target time. A + // zero or negative interval would result in that BackJack + // instance going to the head of the queue every time, immediately + // ready to run. Effectively that would produce an infinite loop, + // a denial of service on this WorkQueue. + error("postEvery(interval) may not be 0"); + } + // Instantiate and post a suitable BackJack, binding a weak_ptr to + // self, the current time, the desired interval and the desired + // callable. + post( + BackJack<Rep, Period, CALLABLE>( + getWeak(), TimePoint::clock::now(), interval, std::move(callable))); + } + + /// general case: arbitrary C++ return type + template <typename CALLABLE, typename FOLLOWUP, typename RETURNTYPE> + struct WorkQueue::MakeReplyLambda + { + auto operator()(CALLABLE&& callable, FOLLOWUP&& callback) + { + // Call the callable in any case -- but to minimize + // copying the result, immediately bind it into the reply + // lambda. The reply lambda also binds the original + // callback, so that when we, the originating WorkQueue, + // finally receive and process the reply lambda, we'll + // call the bound callback with the bound result -- on the + // same thread that originally called postTo(). + return + [result = std::forward<CALLABLE>(callable)(), + callback = std::move(callback)] + () + { callback(std::move(result)); }; + } + }; + + /// specialize for CALLABLE returning void + template <typename CALLABLE, typename FOLLOWUP> + struct WorkQueue::MakeReplyLambda<CALLABLE, FOLLOWUP, void> + { + auto operator()(CALLABLE&& callable, FOLLOWUP&& callback) + { + // Call the callable, which produces no result. + std::forward<CALLABLE>(callable)(); + // Our completion callback is simply the caller's callback. + return std::move(callback); + } + }; + + template <typename CALLABLE, typename FOLLOWUP> + auto WorkQueue::makeReplyLambda(CALLABLE&& callable, FOLLOWUP&& callback) + { + return MakeReplyLambda<CALLABLE, FOLLOWUP, + decltype(std::forward<CALLABLE>(callable)())>() + (std::move(callable), std::move(callback)); + } + + template <typename CALLABLE, typename FOLLOWUP> + bool WorkQueue::postTo(weak_t target, + const TimePoint& time, CALLABLE&& callable, FOLLOWUP&& callback) + { + LL_PROFILE_ZONE_SCOPED; + // We're being asked to post to the WorkQueue at target. + // target is a weak_ptr: have to lock it to check it. + auto tptr = target.lock(); + if (! tptr) + // can't post() if the target WorkQueue has been destroyed + return false; + + // Here we believe target WorkQueue still exists. Post to it a + // lambda that packages our callable, our callback and a weak_ptr + // to this originating WorkQueue. + tptr->post( + time, + [reply = super::getWeak(), + callable = std::move(callable), + callback = std::move(callback)] + () + { + // Use postMaybe() below in case this originating WorkQueue + // has been closed or destroyed. Remember, the outer lambda is + // now running on a thread servicing the target WorkQueue, and + // real time has elapsed since postTo()'s tptr->post() call. + try + { + // Make a reply lambda to repost to THIS WorkQueue. + // Delegate to makeReplyLambda() so we can partially + // specialize on void return. + postMaybe(reply, makeReplyLambda(std::move(callable), std::move(callback))); + } + catch (...) + { + // Either variant of makeReplyLambda() is responsible for + // calling the caller's callable. If that throws, return + // the exception to the originating thread. + postMaybe( + reply, + // Bind the current exception to transport back to the + // originating WorkQueue. Once there, rethrow it. + [exc = std::current_exception()](){ std::rethrow_exception(exc); }); + } + }); + + // looks like we were able to post() + return true; + } + + template <typename CALLABLE> + bool WorkQueue::postMaybe(weak_t target, const TimePoint& time, CALLABLE&& callable) + { + LL_PROFILE_ZONE_SCOPED; + // target is a weak_ptr: have to lock it to check it + auto tptr = target.lock(); + if (tptr) + { + try + { + tptr->post(time, std::forward<CALLABLE>(callable)); + // we were able to post() + return true; + } + catch (const Closed&) + { + // target WorkQueue still exists, but is Closed + } + } + // either target no longer exists, or its WorkQueue is Closed + return false; + } + + /// general case: arbitrary C++ return type + template <typename CALLABLE, typename RETURNTYPE> + struct WorkQueue::WaitForResult + { + auto operator()(WorkQueue* self, const TimePoint& time, CALLABLE&& callable) + { + LLCoros::Promise<RETURNTYPE> promise; + self->post( + time, + // We dare to bind a reference to Promise because it's + // specifically designed for cross-thread communication. + [&promise, callable = std::move(callable)]() + { + try + { + // call the caller's callable and trigger promise with result + promise.set_value(callable()); + } + catch (...) + { + promise.set_exception(std::current_exception()); + } + }); + auto future{ LLCoros::getFuture(promise) }; + // now, on the calling thread, wait for that result + LLCoros::TempStatus st("waiting for WorkQueue::waitForResult()"); + return future.get(); + } + }; + + /// specialize for CALLABLE returning void + template <typename CALLABLE> + struct WorkQueue::WaitForResult<CALLABLE, void> + { + void operator()(WorkQueue* self, const TimePoint& time, CALLABLE&& callable) + { + LLCoros::Promise<void> promise; + self->post( + time, + // &promise is designed for cross-thread access + [&promise, callable = std::move(callable)]() + { + try + { + callable(); + promise.set_value(); + } + catch (...) + { + promise.set_exception(std::current_exception()); + } + }); + auto future{ LLCoros::getFuture(promise) }; + // block until set_value() + LLCoros::TempStatus st("waiting for void WorkQueue::waitForResult()"); + future.get(); + } + }; + + template <typename CALLABLE> + auto WorkQueue::waitForResult(const TimePoint& time, CALLABLE&& callable) + { + checkCoroutine("waitForResult()"); + // derive callable's return type so we can specialize for void + return WaitForResult<CALLABLE, decltype(std::forward<CALLABLE>(callable)())>() + (this, time, std::forward<CALLABLE>(callable)); + } + +} // namespace LL + +#endif /* ! defined(LL_WORKQUEUE_H) */ |