From 67ec47e6da389661934ed2ddfa55ca58455fa7e5 Mon Sep 17 00:00:00 2001 From: Richard Linden Date: Tue, 13 Nov 2012 17:10:10 -0800 Subject: SH-3406 WIP convert fast timers to lltrace system moving fast timers into lltrace namespace and accumulation system --- indra/llcommon/llfasttimer.cpp | 186 ++++++++++++++------------ indra/llcommon/llfasttimer.h | 268 ++++++++++++++++++++------------------ indra/llcommon/lltrace.cpp | 7 +- indra/llcommon/lltrace.h | 194 +++------------------------ indra/llcommon/lltracerecording.h | 18 +-- indra/llcommon/llunit.h | 84 ++++++------ 6 files changed, 322 insertions(+), 435 deletions(-) (limited to 'indra/llcommon') diff --git a/indra/llcommon/llfasttimer.cpp b/indra/llcommon/llfasttimer.cpp index 66452fd02a..cced0bdfa9 100644 --- a/indra/llcommon/llfasttimer.cpp +++ b/indra/llcommon/llfasttimer.cpp @@ -33,8 +33,10 @@ #include "lltreeiterators.h" #include "llsdserialize.h" #include "llunit.h" +#include "llsd.h" #include +#include #if LL_WINDOWS @@ -50,37 +52,40 @@ #error "architecture not supported" #endif +namespace LLTrace +{ + ////////////////////////////////////////////////////////////////////////////// // statics -S32 LLFastTimer::sCurFrameIndex = -1; -S32 LLFastTimer::sLastFrameIndex = -1; -U64 LLFastTimer::sLastFrameTime = LLFastTimer::getCPUClockCount64(); -bool LLFastTimer::sPauseHistory = 0; -bool LLFastTimer::sResetHistory = 0; -LLFastTimer::CurTimerData LLFastTimer::sCurTimerData; -BOOL LLFastTimer::sLog = FALSE; -std::string LLFastTimer::sLogName = ""; -BOOL LLFastTimer::sMetricLog = FALSE; -LLMutex* LLFastTimer::sLogLock = NULL; -std::queue LLFastTimer::sLogQueue; +S32 Time::sCurFrameIndex = -1; +S32 Time::sLastFrameIndex = -1; +U64 Time::sLastFrameTime = Time::getCPUClockCount64(); +bool Time::sPauseHistory = 0; +bool Time::sResetHistory = 0; +LLThreadLocalPointer Time::sCurTimerData; +bool Time::sLog = FALSE; +std::string Time::sLogName = ""; +bool Time::sMetricLog = FALSE; +static LLMutex* sLogLock = NULL; +static std::queue sLogQueue; #if LL_LINUX || LL_SOLARIS -U64 LLFastTimer::sClockResolution = 1000000000; // Nanosecond resolution +U64 Time::sClockResolution = 1000000000; // Nanosecond resolution #else -U64 LLFastTimer::sClockResolution = 1000000; // Microsecond resolution +U64 Time::sClockResolution = 1000000; // Microsecond resolution #endif // FIXME: move these declarations to the relevant modules // helper functions -typedef LLTreeDFSPostIter timer_tree_bottom_up_iterator_t; +typedef LLTreeDFSPostIter timer_tree_bottom_up_iterator_t; -static timer_tree_bottom_up_iterator_t begin_timer_tree_bottom_up(LLFastTimer::DeclareTimer& id) +static timer_tree_bottom_up_iterator_t begin_timer_tree_bottom_up(BlockTimer& id) { return timer_tree_bottom_up_iterator_t(&id, - boost::bind(boost::mem_fn(&LLFastTimer::DeclareTimer::beginChildren), _1), - boost::bind(boost::mem_fn(&LLFastTimer::DeclareTimer::endChildren), _1)); + boost::bind(boost::mem_fn(&BlockTimer::beginChildren), _1), + boost::bind(boost::mem_fn(&BlockTimer::endChildren), _1)); } static timer_tree_bottom_up_iterator_t end_timer_tree_bottom_up() @@ -88,14 +93,14 @@ static timer_tree_bottom_up_iterator_t end_timer_tree_bottom_up() return timer_tree_bottom_up_iterator_t(); } -typedef LLTreeDFSIter timer_tree_dfs_iterator_t; +typedef LLTreeDFSIter timer_tree_dfs_iterator_t; -static timer_tree_dfs_iterator_t begin_timer_tree(LLFastTimer::DeclareTimer& id) +static timer_tree_dfs_iterator_t begin_timer_tree(BlockTimer& id) { return timer_tree_dfs_iterator_t(&id, - boost::bind(boost::mem_fn(&LLFastTimer::DeclareTimer::beginChildren), _1), - boost::bind(boost::mem_fn(&LLFastTimer::DeclareTimer::endChildren), _1)); + boost::bind(boost::mem_fn(&BlockTimer::beginChildren), _1), + boost::bind(boost::mem_fn(&BlockTimer::endChildren), _1)); } static timer_tree_dfs_iterator_t end_timer_tree() @@ -103,21 +108,21 @@ static timer_tree_dfs_iterator_t end_timer_tree() return timer_tree_dfs_iterator_t(); } -LLFastTimer::DeclareTimer& LLFastTimer::DeclareTimer::getRootTimer() +BlockTimer& BlockTimer::getRootTimer() { - static DeclareTimer root_timer("root", true, NULL); + static BlockTimer root_timer("root", true, NULL); return root_timer; } //static #if (LL_DARWIN || LL_LINUX || LL_SOLARIS) && !(defined(__i386__) || defined(__amd64__)) -U64 LLFastTimer::countsPerSecond() // counts per second for the *32-bit* timer +U64 Time::countsPerSecond() // counts per second for the *32-bit* timer { return sClockResolution >> 8; } #else // windows or x86-mac or x86-linux or x86-solaris -U64 LLFastTimer::countsPerSecond() // counts per second for the *32-bit* timer +U64 Time::countsPerSecond() // counts per second for the *32-bit* timer { #if LL_FASTTIMER_USE_RDTSC || !LL_WINDOWS //getCPUFrequency returns MHz and sCPUClockFrequency wants to be in Hz @@ -140,19 +145,14 @@ U64 LLFastTimer::countsPerSecond() // counts per second for the *32-bit* timer } #endif -LLFastTimer::DeclareTimer::DeclareTimer(const std::string& name, bool open, DeclareTimer* parent) -: mName(name), +BlockTimer::BlockTimer(const char* name, bool open, BlockTimer* parent) +: TraceType(name), mCollapsed(true), mParent(NULL), mTreeTimeCounter(0), mCountAverage(0), mCallAverage(0), - mNeedsSorting(false), - mActiveCount(0), - mCalls(0), - mSelfTimeCounter(0), - mLastCaller(NULL), - mMoveUpTree(false) + mNeedsSorting(false) { setCollapsed(!open); @@ -171,13 +171,13 @@ LLFastTimer::DeclareTimer::DeclareTimer(const std::string& name, bool open, Decl memset(mCallHistory, 0, sizeof(U32) * HISTORY_NUM); } -LLFastTimer::DeclareTimer::~DeclareTimer() +BlockTimer::~BlockTimer() { delete[] mCountHistory; delete[] mCallHistory; } -void LLFastTimer::DeclareTimer::setParent(DeclareTimer* parent) +void BlockTimer::setParent(BlockTimer* parent) { llassert_always(parent != this); llassert_always(parent != NULL); @@ -193,8 +193,8 @@ void LLFastTimer::DeclareTimer::setParent(DeclareTimer* parent) // subtract average timing from previous parent mParent->mCountAverage -= mCountAverage; - std::vector& children = mParent->getChildren(); - std::vector::iterator found_it = std::find(children.begin(), children.end(), this); + std::vector& children = mParent->getChildren(); + std::vector::iterator found_it = std::find(children.begin(), children.end(), this); if (found_it != children.end()) { children.erase(found_it); @@ -209,10 +209,10 @@ void LLFastTimer::DeclareTimer::setParent(DeclareTimer* parent) } } -S32 LLFastTimer::DeclareTimer::getDepth() +S32 BlockTimer::getDepth() { S32 depth = 0; - DeclareTimer* timerp = mParent; + BlockTimer* timerp = mParent; while(timerp) { depth++; @@ -223,9 +223,9 @@ S32 LLFastTimer::DeclareTimer::getDepth() } // static -void LLFastTimer::DeclareTimer::processTimes() +void BlockTimer::processTimes() { - if (sCurFrameIndex < 0) return; + if (Time::getCurFrameIndex() < 0) return; buildHierarchy(); accumulateTimings(); @@ -234,27 +234,27 @@ void LLFastTimer::DeclareTimer::processTimes() // sort child timers by name struct SortTimerByName { - bool operator()(const LLFastTimer::DeclareTimer* i1, const LLFastTimer::DeclareTimer* i2) + bool operator()(const BlockTimer* i1, const BlockTimer* i2) { return i1->getName() < i2->getName(); } }; //static -void LLFastTimer::DeclareTimer::buildHierarchy() +void BlockTimer::buildHierarchy() { - if (sCurFrameIndex < 0 ) return; + if (Time::getCurFrameIndex() < 0 ) return; // set up initial tree { - for (instance_iter it = beginInstances(), end_it = endInstances(); it != end_it; ++it) + for (LLInstanceTracker::instance_iter it = LLInstanceTracker::beginInstances(), end_it = LLInstanceTracker::endInstances(); it != end_it; ++it) { - DeclareTimer& timer = *it; - if (&timer == &DeclareTimer::getRootTimer()) continue; + BlockTimer& timer = *it; + if (&timer == &BlockTimer::getRootTimer()) continue; // bootstrap tree construction by attaching to last timer to be on stack // when this timer was called - if (timer.mLastCaller && timer.mParent == &DeclareTimer::getRootTimer()) + if (timer.mLastCaller && timer.mParent == &BlockTimer::getRootTimer()) { timer.setParent(timer.mLastCaller); // no need to push up tree on first use, flag can be set spuriously @@ -266,13 +266,13 @@ void LLFastTimer::DeclareTimer::buildHierarchy() // bump timers up tree if they've been flagged as being in the wrong place // do this in a bottom up order to promote descendants first before promoting ancestors // this preserves partial order derived from current frame's observations - for(timer_tree_bottom_up_iterator_t it = begin_timer_tree_bottom_up(DeclareTimer::getRootTimer()); + for(timer_tree_bottom_up_iterator_t it = begin_timer_tree_bottom_up(BlockTimer::getRootTimer()); it != end_timer_tree_bottom_up(); ++it) { - DeclareTimer* timerp = *it; + BlockTimer* timerp = *it; // skip root timer - if (timerp == &DeclareTimer::getRootTimer()) continue; + if (timerp == &BlockTimer::getRootTimer()) continue; if (timerp->mMoveUpTree) { @@ -289,11 +289,11 @@ void LLFastTimer::DeclareTimer::buildHierarchy() } // sort timers by time last called, so call graph makes sense - for(timer_tree_dfs_iterator_t it = begin_timer_tree(DeclareTimer::getRootTimer()); + for(timer_tree_dfs_iterator_t it = begin_timer_tree(BlockTimer::getRootTimer()); it != end_timer_tree(); ++it) { - DeclareTimer* timerp = (*it); + BlockTimer* timerp = (*it); if (timerp->mNeedsSorting) { std::sort(timerp->getChildren().begin(), timerp->getChildren().end(), SortTimerByName()); @@ -303,12 +303,12 @@ void LLFastTimer::DeclareTimer::buildHierarchy() } //static -void LLFastTimer::DeclareTimer::accumulateTimings() +void BlockTimer::accumulateTimings() { U32 cur_time = getCPUClockCount32(); // walk up stack of active timers and accumulate current time while leaving timing structures active - LLFastTimer* cur_timer = sCurTimerData.mCurTimer; + Time* cur_timer = sCurTimerData.mCurTimer; // root defined by parent pointing to self CurTimerData* cur_data = &sCurTimerData; while(cur_timer && cur_timer->mLastTimerData.mCurTimer != cur_timer) @@ -328,11 +328,11 @@ void LLFastTimer::DeclareTimer::accumulateTimings() } // traverse tree in DFS post order, or bottom up - for(timer_tree_bottom_up_iterator_t it = begin_timer_tree_bottom_up(DeclareTimer::getRootTimer()); + for(timer_tree_bottom_up_iterator_t it = begin_timer_tree_bottom_up(BlockTimer::getRootTimer()); it != end_timer_tree_bottom_up(); ++it) { - DeclareTimer* timerp = (*it); + BlockTimer* timerp = (*it); timerp->mTreeTimeCounter = timerp->mSelfTimeCounter; for (child_const_iter child_it = timerp->beginChildren(); child_it != timerp->endChildren(); ++child_it) { @@ -354,7 +354,7 @@ void LLFastTimer::DeclareTimer::accumulateTimings() } // static -void LLFastTimer::DeclareTimer::resetFrame() +void BlockTimer::resetFrame() { if (sLog) { //output current frame counts to performance log @@ -379,7 +379,7 @@ void LLFastTimer::DeclareTimer::resetFrame() { for (instance_iter it = beginInstances(), end_it = endInstances(); it != end_it; ++it) { - DeclareTimer& timer = *it; + BlockTimer& timer = *it; sd[timer.getName()]["Time"] = (LLSD::Real) (timer.mSelfTimeCounter*iclock_freq); sd[timer.getName()]["Calls"] = (LLSD::Integer) timer.mCalls; @@ -401,7 +401,7 @@ void LLFastTimer::DeclareTimer::resetFrame() // reset for next frame for (instance_iter it = beginInstances(), end_it = endInstances(); it != end_it; ++it) { - DeclareTimer& timer = *it; + BlockTimer& timer = *it; timer.mSelfTimeCounter = 0; timer.mCalls = 0; timer.mLastCaller = NULL; @@ -410,7 +410,7 @@ void LLFastTimer::DeclareTimer::resetFrame() } //static -void LLFastTimer::DeclareTimer::reset() +void BlockTimer::reset() { resetFrame(); // reset frame data @@ -420,7 +420,7 @@ void LLFastTimer::DeclareTimer::reset() // root defined by parent pointing to self CurTimerData* cur_data = &sCurTimerData; - LLFastTimer* cur_timer = cur_data->mCurTimer; + Time* cur_timer = cur_data->mCurTimer; while(cur_timer && cur_timer->mLastTimerData.mCurTimer != cur_timer) { cur_timer->mStartTime = cur_time; @@ -434,10 +434,10 @@ void LLFastTimer::DeclareTimer::reset() { for (instance_iter it = beginInstances(), end_it = endInstances(); it != end_it; ++it) { - DeclareTimer& timer = *it; - if (&timer != &DeclareTimer::getRootTimer()) + BlockTimer& timer = *it; + if (&timer != &BlockTimer::getRootTimer()) { - timer.setParent(&DeclareTimer::getRootTimer()); + timer.setParent(&BlockTimer::getRootTimer()); } timer.mCountAverage = 0; @@ -451,35 +451,35 @@ void LLFastTimer::DeclareTimer::reset() sCurFrameIndex = 0; } -U32 LLFastTimer::DeclareTimer::getHistoricalCount(S32 history_index) const +U32 BlockTimer::getHistoricalCount(S32 history_index) const { - S32 history_idx = (getLastFrameIndex() + history_index) % LLFastTimer::DeclareTimer::HISTORY_NUM; + S32 history_idx = (getLastFrameIndex() + history_index) % BlockTimer::HISTORY_NUM; return mCountHistory[history_idx]; } -U32 LLFastTimer::DeclareTimer::getHistoricalCalls(S32 history_index ) const +U32 BlockTimer::getHistoricalCalls(S32 history_index ) const { - S32 history_idx = (getLastFrameIndex() + history_index) % LLFastTimer::DeclareTimer::HISTORY_NUM; + S32 history_idx = (getLastFrameIndex() + history_index) % BlockTimer::HISTORY_NUM; return mCallHistory[history_idx]; } -std::vector::const_iterator LLFastTimer::DeclareTimer::beginChildren() +std::vector::const_iterator BlockTimer::beginChildren() { return mChildren.begin(); } -std::vector::const_iterator LLFastTimer::DeclareTimer::endChildren() +std::vector::const_iterator BlockTimer::endChildren() { return mChildren.end(); } -std::vector& LLFastTimer::DeclareTimer::getChildren() +std::vector& BlockTimer::getChildren() { return mChildren; } //static -void LLFastTimer::nextFrame() +void Time::nextFrame() { countsPerSecond(); // good place to calculate clock frequency U64 frame_time = getCPUClockCount64(); @@ -490,30 +490,30 @@ void LLFastTimer::nextFrame() if (!sPauseHistory) { - DeclareTimer::processTimes(); + BlockTimer::processTimes(); sLastFrameIndex = sCurFrameIndex++; } // get ready for next frame - DeclareTimer::resetFrame(); + BlockTimer::resetFrame(); sLastFrameTime = frame_time; } //static -void LLFastTimer::dumpCurTimes() +void Time::dumpCurTimes() { // accumulate timings, etc. - DeclareTimer::processTimes(); + BlockTimer::processTimes(); F64 clock_freq = (F64)countsPerSecond(); F64 iclock_freq = 1000.0 / clock_freq; // clock_ticks -> milliseconds // walk over timers in depth order and output timings - for(timer_tree_dfs_iterator_t it = begin_timer_tree(DeclareTimer::getRootTimer()); + for(timer_tree_dfs_iterator_t it = begin_timer_tree(BlockTimer::getRootTimer()); it != end_timer_tree(); ++it) { - DeclareTimer* timerp = (*it); + BlockTimer* timerp = (*it); F64 total_time_ms = ((F64)timerp->getHistoricalCount(0) * iclock_freq); // Don't bother with really brief times, keep output concise if (total_time_ms < 0.1) continue; @@ -534,14 +534,14 @@ void LLFastTimer::dumpCurTimes() } //static -void LLFastTimer::reset() +void Time::reset() { - DeclareTimer::reset(); + BlockTimer::reset(); } //static -void LLFastTimer::writeLog(std::ostream& os) +void Time::writeLog(std::ostream& os) { while (!sLogQueue.empty()) { @@ -552,3 +552,25 @@ void LLFastTimer::writeLog(std::ostream& os) } } + +void LLTrace::TimerAccumulator::addSamples( const LLTrace::TimerAccumulator& other ) +{ + mSelfTimeCounter += other.mSelfTimeCounter; + mTotalTimeCounter += other.mTotalTimeCounter; + mCalls += other.mCalls; + if (!mLastCaller) + { + mLastCaller = other.mLastCaller; + } + + //mActiveCount stays the same; + mMoveUpTree |= other.mMoveUpTree; +} + +void LLTrace::TimerAccumulator::reset( const LLTrace::TimerAccumulator* other ) +{ + mTotalTimeCounter = 0; + mSelfTimeCounter = 0; + mCalls = 0; +} +} diff --git a/indra/llcommon/llfasttimer.h b/indra/llcommon/llfasttimer.h index 40c2af34e8..f5e6d874a2 100644 --- a/indra/llcommon/llfasttimer.h +++ b/indra/llcommon/llfasttimer.h @@ -33,131 +33,26 @@ class LLMutex; -#include -#include "llsd.h" +#include "lltrace.h" #define LL_FASTTIMER_USE_RDTSC 1 -class LL_COMMON_API LLFastTimer +namespace LLTrace { -public: - // stores a "named" timer instance to be reused via multiple LLFastTimer stack instances - class LL_COMMON_API DeclareTimer - : public LLInstanceTracker - { - public: - DeclareTimer(const std::string& name, bool open = false, DeclareTimer* parent = &getRootTimer()); - ~DeclareTimer(); - - enum { HISTORY_NUM = 300 }; - - const std::string& getName() const { return mName; } - DeclareTimer* getParent() const { return mParent; } - void setParent(DeclareTimer* parent); - S32 getDepth(); - - typedef std::vector::const_iterator child_const_iter; - child_const_iter beginChildren(); - child_const_iter endChildren(); - std::vector& getChildren(); - - void setCollapsed(bool collapsed) { mCollapsed = collapsed; } - bool getCollapsed() const { return mCollapsed; } - - U32 getCountAverage() const { return mCountAverage; } - U32 getCallAverage() const { return mCallAverage; } - - U32 getHistoricalCount(S32 history_index = 0) const; - U32 getHistoricalCalls(S32 history_index = 0) const; - - static DeclareTimer& getRootTimer(); - - private: - friend class LLFastTimer; - - // recursive call to gather total time from children - static void accumulateTimings(); - - // updates cumulative times and hierarchy, - // can be called multiple times in a frame, at any point - static void processTimes(); - - static void buildHierarchy(); - static void resetFrame(); - static void reset(); - - // - // members - // - U32 mSelfTimeCounter; - U32 mTotalTimeCounter; - U32 mCalls; - DeclareTimer* mLastCaller; // used to bootstrap tree construction - U16 mActiveCount; // number of timers with this ID active on stack - bool mMoveUpTree; // needs to be moved up the tree of timers at the end of frame - - std::string mName; - - // sum of recored self time and tree time of all children timers (might not match actual recorded time of children if topology is incomplete - U32 mTreeTimeCounter; - - U32 mCountAverage; - U32 mCallAverage; - - U32* mCountHistory; - U32* mCallHistory; - - // tree structure - DeclareTimer* mParent; // DeclareTimer of caller(parent) - std::vector mChildren; - bool mCollapsed; // don't show children - bool mNeedsSorting; // sort children whenever child added - }; +class Time +{ public: - LL_FORCE_INLINE LLFastTimer(LLFastTimer::DeclareTimer& timer) - { -#if FAST_TIMER_ON - mStartTime = getCPUClockCount32(); - - timer.mActiveCount++; - timer.mCalls++; - // keep current parent as long as it is active when we are - timer.mMoveUpTree |= (timer.mParent->mActiveCount == 0); - - LLFastTimer::CurTimerData* cur_timer_data = &LLFastTimer::sCurTimerData; - mLastTimerData = *cur_timer_data; - cur_timer_data->mCurTimer = this; - cur_timer_data->mTimerData = &timer; - cur_timer_data->mChildTime = 0; -#endif - } - - LL_FORCE_INLINE ~LLFastTimer() - { -#if FAST_TIMER_ON - U32 total_time = getCPUClockCount32() - mStartTime; - DeclareTimer* timer_data = LLFastTimer::sCurTimerData.mTimerData; - timer_data->mSelfTimeCounter += total_time - LLFastTimer::sCurTimerData.mChildTime; - timer_data->mTotalTimeCounter += total_time; - timer_data->mActiveCount--; - - // store last caller to bootstrap tree creation - // do this in the destructor in case of recursion to get topmost caller - timer_data->mLastCaller = mLastTimerData.mTimerData; - - // we are only tracking self time, so subtract our total time delta from parents - mLastTimerData.mChildTime += total_time; + typedef Time self_t; + typedef class BlockTimer DeclareTimer; - LLFastTimer::sCurTimerData = mLastTimerData; -#endif - } +public: + Time(BlockTimer& timer); + ~Time(); public: - static LLMutex* sLogLock; - static std::queue sLogQueue; - static BOOL sLog; - static BOOL sMetricLog; + static bool sLog; + static bool sMetricLog; static std::string sLogName; static bool sPauseHistory; static bool sResetHistory; @@ -180,11 +75,11 @@ public: struct CurTimerData { - LLFastTimer* mCurTimer; - DeclareTimer* mTimerData; - U32 mChildTime; + Time* mCurTimer; + BlockTimer* mTimerData; + U64 mChildTime; }; - static CurTimerData sCurTimerData; + static LLThreadLocalPointer sCurTimerData; private: @@ -211,14 +106,14 @@ private: //#undef _interlockedbittestandset //#undef _interlockedbittestandreset - //inline U32 LLFastTimer::getCPUClockCount32() + //inline U32 Time::getCPUClockCount32() //{ // U64 time_stamp = __rdtsc(); // return (U32)(time_stamp >> 8); //} // //// return full timer value, *not* shifted by 8 bits - //inline U64 LLFastTimer::getCPUClockCount64() + //inline U64 Time::getCPUClockCount64() //{ // return __rdtsc(); //} @@ -258,7 +153,7 @@ private: } #else - //LL_COMMON_API U64 get_clock_count(); // in lltimer.cpp + //U64 get_clock_count(); // in lltimer.cpp // These use QueryPerformanceCounter, which is arguably fine and also works on AMD architectures. static U32 getCPUClockCount32() { @@ -330,10 +225,131 @@ private: static S32 sLastFrameIndex; static U64 sLastFrameTime; - U32 mStartTime; - LLFastTimer::CurTimerData mLastTimerData; + U64 mStartTime; + Time::CurTimerData mLastTimerData; +}; + +struct TimerAccumulator +{ + void addSamples(const TimerAccumulator& other); + void reset(const TimerAccumulator* other); + + // + // members + // + U64 mSelfTimeCounter, + mTotalTimeCounter; + U32 mCalls; + BlockTimer* mLastCaller; // used to bootstrap tree construction + U16 mActiveCount; // number of timers with this ID active on stack + bool mMoveUpTree; // needs to be moved up the tree of timers at the end of frame +}; + +// stores a "named" timer instance to be reused via multiple Time stack instances +class BlockTimer +: public TraceType, + public LLInstanceTracker +{ +public: + BlockTimer(const char* name, bool open = false, BlockTimer* parent = &getRootTimer()); + ~BlockTimer(); + + enum { HISTORY_NUM = 300 }; + + BlockTimer* getParent() const { return mParent; } + void setParent(BlockTimer* parent); + S32 getDepth(); + + typedef std::vector::const_iterator child_const_iter; + child_const_iter beginChildren(); + child_const_iter endChildren(); + std::vector& getChildren(); + + void setCollapsed(bool collapsed) { mCollapsed = collapsed; } + bool getCollapsed() const { return mCollapsed; } + + U32 getCountAverage() const { return mCountAverage; } + U32 getCallAverage() const { return mCallAverage; } + + U32 getHistoricalCount(S32 history_index = 0) const; + U32 getHistoricalCalls(S32 history_index = 0) const; + + static BlockTimer& getRootTimer(); + +private: + friend class Time; + + // recursive call to gather total time from children + static void accumulateTimings(); + + // updates cumulative times and hierarchy, + // can be called multiple times in a frame, at any point + static void processTimes(); + + static void buildHierarchy(); + static void resetFrame(); + static void reset(); + + // sum of recorded self time and tree time of all children timers (might not match actual recorded time of children if topology is incomplete + U32 mTreeTimeCounter; + + U32 mCountAverage; + U32 mCallAverage; + + U32* mCountHistory; + U32* mCallHistory; + + // tree structure + BlockTimer* mParent; // BlockTimer of caller(parent) + std::vector mChildren; + bool mCollapsed; // don't show children + bool mNeedsSorting; // sort children whenever child added }; -typedef class LLFastTimer LLFastTimer; +LL_FORCE_INLINE Time::Time(BlockTimer& timer) +{ +#if FAST_TIMER_ON + mStartTime = getCPUClockCount64(); + + TimerAccumulator& accumulator = timer.getPrimaryAccumulator(); + accumulator.mActiveCount++; + accumulator.mCalls++; + // keep current parent as long as it is active when we are + accumulator.mMoveUpTree |= (timer.mParent->getPrimaryAccumulator().mActiveCount == 0); + + CurTimerData* cur_timer_data = Time::sCurTimerData.get(); + // store top of stack + mLastTimerData = *cur_timer_data; + // push new information + cur_timer_data->mCurTimer = this; + cur_timer_data->mTimerData = &timer; + cur_timer_data->mChildTime = 0; +#endif +} + +LL_FORCE_INLINE Time::~Time() +{ +#if FAST_TIMER_ON + U64 total_time = getCPUClockCount64() - mStartTime; + CurTimerData* cur_timer_data = Time::sCurTimerData.get(); + TimerAccumulator& accumulator = cur_timer_data->mTimerData->getPrimaryAccumulator(); + accumulator.mSelfTimeCounter += total_time - cur_timer_data->mChildTime; + accumulator.mTotalTimeCounter += total_time; + accumulator.mActiveCount--; + + // store last caller to bootstrap tree creation + // do this in the destructor in case of recursion to get topmost caller + accumulator.mLastCaller = mLastTimerData.mTimerData; + + // we are only tracking self time, so subtract our total time delta from parents + mLastTimerData.mChildTime += total_time; + + *sCurTimerData = mLastTimerData; +#endif +} + +} + +typedef LLTrace::Time LLFastTimer; #endif // LL_LLFASTTIMER_H diff --git a/indra/llcommon/lltrace.cpp b/indra/llcommon/lltrace.cpp index 3f605f2c74..afb175c398 100644 --- a/indra/llcommon/lltrace.cpp +++ b/indra/llcommon/lltrace.cpp @@ -28,6 +28,7 @@ #include "lltrace.h" #include "lltracerecording.h" #include "lltracethreadrecorder.h" +#include "llfasttimer.h" namespace LLTrace { @@ -37,13 +38,15 @@ static MasterThreadRecorder* gMasterThreadRecorder = NULL; void init() { gMasterThreadRecorder = new MasterThreadRecorder(); + Time::sCurTimerData = new Time::CurTimerData(); } void cleanup() { delete gMasterThreadRecorder; - LLUnitStrict seconds; gMasterThreadRecorder = NULL; + delete Time::sCurTimerData.get(); + Time::sCurTimerData = NULL; } MasterThreadRecorder& getMasterThreadRecorder() @@ -59,6 +62,4 @@ LLThreadLocalPointer& get_thread_recorder() } -BlockTimer::Recorder::StackEntry BlockTimer::sCurRecorder; - } diff --git a/indra/llcommon/lltrace.h b/indra/llcommon/lltrace.h index 549e407822..fb9dca5e84 100644 --- a/indra/llcommon/lltrace.h +++ b/indra/llcommon/lltrace.h @@ -32,7 +32,6 @@ #include "llmemory.h" #include "llrefcount.h" -//#include "lltracethreadrecorder.h" #include "llunit.h" #include "llapr.h" @@ -61,8 +60,8 @@ namespace LLTrace typedef LLUnit Microseconds; typedef LLUnit Nanoseconds; - typedef LLUnit Meters; - typedef LLUnit Kilometers; + typedef LLUnit Meters; + typedef LLUnit Kilometers; typedef LLUnit Centimeters; typedef LLUnit Millimeters; @@ -71,11 +70,11 @@ namespace LLTrace LLThreadLocalPointer& get_thread_recorder(); - class LL_COMMON_API MasterThreadRecorder& getMasterThreadRecorder(); + class MasterThreadRecorder& getMasterThreadRecorder(); // one per thread per type template - class LL_COMMON_API AccumulatorBuffer : public LLRefCount + class AccumulatorBuffer : public LLRefCount { static const U32 DEFAULT_ACCUMULATOR_BUFFER_SIZE = 64; private: @@ -199,7 +198,7 @@ namespace LLTrace template LLThreadLocalPointer AccumulatorBuffer::sPrimaryStorage; template - class LL_COMMON_API TraceType + class TraceType : public LLInstanceTracker, std::string> { public: @@ -218,6 +217,9 @@ namespace LLTrace size_t getIndex() const { return mAccumulatorIndex; } + std::string& getName() { return mName; } + const std::string& getName() const { return mName; } + protected: std::string mName; std::string mDescription; @@ -225,7 +227,7 @@ namespace LLTrace }; template - class LL_COMMON_API MeasurementAccumulator + class MeasurementAccumulator { public: typedef T value_t; @@ -339,7 +341,7 @@ namespace LLTrace }; template - class LL_COMMON_API CountAccumulator + class CountAccumulator { public: typedef CountAccumulator self_t; @@ -378,11 +380,8 @@ namespace LLTrace U32 mNumSamples; }; - typedef TraceType > measurement_common_float_t; - typedef TraceType > measurement_common_int_t; - - template - class LL_COMMON_API Measurement + template + class Measurement : public TraceType::type_t> > { public: @@ -392,37 +391,16 @@ namespace LLTrace : TraceType(name, description) {} - void sample(T value) - { - getPrimaryAccumulator().sample((storage_t)value); - } - }; - - template - class LL_COMMON_API Measurement - : public TraceType::type_t> > - { - public: - typedef typename LLUnits::HighestPrecisionType::type_t storage_t; - - Measurement(const char* name, const char* description = NULL) - : TraceType(name, description) - {} - template void sample(UNIT_T value) { - T converted_value; - converted_value = value; - getPrimaryAccumulator().sample((storage_t)converted_value.value()); + T converted_value(value); + getPrimaryAccumulator().sample((storage_t)converted_value); } }; - typedef TraceType > count_common_float_t; - typedef TraceType > count_common_int_t; - - template - class LL_COMMON_API Count + template + class Count : public TraceType::type_t> > { public: @@ -432,148 +410,12 @@ namespace LLTrace : TraceType(name) {} - void add(T value) - { - getPrimaryAccumulator().add((storage_t)value); - } - }; - - template - class LL_COMMON_API Count - : public TraceType::type_t> > - { - public: - typedef typename LLUnits::HighestPrecisionType::type_t storage_t; - - Count(const char* name, const char* description = NULL) - : TraceType(name) - {} - template void add(UNIT_T value) { - T converted_value; - converted_value = value; - getPrimaryAccumulator().add((storage_t)converted_value.value()); - } - }; - - class LL_COMMON_API TimerAccumulator - { - public: - typedef TimerAccumulator self_t; - - U32 mTotalTimeCounter, - mChildTimeCounter, - mCalls; - - TimerAccumulator* mParent; // info for caller timer - TimerAccumulator* mLastCaller; // used to bootstrap tree construction - const class BlockTimer* mTimer; // points to block timer associated with this storage - U8 mActiveCount; // number of timers with this ID active on stack - bool mMoveUpTree; // needs to be moved up the tree of timers at the end of frame - std::vector mChildren; // currently assumed child timers - - void addSamples(const TimerAccumulator& other) - { - mTotalTimeCounter += other.mTotalTimeCounter; - mChildTimeCounter += other.mChildTimeCounter; - mCalls += other.mCalls; - } - - void reset(const self_t* other) - { - mTotalTimeCounter = 0; - mChildTimeCounter = 0; - mCalls = 0; + T converted_value(value); + getPrimaryAccumulator().add((storage_t)converted_value); } - - }; - - class LL_COMMON_API BlockTimer : public TraceType - { - public: - BlockTimer(const char* name) - : TraceType(name) - {} - - struct Recorder - { - struct StackEntry - { - Recorder* mRecorder; - TimerAccumulator* mAccumulator; - U32 mChildTime; - }; - - LL_FORCE_INLINE Recorder(BlockTimer& block_timer) - : mLastRecorder(sCurRecorder) - { - mStartTime = getCPUClockCount32(); - TimerAccumulator* accumulator = &block_timer.getPrimaryAccumulator(); // get per-thread accumulator - accumulator->mActiveCount++; - accumulator->mCalls++; - accumulator->mMoveUpTree |= (accumulator->mParent->mActiveCount == 0); - - // push new timer on stack - sCurRecorder.mRecorder = this; - sCurRecorder.mAccumulator = accumulator; - sCurRecorder.mChildTime = 0; - } - - LL_FORCE_INLINE ~Recorder() - { - U32 total_time = getCPUClockCount32() - mStartTime; - - TimerAccumulator* accumulator = sCurRecorder.mAccumulator; - accumulator->mTotalTimeCounter += total_time; - accumulator->mChildTimeCounter += sCurRecorder.mChildTime; - accumulator->mActiveCount--; - - accumulator->mLastCaller = mLastRecorder.mAccumulator; - mLastRecorder.mChildTime += total_time; - - // pop stack - sCurRecorder = mLastRecorder; - } - - StackEntry mLastRecorder; - U32 mStartTime; - }; - - private: - static U32 getCPUClockCount32() - { - U32 ret_val; - __asm - { - _emit 0x0f - _emit 0x31 - shr eax,8 - shl edx,24 - or eax, edx - mov dword ptr [ret_val], eax - } - return ret_val; - } - - // return full timer value, *not* shifted by 8 bits - static U64 getCPUClockCount64() - { - U64 ret_val; - __asm - { - _emit 0x0f - _emit 0x31 - mov eax,eax - mov edx,edx - mov dword ptr [ret_val+4], edx - mov dword ptr [ret_val], eax - } - return ret_val; - } - - static Recorder::StackEntry sCurRecorder; }; } diff --git a/indra/llcommon/lltracerecording.h b/indra/llcommon/lltracerecording.h index 16b80fd1d8..4dcadfdf29 100644 --- a/indra/llcommon/lltracerecording.h +++ b/indra/llcommon/lltracerecording.h @@ -120,7 +120,7 @@ namespace LLTrace F64 getSum(const TraceType >& stat) const; S64 getSum(const TraceType >& stat) const; template - T getSum(const Count& stat) const + T getSum(const Count& stat) const { return (T)getSum(static_cast::type_t> >&> (stat)); } @@ -128,7 +128,7 @@ namespace LLTrace F64 getPerSec(const TraceType >& stat) const; F64 getPerSec(const TraceType >& stat) const; template - T getPerSec(const Count& stat) const + T getPerSec(const Count& stat) const { return (T)getPerSec(static_cast::type_t> >&> (stat)); } @@ -141,7 +141,7 @@ namespace LLTrace F64 getSum(const TraceType >& stat) const; S64 getSum(const TraceType >& stat) const; template - T getSum(const Measurement& stat) const + T getSum(const Measurement& stat) const { return (T)getSum(static_cast::type_t> >&> (stat)); } @@ -149,7 +149,7 @@ namespace LLTrace F64 getPerSec(const TraceType >& stat) const; F64 getPerSec(const TraceType >& stat) const; template - T getPerSec(const Measurement& stat) const + T getPerSec(const Measurement& stat) const { return (T)getPerSec(static_cast::type_t> >&> (stat)); } @@ -157,7 +157,7 @@ namespace LLTrace F64 getMin(const TraceType >& stat) const; S64 getMin(const TraceType >& stat) const; template - T getMin(const Measurement& stat) const + T getMin(const Measurement& stat) const { return (T)getMin(static_cast::type_t> >&> (stat)); } @@ -165,7 +165,7 @@ namespace LLTrace F64 getMax(const TraceType >& stat) const; S64 getMax(const TraceType >& stat) const; template - T getMax(const Measurement& stat) const + T getMax(const Measurement& stat) const { return (T)getMax(static_cast::type_t> >&> (stat)); } @@ -173,7 +173,7 @@ namespace LLTrace F64 getMean(const TraceType >& stat) const; F64 getMean(const TraceType >& stat) const; template - T getMean(Measurement& stat) const + T getMean(Measurement& stat) const { return (T)getMean(static_cast::type_t> >&> (stat)); } @@ -181,7 +181,7 @@ namespace LLTrace F64 getStandardDeviation(const TraceType >& stat) const; F64 getStandardDeviation(const TraceType >& stat) const; template - T getStandardDeviation(const Measurement& stat) const + T getStandardDeviation(const Measurement& stat) const { return (T)getMean(static_cast::type_t> >&> (stat)); } @@ -189,7 +189,7 @@ namespace LLTrace F64 getLastValue(const TraceType >& stat) const; S64 getLastValue(const TraceType >& stat) const; template - T getLastValue(const Measurement& stat) const + T getLastValue(const Measurement& stat) const { return (T)getLastValue(static_cast::type_t> >&> (stat)); } diff --git a/indra/llcommon/llunit.h b/indra/llcommon/llunit.h index 54ba1d67db..4ac2197c11 100644 --- a/indra/llcommon/llunit.h +++ b/indra/llcommon/llunit.h @@ -209,26 +209,26 @@ private: // // operator + // -template -LLUnit operator + (LLUnit first, LLUnit second) +template +LLUnit operator + (LLUnit first, LLUnit second) { - LLUnit result(first); + LLUnit result(first); result += second; return result; } -template -LLUnit operator + (LLUnit first, SCALAR_TYPE second) +template +LLUnit operator + (LLUnit first, SCALAR_TYPE second) { - LLUnit result(first); + LLUnit result(first); result += second; return result; } -template -LLUnit operator + (SCALAR_TYPE first, LLUnit second) +template +LLUnit operator + (SCALAR_TYPE first, LLUnit second) { - LLUnit result(first); + LLUnit result(first); result += second; return result; } @@ -236,27 +236,27 @@ LLUnit operator + (SCALAR_TYPE first, LLUnit -LLUnit operator - (LLUnit first, LLUnit second) +template +LLUnit operator - (LLUnit first, LLUnit second) { - LLUnit result(first); + LLUnit result(first); result -= second; return result; } -template -LLUnit operator - (LLUnit first, SCALAR_TYPE second) +template +LLUnit operator - (LLUnit first, SCALAR_TYPE second) { - LLUnit result(first); + LLUnit result(first); result -= second; return result; } -template -LLUnit operator - (SCALAR_TYPE first, LLUnit second) +template +LLUnit operator - (SCALAR_TYPE first, LLUnit second) { - LLUnit result(first); + LLUnit result(first); result -= second; return result; } @@ -264,20 +264,20 @@ LLUnit operator - (SCALAR_TYPE first, LLUnit -LLUnit operator * (SCALAR_TYPE first, LLUnit second) +template +LLUnit operator * (SCALAR_TYPE first, LLUnit second) { - return LLUnit(first * second.value()); + return LLUnit(first * second.value()); } -template -LLUnit operator * (LLUnit first, SCALAR_TYPE second) +template +LLUnit operator * (LLUnit first, SCALAR_TYPE second) { - return LLUnit(first.value() * second); + return LLUnit(first.value() * second); } -template -void operator * (LLUnit, LLUnit) +template +void operator * (LLUnit, LLUnit) { // spurious use of dependent type to stop gcc from triggering the static assertion before instantiating the template llstatic_assert(sizeof(STORAGE_TYPE1) == 0, "Multiplication of unit types results in new unit type - not supported."); @@ -286,40 +286,40 @@ void operator * (LLUnit, LLUnit -SCALAR_TYPE operator / (SCALAR_TYPE first, LLUnit second) +template +SCALAR_TYPE operator / (SCALAR_TYPE first, LLUnit second) { return SCALAR_TYPE(first / second.value()); } -template -LLUnit operator / (LLUnit first, SCALAR_TYPE second) +template +LLUnit operator / (LLUnit first, SCALAR_TYPE second) { - return LLUnit(first.value() / second); + return LLUnit(first.value() / second); } -template -void operator / (LLUnit, LLUnit) +template +void operator / (LLUnit, LLUnit) { // spurious use of dependent type to stop gcc from triggering the static assertion before instantiating the template llstatic_assert(sizeof(STORAGE_TYPE1) == 0, "Multiplication of unit types results in new unit type - not supported."); } #define COMPARISON_OPERATORS(op) \ -template \ -bool operator op (SCALAR_TYPE first, LLUnit second) \ +template \ +bool operator op (SCALAR_TYPE first, LLUnit second) \ { \ return first op second.value(); \ } \ \ -template \ -bool operator op (LLUnit first, SCALAR_TYPE second) \ +template \ +bool operator op (LLUnit first, SCALAR_TYPE second) \ { \ return first.value() op second; \ } \ \ -template \ -bool operator op (LLUnit first, LLUnit second) \ +template \ +bool operator op (LLUnit first, LLUnit second) \ { \ return first.value() op first.convert(second); \ } @@ -333,6 +333,12 @@ COMPARISON_OPERATORS(!=) namespace LLUnits { +template +struct HighestPrecisionType > +{ + typedef typename HighestPrecisionType::type_t type_t; +}; + #define LL_DECLARE_DERIVED_UNIT(base_unit_name, unit_name, conversion_factor) \ struct unit_name \ { \ -- cgit v1.2.3