From 02d503bf8f8890c6d4b57dd09a1fde2973715b75 Mon Sep 17 00:00:00 2001 From: Richard Linden Date: Thu, 29 Nov 2012 00:43:25 -0800 Subject: SH-3406 WIP convert fast timers to lltrace system moved runtime timer tree topology information to separate array instead of recording stack --- indra/llcommon/llfasttimer.cpp | 85 +++++++++++++++++--------------- indra/llcommon/llfasttimer.h | 34 +++++++------ indra/llcommon/lltrace.h | 13 ++++- indra/llcommon/lltracethreadrecorder.cpp | 20 +++++--- indra/llcommon/lltracethreadrecorder.h | 4 +- 5 files changed, 91 insertions(+), 65 deletions(-) diff --git a/indra/llcommon/llfasttimer.cpp b/indra/llcommon/llfasttimer.cpp index e88a5a9ed1..1b3498cce3 100644 --- a/indra/llcommon/llfasttimer.cpp +++ b/indra/llcommon/llfasttimer.cpp @@ -58,24 +58,27 @@ namespace LLTrace ////////////////////////////////////////////////////////////////////////////// // statics -S32 BlockTimer::sCurFrameIndex = -1; -S32 BlockTimer::sLastFrameIndex = -1; -U64 BlockTimer::sLastFrameTime = BlockTimer::getCPUClockCount64(); -bool BlockTimer::sPauseHistory = 0; -bool BlockTimer::sResetHistory = 0; -LLThreadLocalPointer BlockTimer::sCurTimerData; -bool BlockTimer::sLog = false; -std::string BlockTimer::sLogName = ""; -bool BlockTimer::sMetricLog = false; -static LLMutex* sLogLock = NULL; -static std::queue sLogQueue; +S32 BlockTimer::sCurFrameIndex = -1; +S32 BlockTimer::sLastFrameIndex = -1; +U64 BlockTimer::sLastFrameTime = BlockTimer::getCPUClockCount64(); +bool BlockTimer::sPauseHistory = 0; +bool BlockTimer::sResetHistory = 0; +bool BlockTimer::sLog = false; +std::string BlockTimer::sLogName = ""; +bool BlockTimer::sMetricLog = false; #if LL_LINUX || LL_SOLARIS -U64 BlockTimer::sClockResolution = 1000000000; // Nanosecond resolution +U64 BlockTimer::sClockResolution = 1000000000; // Nanosecond resolution #else -U64 BlockTimer::sClockResolution = 1000000; // Microsecond resolution +U64 BlockTimer::sClockResolution = 1000000; // Microsecond resolution #endif +LLThreadLocalPointer BlockTimer::sCurTimerData; + +static LLMutex* sLogLock = NULL; +static std::queue sLogQueue; + + // FIXME: move these declarations to the relevant modules // helper functions @@ -108,6 +111,7 @@ static timer_tree_dfs_iterator_t end_timer_tree() return timer_tree_dfs_iterator_t(); } + BlockTimer& BlockTimer::getRootTimer() { static BlockTimer root_timer("root", true, NULL); @@ -142,7 +146,7 @@ U64 BlockTimer::countsPerSecond() // counts per second for the *32-bit* timer // we drop the low-order byte in our timers, so report a lower frequency #else - // If we're not using RDTSC, each fasttimer tick is just a performance counter tick. + // If we're not using RDTSC, each fast timer tick is just a performance counter tick. // Not redefining the clock frequency itself (in llprocessor.cpp/calculate_cpu_frequency()) // since that would change displayed MHz stats for CPUs static bool firstcall = true; @@ -268,17 +272,19 @@ void BlockTimer::buildHierarchy() // when this timer was called if (timer.mParent == &BlockTimer::getRootTimer()) { - if (timer.getPrimaryAccumulator().mLastCaller) - { - timer.setParent(timer.getPrimaryAccumulator().mLastCaller); + TimerTreeNode& tree_node = sCurTimerData->mTimerTreeData[timer.getIndex()]; + + if (tree_node.mLastCaller) + { + timer.setParent(tree_node.mLastCaller); } // no need to push up tree on first use, flag can be set spuriously - timer.getPrimaryAccumulator().mMoveUpTree = false; + tree_node.mMoveUpTree = false; } } } - // bump timers up tree if they've been flagged as being in the wrong place + // bump timers up tree if they have been flagged as being in the wrong place // do this in a bottom up order to promote descendants first before promoting ancestors // this preserves partial order derived from current frame's observations for(timer_tree_bottom_up_iterator_t it = begin_timer_tree_bottom_up(BlockTimer::getRootTimer()); @@ -288,15 +294,16 @@ void BlockTimer::buildHierarchy() BlockTimer* timerp = *it; // skip root timer if (timerp == &BlockTimer::getRootTimer()) continue; + TimerTreeNode& tree_node = sCurTimerData->mTimerTreeData[timerp->getIndex()]; - if (timerp->getPrimaryAccumulator().mMoveUpTree) + if (tree_node.mMoveUpTree) { // since ancestors have already been visited, re-parenting won't affect tree traversal //step up tree, bringing our descendants with us LL_DEBUGS("FastTimers") << "Moving " << timerp->getName() << " from child of " << timerp->getParent()->getName() << " to child of " << timerp->getParent()->getParent()->getName() << LL_ENDL; timerp->setParent(timerp->getParent()->getParent()); - timerp->getPrimaryAccumulator().mMoveUpTree = false; + tree_node.mMoveUpTree = false; // don't bubble up any ancestors until descendants are done bubbling up it.skipAncestors(); @@ -322,11 +329,11 @@ void BlockTimer::accumulateTimings() { U64 cur_time = getCPUClockCount64(); - // walk up stack of active timers and accumulate current time while leaving timing structures active - Time* cur_timer = sCurTimerData->mCurTimer; // root defined by parent pointing to self CurTimerData* cur_data = sCurTimerData.get(); - TimerAccumulator& accumulator = sCurTimerData->mTimerData->getPrimaryAccumulator(); + // walk up stack of active timers and accumulate current time while leaving timing structures active + Time* cur_timer = cur_data->mCurTimer; + TimerAccumulator& accumulator = cur_data->mTimerData->getPrimaryAccumulator(); while(cur_timer && cur_timer->mLastTimerData.mCurTimer != cur_timer) { U64 cumulative_time_delta = cur_time - cur_timer->mStartTime; @@ -431,11 +438,13 @@ void BlockTimer::resetFrame() { BlockTimer& timer = *it; TimerAccumulator& accumulator = timer.getPrimaryAccumulator(); + TimerTreeNode& tree_node = sCurTimerData->mTimerTreeData[timer.getIndex()]; + accumulator.mSelfTimeCounter = 0; accumulator.mTotalTimeCounter = 0; accumulator.mCalls = 0; - accumulator.mLastCaller = NULL; - accumulator.mMoveUpTree = false; + tree_node.mLastCaller = NULL; + tree_node.mMoveUpTree = false; } } @@ -579,32 +588,28 @@ void Time::writeLog(std::ostream& os) } -LLTrace::TimerAccumulator::TimerAccumulator() : mSelfTimeCounter(0), +TimerAccumulator::TimerAccumulator() : mSelfTimeCounter(0), mTotalTimeCounter(0), - mCalls(0), - mLastCaller(NULL), - mActiveCount(0), - mMoveUpTree(false) + mCalls(0) {} -void LLTrace::TimerAccumulator::addSamples( const LLTrace::TimerAccumulator& other ) +void TimerAccumulator::addSamples( const TimerAccumulator& other ) { mSelfTimeCounter += other.mSelfTimeCounter; mTotalTimeCounter += other.mTotalTimeCounter; mCalls += other.mCalls; - if (!mLastCaller) - { - mLastCaller = other.mLastCaller; - } - - //mActiveCount stays the same; - mMoveUpTree |= other.mMoveUpTree; } -void LLTrace::TimerAccumulator::reset( const LLTrace::TimerAccumulator* other ) +void TimerAccumulator::reset( const TimerAccumulator* other ) { mTotalTimeCounter = 0; mSelfTimeCounter = 0; mCalls = 0; } + +TimerTreeNode::TimerTreeNode() +: mLastCaller(NULL), + mActiveCount(0), + mMoveUpTree(false) +{} } diff --git a/indra/llcommon/llfasttimer.h b/indra/llcommon/llfasttimer.h index cfe2cf5371..9f981480f2 100644 --- a/indra/llcommon/llfasttimer.h +++ b/indra/llcommon/llfasttimer.h @@ -37,11 +37,13 @@ class LLMutex; namespace LLTrace { + struct CurTimerData { class Time* mCurTimer; class BlockTimer* mTimerData; U64 mChildTime; + TimerTreeNode* mTimerTreeData; }; class Time @@ -269,20 +271,20 @@ public: // tree structure BlockTimer* mParent; // BlockTimer of caller(parent) std::vector mChildren; - bool mCollapsed; // don't show children - bool mNeedsSorting; // sort children whenever child added + bool mCollapsed, // don't show children + mNeedsSorting; // sort children whenever child added // statics static std::string sLogName; - static bool sMetricLog; - static bool sLog; + static bool sMetricLog, + sLog; static LLThreadLocalPointer sCurTimerData; static U64 sClockResolution; - static S32 sCurFrameIndex; - static S32 sLastFrameIndex; + static S32 sCurFrameIndex, + sLastFrameIndex; static U64 sLastFrameTime; - static bool sPauseHistory; - static bool sResetHistory; + static bool sPauseHistory, + sResetHistory; }; @@ -291,13 +293,12 @@ LL_FORCE_INLINE Time::Time(BlockTimer& timer) #if FAST_TIMER_ON mStartTime = BlockTimer::getCPUClockCount64(); - TimerAccumulator& accumulator = timer.getPrimaryAccumulator(); - accumulator.mActiveCount++; - accumulator.mCalls++; + CurTimerData* cur_timer_data = BlockTimer::sCurTimerData.get(); + TimerTreeNode& tree_node = cur_timer_data->mTimerTreeData[timer.getIndex()]; + tree_node.mActiveCount++; // keep current parent as long as it is active when we are - accumulator.mMoveUpTree |= (timer.mParent->getPrimaryAccumulator().mActiveCount == 0); + tree_node.mMoveUpTree |= (cur_timer_data->mTimerTreeData[timer.mParent->getIndex()].mActiveCount == 0); - CurTimerData* cur_timer_data = BlockTimer::sCurTimerData.get(); // store top of stack mLastTimerData = *cur_timer_data; // push new information @@ -313,13 +314,16 @@ LL_FORCE_INLINE Time::~Time() U64 total_time = BlockTimer::getCPUClockCount64() - mStartTime; CurTimerData* cur_timer_data = BlockTimer::sCurTimerData.get(); TimerAccumulator& accumulator = cur_timer_data->mTimerData->getPrimaryAccumulator(); + TimerTreeNode& tree_node = cur_timer_data->mTimerTreeData[cur_timer_data->mTimerData->getIndex()]; + + accumulator.mCalls++; accumulator.mSelfTimeCounter += total_time - cur_timer_data->mChildTime; accumulator.mTotalTimeCounter += total_time; - accumulator.mActiveCount--; + tree_node.mActiveCount--; // store last caller to bootstrap tree creation // do this in the destructor in case of recursion to get topmost caller - accumulator.mLastCaller = mLastTimerData.mTimerData; + tree_node.mLastCaller = mLastTimerData.mTimerData; // we are only tracking self time, so subtract our total time delta from parents mLastTimerData.mChildTime += total_time; diff --git a/indra/llcommon/lltrace.h b/indra/llcommon/lltrace.h index 11651ef953..ad9f170aae 100644 --- a/indra/llcommon/lltrace.h +++ b/indra/llcommon/lltrace.h @@ -189,6 +189,11 @@ namespace LLTrace delete[] old_storage; } + size_t size() + { + return mNextStorageSlot; + } + static AccumulatorBuffer& getDefaultBuffer() { static AccumulatorBuffer sBuffer(STATIC_ALLOC); @@ -216,7 +221,7 @@ namespace LLTrace mAccumulatorIndex = AccumulatorBuffer::getDefaultBuffer().reserveSlot(); } - LL_FORCE_INLINE ACCUMULATOR& getPrimaryAccumulator() + LL_FORCE_INLINE ACCUMULATOR& getPrimaryAccumulator() const { return AccumulatorBuffer::getPrimaryStorage()[mAccumulatorIndex]; } @@ -399,6 +404,12 @@ namespace LLTrace U64 mSelfTimeCounter, mTotalTimeCounter; U32 mCalls; + }; + + class TimerTreeNode + { + public: + TimerTreeNode(); class BlockTimer* mLastCaller; // used to bootstrap tree construction U16 mActiveCount; // number of timers with this ID active on stack bool mMoveUpTree; // needs to be moved up the tree of timers at the end of frame diff --git a/indra/llcommon/lltracethreadrecorder.cpp b/indra/llcommon/lltracethreadrecorder.cpp index faaab4c8e7..0a2d79cf3a 100644 --- a/indra/llcommon/lltracethreadrecorder.cpp +++ b/indra/llcommon/lltracethreadrecorder.cpp @@ -41,25 +41,29 @@ ThreadRecorder::ThreadRecorder() get_thread_recorder() = this; mFullRecording.start(); - BlockTimer::sCurTimerData = new CurTimerData(); -} + mRootTimerData = new CurTimerData(); + mRootTimerData->mTimerData = &BlockTimer::getRootTimer(); + mRootTimerData->mTimerTreeData = new TimerTreeNode[AccumulatorBuffer::getDefaultBuffer().size()]; + BlockTimer::sCurTimerData = mRootTimerData; -ThreadRecorder::ThreadRecorder( const ThreadRecorder& other ) -: mFullRecording(other.mFullRecording) -{ - get_thread_recorder() = this; - mFullRecording.start(); + mRootTimer = new Time(BlockTimer::getRootTimer()); + mRootTimerData->mCurTimer = mRootTimer; + + mRootTimerData->mTimerTreeData[BlockTimer::getRootTimer().getIndex()].mActiveCount = 1; } ThreadRecorder::~ThreadRecorder() { + delete mRootTimer; + while(mActiveRecordings.size()) { mActiveRecordings.front().mTargetRecording->stop(); } get_thread_recorder() = NULL; - delete BlockTimer::sCurTimerData.get(); BlockTimer::sCurTimerData = NULL; + delete [] mRootTimerData->mTimerTreeData; + delete mRootTimerData; } void ThreadRecorder::activate( Recording* recording ) diff --git a/indra/llcommon/lltracethreadrecorder.h b/indra/llcommon/lltracethreadrecorder.h index 44fe67384b..277a468a40 100644 --- a/indra/llcommon/lltracethreadrecorder.h +++ b/indra/llcommon/lltracethreadrecorder.h @@ -41,7 +41,6 @@ namespace LLTrace struct ActiveRecording; public: ThreadRecorder(); - ThreadRecorder(const ThreadRecorder& other); virtual ~ThreadRecorder(); @@ -63,6 +62,9 @@ namespace LLTrace }; Recording mFullRecording; std::list mActiveRecordings; + + struct CurTimerData* mRootTimerData; + class Time* mRootTimer; }; class LL_COMMON_API MasterThreadRecorder : public ThreadRecorder -- cgit v1.2.3