diff options
author | Richard Linden <none@none> | 2012-12-18 20:07:25 -0800 |
---|---|---|
committer | Richard Linden <none@none> | 2012-12-18 20:07:25 -0800 |
commit | c219282f5de753a044cecb53bd806145f68add9a (patch) | |
tree | 9bea36fd9fba8f41fed4c04ba413bf5d41993ca0 /indra | |
parent | 1f56e57008f5a50c9e75fc0b4512c483ac359a52 (diff) |
SH-3406 WIP convert fast timers to lltrace system
removed some potential data races
got memory stats recording in trace system
Diffstat (limited to 'indra')
-rw-r--r-- | indra/llcommon/llapr.h | 51 | ||||
-rw-r--r-- | indra/llcommon/llfasttimer.cpp | 326 | ||||
-rw-r--r-- | indra/llcommon/llfasttimer.h | 27 | ||||
-rw-r--r-- | indra/llcommon/lltrace.h | 56 | ||||
-rw-r--r-- | indra/llcommon/lltracerecording.cpp | 21 | ||||
-rw-r--r-- | indra/llcommon/lltracerecording.h | 11 | ||||
-rw-r--r-- | indra/llcommon/lltracethreadrecorder.cpp | 14 | ||||
-rw-r--r-- | indra/llui/llviewmodel.h | 1 | ||||
-rw-r--r-- | indra/newview/llappviewer.cpp | 2 | ||||
-rw-r--r-- | indra/newview/llfasttimerview.cpp | 52 | ||||
-rw-r--r-- | indra/newview/llfasttimerview.h | 1 |
11 files changed, 274 insertions, 288 deletions
diff --git a/indra/llcommon/llapr.h b/indra/llcommon/llapr.h index 199b5291dd..f8f94263e4 100644 --- a/indra/llcommon/llapr.h +++ b/indra/llcommon/llapr.h @@ -48,6 +48,19 @@ extern LL_COMMON_API apr_thread_mutex_t* gLogMutexp; extern apr_thread_mutex_t* gCallStacksLogMutexp; struct apr_dso_handle_t; +/** + * @brief Function which appropriately logs error or remains quiet on + * APR_SUCCESS. + * @return Returns <code>true</code> if status is an error condition. + */ +bool LL_COMMON_API ll_apr_warn_status(apr_status_t status); +/// There's a whole other APR error-message function if you pass a DSO handle. +bool LL_COMMON_API ll_apr_warn_status(apr_status_t status, apr_dso_handle_t* handle); + +void LL_COMMON_API ll_apr_assert_status(apr_status_t status); +void LL_COMMON_API ll_apr_assert_status(apr_status_t status, apr_dso_handle_t* handle); + +extern "C" LL_COMMON_API apr_pool_t* gAPRPoolp; // Global APR memory pool /** * @brief initialize the common apr constructs -- apr itself, the @@ -285,26 +298,26 @@ protected: { llassert(sInitialized); void* ptr; - //apr_status_t result = + apr_status_t result = apr_threadkey_private_get(&ptr, mThreadKey); - //if (result != APR_SUCCESS) - //{ - // ll_apr_warn_status(s); - // llerrs << "Failed to get thread local data" << llendl; - //} + if (result != APR_SUCCESS) + { + ll_apr_warn_status(result); + llerrs << "Failed to get thread local data" << llendl; + } return ptr; } LL_FORCE_INLINE const void* get() const { void* ptr; - //apr_status_t result = + apr_status_t result = apr_threadkey_private_get(&ptr, mThreadKey); - //if (result != APR_SUCCESS) - //{ - // ll_apr_warn_status(s); - // llerrs << "Failed to get thread local data" << llendl; - //} + if (result != APR_SUCCESS) + { + ll_apr_warn_status(result); + llerrs << "Failed to get thread local data" << llendl; + } return ptr; } @@ -379,18 +392,4 @@ public: } }; -/** - * @brief Function which appropriately logs error or remains quiet on - * APR_SUCCESS. - * @return Returns <code>true</code> if status is an error condition. - */ -bool LL_COMMON_API ll_apr_warn_status(apr_status_t status); -/// There's a whole other APR error-message function if you pass a DSO handle. -bool LL_COMMON_API ll_apr_warn_status(apr_status_t status, apr_dso_handle_t* handle); - -void LL_COMMON_API ll_apr_assert_status(apr_status_t status); -void LL_COMMON_API ll_apr_assert_status(apr_status_t status, apr_dso_handle_t* handle); - -extern "C" LL_COMMON_API apr_pool_t* gAPRPoolp; // Global APR memory pool - #endif // LL_LLAPR_H diff --git a/indra/llcommon/llfasttimer.cpp b/indra/llcommon/llfasttimer.cpp index 37e0fbac0a..f4fa8f1e61 100644 --- a/indra/llcommon/llfasttimer.cpp +++ b/indra/llcommon/llfasttimer.cpp @@ -59,11 +59,6 @@ namespace LLTrace ////////////////////////////////////////////////////////////////////////////// // statics -S32 TimeBlock::sCurFrameIndex = -1; -S32 TimeBlock::sLastFrameIndex = -1; -U64 TimeBlock::sLastFrameTime = TimeBlock::getCPUClockCount64(); -bool TimeBlock::sPauseHistory = 0; -bool TimeBlock::sResetHistory = 0; bool TimeBlock::sLog = false; std::string TimeBlock::sLogName = ""; bool TimeBlock::sMetricLog = false; @@ -220,143 +215,126 @@ void TimeBlock::setParent(TimeBlock* parent) } } -S32 TimeBlock::getDepth() -{ - S32 depth = 0; - TimeBlock* timerp = mParent; - while(timerp) - { - depth++; - if (timerp->getParent() == timerp) break; - timerp = timerp->mParent; - } - return depth; -} - // static void TimeBlock::processTimes() { - //void TimeBlock::buildHierarchy() + get_clock_count(); // good place to calculate clock frequency + U64 cur_time = getCPUClockCount64(); + CurTimerData* cur_data = sCurTimerData.get(); + + // set up initial tree + for (LLInstanceTracker<TimeBlock>::instance_iter it = LLInstanceTracker<TimeBlock>::beginInstances(), end_it = LLInstanceTracker<TimeBlock>::endInstances(); + it != end_it; + ++it) { - // set up initial tree + TimeBlock& timer = *it; + if (&timer == &TimeBlock::getRootTimer()) continue; + + // bootstrap tree construction by attaching to last timer to be on stack + // when this timer was called + if (timer.mParent == &TimeBlock::getRootTimer()) { - for (LLInstanceTracker<TimeBlock>::instance_iter it = LLInstanceTracker<TimeBlock>::beginInstances(), end_it = LLInstanceTracker<TimeBlock>::endInstances(); it != end_it; ++it) + TimeBlockAccumulator& accumulator = timer.getPrimaryAccumulator(); + + if (accumulator.mLastCaller) { - TimeBlock& timer = *it; - if (&timer == &TimeBlock::getRootTimer()) continue; - - // bootstrap tree construction by attaching to last timer to be on stack - // when this timer was called - if (timer.mParent == &TimeBlock::getRootTimer()) - { - TimeBlockTreeNode& tree_node = sCurTimerData->mTimerTreeData[timer.getIndex()]; - - if (tree_node.mLastCaller) - { - timer.setParent(tree_node.mLastCaller); - } - // no need to push up tree on first use, flag can be set spuriously - tree_node.mMoveUpTree = false; - } + timer.setParent(accumulator.mLastCaller); + accumulator.mParent = accumulator.mLastCaller; } + // no need to push up tree on first use, flag can be set spuriously + accumulator.mMoveUpTree = false; } + } - // bump timers up tree if they have been flagged as being in the wrong place - // do this in a bottom up order to promote descendants first before promoting ancestors - // this preserves partial order derived from current frame's observations - for(timer_tree_bottom_up_iterator_t it = begin_timer_tree_bottom_up(TimeBlock::getRootTimer()); - it != end_timer_tree_bottom_up(); - ++it) + // bump timers up tree if they have been flagged as being in the wrong place + // do this in a bottom up order to promote descendants first before promoting ancestors + // this preserves partial order derived from current frame's observations + for(timer_tree_bottom_up_iterator_t it = begin_timer_tree_bottom_up(TimeBlock::getRootTimer()); + it != end_timer_tree_bottom_up(); + ++it) + { + TimeBlock* timerp = *it; + + // sort timers by time last called, so call graph makes sense + if (timerp->mNeedsSorting) { - TimeBlock* timerp = *it; - // skip root timer - if (timerp == &TimeBlock::getRootTimer()) continue; - TimeBlockTreeNode& tree_node = sCurTimerData->mTimerTreeData[timerp->getIndex()]; + std::sort(timerp->getChildren().begin(), timerp->getChildren().end(), SortTimerByName()); + } + + // skip root timer + if (timerp != &TimeBlock::getRootTimer()) + { + TimeBlockAccumulator& accumulator = timerp->getPrimaryAccumulator(); - if (tree_node.mMoveUpTree) + if (accumulator.mMoveUpTree) { // since ancestors have already been visited, re-parenting won't affect tree traversal //step up tree, bringing our descendants with us LL_DEBUGS("FastTimers") << "Moving " << timerp->getName() << " from child of " << timerp->getParent()->getName() << " to child of " << timerp->getParent()->getParent()->getName() << LL_ENDL; timerp->setParent(timerp->getParent()->getParent()); - tree_node.mMoveUpTree = false; + accumulator.mParent = timerp->mParent; + accumulator.mMoveUpTree = false; // don't bubble up any ancestors until descendants are done bubbling up + // as ancestors may call this timer only on certain paths, so we want to resolve + // child-most block locations before their parents it.skipAncestors(); } } - - // sort timers by time last called, so call graph makes sense - for(timer_tree_dfs_iterator_t it = begin_timer_tree(TimeBlock::getRootTimer()); - it != end_timer_tree(); - ++it) - { - TimeBlock* timerp = (*it); - if (timerp->mNeedsSorting) - { - std::sort(timerp->getChildren().begin(), timerp->getChildren().end(), SortTimerByName()); - } - timerp->mNeedsSorting = false; - } } - - //void TimeBlock::accumulateTimings() - { - U64 cur_time = getCPUClockCount64(); - - // root defined by parent pointing to self - CurTimerData* cur_data = sCurTimerData.get(); - // walk up stack of active timers and accumulate current time while leaving timing structures active - BlockTimer* cur_timer = cur_data->mCurTimer; - TimeBlockAccumulator& accumulator = cur_data->mTimerData->getPrimaryAccumulator(); - while(cur_timer && cur_timer->mLastTimerData.mCurTimer != cur_timer) - { - U64 cumulative_time_delta = cur_time - cur_timer->mStartTime; - U64 self_time_delta = cumulative_time_delta - cur_data->mChildTime; - cur_data->mChildTime = 0; - accumulator.mSelfTimeCounter += self_time_delta; - accumulator.mTotalTimeCounter += cumulative_time_delta; - cur_timer->mStartTime = cur_time; + // walk up stack of active timers and accumulate current time while leaving timing structures active + BlockTimer* cur_timer = cur_data->mCurTimer; + TimeBlockAccumulator& accumulator = cur_data->mTimerData->getPrimaryAccumulator(); + // root defined by parent pointing to self + while(cur_timer && cur_timer->mLastTimerData.mCurTimer != cur_timer) + { + U64 cumulative_time_delta = cur_time - cur_timer->mStartTime; + U64 self_time_delta = cumulative_time_delta - cur_data->mChildTime; + cur_data->mChildTime = 0; + accumulator.mSelfTimeCounter += self_time_delta; + accumulator.mTotalTimeCounter += cumulative_time_delta; - cur_data = &cur_timer->mLastTimerData; - cur_data->mChildTime += cumulative_time_delta; - if (cur_data->mTimerData) - { - accumulator = cur_data->mTimerData->getPrimaryAccumulator(); - } + cur_timer->mStartTime = cur_time; - cur_timer = cur_timer->mLastTimerData.mCurTimer; + cur_data = &cur_timer->mLastTimerData; + cur_data->mChildTime += cumulative_time_delta; + if (cur_data->mTimerData) + { + accumulator = cur_data->mTimerData->getPrimaryAccumulator(); } - // traverse tree in DFS post order, or bottom up - //for(timer_tree_bottom_up_iterator_t it = begin_timer_tree_bottom_up(TimeBlock::getRootTimer()); - // it != end_timer_tree_bottom_up(); - // ++it) - //{ - // TimeBlock* timerp = (*it); - // TimeBlockAccumulator& accumulator = timerp->getPrimaryAccumulator(); - // timerp->mTreeTimeCounter = accumulator.mSelfTimeCounter; - // for (child_const_iter child_it = timerp->beginChildren(); child_it != timerp->endChildren(); ++child_it) - // { - // timerp->mTreeTimeCounter += (*child_it)->mTreeTimeCounter; - // } - - //S32 cur_frame = getCurFrameIndex(); - //if (cur_frame >= 0) - //{ - // // update timer history + cur_timer = cur_timer->mLastTimerData.mCurTimer; + } - // int hidx = getCurFrameIndex() % HISTORY_NUM; - // timerp->mCountHistory[hidx] = timerp->mTreeTimeCounter; - // timerp->mCountAverage = ((U64)timerp->mCountAverage * cur_frame + timerp->mTreeTimeCounter) / (cur_frame+1); - // timerp->mCallHistory[hidx] = accumulator.mCalls; - // timerp->mCallAverage = ((U64)timerp->mCallAverage * cur_frame + accumulator.mCalls) / (cur_frame+1); - //} - //} + // reset for next frame + for (LLInstanceTracker<TimeBlock>::instance_iter it = LLInstanceTracker<TimeBlock>::beginInstances(), + end_it = LLInstanceTracker<TimeBlock>::endInstances(); + it != end_it; + ++it) + { + TimeBlock& timer = *it; + TimeBlockAccumulator& accumulator = timer.getPrimaryAccumulator(); + + accumulator.mLastCaller = NULL; + accumulator.mMoveUpTree = false; } + + // traverse tree in DFS post order, or bottom up + //for(timer_tree_bottom_up_iterator_t it = begin_timer_tree_bottom_up(TimeBlock::getRootTimer()); + // it != end_timer_tree_bottom_up(); + // ++it) + //{ + // TimeBlock* timerp = (*it); + // TimeBlockAccumulator& accumulator = timerp->getPrimaryAccumulator(); + // timerp->mTreeTimeCounter = accumulator.mSelfTimeCounter; + // for (child_const_iter child_it = timerp->beginChildren(); child_it != timerp->endChildren(); ++child_it) + // { + // timerp->mTreeTimeCounter += (*child_it)->mTreeTimeCounter; + // } + //} } @@ -376,106 +354,78 @@ std::vector<TimeBlock*>& TimeBlock::getChildren() } //static -void TimeBlock::nextFrame() +void TimeBlock::logStats() { - get_clock_count(); // good place to calculate clock frequency - U64 frame_time = TimeBlock::getCPUClockCount64(); - if ((frame_time - sLastFrameTime) >> 8 > 0xffffffff) - { - llinfos << "Slow frame, fast timers inaccurate" << llendl; - } - - if (!sPauseHistory) - { - TimeBlock::processTimes(); - sLastFrameIndex = sCurFrameIndex++; - } - // get ready for next frame - //void TimeBlock::resetFrame() - { - if (sLog) - { //output current frame counts to performance log + if (sLog) + { //output current frame counts to performance log - static S32 call_count = 0; - if (call_count % 100 == 0) - { - LL_DEBUGS("FastTimers") << "countsPerSecond: " << countsPerSecond() << LL_ENDL; - LL_DEBUGS("FastTimers") << "LLProcessorInfo().getCPUFrequency() " << LLProcessorInfo().getCPUFrequency() << LL_ENDL; - LL_DEBUGS("FastTimers") << "getCPUClockCount32() " << getCPUClockCount32() << LL_ENDL; - LL_DEBUGS("FastTimers") << "getCPUClockCount64() " << getCPUClockCount64() << LL_ENDL; - LL_DEBUGS("FastTimers") << "elapsed sec " << ((F64)getCPUClockCount64()) / (LLUnit<LLUnits::Hertz, F64>(LLProcessorInfo().getCPUFrequency())) << LL_ENDL; - } - call_count++; + static S32 call_count = 0; + if (call_count % 100 == 0) + { + LL_DEBUGS("FastTimers") << "countsPerSecond: " << countsPerSecond() << LL_ENDL; + LL_DEBUGS("FastTimers") << "LLProcessorInfo().getCPUFrequency() " << LLProcessorInfo().getCPUFrequency() << LL_ENDL; + LL_DEBUGS("FastTimers") << "getCPUClockCount32() " << getCPUClockCount32() << LL_ENDL; + LL_DEBUGS("FastTimers") << "getCPUClockCount64() " << getCPUClockCount64() << LL_ENDL; + LL_DEBUGS("FastTimers") << "elapsed sec " << ((F64)getCPUClockCount64()) / (LLUnit<LLUnits::Hertz, F64>(LLProcessorInfo().getCPUFrequency())) << LL_ENDL; + } + call_count++; - LLUnit<LLUnits::Seconds, F64> total_time(0); - LLSD sd; + LLUnit<LLUnits::Seconds, F64> total_time(0); + LLSD sd; + { + for (LLInstanceTracker<TimeBlock>::instance_iter it = LLInstanceTracker<TimeBlock>::beginInstances(), + end_it = LLInstanceTracker<TimeBlock>::endInstances(); + it != end_it; + ++it) { - for (LLInstanceTracker<TimeBlock>::instance_iter it = LLInstanceTracker<TimeBlock>::beginInstances(), - end_it = LLInstanceTracker<TimeBlock>::endInstances(); - it != end_it; - ++it) - { - TimeBlock& timer = *it; - LLTrace::PeriodicRecording& frame_recording = LLTrace::get_frame_recording(); - sd[timer.getName()]["Time"] = (LLSD::Real) (frame_recording.getLastRecordingPeriod().getSum(timer).value()); - sd[timer.getName()]["Calls"] = (LLSD::Integer) (frame_recording.getLastRecordingPeriod().getSum(timer.callCount())); - - // computing total time here because getting the root timer's getCountHistory - // doesn't work correctly on the first frame - total_time += frame_recording.getLastRecordingPeriod().getSum(timer); - } - } - - sd["Total"]["Time"] = (LLSD::Real) total_time.value(); - sd["Total"]["Calls"] = (LLSD::Integer) 1; + TimeBlock& timer = *it; + LLTrace::PeriodicRecording& frame_recording = LLTrace::get_frame_recording(); + sd[timer.getName()]["Time"] = (LLSD::Real) (frame_recording.getLastRecordingPeriod().getSum(timer).value()); + sd[timer.getName()]["Calls"] = (LLSD::Integer) (frame_recording.getLastRecordingPeriod().getSum(timer.callCount())); - { - LLMutexLock lock(sLogLock); - sLogQueue.push(sd); + // computing total time here because getting the root timer's getCountHistory + // doesn't work correctly on the first frame + total_time += frame_recording.getLastRecordingPeriod().getSum(timer); } } - // reset for next frame - for (LLInstanceTracker<TimeBlock>::instance_iter it = LLInstanceTracker<TimeBlock>::beginInstances(), - end_it = LLInstanceTracker<TimeBlock>::endInstances(); - it != end_it; - ++it) - { - TimeBlock& timer = *it; - TimeBlockTreeNode& tree_node = sCurTimerData->mTimerTreeData[timer.getIndex()]; + sd["Total"]["Time"] = (LLSD::Real) total_time.value(); + sd["Total"]["Calls"] = (LLSD::Integer) 1; - tree_node.mLastCaller = NULL; - tree_node.mMoveUpTree = false; + { + LLMutexLock lock(sLogLock); + sLogQueue.push(sd); } } - sLastFrameTime = frame_time; + } //static void TimeBlock::dumpCurTimes() { - // accumulate timings, etc. - processTimes(); - + LLTrace::PeriodicRecording& frame_recording = LLTrace::get_frame_recording(); + LLTrace::Recording& last_frame_recording = frame_recording.getLastRecordingPeriod(); + // walk over timers in depth order and output timings for(timer_tree_dfs_iterator_t it = begin_timer_tree(TimeBlock::getRootTimer()); it != end_timer_tree(); ++it) { - LLTrace::PeriodicRecording& frame_recording = LLTrace::get_frame_recording(); TimeBlock* timerp = (*it); - LLUnit<LLUnits::Seconds, F64> total_time_ms = frame_recording.getLastRecordingPeriod().getSum(*timerp); - U32 num_calls = frame_recording.getLastRecordingPeriod().getSum(timerp->callCount()); + LLUnit<LLUnits::Seconds, F64> total_time_ms = last_frame_recording.getSum(*timerp); + U32 num_calls = last_frame_recording.getSum(timerp->callCount()); // Don't bother with really brief times, keep output concise if (total_time_ms < 0.1) continue; std::ostringstream out_str; - for (S32 i = 0; i < timerp->getDepth(); i++) + TimeBlock* parent_timerp = timerp; + while(parent_timerp && parent_timerp != parent_timerp->getParent()) { out_str << "\t"; + parent_timerp = parent_timerp->getParent(); } out_str << timerp->getName() << " " @@ -505,7 +455,11 @@ void TimeBlock::writeLog(std::ostream& os) TimeBlockAccumulator::TimeBlockAccumulator() : mSelfTimeCounter(0), mTotalTimeCounter(0), - mCalls(0) + mCalls(0), + mLastCaller(NULL), + mActiveCount(0), + mMoveUpTree(false), + mParent(NULL) {} void TimeBlockAccumulator::addSamples( const TimeBlockAccumulator& other ) @@ -513,6 +467,10 @@ void TimeBlockAccumulator::addSamples( const TimeBlockAccumulator& other ) mSelfTimeCounter += other.mSelfTimeCounter; mTotalTimeCounter += other.mTotalTimeCounter; mCalls += other.mCalls; + mLastCaller = other.mLastCaller; + mActiveCount = other.mActiveCount; + mMoveUpTree = other.mMoveUpTree; + mParent = other.mParent; } void TimeBlockAccumulator::reset( const TimeBlockAccumulator* other ) @@ -522,10 +480,4 @@ void TimeBlockAccumulator::reset( const TimeBlockAccumulator* other ) mCalls = 0; } -TimeBlockTreeNode::TimeBlockTreeNode() -: mLastCaller(NULL), - mActiveCount(0), - mMoveUpTree(false) -{} - } // namespace LLTrace diff --git a/indra/llcommon/llfasttimer.h b/indra/llcommon/llfasttimer.h index b5b4a8d0b4..1e2e4b590f 100644 --- a/indra/llcommon/llfasttimer.h +++ b/indra/llcommon/llfasttimer.h @@ -43,7 +43,6 @@ struct CurTimerData class BlockTimer* mCurTimer; class TimeBlock* mTimerData; U64 mChildTime; - TimeBlockTreeNode* mTimerTreeData; }; class BlockTimer @@ -70,11 +69,8 @@ class TimeBlock public: TimeBlock(const char* name, bool open = false, TimeBlock* parent = &getRootTimer()); - enum { HISTORY_NUM = 300 }; - TimeBlock* getParent() const { return mParent; } void setParent(TimeBlock* parent); - S32 getDepth(); typedef std::vector<TimeBlock*>::const_iterator child_const_iter; child_const_iter beginChildren(); @@ -245,12 +241,12 @@ public: // can be called multiple times in a frame, at any point static void processTimes(); - // call this once a frame to reset timers - static void nextFrame(); + // call this once a frame to periodically log timers + static void logStats(); // tree structure, only updated from master trace thread TimeBlock* mParent; // TimeBlock of caller(parent) - std::vector<TimeBlock*> mChildren; + std::vector<TimeBlock*> mChildren; // TimeBlock of callees bool mCollapsed, // don't show children mNeedsSorting; // sort children whenever child added @@ -260,12 +256,6 @@ public: sLog; static LLThreadLocalPointer<CurTimerData> sCurTimerData; static U64 sClockResolution; - static S32 sCurFrameIndex, - sLastFrameIndex; - static U64 sLastFrameTime; - static bool sPauseHistory, - sResetHistory; - }; LL_FORCE_INLINE BlockTimer::BlockTimer(TimeBlock& timer) @@ -274,10 +264,10 @@ LL_FORCE_INLINE BlockTimer::BlockTimer(TimeBlock& timer) mStartTime = TimeBlock::getCPUClockCount64(); CurTimerData* cur_timer_data = TimeBlock::sCurTimerData.get(); - TimeBlockTreeNode& tree_node = cur_timer_data->mTimerTreeData[timer.getIndex()]; - tree_node.mActiveCount++; + TimeBlockAccumulator& accumulator = cur_timer_data->mTimerData->getPrimaryAccumulator(); + accumulator.mActiveCount++; // keep current parent as long as it is active when we are - tree_node.mMoveUpTree |= (cur_timer_data->mTimerTreeData[timer.mParent->getIndex()].mActiveCount == 0); + accumulator.mMoveUpTree |= (accumulator.mParent->getPrimaryAccumulator().mActiveCount == 0); // store top of stack mLastTimerData = *cur_timer_data; @@ -294,16 +284,15 @@ LL_FORCE_INLINE BlockTimer::~BlockTimer() U64 total_time = TimeBlock::getCPUClockCount64() - mStartTime; CurTimerData* cur_timer_data = TimeBlock::sCurTimerData.get(); TimeBlockAccumulator& accumulator = cur_timer_data->mTimerData->getPrimaryAccumulator(); - TimeBlockTreeNode& tree_node = cur_timer_data->mTimerTreeData[cur_timer_data->mTimerData->getIndex()]; accumulator.mCalls++; accumulator.mSelfTimeCounter += total_time - cur_timer_data->mChildTime; accumulator.mTotalTimeCounter += total_time; - tree_node.mActiveCount--; + accumulator.mActiveCount--; // store last caller to bootstrap tree creation // do this in the destructor in case of recursion to get topmost caller - tree_node.mLastCaller = mLastTimerData.mTimerData; + accumulator.mLastCaller = mLastTimerData.mTimerData; // we are only tracking self time, so subtract our total time delta from parents mLastTimerData.mChildTime += total_time; diff --git a/indra/llcommon/lltrace.h b/indra/llcommon/lltrace.h index d4fc93342d..5b5e2b7879 100644 --- a/indra/llcommon/lltrace.h +++ b/indra/llcommon/lltrace.h @@ -82,18 +82,21 @@ namespace LLTrace enum StaticAllocationMarker { STATIC_ALLOC }; AccumulatorBuffer(StaticAllocationMarker m) - : mStorageSize(64), - mNextStorageSlot(0), - mStorage(new ACCUMULATOR[DEFAULT_ACCUMULATOR_BUFFER_SIZE]) - {} + : mStorageSize(0), + mStorage(NULL), + mNextStorageSlot(0) + { + resize(DEFAULT_ACCUMULATOR_BUFFER_SIZE); + } public: AccumulatorBuffer(const AccumulatorBuffer& other = getDefaultBuffer()) - : mStorageSize(other.mStorageSize), - mStorage(new ACCUMULATOR[other.mStorageSize]), + : mStorageSize(0), + mStorage(NULL), mNextStorageSlot(other.mNextStorageSlot) { + resize(other.mStorageSize); for (S32 i = 0; i < mNextStorageSlot; i++) { mStorage[i] = other.mStorage[i]; @@ -181,9 +184,12 @@ namespace LLTrace { ACCUMULATOR* old_storage = mStorage; mStorage = new ACCUMULATOR[new_size]; - for (S32 i = 0; i < mStorageSize; i++) + if (old_storage) { - mStorage[i] = old_storage[i]; + for (S32 i = 0; i < mStorageSize; i++) + { + mStorage[i] = old_storage[i]; + } } mStorageSize = new_size; delete[] old_storage; @@ -300,14 +306,14 @@ namespace LLTrace mNumSamples += other.mNumSamples; mMean = mMean * weight + other.mMean * (1.f - weight); + // combine variance (and hence standard deviation) of 2 different sized sample groups using + // the following formula: http://www.mrc-bsu.cam.ac.uk/cochrane/handbook/chapter_7/7_7_3_8_combining_groups.htm F64 n_1 = (F64)mNumSamples, n_2 = (F64)other.mNumSamples; F64 m_1 = mMean, m_2 = other.mMean; F64 sd_1 = getStandardDeviation(), sd_2 = other.getStandardDeviation(); - // combine variance (and hence standard deviation) of 2 different sized sample groups using - // the following formula: http://www.mrc-bsu.cam.ac.uk/cochrane/handbook/chapter_7/7_7_3_8_combining_groups.htm if (n_1 == 0) { mVarianceSum = other.mVarianceSum; @@ -405,6 +411,7 @@ namespace LLTrace { public: typedef LLUnit<LLUnits::Seconds, F64> value_t; + typedef TimeBlockAccumulator self_t; // fake class that allows us to view call count aspect of timeblock accumulator struct CallCountAspect @@ -418,15 +425,20 @@ namespace LLTrace }; TimeBlockAccumulator(); - void addSamples(const TimeBlockAccumulator& other); - void reset(const TimeBlockAccumulator* other); + void addSamples(const self_t& other); + void reset(const self_t* other); // // members // - U64 mSelfTimeCounter, - mTotalTimeCounter; - U32 mCalls; + U64 mSelfTimeCounter, + mTotalTimeCounter; + U32 mCalls; + class TimeBlock* mParent; // last acknowledged parent of this time block + class TimeBlock* mLastCaller; // used to bootstrap tree construction + U16 mActiveCount; // number of timers with this ID active on stack + bool mMoveUpTree; // needs to be moved up the tree of timers at the end of frame + }; @@ -460,16 +472,6 @@ namespace LLTrace {} }; - class TimeBlockTreeNode - { - public: - TimeBlockTreeNode(); - class TimeBlock* mLastCaller; // used to bootstrap tree construction - U16 mActiveCount; // number of timers with this ID active on stack - bool mMoveUpTree; // needs to be moved up the tree of timers at the end of frame - }; - - template <typename T = F64> class Measurement : public TraceType<MeasurementAccumulator<typename LLUnits::HighestPrecisionType<T>::type_t> > @@ -604,7 +606,7 @@ struct MemFootprint<std::vector<T> > { static size_t measure(const std::vector<T>& value) { - return value.capacity() * MemFootPrint<T>::measure(); + return value.capacity() * MemFootprint<T>::measure(); } static size_t measure() @@ -618,7 +620,7 @@ struct MemFootprint<std::list<T> > { static size_t measure(const std::list<T>& value) { - return value.size() * (MemFootPrint<T>::measure() + sizeof(void*) * 2); + return value.size() * (MemFootprint<T>::measure() + sizeof(void*) * 2); } static size_t measure() diff --git a/indra/llcommon/lltracerecording.cpp b/indra/llcommon/lltracerecording.cpp index 3ea511ff3c..40079f40f1 100644 --- a/indra/llcommon/lltracerecording.cpp +++ b/indra/llcommon/lltracerecording.cpp @@ -44,7 +44,8 @@ Recording::Recording() mMeasurementsFloat(new AccumulatorBuffer<MeasurementAccumulator<F64> >()), mCounts(new AccumulatorBuffer<CountAccumulator<S64> >()), mMeasurements(new AccumulatorBuffer<MeasurementAccumulator<S64> >()), - mStackTimers(new AccumulatorBuffer<TimeBlockAccumulator>()) + mStackTimers(new AccumulatorBuffer<TimeBlockAccumulator>()), + mMemStats(new AccumulatorBuffer<MemStatAccumulator>()) {} Recording::Recording( const Recording& other ) @@ -57,6 +58,7 @@ Recording::Recording( const Recording& other ) mCounts = other.mCounts; mMeasurements = other.mMeasurements; mStackTimers = other.mStackTimers; + mMemStats = other.mMemStats; LLStopWatchControlsMixin::initTo(other.getPlayState()); } @@ -84,6 +86,7 @@ void Recording::handleReset() mCounts.write()->reset(); mMeasurements.write()->reset(); mStackTimers.write()->reset(); + mMemStats.write()->reset(); mElapsedSeconds = 0.0; mSamplingTimer.reset(); @@ -98,6 +101,7 @@ void Recording::handleStart() void Recording::handleStop() { mElapsedSeconds += mSamplingTimer.getElapsedTimeF64(); + LLTrace::TimeBlock::processTimes(); LLTrace::get_thread_recorder()->deactivate(this); } @@ -107,9 +111,9 @@ void Recording::handleSplitTo(Recording& other) other.restart(); other.mMeasurementsFloat.write()->reset(mMeasurementsFloat); other.mMeasurements.write()->reset(mMeasurements); + //TODO: figure out how to get seamless handoff of timing stats } - void Recording::makePrimary() { mCountsFloat.write()->makePrimary(); @@ -117,6 +121,7 @@ void Recording::makePrimary() mCounts.write()->makePrimary(); mMeasurements.write()->makePrimary(); mStackTimers.write()->makePrimary(); + mMemStats.write()->makePrimary(); } bool Recording::isPrimary() const @@ -131,6 +136,7 @@ void Recording::makeUnique() mCounts.makeUnique(); mMeasurements.makeUnique(); mStackTimers.makeUnique(); + mMemStats.makeUnique(); } void Recording::appendRecording( const Recording& other ) @@ -140,6 +146,7 @@ void Recording::appendRecording( const Recording& other ) mCounts.write()->addSamples(*other.mCounts); mMeasurements.write()->addSamples(*other.mMeasurements); mStackTimers.write()->addSamples(*other.mStackTimers); + mMemStats.write()->addSamples(*other.mMemStats); mElapsedSeconds += other.mElapsedSeconds; } @@ -174,6 +181,16 @@ F32 Recording::getPerSec(const TraceType<TimeBlockAccumulator::CallCountAspect>& return (F32)(*mStackTimers)[stat.getIndex()].mCalls / mElapsedSeconds; } +LLUnit<LLUnits::Bytes, U32> Recording::getSum(const TraceType<MemStatAccumulator>& stat) const +{ + return (*mMemStats)[stat.getIndex()].mAllocatedCount; +} + +LLUnit<LLUnits::Bytes, F32> Recording::getPerSec(const TraceType<MemStatAccumulator>& stat) const +{ + return (F32)(*mMemStats)[stat.getIndex()].mAllocatedCount / mElapsedSeconds; +} + F64 Recording::getSum( const TraceType<CountAccumulator<F64> >& stat ) const { diff --git a/indra/llcommon/lltracerecording.h b/indra/llcommon/lltracerecording.h index f4841214e8..8e5f1125ec 100644 --- a/indra/llcommon/lltracerecording.h +++ b/indra/llcommon/lltracerecording.h @@ -34,7 +34,7 @@ #include "lltimer.h" #include "lltrace.h" -class LL_COMMON_API LLStopWatchControlsMixinCommon +class LLStopWatchControlsMixinCommon { public: virtual ~LLStopWatchControlsMixinCommon() {} @@ -101,7 +101,7 @@ private: namespace LLTrace { - class LL_COMMON_API Recording : public LLStopWatchControlsMixin<Recording> + class Recording : public LLStopWatchControlsMixin<Recording> { public: Recording(); @@ -127,6 +127,10 @@ namespace LLTrace LLUnit<LLUnits::Seconds, F64> getPerSec(const TraceType<TimeBlockAccumulator::SelfTimeAspect>& stat) const; F32 getPerSec(const TraceType<TimeBlockAccumulator::CallCountAspect>& stat) const; + // Memory accessors + LLUnit<LLUnits::Bytes, U32> getSum(const TraceType<MemStatAccumulator>& stat) const; + LLUnit<LLUnits::Bytes, F32> getPerSec(const TraceType<MemStatAccumulator>& stat) const; + // Count accessors F64 getSum(const TraceType<CountAccumulator<F64> >& stat) const; S64 getSum(const TraceType<CountAccumulator<S64> >& stat) const; @@ -226,7 +230,8 @@ namespace LLTrace LLCopyOnWritePointer<AccumulatorBuffer<MeasurementAccumulator<F64> > > mMeasurementsFloat; LLCopyOnWritePointer<AccumulatorBuffer<CountAccumulator<S64> > > mCounts; LLCopyOnWritePointer<AccumulatorBuffer<MeasurementAccumulator<S64> > > mMeasurements; - LLCopyOnWritePointer<AccumulatorBuffer<TimeBlockAccumulator> > mStackTimers; + LLCopyOnWritePointer<AccumulatorBuffer<TimeBlockAccumulator> > mStackTimers; + LLCopyOnWritePointer<AccumulatorBuffer<MemStatAccumulator> > mMemStats; LLTimer mSamplingTimer; F64 mElapsedSeconds; diff --git a/indra/llcommon/lltracethreadrecorder.cpp b/indra/llcommon/lltracethreadrecorder.cpp index 16235473ee..c4144b4999 100644 --- a/indra/llcommon/lltracethreadrecorder.cpp +++ b/indra/llcommon/lltracethreadrecorder.cpp @@ -43,13 +43,20 @@ ThreadRecorder::ThreadRecorder() mRootTimerData = new CurTimerData(); mRootTimerData->mTimerData = &TimeBlock::getRootTimer(); - mRootTimerData->mTimerTreeData = new TimeBlockTreeNode[AccumulatorBuffer<TimeBlockAccumulator>::getDefaultBuffer().size()]; + TimeBlock::sCurTimerData = mRootTimerData; + TimeBlock::getRootTimer().getPrimaryAccumulator().mActiveCount = 1; + + // initialize parent pointers in time blocks + for (LLInstanceTracker<TimeBlock>::instance_iter it = LLInstanceTracker<TimeBlock>::beginInstances(), end_it = LLInstanceTracker<TimeBlock>::endInstances(); + it != end_it; + ++it) + { + it->getPrimaryAccumulator().mParent = it->mParent; + } mRootTimer = new BlockTimer(TimeBlock::getRootTimer()); mRootTimerData->mCurTimer = mRootTimer; - - mRootTimerData->mTimerTreeData[TimeBlock::getRootTimer().getIndex()].mActiveCount = 1; } ThreadRecorder::~ThreadRecorder() @@ -62,7 +69,6 @@ ThreadRecorder::~ThreadRecorder() } get_thread_recorder() = NULL; TimeBlock::sCurTimerData = NULL; - delete [] mRootTimerData->mTimerTreeData; delete mRootTimerData; } diff --git a/indra/llui/llviewmodel.h b/indra/llui/llviewmodel.h index cec0368460..a2ca20c739 100644 --- a/indra/llui/llviewmodel.h +++ b/indra/llui/llviewmodel.h @@ -39,6 +39,7 @@ #include "llrefcount.h" #include "stdenums.h" #include "llstring.h" +#include "lltrace.h" #include <string> class LLScrollListItem; diff --git a/indra/newview/llappviewer.cpp b/indra/newview/llappviewer.cpp index c986954867..4209e6612e 100644 --- a/indra/newview/llappviewer.cpp +++ b/indra/newview/llappviewer.cpp @@ -1233,8 +1233,8 @@ bool LLAppViewer::mainLoop() while (!LLApp::isExiting()) { LLFastTimer _(FTM_FRAME); - LLTrace::TimeBlock::nextFrame(); LLTrace::get_frame_recording().nextPeriod(); + LLTrace::TimeBlock::logStats(); LLTrace::getMasterThreadRecorder().pullFromSlaveThreads(); diff --git a/indra/newview/llfasttimerview.cpp b/indra/newview/llfasttimerview.cpp index 8014b5712b..2cf7939e5c 100644 --- a/indra/newview/llfasttimerview.cpp +++ b/indra/newview/llfasttimerview.cpp @@ -58,6 +58,7 @@ static const S32 MAX_VISIBLE_HISTORY = 10; static const S32 LINE_GRAPH_HEIGHT = 240; const S32 FTV_MAX_DEPTH = 8; +const S32 HISTORY_NUM = 300; std::vector<LLTrace::TimeBlock*> ft_display_idx; // line of table entry for display purposes (for collapse) @@ -77,6 +78,19 @@ static timer_tree_iterator_t end_timer_tree() return timer_tree_iterator_t(); } +S32 get_depth(const TimeBlock* blockp) +{ + S32 depth = 0; + TimeBlock* timerp = blockp->mParent; + while(timerp) + { + depth++; + if (timerp->getParent() == timerp) break; + timerp = timerp->mParent; + } + return depth; +} + LLFastTimerView::LLFastTimerView(const LLSD& key) : LLFloater(key), mHoverTimer(NULL), @@ -88,7 +102,8 @@ LLFastTimerView::LLFastTimerView(const LLSD& key) mHoverID(NULL), mHoverBarIndex(-1), mPrintStats(-1), - mRecording(&LLTrace::get_frame_recording()) + mRecording(&LLTrace::get_frame_recording()), + mPauseHistory(false) {} LLFastTimerView::~LLFastTimerView() @@ -102,13 +117,12 @@ LLFastTimerView::~LLFastTimerView() void LLFastTimerView::onPause() { - LLTrace::TimeBlock::sPauseHistory = !LLTrace::TimeBlock::sPauseHistory; + mPauseHistory = !mPauseHistory; // reset scroll to bottom when unpausing - if (!LLTrace::TimeBlock::sPauseHistory) + if (!mPauseHistory) { mRecording = new LLTrace::PeriodicRecording(LLTrace::get_frame_recording()); mScrollIndex = 0; - LLTrace::TimeBlock::sResetHistory = true; getChild<LLButton>("pause_btn")->setLabel(getString("pause")); } else @@ -239,14 +253,14 @@ BOOL LLFastTimerView::handleHover(S32 x, S32 y, MASK mask) if (hasMouseCapture()) { F32 lerp = llclamp(1.f - (F32) (x - mGraphRect.mLeft) / (F32) mGraphRect.getWidth(), 0.f, 1.f); - mScrollIndex = llround( lerp * (F32)(LLTrace::TimeBlock::HISTORY_NUM - MAX_VISIBLE_HISTORY)); + mScrollIndex = llround( lerp * (F32)(HISTORY_NUM - MAX_VISIBLE_HISTORY)); mScrollIndex = llclamp( mScrollIndex, 0, frame_recording.getNumPeriods()); return TRUE; } mHoverTimer = NULL; mHoverID = NULL; - if(LLTrace::TimeBlock::sPauseHistory && mBarRect.pointInRect(x, y)) + if(mPauseHistory && mBarRect.pointInRect(x, y)) { mHoverBarIndex = llmin(mRecording->getNumPeriods() - 1, MAX_VISIBLE_HISTORY - ((y - mBarRect.mBottom) * (MAX_VISIBLE_HISTORY + 2) / mBarRect.getHeight())); @@ -322,7 +336,7 @@ static std::string get_tooltip(LLTrace::TimeBlock& timer, S32 history_index, LLT BOOL LLFastTimerView::handleToolTip(S32 x, S32 y, MASK mask) { - if(LLTrace::TimeBlock::sPauseHistory && mBarRect.pointInRect(x, y)) + if(mPauseHistory && mBarRect.pointInRect(x, y)) { // tooltips for timer bars if (mHoverTimer) @@ -362,10 +376,10 @@ BOOL LLFastTimerView::handleScrollWheel(S32 x, S32 y, S32 clicks) { LLTrace::PeriodicRecording& frame_recording = *mRecording; - LLTrace::TimeBlock::sPauseHistory = TRUE; + mPauseHistory = true; mScrollIndex = llclamp( mScrollIndex + clicks, 0, - llmin(frame_recording.getNumPeriods(), (S32)LLTrace::TimeBlock::HISTORY_NUM - MAX_VISIBLE_HISTORY)); + llmin(frame_recording.getNumPeriods(), (S32)HISTORY_NUM - MAX_VISIBLE_HISTORY)); return TRUE; } @@ -448,9 +462,9 @@ void LLFastTimerView::draw() const F32 HUE_INCREMENT = 0.23f; hue = fmodf(hue + HUE_INCREMENT, 1.f); // saturation increases with depth - F32 saturation = clamp_rescale((F32)idp->getDepth(), 0.f, 3.f, 0.f, 1.f); + F32 saturation = clamp_rescale((F32)get_depth(idp), 0.f, 3.f, 0.f, 1.f); // lightness alternates with depth - F32 lightness = idp->getDepth() % 2 ? 0.5f : 0.6f; + F32 lightness = get_depth(idp) % 2 ? 0.5f : 0.6f; LLColor4 child_color; child_color.setHSL(hue, saturation, lightness); @@ -509,15 +523,15 @@ void LLFastTimerView::draw() { tdesc = llformat("%s [%.1f]",idp->getName().c_str(),ms.value()); } - dx = (texth+4) + idp->getDepth()*8; + dx = (texth+4) + get_depth(idp)*8; LLColor4 color = LLColor4::white; - if (idp->getDepth() > 0) + if (get_depth(idp) > 0) { S32 line_start_y = (top + bottom) / 2; S32 line_end_y = line_start_y + ((texth + 2) * (cur_line - display_line[idp->getParent()])) - texth; gl_line_2d(x + dx - 8, line_start_y, x + dx, line_start_y, color); - S32 line_x = x + (texth + 4) + ((idp->getDepth() - 1) * 8); + S32 line_x = x + (texth + 4) + ((get_depth(idp) - 1) * 8); gl_line_2d(line_x, line_start_y, line_x, line_end_y, color); if (idp->getCollapsed() && !idp->getChildren().empty()) { @@ -696,7 +710,7 @@ void LLFastTimerView::draw() S32 prev_delta_x = deltax.empty() ? 0 : deltax.back(); deltax.push_back(dx); - int level = idp->getDepth() - 1; + int level = get_depth(idp) - 1; while ((S32)xpos.size() > level + 1) { @@ -710,7 +724,7 @@ void LLFastTimerView::draw() sublevel_dx[level] = dx; sublevel_right[level] = sublevel_left[level] + sublevel_dx[level]; } - else if (prev_id && prev_id->getDepth() < idp->getDepth()) + else if (prev_id && prev_id->getDepth() < get_depth(idp)) { U64 sublevelticks = 0; @@ -821,10 +835,10 @@ void LLFastTimerView::draw() //highlight visible range { - S32 first_frame = LLTrace::TimeBlock::HISTORY_NUM - mScrollIndex; + S32 first_frame = HISTORY_NUM - mScrollIndex; S32 last_frame = first_frame - MAX_VISIBLE_HISTORY; - F32 frame_delta = ((F32) (mGraphRect.getWidth()))/(LLTrace::TimeBlock::HISTORY_NUM-1); + F32 frame_delta = ((F32) (mGraphRect.getWidth()))/(HISTORY_NUM-1); F32 right = (F32) mGraphRect.mLeft + frame_delta*first_frame; F32 left = (F32) mGraphRect.mLeft + frame_delta*last_frame; @@ -889,7 +903,7 @@ void LLFastTimerView::draw() cur_max = llmax(cur_max, time); cur_max_calls = llmax(cur_max_calls, calls); } - F32 x = mGraphRect.mRight - j * (F32)(mGraphRect.getWidth())/(LLTrace::TimeBlock::HISTORY_NUM-1); + F32 x = mGraphRect.mRight - j * (F32)(mGraphRect.getWidth())/(HISTORY_NUM-1); F32 y = mDisplayHz ? mGraphRect.mBottom + (1.f / time.value()) * ((F32) mGraphRect.getHeight() / (1.f / max_time.value())) : mGraphRect.mBottom + time / max_time * (F32)mGraphRect.getHeight(); diff --git a/indra/newview/llfasttimerview.h b/indra/newview/llfasttimerview.h index 6474c2f524..07662bb795 100644 --- a/indra/newview/llfasttimerview.h +++ b/indra/newview/llfasttimerview.h @@ -95,6 +95,7 @@ private: S32 mPrintStats; LLRect mGraphRect; LLTrace::PeriodicRecording* mRecording; + bool mPauseHistory; }; #endif |