diff options
author | Richard Linden <none@none> | 2012-12-18 20:07:25 -0800 |
---|---|---|
committer | Richard Linden <none@none> | 2012-12-18 20:07:25 -0800 |
commit | c219282f5de753a044cecb53bd806145f68add9a (patch) | |
tree | 9bea36fd9fba8f41fed4c04ba413bf5d41993ca0 /indra/llcommon | |
parent | 1f56e57008f5a50c9e75fc0b4512c483ac359a52 (diff) |
SH-3406 WIP convert fast timers to lltrace system
removed some potential data races
got memory stats recording in trace system
Diffstat (limited to 'indra/llcommon')
-rw-r--r-- | indra/llcommon/llapr.h | 51 | ||||
-rw-r--r-- | indra/llcommon/llfasttimer.cpp | 326 | ||||
-rw-r--r-- | indra/llcommon/llfasttimer.h | 27 | ||||
-rw-r--r-- | indra/llcommon/lltrace.h | 56 | ||||
-rw-r--r-- | indra/llcommon/lltracerecording.cpp | 21 | ||||
-rw-r--r-- | indra/llcommon/lltracerecording.h | 11 | ||||
-rw-r--r-- | indra/llcommon/lltracethreadrecorder.cpp | 14 |
7 files changed, 238 insertions, 268 deletions
diff --git a/indra/llcommon/llapr.h b/indra/llcommon/llapr.h index 199b5291dd..f8f94263e4 100644 --- a/indra/llcommon/llapr.h +++ b/indra/llcommon/llapr.h @@ -48,6 +48,19 @@ extern LL_COMMON_API apr_thread_mutex_t* gLogMutexp; extern apr_thread_mutex_t* gCallStacksLogMutexp; struct apr_dso_handle_t; +/** + * @brief Function which appropriately logs error or remains quiet on + * APR_SUCCESS. + * @return Returns <code>true</code> if status is an error condition. + */ +bool LL_COMMON_API ll_apr_warn_status(apr_status_t status); +/// There's a whole other APR error-message function if you pass a DSO handle. +bool LL_COMMON_API ll_apr_warn_status(apr_status_t status, apr_dso_handle_t* handle); + +void LL_COMMON_API ll_apr_assert_status(apr_status_t status); +void LL_COMMON_API ll_apr_assert_status(apr_status_t status, apr_dso_handle_t* handle); + +extern "C" LL_COMMON_API apr_pool_t* gAPRPoolp; // Global APR memory pool /** * @brief initialize the common apr constructs -- apr itself, the @@ -285,26 +298,26 @@ protected: { llassert(sInitialized); void* ptr; - //apr_status_t result = + apr_status_t result = apr_threadkey_private_get(&ptr, mThreadKey); - //if (result != APR_SUCCESS) - //{ - // ll_apr_warn_status(s); - // llerrs << "Failed to get thread local data" << llendl; - //} + if (result != APR_SUCCESS) + { + ll_apr_warn_status(result); + llerrs << "Failed to get thread local data" << llendl; + } return ptr; } LL_FORCE_INLINE const void* get() const { void* ptr; - //apr_status_t result = + apr_status_t result = apr_threadkey_private_get(&ptr, mThreadKey); - //if (result != APR_SUCCESS) - //{ - // ll_apr_warn_status(s); - // llerrs << "Failed to get thread local data" << llendl; - //} + if (result != APR_SUCCESS) + { + ll_apr_warn_status(result); + llerrs << "Failed to get thread local data" << llendl; + } return ptr; } @@ -379,18 +392,4 @@ public: } }; -/** - * @brief Function which appropriately logs error or remains quiet on - * APR_SUCCESS. - * @return Returns <code>true</code> if status is an error condition. - */ -bool LL_COMMON_API ll_apr_warn_status(apr_status_t status); -/// There's a whole other APR error-message function if you pass a DSO handle. -bool LL_COMMON_API ll_apr_warn_status(apr_status_t status, apr_dso_handle_t* handle); - -void LL_COMMON_API ll_apr_assert_status(apr_status_t status); -void LL_COMMON_API ll_apr_assert_status(apr_status_t status, apr_dso_handle_t* handle); - -extern "C" LL_COMMON_API apr_pool_t* gAPRPoolp; // Global APR memory pool - #endif // LL_LLAPR_H diff --git a/indra/llcommon/llfasttimer.cpp b/indra/llcommon/llfasttimer.cpp index 37e0fbac0a..f4fa8f1e61 100644 --- a/indra/llcommon/llfasttimer.cpp +++ b/indra/llcommon/llfasttimer.cpp @@ -59,11 +59,6 @@ namespace LLTrace ////////////////////////////////////////////////////////////////////////////// // statics -S32 TimeBlock::sCurFrameIndex = -1; -S32 TimeBlock::sLastFrameIndex = -1; -U64 TimeBlock::sLastFrameTime = TimeBlock::getCPUClockCount64(); -bool TimeBlock::sPauseHistory = 0; -bool TimeBlock::sResetHistory = 0; bool TimeBlock::sLog = false; std::string TimeBlock::sLogName = ""; bool TimeBlock::sMetricLog = false; @@ -220,143 +215,126 @@ void TimeBlock::setParent(TimeBlock* parent) } } -S32 TimeBlock::getDepth() -{ - S32 depth = 0; - TimeBlock* timerp = mParent; - while(timerp) - { - depth++; - if (timerp->getParent() == timerp) break; - timerp = timerp->mParent; - } - return depth; -} - // static void TimeBlock::processTimes() { - //void TimeBlock::buildHierarchy() + get_clock_count(); // good place to calculate clock frequency + U64 cur_time = getCPUClockCount64(); + CurTimerData* cur_data = sCurTimerData.get(); + + // set up initial tree + for (LLInstanceTracker<TimeBlock>::instance_iter it = LLInstanceTracker<TimeBlock>::beginInstances(), end_it = LLInstanceTracker<TimeBlock>::endInstances(); + it != end_it; + ++it) { - // set up initial tree + TimeBlock& timer = *it; + if (&timer == &TimeBlock::getRootTimer()) continue; + + // bootstrap tree construction by attaching to last timer to be on stack + // when this timer was called + if (timer.mParent == &TimeBlock::getRootTimer()) { - for (LLInstanceTracker<TimeBlock>::instance_iter it = LLInstanceTracker<TimeBlock>::beginInstances(), end_it = LLInstanceTracker<TimeBlock>::endInstances(); it != end_it; ++it) + TimeBlockAccumulator& accumulator = timer.getPrimaryAccumulator(); + + if (accumulator.mLastCaller) { - TimeBlock& timer = *it; - if (&timer == &TimeBlock::getRootTimer()) continue; - - // bootstrap tree construction by attaching to last timer to be on stack - // when this timer was called - if (timer.mParent == &TimeBlock::getRootTimer()) - { - TimeBlockTreeNode& tree_node = sCurTimerData->mTimerTreeData[timer.getIndex()]; - - if (tree_node.mLastCaller) - { - timer.setParent(tree_node.mLastCaller); - } - // no need to push up tree on first use, flag can be set spuriously - tree_node.mMoveUpTree = false; - } + timer.setParent(accumulator.mLastCaller); + accumulator.mParent = accumulator.mLastCaller; } + // no need to push up tree on first use, flag can be set spuriously + accumulator.mMoveUpTree = false; } + } - // bump timers up tree if they have been flagged as being in the wrong place - // do this in a bottom up order to promote descendants first before promoting ancestors - // this preserves partial order derived from current frame's observations - for(timer_tree_bottom_up_iterator_t it = begin_timer_tree_bottom_up(TimeBlock::getRootTimer()); - it != end_timer_tree_bottom_up(); - ++it) + // bump timers up tree if they have been flagged as being in the wrong place + // do this in a bottom up order to promote descendants first before promoting ancestors + // this preserves partial order derived from current frame's observations + for(timer_tree_bottom_up_iterator_t it = begin_timer_tree_bottom_up(TimeBlock::getRootTimer()); + it != end_timer_tree_bottom_up(); + ++it) + { + TimeBlock* timerp = *it; + + // sort timers by time last called, so call graph makes sense + if (timerp->mNeedsSorting) { - TimeBlock* timerp = *it; - // skip root timer - if (timerp == &TimeBlock::getRootTimer()) continue; - TimeBlockTreeNode& tree_node = sCurTimerData->mTimerTreeData[timerp->getIndex()]; + std::sort(timerp->getChildren().begin(), timerp->getChildren().end(), SortTimerByName()); + } + + // skip root timer + if (timerp != &TimeBlock::getRootTimer()) + { + TimeBlockAccumulator& accumulator = timerp->getPrimaryAccumulator(); - if (tree_node.mMoveUpTree) + if (accumulator.mMoveUpTree) { // since ancestors have already been visited, re-parenting won't affect tree traversal //step up tree, bringing our descendants with us LL_DEBUGS("FastTimers") << "Moving " << timerp->getName() << " from child of " << timerp->getParent()->getName() << " to child of " << timerp->getParent()->getParent()->getName() << LL_ENDL; timerp->setParent(timerp->getParent()->getParent()); - tree_node.mMoveUpTree = false; + accumulator.mParent = timerp->mParent; + accumulator.mMoveUpTree = false; // don't bubble up any ancestors until descendants are done bubbling up + // as ancestors may call this timer only on certain paths, so we want to resolve + // child-most block locations before their parents it.skipAncestors(); } } - - // sort timers by time last called, so call graph makes sense - for(timer_tree_dfs_iterator_t it = begin_timer_tree(TimeBlock::getRootTimer()); - it != end_timer_tree(); - ++it) - { - TimeBlock* timerp = (*it); - if (timerp->mNeedsSorting) - { - std::sort(timerp->getChildren().begin(), timerp->getChildren().end(), SortTimerByName()); - } - timerp->mNeedsSorting = false; - } } - - //void TimeBlock::accumulateTimings() - { - U64 cur_time = getCPUClockCount64(); - - // root defined by parent pointing to self - CurTimerData* cur_data = sCurTimerData.get(); - // walk up stack of active timers and accumulate current time while leaving timing structures active - BlockTimer* cur_timer = cur_data->mCurTimer; - TimeBlockAccumulator& accumulator = cur_data->mTimerData->getPrimaryAccumulator(); - while(cur_timer && cur_timer->mLastTimerData.mCurTimer != cur_timer) - { - U64 cumulative_time_delta = cur_time - cur_timer->mStartTime; - U64 self_time_delta = cumulative_time_delta - cur_data->mChildTime; - cur_data->mChildTime = 0; - accumulator.mSelfTimeCounter += self_time_delta; - accumulator.mTotalTimeCounter += cumulative_time_delta; - cur_timer->mStartTime = cur_time; + // walk up stack of active timers and accumulate current time while leaving timing structures active + BlockTimer* cur_timer = cur_data->mCurTimer; + TimeBlockAccumulator& accumulator = cur_data->mTimerData->getPrimaryAccumulator(); + // root defined by parent pointing to self + while(cur_timer && cur_timer->mLastTimerData.mCurTimer != cur_timer) + { + U64 cumulative_time_delta = cur_time - cur_timer->mStartTime; + U64 self_time_delta = cumulative_time_delta - cur_data->mChildTime; + cur_data->mChildTime = 0; + accumulator.mSelfTimeCounter += self_time_delta; + accumulator.mTotalTimeCounter += cumulative_time_delta; - cur_data = &cur_timer->mLastTimerData; - cur_data->mChildTime += cumulative_time_delta; - if (cur_data->mTimerData) - { - accumulator = cur_data->mTimerData->getPrimaryAccumulator(); - } + cur_timer->mStartTime = cur_time; - cur_timer = cur_timer->mLastTimerData.mCurTimer; + cur_data = &cur_timer->mLastTimerData; + cur_data->mChildTime += cumulative_time_delta; + if (cur_data->mTimerData) + { + accumulator = cur_data->mTimerData->getPrimaryAccumulator(); } - // traverse tree in DFS post order, or bottom up - //for(timer_tree_bottom_up_iterator_t it = begin_timer_tree_bottom_up(TimeBlock::getRootTimer()); - // it != end_timer_tree_bottom_up(); - // ++it) - //{ - // TimeBlock* timerp = (*it); - // TimeBlockAccumulator& accumulator = timerp->getPrimaryAccumulator(); - // timerp->mTreeTimeCounter = accumulator.mSelfTimeCounter; - // for (child_const_iter child_it = timerp->beginChildren(); child_it != timerp->endChildren(); ++child_it) - // { - // timerp->mTreeTimeCounter += (*child_it)->mTreeTimeCounter; - // } - - //S32 cur_frame = getCurFrameIndex(); - //if (cur_frame >= 0) - //{ - // // update timer history + cur_timer = cur_timer->mLastTimerData.mCurTimer; + } - // int hidx = getCurFrameIndex() % HISTORY_NUM; - // timerp->mCountHistory[hidx] = timerp->mTreeTimeCounter; - // timerp->mCountAverage = ((U64)timerp->mCountAverage * cur_frame + timerp->mTreeTimeCounter) / (cur_frame+1); - // timerp->mCallHistory[hidx] = accumulator.mCalls; - // timerp->mCallAverage = ((U64)timerp->mCallAverage * cur_frame + accumulator.mCalls) / (cur_frame+1); - //} - //} + // reset for next frame + for (LLInstanceTracker<TimeBlock>::instance_iter it = LLInstanceTracker<TimeBlock>::beginInstances(), + end_it = LLInstanceTracker<TimeBlock>::endInstances(); + it != end_it; + ++it) + { + TimeBlock& timer = *it; + TimeBlockAccumulator& accumulator = timer.getPrimaryAccumulator(); + + accumulator.mLastCaller = NULL; + accumulator.mMoveUpTree = false; } + + // traverse tree in DFS post order, or bottom up + //for(timer_tree_bottom_up_iterator_t it = begin_timer_tree_bottom_up(TimeBlock::getRootTimer()); + // it != end_timer_tree_bottom_up(); + // ++it) + //{ + // TimeBlock* timerp = (*it); + // TimeBlockAccumulator& accumulator = timerp->getPrimaryAccumulator(); + // timerp->mTreeTimeCounter = accumulator.mSelfTimeCounter; + // for (child_const_iter child_it = timerp->beginChildren(); child_it != timerp->endChildren(); ++child_it) + // { + // timerp->mTreeTimeCounter += (*child_it)->mTreeTimeCounter; + // } + //} } @@ -376,106 +354,78 @@ std::vector<TimeBlock*>& TimeBlock::getChildren() } //static -void TimeBlock::nextFrame() +void TimeBlock::logStats() { - get_clock_count(); // good place to calculate clock frequency - U64 frame_time = TimeBlock::getCPUClockCount64(); - if ((frame_time - sLastFrameTime) >> 8 > 0xffffffff) - { - llinfos << "Slow frame, fast timers inaccurate" << llendl; - } - - if (!sPauseHistory) - { - TimeBlock::processTimes(); - sLastFrameIndex = sCurFrameIndex++; - } - // get ready for next frame - //void TimeBlock::resetFrame() - { - if (sLog) - { //output current frame counts to performance log + if (sLog) + { //output current frame counts to performance log - static S32 call_count = 0; - if (call_count % 100 == 0) - { - LL_DEBUGS("FastTimers") << "countsPerSecond: " << countsPerSecond() << LL_ENDL; - LL_DEBUGS("FastTimers") << "LLProcessorInfo().getCPUFrequency() " << LLProcessorInfo().getCPUFrequency() << LL_ENDL; - LL_DEBUGS("FastTimers") << "getCPUClockCount32() " << getCPUClockCount32() << LL_ENDL; - LL_DEBUGS("FastTimers") << "getCPUClockCount64() " << getCPUClockCount64() << LL_ENDL; - LL_DEBUGS("FastTimers") << "elapsed sec " << ((F64)getCPUClockCount64()) / (LLUnit<LLUnits::Hertz, F64>(LLProcessorInfo().getCPUFrequency())) << LL_ENDL; - } - call_count++; + static S32 call_count = 0; + if (call_count % 100 == 0) + { + LL_DEBUGS("FastTimers") << "countsPerSecond: " << countsPerSecond() << LL_ENDL; + LL_DEBUGS("FastTimers") << "LLProcessorInfo().getCPUFrequency() " << LLProcessorInfo().getCPUFrequency() << LL_ENDL; + LL_DEBUGS("FastTimers") << "getCPUClockCount32() " << getCPUClockCount32() << LL_ENDL; + LL_DEBUGS("FastTimers") << "getCPUClockCount64() " << getCPUClockCount64() << LL_ENDL; + LL_DEBUGS("FastTimers") << "elapsed sec " << ((F64)getCPUClockCount64()) / (LLUnit<LLUnits::Hertz, F64>(LLProcessorInfo().getCPUFrequency())) << LL_ENDL; + } + call_count++; - LLUnit<LLUnits::Seconds, F64> total_time(0); - LLSD sd; + LLUnit<LLUnits::Seconds, F64> total_time(0); + LLSD sd; + { + for (LLInstanceTracker<TimeBlock>::instance_iter it = LLInstanceTracker<TimeBlock>::beginInstances(), + end_it = LLInstanceTracker<TimeBlock>::endInstances(); + it != end_it; + ++it) { - for (LLInstanceTracker<TimeBlock>::instance_iter it = LLInstanceTracker<TimeBlock>::beginInstances(), - end_it = LLInstanceTracker<TimeBlock>::endInstances(); - it != end_it; - ++it) - { - TimeBlock& timer = *it; - LLTrace::PeriodicRecording& frame_recording = LLTrace::get_frame_recording(); - sd[timer.getName()]["Time"] = (LLSD::Real) (frame_recording.getLastRecordingPeriod().getSum(timer).value()); - sd[timer.getName()]["Calls"] = (LLSD::Integer) (frame_recording.getLastRecordingPeriod().getSum(timer.callCount())); - - // computing total time here because getting the root timer's getCountHistory - // doesn't work correctly on the first frame - total_time += frame_recording.getLastRecordingPeriod().getSum(timer); - } - } - - sd["Total"]["Time"] = (LLSD::Real) total_time.value(); - sd["Total"]["Calls"] = (LLSD::Integer) 1; + TimeBlock& timer = *it; + LLTrace::PeriodicRecording& frame_recording = LLTrace::get_frame_recording(); + sd[timer.getName()]["Time"] = (LLSD::Real) (frame_recording.getLastRecordingPeriod().getSum(timer).value()); + sd[timer.getName()]["Calls"] = (LLSD::Integer) (frame_recording.getLastRecordingPeriod().getSum(timer.callCount())); - { - LLMutexLock lock(sLogLock); - sLogQueue.push(sd); + // computing total time here because getting the root timer's getCountHistory + // doesn't work correctly on the first frame + total_time += frame_recording.getLastRecordingPeriod().getSum(timer); } } - // reset for next frame - for (LLInstanceTracker<TimeBlock>::instance_iter it = LLInstanceTracker<TimeBlock>::beginInstances(), - end_it = LLInstanceTracker<TimeBlock>::endInstances(); - it != end_it; - ++it) - { - TimeBlock& timer = *it; - TimeBlockTreeNode& tree_node = sCurTimerData->mTimerTreeData[timer.getIndex()]; + sd["Total"]["Time"] = (LLSD::Real) total_time.value(); + sd["Total"]["Calls"] = (LLSD::Integer) 1; - tree_node.mLastCaller = NULL; - tree_node.mMoveUpTree = false; + { + LLMutexLock lock(sLogLock); + sLogQueue.push(sd); } } - sLastFrameTime = frame_time; + } //static void TimeBlock::dumpCurTimes() { - // accumulate timings, etc. - processTimes(); - + LLTrace::PeriodicRecording& frame_recording = LLTrace::get_frame_recording(); + LLTrace::Recording& last_frame_recording = frame_recording.getLastRecordingPeriod(); + // walk over timers in depth order and output timings for(timer_tree_dfs_iterator_t it = begin_timer_tree(TimeBlock::getRootTimer()); it != end_timer_tree(); ++it) { - LLTrace::PeriodicRecording& frame_recording = LLTrace::get_frame_recording(); TimeBlock* timerp = (*it); - LLUnit<LLUnits::Seconds, F64> total_time_ms = frame_recording.getLastRecordingPeriod().getSum(*timerp); - U32 num_calls = frame_recording.getLastRecordingPeriod().getSum(timerp->callCount()); + LLUnit<LLUnits::Seconds, F64> total_time_ms = last_frame_recording.getSum(*timerp); + U32 num_calls = last_frame_recording.getSum(timerp->callCount()); // Don't bother with really brief times, keep output concise if (total_time_ms < 0.1) continue; std::ostringstream out_str; - for (S32 i = 0; i < timerp->getDepth(); i++) + TimeBlock* parent_timerp = timerp; + while(parent_timerp && parent_timerp != parent_timerp->getParent()) { out_str << "\t"; + parent_timerp = parent_timerp->getParent(); } out_str << timerp->getName() << " " @@ -505,7 +455,11 @@ void TimeBlock::writeLog(std::ostream& os) TimeBlockAccumulator::TimeBlockAccumulator() : mSelfTimeCounter(0), mTotalTimeCounter(0), - mCalls(0) + mCalls(0), + mLastCaller(NULL), + mActiveCount(0), + mMoveUpTree(false), + mParent(NULL) {} void TimeBlockAccumulator::addSamples( const TimeBlockAccumulator& other ) @@ -513,6 +467,10 @@ void TimeBlockAccumulator::addSamples( const TimeBlockAccumulator& other ) mSelfTimeCounter += other.mSelfTimeCounter; mTotalTimeCounter += other.mTotalTimeCounter; mCalls += other.mCalls; + mLastCaller = other.mLastCaller; + mActiveCount = other.mActiveCount; + mMoveUpTree = other.mMoveUpTree; + mParent = other.mParent; } void TimeBlockAccumulator::reset( const TimeBlockAccumulator* other ) @@ -522,10 +480,4 @@ void TimeBlockAccumulator::reset( const TimeBlockAccumulator* other ) mCalls = 0; } -TimeBlockTreeNode::TimeBlockTreeNode() -: mLastCaller(NULL), - mActiveCount(0), - mMoveUpTree(false) -{} - } // namespace LLTrace diff --git a/indra/llcommon/llfasttimer.h b/indra/llcommon/llfasttimer.h index b5b4a8d0b4..1e2e4b590f 100644 --- a/indra/llcommon/llfasttimer.h +++ b/indra/llcommon/llfasttimer.h @@ -43,7 +43,6 @@ struct CurTimerData class BlockTimer* mCurTimer; class TimeBlock* mTimerData; U64 mChildTime; - TimeBlockTreeNode* mTimerTreeData; }; class BlockTimer @@ -70,11 +69,8 @@ class TimeBlock public: TimeBlock(const char* name, bool open = false, TimeBlock* parent = &getRootTimer()); - enum { HISTORY_NUM = 300 }; - TimeBlock* getParent() const { return mParent; } void setParent(TimeBlock* parent); - S32 getDepth(); typedef std::vector<TimeBlock*>::const_iterator child_const_iter; child_const_iter beginChildren(); @@ -245,12 +241,12 @@ public: // can be called multiple times in a frame, at any point static void processTimes(); - // call this once a frame to reset timers - static void nextFrame(); + // call this once a frame to periodically log timers + static void logStats(); // tree structure, only updated from master trace thread TimeBlock* mParent; // TimeBlock of caller(parent) - std::vector<TimeBlock*> mChildren; + std::vector<TimeBlock*> mChildren; // TimeBlock of callees bool mCollapsed, // don't show children mNeedsSorting; // sort children whenever child added @@ -260,12 +256,6 @@ public: sLog; static LLThreadLocalPointer<CurTimerData> sCurTimerData; static U64 sClockResolution; - static S32 sCurFrameIndex, - sLastFrameIndex; - static U64 sLastFrameTime; - static bool sPauseHistory, - sResetHistory; - }; LL_FORCE_INLINE BlockTimer::BlockTimer(TimeBlock& timer) @@ -274,10 +264,10 @@ LL_FORCE_INLINE BlockTimer::BlockTimer(TimeBlock& timer) mStartTime = TimeBlock::getCPUClockCount64(); CurTimerData* cur_timer_data = TimeBlock::sCurTimerData.get(); - TimeBlockTreeNode& tree_node = cur_timer_data->mTimerTreeData[timer.getIndex()]; - tree_node.mActiveCount++; + TimeBlockAccumulator& accumulator = cur_timer_data->mTimerData->getPrimaryAccumulator(); + accumulator.mActiveCount++; // keep current parent as long as it is active when we are - tree_node.mMoveUpTree |= (cur_timer_data->mTimerTreeData[timer.mParent->getIndex()].mActiveCount == 0); + accumulator.mMoveUpTree |= (accumulator.mParent->getPrimaryAccumulator().mActiveCount == 0); // store top of stack mLastTimerData = *cur_timer_data; @@ -294,16 +284,15 @@ LL_FORCE_INLINE BlockTimer::~BlockTimer() U64 total_time = TimeBlock::getCPUClockCount64() - mStartTime; CurTimerData* cur_timer_data = TimeBlock::sCurTimerData.get(); TimeBlockAccumulator& accumulator = cur_timer_data->mTimerData->getPrimaryAccumulator(); - TimeBlockTreeNode& tree_node = cur_timer_data->mTimerTreeData[cur_timer_data->mTimerData->getIndex()]; accumulator.mCalls++; accumulator.mSelfTimeCounter += total_time - cur_timer_data->mChildTime; accumulator.mTotalTimeCounter += total_time; - tree_node.mActiveCount--; + accumulator.mActiveCount--; // store last caller to bootstrap tree creation // do this in the destructor in case of recursion to get topmost caller - tree_node.mLastCaller = mLastTimerData.mTimerData; + accumulator.mLastCaller = mLastTimerData.mTimerData; // we are only tracking self time, so subtract our total time delta from parents mLastTimerData.mChildTime += total_time; diff --git a/indra/llcommon/lltrace.h b/indra/llcommon/lltrace.h index d4fc93342d..5b5e2b7879 100644 --- a/indra/llcommon/lltrace.h +++ b/indra/llcommon/lltrace.h @@ -82,18 +82,21 @@ namespace LLTrace enum StaticAllocationMarker { STATIC_ALLOC }; AccumulatorBuffer(StaticAllocationMarker m) - : mStorageSize(64), - mNextStorageSlot(0), - mStorage(new ACCUMULATOR[DEFAULT_ACCUMULATOR_BUFFER_SIZE]) - {} + : mStorageSize(0), + mStorage(NULL), + mNextStorageSlot(0) + { + resize(DEFAULT_ACCUMULATOR_BUFFER_SIZE); + } public: AccumulatorBuffer(const AccumulatorBuffer& other = getDefaultBuffer()) - : mStorageSize(other.mStorageSize), - mStorage(new ACCUMULATOR[other.mStorageSize]), + : mStorageSize(0), + mStorage(NULL), mNextStorageSlot(other.mNextStorageSlot) { + resize(other.mStorageSize); for (S32 i = 0; i < mNextStorageSlot; i++) { mStorage[i] = other.mStorage[i]; @@ -181,9 +184,12 @@ namespace LLTrace { ACCUMULATOR* old_storage = mStorage; mStorage = new ACCUMULATOR[new_size]; - for (S32 i = 0; i < mStorageSize; i++) + if (old_storage) { - mStorage[i] = old_storage[i]; + for (S32 i = 0; i < mStorageSize; i++) + { + mStorage[i] = old_storage[i]; + } } mStorageSize = new_size; delete[] old_storage; @@ -300,14 +306,14 @@ namespace LLTrace mNumSamples += other.mNumSamples; mMean = mMean * weight + other.mMean * (1.f - weight); + // combine variance (and hence standard deviation) of 2 different sized sample groups using + // the following formula: http://www.mrc-bsu.cam.ac.uk/cochrane/handbook/chapter_7/7_7_3_8_combining_groups.htm F64 n_1 = (F64)mNumSamples, n_2 = (F64)other.mNumSamples; F64 m_1 = mMean, m_2 = other.mMean; F64 sd_1 = getStandardDeviation(), sd_2 = other.getStandardDeviation(); - // combine variance (and hence standard deviation) of 2 different sized sample groups using - // the following formula: http://www.mrc-bsu.cam.ac.uk/cochrane/handbook/chapter_7/7_7_3_8_combining_groups.htm if (n_1 == 0) { mVarianceSum = other.mVarianceSum; @@ -405,6 +411,7 @@ namespace LLTrace { public: typedef LLUnit<LLUnits::Seconds, F64> value_t; + typedef TimeBlockAccumulator self_t; // fake class that allows us to view call count aspect of timeblock accumulator struct CallCountAspect @@ -418,15 +425,20 @@ namespace LLTrace }; TimeBlockAccumulator(); - void addSamples(const TimeBlockAccumulator& other); - void reset(const TimeBlockAccumulator* other); + void addSamples(const self_t& other); + void reset(const self_t* other); // // members // - U64 mSelfTimeCounter, - mTotalTimeCounter; - U32 mCalls; + U64 mSelfTimeCounter, + mTotalTimeCounter; + U32 mCalls; + class TimeBlock* mParent; // last acknowledged parent of this time block + class TimeBlock* mLastCaller; // used to bootstrap tree construction + U16 mActiveCount; // number of timers with this ID active on stack + bool mMoveUpTree; // needs to be moved up the tree of timers at the end of frame + }; @@ -460,16 +472,6 @@ namespace LLTrace {} }; - class TimeBlockTreeNode - { - public: - TimeBlockTreeNode(); - class TimeBlock* mLastCaller; // used to bootstrap tree construction - U16 mActiveCount; // number of timers with this ID active on stack - bool mMoveUpTree; // needs to be moved up the tree of timers at the end of frame - }; - - template <typename T = F64> class Measurement : public TraceType<MeasurementAccumulator<typename LLUnits::HighestPrecisionType<T>::type_t> > @@ -604,7 +606,7 @@ struct MemFootprint<std::vector<T> > { static size_t measure(const std::vector<T>& value) { - return value.capacity() * MemFootPrint<T>::measure(); + return value.capacity() * MemFootprint<T>::measure(); } static size_t measure() @@ -618,7 +620,7 @@ struct MemFootprint<std::list<T> > { static size_t measure(const std::list<T>& value) { - return value.size() * (MemFootPrint<T>::measure() + sizeof(void*) * 2); + return value.size() * (MemFootprint<T>::measure() + sizeof(void*) * 2); } static size_t measure() diff --git a/indra/llcommon/lltracerecording.cpp b/indra/llcommon/lltracerecording.cpp index 3ea511ff3c..40079f40f1 100644 --- a/indra/llcommon/lltracerecording.cpp +++ b/indra/llcommon/lltracerecording.cpp @@ -44,7 +44,8 @@ Recording::Recording() mMeasurementsFloat(new AccumulatorBuffer<MeasurementAccumulator<F64> >()), mCounts(new AccumulatorBuffer<CountAccumulator<S64> >()), mMeasurements(new AccumulatorBuffer<MeasurementAccumulator<S64> >()), - mStackTimers(new AccumulatorBuffer<TimeBlockAccumulator>()) + mStackTimers(new AccumulatorBuffer<TimeBlockAccumulator>()), + mMemStats(new AccumulatorBuffer<MemStatAccumulator>()) {} Recording::Recording( const Recording& other ) @@ -57,6 +58,7 @@ Recording::Recording( const Recording& other ) mCounts = other.mCounts; mMeasurements = other.mMeasurements; mStackTimers = other.mStackTimers; + mMemStats = other.mMemStats; LLStopWatchControlsMixin::initTo(other.getPlayState()); } @@ -84,6 +86,7 @@ void Recording::handleReset() mCounts.write()->reset(); mMeasurements.write()->reset(); mStackTimers.write()->reset(); + mMemStats.write()->reset(); mElapsedSeconds = 0.0; mSamplingTimer.reset(); @@ -98,6 +101,7 @@ void Recording::handleStart() void Recording::handleStop() { mElapsedSeconds += mSamplingTimer.getElapsedTimeF64(); + LLTrace::TimeBlock::processTimes(); LLTrace::get_thread_recorder()->deactivate(this); } @@ -107,9 +111,9 @@ void Recording::handleSplitTo(Recording& other) other.restart(); other.mMeasurementsFloat.write()->reset(mMeasurementsFloat); other.mMeasurements.write()->reset(mMeasurements); + //TODO: figure out how to get seamless handoff of timing stats } - void Recording::makePrimary() { mCountsFloat.write()->makePrimary(); @@ -117,6 +121,7 @@ void Recording::makePrimary() mCounts.write()->makePrimary(); mMeasurements.write()->makePrimary(); mStackTimers.write()->makePrimary(); + mMemStats.write()->makePrimary(); } bool Recording::isPrimary() const @@ -131,6 +136,7 @@ void Recording::makeUnique() mCounts.makeUnique(); mMeasurements.makeUnique(); mStackTimers.makeUnique(); + mMemStats.makeUnique(); } void Recording::appendRecording( const Recording& other ) @@ -140,6 +146,7 @@ void Recording::appendRecording( const Recording& other ) mCounts.write()->addSamples(*other.mCounts); mMeasurements.write()->addSamples(*other.mMeasurements); mStackTimers.write()->addSamples(*other.mStackTimers); + mMemStats.write()->addSamples(*other.mMemStats); mElapsedSeconds += other.mElapsedSeconds; } @@ -174,6 +181,16 @@ F32 Recording::getPerSec(const TraceType<TimeBlockAccumulator::CallCountAspect>& return (F32)(*mStackTimers)[stat.getIndex()].mCalls / mElapsedSeconds; } +LLUnit<LLUnits::Bytes, U32> Recording::getSum(const TraceType<MemStatAccumulator>& stat) const +{ + return (*mMemStats)[stat.getIndex()].mAllocatedCount; +} + +LLUnit<LLUnits::Bytes, F32> Recording::getPerSec(const TraceType<MemStatAccumulator>& stat) const +{ + return (F32)(*mMemStats)[stat.getIndex()].mAllocatedCount / mElapsedSeconds; +} + F64 Recording::getSum( const TraceType<CountAccumulator<F64> >& stat ) const { diff --git a/indra/llcommon/lltracerecording.h b/indra/llcommon/lltracerecording.h index f4841214e8..8e5f1125ec 100644 --- a/indra/llcommon/lltracerecording.h +++ b/indra/llcommon/lltracerecording.h @@ -34,7 +34,7 @@ #include "lltimer.h" #include "lltrace.h" -class LL_COMMON_API LLStopWatchControlsMixinCommon +class LLStopWatchControlsMixinCommon { public: virtual ~LLStopWatchControlsMixinCommon() {} @@ -101,7 +101,7 @@ private: namespace LLTrace { - class LL_COMMON_API Recording : public LLStopWatchControlsMixin<Recording> + class Recording : public LLStopWatchControlsMixin<Recording> { public: Recording(); @@ -127,6 +127,10 @@ namespace LLTrace LLUnit<LLUnits::Seconds, F64> getPerSec(const TraceType<TimeBlockAccumulator::SelfTimeAspect>& stat) const; F32 getPerSec(const TraceType<TimeBlockAccumulator::CallCountAspect>& stat) const; + // Memory accessors + LLUnit<LLUnits::Bytes, U32> getSum(const TraceType<MemStatAccumulator>& stat) const; + LLUnit<LLUnits::Bytes, F32> getPerSec(const TraceType<MemStatAccumulator>& stat) const; + // Count accessors F64 getSum(const TraceType<CountAccumulator<F64> >& stat) const; S64 getSum(const TraceType<CountAccumulator<S64> >& stat) const; @@ -226,7 +230,8 @@ namespace LLTrace LLCopyOnWritePointer<AccumulatorBuffer<MeasurementAccumulator<F64> > > mMeasurementsFloat; LLCopyOnWritePointer<AccumulatorBuffer<CountAccumulator<S64> > > mCounts; LLCopyOnWritePointer<AccumulatorBuffer<MeasurementAccumulator<S64> > > mMeasurements; - LLCopyOnWritePointer<AccumulatorBuffer<TimeBlockAccumulator> > mStackTimers; + LLCopyOnWritePointer<AccumulatorBuffer<TimeBlockAccumulator> > mStackTimers; + LLCopyOnWritePointer<AccumulatorBuffer<MemStatAccumulator> > mMemStats; LLTimer mSamplingTimer; F64 mElapsedSeconds; diff --git a/indra/llcommon/lltracethreadrecorder.cpp b/indra/llcommon/lltracethreadrecorder.cpp index 16235473ee..c4144b4999 100644 --- a/indra/llcommon/lltracethreadrecorder.cpp +++ b/indra/llcommon/lltracethreadrecorder.cpp @@ -43,13 +43,20 @@ ThreadRecorder::ThreadRecorder() mRootTimerData = new CurTimerData(); mRootTimerData->mTimerData = &TimeBlock::getRootTimer(); - mRootTimerData->mTimerTreeData = new TimeBlockTreeNode[AccumulatorBuffer<TimeBlockAccumulator>::getDefaultBuffer().size()]; + TimeBlock::sCurTimerData = mRootTimerData; + TimeBlock::getRootTimer().getPrimaryAccumulator().mActiveCount = 1; + + // initialize parent pointers in time blocks + for (LLInstanceTracker<TimeBlock>::instance_iter it = LLInstanceTracker<TimeBlock>::beginInstances(), end_it = LLInstanceTracker<TimeBlock>::endInstances(); + it != end_it; + ++it) + { + it->getPrimaryAccumulator().mParent = it->mParent; + } mRootTimer = new BlockTimer(TimeBlock::getRootTimer()); mRootTimerData->mCurTimer = mRootTimer; - - mRootTimerData->mTimerTreeData[TimeBlock::getRootTimer().getIndex()].mActiveCount = 1; } ThreadRecorder::~ThreadRecorder() @@ -62,7 +69,6 @@ ThreadRecorder::~ThreadRecorder() } get_thread_recorder() = NULL; TimeBlock::sCurTimerData = NULL; - delete [] mRootTimerData->mTimerTreeData; delete mRootTimerData; } |