diff options
author | Richard Linden <none@none> | 2013-06-18 23:41:53 -0700 |
---|---|---|
committer | Richard Linden <none@none> | 2013-06-18 23:41:53 -0700 |
commit | d136c4c29686c565b5a46503aa67a9c958b4145d (patch) | |
tree | 6eb0a18df5fc07172332777cf694faad7f8a4796 /indra/llcommon | |
parent | 1cb0c974ae410479a1f72dab556ef7c9371970ee (diff) |
SH-4246 FIX interesting: fast timers significantly decreases framerate
removed implicit flushes on reads from recorders for better performance
made sure stack timers were updated on recorder deactivate
faster rendering and better ui for fast timer view
Diffstat (limited to 'indra/llcommon')
-rwxr-xr-x | indra/llcommon/llfasttimer.cpp | 77 | ||||
-rwxr-xr-x | indra/llcommon/llfasttimer.h | 4 | ||||
-rw-r--r-- | indra/llcommon/lltrace.h | 19 | ||||
-rw-r--r-- | indra/llcommon/lltracerecording.cpp | 38 | ||||
-rw-r--r-- | indra/llcommon/lltracethreadrecorder.cpp | 1 | ||||
-rw-r--r-- | indra/llcommon/tests/llunits_test.cpp | 3 |
6 files changed, 68 insertions, 74 deletions
diff --git a/indra/llcommon/llfasttimer.cpp b/indra/llcommon/llfasttimer.cpp index d9670891f8..4da9c3fd6c 100755 --- a/indra/llcommon/llfasttimer.cpp +++ b/indra/llcommon/llfasttimer.cpp @@ -178,43 +178,38 @@ TimeBlockTreeNode& TimeBlock::getTreeNode() const return *nodep; } -static LLFastTimer::DeclareTimer FTM_PROCESS_TIMES("Process FastTimer Times"); -// not thread safe, so only call on main thread -//static -void TimeBlock::processTimes() +void TimeBlock::bootstrapTimerTree() { - LLFastTimer _(FTM_PROCESS_TIMES); - get_clock_count(); // good place to calculate clock frequency - U64 cur_time = getCPUClockCount64(); - - // set up initial tree for (LLInstanceTracker<TimeBlock>::instance_iter begin_it = LLInstanceTracker<TimeBlock>::beginInstances(), end_it = LLInstanceTracker<TimeBlock>::endInstances(), it = begin_it; it != end_it; ++it) { TimeBlock& timer = *it; if (&timer == &TimeBlock::getRootTimeBlock()) continue; - + // bootstrap tree construction by attaching to last timer to be on stack // when this timer was called if (timer.getParent() == &TimeBlock::getRootTimeBlock()) { TimeBlockAccumulator* accumulator = timer.getPrimaryAccumulator(); - + if (accumulator->mLastCaller) { timer.setParent(accumulator->mLastCaller); accumulator->mParent = accumulator->mLastCaller; } - // no need to push up tree on first use, flag can be set spuriously + // no need to push up tree on first use, flag can be set spuriously accumulator->mMoveUpTree = false; } } +} - // bump timers up tree if they have been flagged as being in the wrong place - // do this in a bottom up order to promote descendants first before promoting ancestors - // this preserves partial order derived from current frame's observations +// bump timers up tree if they have been flagged as being in the wrong place +// do this in a bottom up order to promote descendants first before promoting ancestors +// this preserves partial order derived from current frame's observations +void TimeBlock::incrementalUpdateTimerTree() +{ for(timer_tree_bottom_up_iterator_t it = begin_timer_tree_bottom_up(TimeBlock::getRootTimeBlock()); it != end_timer_tree_bottom_up(); ++it) @@ -240,27 +235,35 @@ void TimeBlock::processTimes() LL_DEBUGS("FastTimers") << "Moving " << timerp->getName() << " from child of " << timerp->getParent()->getName() << " to child of " << timerp->getParent()->getParent()->getName() << LL_ENDL; timerp->setParent(timerp->getParent()->getParent()); - accumulator->mParent = timerp->getParent(); - accumulator->mMoveUpTree = false; + accumulator->mParent = timerp->getParent(); + accumulator->mMoveUpTree = false; // don't bubble up any ancestors until descendants are done bubbling up - // as ancestors may call this timer only on certain paths, so we want to resolve - // child-most block locations before their parents + // as ancestors may call this timer only on certain paths, so we want to resolve + // child-most block locations before their parents it.skipAncestors(); } } } +} + + +void TimeBlock::updateTimes() +{ + U64 cur_time = getCPUClockCount64(); // walk up stack of active timers and accumulate current time while leaving timing structures active - BlockTimerStackRecord* stack_record = ThreadTimerStack::getInstance(); - BlockTimer* cur_timer = stack_record->mActiveTimer; - TimeBlockAccumulator* accumulator = stack_record->mTimeBlock->getPrimaryAccumulator(); + BlockTimerStackRecord* stack_record = ThreadTimerStack::getInstance(); + BlockTimer* cur_timer = stack_record->mActiveTimer; + TimeBlockAccumulator* accumulator = stack_record->mTimeBlock->getPrimaryAccumulator(); while(cur_timer && cur_timer->mParentTimerData.mActiveTimer != cur_timer) // root defined by parent pointing to self { U64 cumulative_time_delta = cur_time - cur_timer->mStartTime; - accumulator->mTotalTimeCounter += cumulative_time_delta - (accumulator->mTotalTimeCounter - cur_timer->mBlockStartTotalTimeCounter); + accumulator->mTotalTimeCounter += cumulative_time_delta + - (accumulator->mTotalTimeCounter + - cur_timer->mBlockStartTotalTimeCounter); accumulator->mSelfTimeCounter += cumulative_time_delta - stack_record->mChildTime; stack_record->mChildTime = 0; @@ -268,11 +271,28 @@ void TimeBlock::processTimes() cur_timer->mBlockStartTotalTimeCounter = accumulator->mTotalTimeCounter; stack_record = &cur_timer->mParentTimerData; - accumulator = stack_record->mTimeBlock->getPrimaryAccumulator(); - cur_timer = stack_record->mActiveTimer; + accumulator = stack_record->mTimeBlock->getPrimaryAccumulator(); + cur_timer = stack_record->mActiveTimer; stack_record->mChildTime += cumulative_time_delta; } +} + +static LLFastTimer::DeclareTimer FTM_PROCESS_TIMES("Process FastTimer Times"); + +// not thread safe, so only call on main thread +//static +void TimeBlock::processTimes() +{ + LLFastTimer _(FTM_PROCESS_TIMES); + get_clock_count(); // good place to calculate clock frequency + + // set up initial tree + bootstrapTimerTree(); + + incrementalUpdateTimerTree(); + + updateTimes(); // reset for next frame for (LLInstanceTracker<TimeBlock>::instance_iter it = LLInstanceTracker<TimeBlock>::beginInstances(), @@ -288,14 +308,13 @@ void TimeBlock::processTimes() } } - std::vector<TimeBlock*>::iterator TimeBlock::beginChildren() - { +{ return getTreeNode().mChildren.begin(); - } +} std::vector<TimeBlock*>::iterator TimeBlock::endChildren() - { +{ return getTreeNode().mChildren.end(); } diff --git a/indra/llcommon/llfasttimer.h b/indra/llcommon/llfasttimer.h index fdc6997d45..e800befd9f 100755 --- a/indra/llcommon/llfasttimer.h +++ b/indra/llcommon/llfasttimer.h @@ -115,6 +115,7 @@ public: static void pushLog(LLSD sd); static void setLogLock(LLMutex* mutex); static void writeLog(std::ostream& os); + static void updateTimes(); // dumps current cumulative frame stats to log // call nextFrame() to reset timers @@ -262,6 +263,9 @@ public: // can be called multiple times in a frame, at any point static void processTimes(); + static void bootstrapTimerTree(); + static void incrementalUpdateTimerTree(); + // call this once a frame to periodically log timers static void logStats(); diff --git a/indra/llcommon/lltrace.h b/indra/llcommon/lltrace.h index cd377531e8..6292534a03 100644 --- a/indra/llcommon/lltrace.h +++ b/indra/llcommon/lltrace.h @@ -157,12 +157,12 @@ public: } } - void flush() + void flush(LLUnitImplicit<F64, LLUnits::Seconds> time_stamp) { llassert(mStorageSize >= sNextStorageSlot); for (size_t i = 0; i < sNextStorageSlot; i++) { - mStorage[i].flush(); + mStorage[i].flush(time_stamp); } } @@ -380,7 +380,7 @@ public: mLastValue = other ? other->mLastValue : 0; } - void flush() {} + void flush(LLUnitImplicit<F64, LLUnits::Seconds>) {} F64 getSum() const { return mSum; } F64 getMin() const { return mMin; } @@ -512,9 +512,8 @@ public: mHasValue = other ? other->mHasValue : false; } - void flush() + void flush(LLUnitImplicit<F64, LLUnits::Seconds> time_stamp) { - LLUnitImplicit<F64, LLUnits::Seconds> time_stamp = LLTimer::getTotalSeconds(); LLUnitImplicit<F64, LLUnits::Seconds> delta_time = time_stamp - mLastSampleTimeStamp; if (mHasValue) @@ -579,7 +578,7 @@ public: mSum = 0; } - void flush() {} + void flush(LLUnitImplicit<F64, LLUnits::Seconds>) {} F64 getSum() const { return mSum; } @@ -614,7 +613,7 @@ public: TimeBlockAccumulator(); void addSamples(const self_t& other, bool /*append*/); void reset(const self_t* other); - void flush() {} + void flush(LLUnitImplicit<F64, LLUnits::Seconds>) {} // // members @@ -780,10 +779,10 @@ struct MemStatAccumulator mDeallocatedCount = 0; } - void flush() + void flush(LLUnitImplicit<F64, LLUnits::Seconds> time_stamp) { - mSize.flush(); - mChildSize.flush(); + mSize.flush(time_stamp); + mChildSize.flush(time_stamp); } SampleAccumulator mSize, diff --git a/indra/llcommon/lltracerecording.cpp b/indra/llcommon/lltracerecording.cpp index f2c5941011..33002929ea 100644 --- a/indra/llcommon/lltracerecording.cpp +++ b/indra/llcommon/lltracerecording.cpp @@ -107,7 +107,9 @@ void RecordingBuffers::reset(RecordingBuffers* other) void RecordingBuffers::flush() { - mSamples.flush(); + LLUnitImplicit<F64, LLUnits::Seconds> time_stamp = LLTimer::getTotalSeconds(); + + mSamples.flush(time_stamp); } /////////////////////////////////////////////////////////////////////// @@ -205,7 +207,6 @@ void Recording::mergeRecording( const Recording& other) LLUnit<F64, LLUnits::Seconds> Recording::getSum(const TraceType<TimeBlockAccumulator>& stat) { const TimeBlockAccumulator& accumulator = mBuffers->mStackTimers[stat.getIndex()]; - update(); return (F64)(accumulator.mTotalTimeCounter - accumulator.mStartTotalTimeCounter) / (F64)LLTrace::TimeBlock::countsPerSecond(); } @@ -213,14 +214,12 @@ LLUnit<F64, LLUnits::Seconds> Recording::getSum(const TraceType<TimeBlockAccumul LLUnit<F64, LLUnits::Seconds> Recording::getSum(const TraceType<TimeBlockAccumulator::SelfTimeFacet>& stat) { const TimeBlockAccumulator& accumulator = mBuffers->mStackTimers[stat.getIndex()]; - update(); return (F64)(accumulator.mSelfTimeCounter) / (F64)LLTrace::TimeBlock::countsPerSecond(); } U32 Recording::getSum(const TraceType<TimeBlockAccumulator::CallCountFacet>& stat) { - update(); return mBuffers->mStackTimers[stat.getIndex()].mCalls; } @@ -228,7 +227,6 @@ LLUnit<F64, LLUnits::Seconds> Recording::getPerSec(const TraceType<TimeBlockAccu { const TimeBlockAccumulator& accumulator = mBuffers->mStackTimers[stat.getIndex()]; - update(); return (F64)(accumulator.mTotalTimeCounter - accumulator.mStartTotalTimeCounter) / ((F64)LLTrace::TimeBlock::countsPerSecond() * mElapsedSeconds.value()); } @@ -237,105 +235,88 @@ LLUnit<F64, LLUnits::Seconds> Recording::getPerSec(const TraceType<TimeBlockAccu { const TimeBlockAccumulator& accumulator = mBuffers->mStackTimers[stat.getIndex()]; - update(); return (F64)(accumulator.mSelfTimeCounter) / ((F64)LLTrace::TimeBlock::countsPerSecond() * mElapsedSeconds.value()); } F32 Recording::getPerSec(const TraceType<TimeBlockAccumulator::CallCountFacet>& stat) { - update(); return (F32)mBuffers->mStackTimers[stat.getIndex()].mCalls / mElapsedSeconds.value(); } LLUnit<F64, LLUnits::Bytes> Recording::getMin(const TraceType<MemStatAccumulator>& stat) { - update(); return mBuffers->mMemStats[stat.getIndex()].mSize.getMin(); } LLUnit<F64, LLUnits::Bytes> Recording::getMean(const TraceType<MemStatAccumulator>& stat) { - update(); return mBuffers->mMemStats[stat.getIndex()].mSize.getMean(); } LLUnit<F64, LLUnits::Bytes> Recording::getMax(const TraceType<MemStatAccumulator>& stat) { - update(); return mBuffers->mMemStats[stat.getIndex()].mSize.getMax(); } LLUnit<F64, LLUnits::Bytes> Recording::getStandardDeviation(const TraceType<MemStatAccumulator>& stat) { - update(); return mBuffers->mMemStats[stat.getIndex()].mSize.getStandardDeviation(); } LLUnit<F64, LLUnits::Bytes> Recording::getLastValue(const TraceType<MemStatAccumulator>& stat) { - update(); return mBuffers->mMemStats[stat.getIndex()].mSize.getLastValue(); } LLUnit<F64, LLUnits::Bytes> Recording::getMin(const TraceType<MemStatAccumulator::ChildMemFacet>& stat) { - update(); return mBuffers->mMemStats[stat.getIndex()].mChildSize.getMin(); } LLUnit<F64, LLUnits::Bytes> Recording::getMean(const TraceType<MemStatAccumulator::ChildMemFacet>& stat) { - update(); return mBuffers->mMemStats[stat.getIndex()].mChildSize.getMean(); } LLUnit<F64, LLUnits::Bytes> Recording::getMax(const TraceType<MemStatAccumulator::ChildMemFacet>& stat) { - update(); return mBuffers->mMemStats[stat.getIndex()].mChildSize.getMax(); } LLUnit<F64, LLUnits::Bytes> Recording::getStandardDeviation(const TraceType<MemStatAccumulator::ChildMemFacet>& stat) { - update(); return mBuffers->mMemStats[stat.getIndex()].mChildSize.getStandardDeviation(); } LLUnit<F64, LLUnits::Bytes> Recording::getLastValue(const TraceType<MemStatAccumulator::ChildMemFacet>& stat) { - update(); return mBuffers->mMemStats[stat.getIndex()].mChildSize.getLastValue(); } U32 Recording::getSum(const TraceType<MemStatAccumulator::AllocationCountFacet>& stat) { - update(); return mBuffers->mMemStats[stat.getIndex()].mAllocatedCount; } U32 Recording::getSum(const TraceType<MemStatAccumulator::DeallocationCountFacet>& stat) { - update(); return mBuffers->mMemStats[stat.getIndex()].mAllocatedCount; } F64 Recording::getSum( const TraceType<CountAccumulator>& stat ) { - update(); return mBuffers->mCounts[stat.getIndex()].getSum(); } F64 Recording::getSum( const TraceType<EventAccumulator>& stat ) { - update(); return (F64)mBuffers->mEvents[stat.getIndex()].getSum(); } F64 Recording::getPerSec( const TraceType<CountAccumulator>& stat ) { - update(); F64 sum = mBuffers->mCounts[stat.getIndex()].getSum(); return (sum != 0.0) ? (sum / mElapsedSeconds.value()) @@ -344,79 +325,66 @@ F64 Recording::getPerSec( const TraceType<CountAccumulator>& stat ) U32 Recording::getSampleCount( const TraceType<CountAccumulator>& stat ) { - update(); return mBuffers->mCounts[stat.getIndex()].getSampleCount(); } F64 Recording::getMin( const TraceType<SampleAccumulator>& stat ) { - update(); return mBuffers->mSamples[stat.getIndex()].getMin(); } F64 Recording::getMax( const TraceType<SampleAccumulator>& stat ) { - update(); return mBuffers->mSamples[stat.getIndex()].getMax(); } F64 Recording::getMean( const TraceType<SampleAccumulator>& stat ) { - update(); return mBuffers->mSamples[stat.getIndex()].getMean(); } F64 Recording::getStandardDeviation( const TraceType<SampleAccumulator>& stat ) { - update(); return mBuffers->mSamples[stat.getIndex()].getStandardDeviation(); } F64 Recording::getLastValue( const TraceType<SampleAccumulator>& stat ) { - update(); return mBuffers->mSamples[stat.getIndex()].getLastValue(); } U32 Recording::getSampleCount( const TraceType<SampleAccumulator>& stat ) { - update(); return mBuffers->mSamples[stat.getIndex()].getSampleCount(); } F64 Recording::getMin( const TraceType<EventAccumulator>& stat ) { - update(); return mBuffers->mEvents[stat.getIndex()].getMin(); } F64 Recording::getMax( const TraceType<EventAccumulator>& stat ) { - update(); return mBuffers->mEvents[stat.getIndex()].getMax(); } F64 Recording::getMean( const TraceType<EventAccumulator>& stat ) { - update(); return mBuffers->mEvents[stat.getIndex()].getMean(); } F64 Recording::getStandardDeviation( const TraceType<EventAccumulator>& stat ) { - update(); return mBuffers->mEvents[stat.getIndex()].getStandardDeviation(); } F64 Recording::getLastValue( const TraceType<EventAccumulator>& stat ) { - update(); return mBuffers->mEvents[stat.getIndex()].getLastValue(); } U32 Recording::getSampleCount( const TraceType<EventAccumulator>& stat ) { - update(); return mBuffers->mEvents[stat.getIndex()].getSampleCount(); } diff --git a/indra/llcommon/lltracethreadrecorder.cpp b/indra/llcommon/lltracethreadrecorder.cpp index c1a0700eff..54006f4e5b 100644 --- a/indra/llcommon/lltracethreadrecorder.cpp +++ b/indra/llcommon/lltracethreadrecorder.cpp @@ -111,6 +111,7 @@ ThreadRecorder::active_recording_list_t::reverse_iterator ThreadRecorder::bringU if (mActiveRecordings.empty()) return mActiveRecordings.rend(); mActiveRecordings.back()->mPartialRecording.flush(); + TimeBlock::updateTimes(); active_recording_list_t::reverse_iterator it, end_it; for (it = mActiveRecordings.rbegin(), end_it = mActiveRecordings.rend(); diff --git a/indra/llcommon/tests/llunits_test.cpp b/indra/llcommon/tests/llunits_test.cpp index 04764f6c2f..a5df51f6de 100644 --- a/indra/llcommon/tests/llunits_test.cpp +++ b/indra/llcommon/tests/llunits_test.cpp @@ -56,6 +56,9 @@ namespace tut LLUnit<F32, Quatloos> float_quatloos; ensure(float_quatloos == 0.f); + LLUnit<F32, Quatloos> float_initialize_quatloos(1); + ensure(float_initialize_quatloos == 1.f); + LLUnit<S32, Quatloos> int_quatloos; ensure(int_quatloos == 0); |