diff options
-rw-r--r-- | indra/llcommon/llfasttimer.cpp | 40 | ||||
-rw-r--r-- | indra/llcommon/llthreadlocalstorage.h | 8 | ||||
-rw-r--r-- | indra/llcommon/lltrace.h | 48 | ||||
-rw-r--r-- | indra/llcommon/lltracerecording.cpp | 22 | ||||
-rw-r--r-- | indra/llcommon/lltracerecording.h | 6 | ||||
-rw-r--r-- | indra/llcommon/lltracethreadrecorder.h | 5 | ||||
-rw-r--r-- | indra/newview/llappviewer.cpp | 1 |
7 files changed, 71 insertions, 59 deletions
diff --git a/indra/llcommon/llfasttimer.cpp b/indra/llcommon/llfasttimer.cpp index 5dc5fdd5be..3fdd33959d 100644 --- a/indra/llcommon/llfasttimer.cpp +++ b/indra/llcommon/llfasttimer.cpp @@ -180,6 +180,7 @@ TimeBlockTreeNode& TimeBlock::getTreeNode() const static LLFastTimer::DeclareTimer FTM_PROCESS_TIMES("Process FastTimer Times"); +// not thread safe, so only call on main thread //static void TimeBlock::processTimes() { @@ -195,8 +196,8 @@ void TimeBlock::processTimes() TimeBlock& timer = *it; if (&timer == &TimeBlock::getRootTimeBlock()) continue; - // bootstrap tree construction by attaching to last timer to be on stack - // when this timer was called + // bootstrap tree construction by attaching to last timer to be on stack + // when this timer was called if (timer.getParent() == &TimeBlock::getRootTimeBlock()) { TimeBlockAccumulator* accumulator = timer.getPrimaryAccumulator(); @@ -233,30 +234,30 @@ void TimeBlock::processTimes() TimeBlockAccumulator* accumulator = timerp->getPrimaryAccumulator(); if (accumulator->mMoveUpTree) - { + { // since ancestors have already been visited, re-parenting won't affect tree traversal - //step up tree, bringing our descendants with us - LL_DEBUGS("FastTimers") << "Moving " << timerp->getName() << " from child of " << timerp->getParent()->getName() << - " to child of " << timerp->getParent()->getParent()->getName() << LL_ENDL; - timerp->setParent(timerp->getParent()->getParent()); - accumulator->mParent = timerp->getParent(); - accumulator->mMoveUpTree = false; - - // don't bubble up any ancestors until descendants are done bubbling up - // as ancestors may call this timer only on certain paths, so we want to resolve - // child-most block locations before their parents - it.skipAncestors(); + //step up tree, bringing our descendants with us + LL_DEBUGS("FastTimers") << "Moving " << timerp->getName() << " from child of " << timerp->getParent()->getName() << + " to child of " << timerp->getParent()->getParent()->getName() << LL_ENDL; + timerp->setParent(timerp->getParent()->getParent()); + accumulator->mParent = timerp->getParent(); + accumulator->mMoveUpTree = false; + + // don't bubble up any ancestors until descendants are done bubbling up + // as ancestors may call this timer only on certain paths, so we want to resolve + // child-most block locations before their parents + it.skipAncestors(); + } } } -} // walk up stack of active timers and accumulate current time while leaving timing structures active BlockTimerStackRecord* stack_record = ThreadTimerStack::getInstance(); BlockTimer* cur_timer = stack_record->mActiveTimer; TimeBlockAccumulator* accumulator = stack_record->mTimeBlock->getPrimaryAccumulator(); - // root defined by parent pointing to self - while(cur_timer && cur_timer->mParentTimerData.mActiveTimer != cur_timer) + while(cur_timer + && cur_timer->mParentTimerData.mActiveTimer != cur_timer) // root defined by parent pointing to self { U64 cumulative_time_delta = cur_time - cur_timer->mStartTime; accumulator->mTotalTimeCounter += cumulative_time_delta - (accumulator->mTotalTimeCounter - cur_timer->mBlockStartTotalTimeCounter); @@ -413,8 +414,11 @@ TimeBlockAccumulator::TimeBlockAccumulator() mParent(NULL) {} -void TimeBlockAccumulator::addSamples( const TimeBlockAccumulator& other ) +void TimeBlockAccumulator::addSamples( const TimeBlockAccumulator& other, bool append ) { + // we can't merge two unrelated time block samples, as that will screw with the nested timings + // due to the call hierarchy of each thread + llassert(append); mTotalTimeCounter += other.mTotalTimeCounter - other.mStartTotalTimeCounter; mSelfTimeCounter += other.mSelfTimeCounter; mCalls += other.mCalls; diff --git a/indra/llcommon/llthreadlocalstorage.h b/indra/llcommon/llthreadlocalstorage.h index 471784749b..cc67248124 100644 --- a/indra/llcommon/llthreadlocalstorage.h +++ b/indra/llcommon/llthreadlocalstorage.h @@ -313,11 +313,6 @@ template<typename DERIVED_TYPE> class LLThreadLocalSingletonPointer { public: - void operator =(DERIVED_TYPE* value) - { - setInstance(value); - } - LL_FORCE_INLINE static DERIVED_TYPE* getInstance() { #if LL_DARWIN @@ -328,7 +323,7 @@ public: #endif } - LL_FORCE_INLINE static void setInstance(DERIVED_TYPE* instance) + static void setInstance(DERIVED_TYPE* instance) { #if LL_DARWIN createTLSKey(); @@ -339,6 +334,7 @@ public: } private: + #if LL_WINDOWS static __declspec(thread) DERIVED_TYPE* sInstance; #elif LL_LINUX diff --git a/indra/llcommon/lltrace.h b/indra/llcommon/lltrace.h index e950a119d3..6dfe9e4b4e 100644 --- a/indra/llcommon/lltrace.h +++ b/indra/llcommon/lltrace.h @@ -35,6 +35,7 @@ #include "llunit.h" #include "llapr.h" #include "llthreadlocalstorage.h" +#include "lltimer.h" #include <list> @@ -75,7 +76,6 @@ void set_thread_recorder(class ThreadRecorder*); class MasterThreadRecorder& getUIThreadRecorder(); -// one per thread per type template<typename ACCUMULATOR> class AccumulatorBuffer : public LLRefCount { @@ -104,9 +104,9 @@ public: ~AccumulatorBuffer() { - if (LLThreadLocalSingletonPointer<ACCUMULATOR>::getInstance() == mStorage) + if (isPrimary()) { - LLThreadLocalSingletonPointer<ACCUMULATOR>::setInstance(getDefaultBuffer()->mStorage); + LLThreadLocalSingletonPointer<ACCUMULATOR>::setInstance(NULL); } delete[] mStorage; } @@ -121,12 +121,12 @@ public: return mStorage[index]; } - void addSamples(const AccumulatorBuffer<ACCUMULATOR>& other) + void addSamples(const AccumulatorBuffer<ACCUMULATOR>& other, bool append = true) { llassert(mStorageSize >= sNextStorageSlot && other.mStorageSize > sNextStorageSlot); for (size_t i = 0; i < sNextStorageSlot; i++) { - mStorage[i].addSamples(other.mStorage[i]); + mStorage[i].addSamples(other.mStorage[i], append); } } @@ -169,7 +169,8 @@ public: LL_FORCE_INLINE static ACCUMULATOR* getPrimaryStorage() { - return LLThreadLocalSingletonPointer<ACCUMULATOR>::getInstance(); + ACCUMULATOR* accumulator = LLThreadLocalSingletonPointer<ACCUMULATOR>::getInstance(); + return accumulator ? accumulator : sDefaultBuffer->mStorage; } // NOTE: this is not thread-safe. We assume that slots are reserved in the main thread before any child threads are spawned @@ -222,25 +223,27 @@ public: static self_t* getDefaultBuffer() { - // this buffer is allowed to leak so that trace calls from global destructors have somewhere to put their data - // so as not to trigger an access violation - static self_t* sBuffer = new AccumulatorBuffer(StaticAllocationMarker()); static bool sInitialized = false; if (!sInitialized) { - sBuffer->resize(DEFAULT_ACCUMULATOR_BUFFER_SIZE); + // this buffer is allowed to leak so that trace calls from global destructors have somewhere to put their data + // so as not to trigger an access violation + sDefaultBuffer = new AccumulatorBuffer(StaticAllocationMarker()); sInitialized = true; + sDefaultBuffer->resize(DEFAULT_ACCUMULATOR_BUFFER_SIZE); } - return sBuffer; + return sDefaultBuffer; } private: ACCUMULATOR* mStorage; size_t mStorageSize; static size_t sNextStorageSlot; + static self_t* sDefaultBuffer; }; template<typename ACCUMULATOR> size_t AccumulatorBuffer<ACCUMULATOR>::sNextStorageSlot = 0; +template<typename ACCUMULATOR> AccumulatorBuffer<ACCUMULATOR>* AccumulatorBuffer<ACCUMULATOR>::sDefaultBuffer = NULL; template<typename ACCUMULATOR> class TraceType @@ -307,7 +310,7 @@ public: mLastValue = value; } - void addSamples(const self_t& other) + void addSamples(const self_t& other, bool append) { if (other.mNumSamples) { @@ -347,7 +350,7 @@ public: F64 weight = (F64)mNumSamples / (F64)(mNumSamples + other.mNumSamples); mNumSamples += other.mNumSamples; mMean = mMean * weight + other.mMean * (1.f - weight); - mLastValue = other.mLastValue; + if (append) mLastValue = other.mLastValue; } } @@ -431,7 +434,7 @@ public: mHasValue = true; } - void addSamples(const self_t& other) + void addSamples(const self_t& other, bool append) { if (other.mTotalSamplingTime) { @@ -473,9 +476,12 @@ public: mNumSamples += other.mNumSamples; mTotalSamplingTime += other.mTotalSamplingTime; mMean = (mMean * weight) + (other.mMean * (1.0 - weight)); - mLastValue = other.mLastValue; - mLastSampleTimeStamp = other.mLastSampleTimeStamp; - mHasValue |= other.mHasValue; + if (append) + { + mLastValue = other.mLastValue; + mLastSampleTimeStamp = other.mLastSampleTimeStamp; + mHasValue |= other.mHasValue; + } } } @@ -548,7 +554,7 @@ public: mSum += value; } - void addSamples(const CountAccumulator<T>& other) + void addSamples(const CountAccumulator<T>& other, bool /*append*/) { mSum += other.mSum; mNumSamples += other.mNumSamples; @@ -593,7 +599,7 @@ public: }; TimeBlockAccumulator(); - void addSamples(const self_t& other); + void addSamples(const self_t& other, bool /*append*/); void reset(const self_t* other); void flush() {} @@ -713,6 +719,8 @@ void add(CountStatHandle<T>& count, VALUE_T value) struct MemStatAccumulator { + typedef MemStatAccumulator self_t; + MemStatAccumulator() : mSize(0), mChildSize(0), @@ -720,7 +728,7 @@ struct MemStatAccumulator mDeallocatedCount(0) {} - void addSamples(const MemStatAccumulator& other) + void addSamples(const MemStatAccumulator& other, bool /*append*/) { mSize += other.mSize; mChildSize += other.mChildSize; diff --git a/indra/llcommon/lltracerecording.cpp b/indra/llcommon/lltracerecording.cpp index 5b0b74524f..3994e4f521 100644 --- a/indra/llcommon/lltracerecording.cpp +++ b/indra/llcommon/lltracerecording.cpp @@ -97,13 +97,15 @@ void RecordingBuffers::append( const RecordingBuffers& other ) void RecordingBuffers::merge( const RecordingBuffers& other) { - mCountsFloat.addSamples(other.mCountsFloat); - mCounts.addSamples(other.mCounts); - mSamplesFloat.addSamples(other.mSamplesFloat); - mSamples.addSamples(other.mSamples); - mEventsFloat.addSamples(other.mEventsFloat); - mEvents.addSamples(other.mEvents); - mMemStats.addSamples(other.mMemStats); + mCountsFloat.addSamples(other.mCountsFloat, false); + mCounts.addSamples(other.mCounts, false); + mSamplesFloat.addSamples(other.mSamplesFloat, false); + mSamples.addSamples(other.mSamples, false); + mEventsFloat.addSamples(other.mEventsFloat, false); + mEvents.addSamples(other.mEvents, false); + mMemStats.addSamples(other.mMemStats, false); + // for now, hold out timers from merge, need to be displayed per thread + //mStackTimers.addSamples(other.mStackTimers, false); } void RecordingBuffers::reset(RecordingBuffers* other) @@ -190,7 +192,6 @@ void Recording::handleStop() { mElapsedSeconds += mSamplingTimer.getElapsedTimeF64(); mBuffers.write()->flush(); - LLTrace::TimeBlock::processTimes(); LLTrace::get_thread_recorder()->deactivate(this); } @@ -490,8 +491,9 @@ void PeriodicRecording::appendPeriodicRecording( PeriodicRecording& other ) { if (mAutoResize) { - for (S32 other_index = (other.mCurPeriod + 2) % other_recording_count; - other_index != other.mCurPeriod; + for (S32 other_index = (other.mCurPeriod + 2) % other_recording_count, + end_index = (other.mCurPeriod + 1) % other_recording_count; + other_index != end_index; other_index = (other_index + 1) % other_recording_count) { llassert(other.mRecordingPeriods[other_index].getDuration() != 0.f diff --git a/indra/llcommon/lltracerecording.h b/indra/llcommon/lltracerecording.h index 19a4fae737..aaeb32e891 100644 --- a/indra/llcommon/lltracerecording.h +++ b/indra/llcommon/lltracerecording.h @@ -336,12 +336,12 @@ namespace LLTrace } template <typename T> - typename T getPeriodMin(const TraceType<SampleAccumulator<T> >& stat, size_t num_periods = U32_MAX) const + T getPeriodMin(const TraceType<SampleAccumulator<T> >& stat, size_t num_periods = U32_MAX) const { size_t total_periods = mRecordingPeriods.size(); num_periods = llmin(num_periods, total_periods); - typename T min_val = std::numeric_limits<T>::max(); + T min_val = std::numeric_limits<T>::max(); for (S32 i = 1; i <= num_periods; i++) { S32 index = (mCurPeriod + total_periods - i) % total_periods; @@ -351,7 +351,7 @@ namespace LLTrace } template <typename T> - typename T getPeriodMin(const TraceType<EventAccumulator<T> >& stat, size_t num_periods = U32_MAX) const + T getPeriodMin(const TraceType<EventAccumulator<T> >& stat, size_t num_periods = U32_MAX) const { size_t total_periods = mRecordingPeriods.size(); num_periods = llmin(num_periods, total_periods); diff --git a/indra/llcommon/lltracethreadrecorder.h b/indra/llcommon/lltracethreadrecorder.h index 17a2d4a9a9..bf3701304f 100644 --- a/indra/llcommon/lltracethreadrecorder.h +++ b/indra/llcommon/lltracethreadrecorder.h @@ -87,12 +87,13 @@ namespace LLTrace LLMutex* getSlaveListMutex() { return &mSlaveListMutex; } + private: typedef std::list<class SlaveThreadRecorder*> slave_thread_recorder_list_t; - slave_thread_recorder_list_t mSlaveThreadRecorders; - LLMutex mSlaveListMutex; + slave_thread_recorder_list_t mSlaveThreadRecorders; // list of slave thread recorders associated with this master + LLMutex mSlaveListMutex; // protects access to slave list }; class LL_COMMON_API SlaveThreadRecorder : public ThreadRecorder diff --git a/indra/newview/llappviewer.cpp b/indra/newview/llappviewer.cpp index a049aabe74..ff481d6278 100644 --- a/indra/newview/llappviewer.cpp +++ b/indra/newview/llappviewer.cpp @@ -1241,6 +1241,7 @@ bool LLAppViewer::mainLoop() while (!LLApp::isExiting()) { LLFastTimer _(FTM_FRAME); + LLTrace::TimeBlock::processTimes(); LLTrace::get_frame_recording().nextPeriod(); LLTrace::TimeBlock::logStats(); |