From e50e6004082223fdc0bfd78bc697d48a7f45b379 Mon Sep 17 00:00:00 2001
From: Richard Linden <none@none>
Date: Thu, 30 May 2013 20:15:48 -0700
Subject: SH-3931 WIP Interesting: Add graphs to visualize scene load metrics
 reverted SlaveThreadRecorder update gating moved processTimes() outside of
 Recording, so it is called only once per frame refined sample merge logic so
 that multi-threaded samples do not stomp on linear history of a stat

---
 indra/llcommon/llfasttimer.cpp           | 40 ++++++++++++++++++--------------
 indra/llcommon/lltrace.h                 | 27 ++++++++++++---------
 indra/llcommon/lltracerecording.cpp      | 17 +++++++-------
 indra/llcommon/lltracethreadrecorder.cpp | 25 ++++++--------------
 indra/llcommon/lltracethreadrecorder.h   |  9 +++----
 indra/newview/llappviewer.cpp            |  1 +
 6 files changed, 58 insertions(+), 61 deletions(-)

(limited to 'indra')

diff --git a/indra/llcommon/llfasttimer.cpp b/indra/llcommon/llfasttimer.cpp
index 5dc5fdd5be..3fdd33959d 100644
--- a/indra/llcommon/llfasttimer.cpp
+++ b/indra/llcommon/llfasttimer.cpp
@@ -180,6 +180,7 @@ TimeBlockTreeNode& TimeBlock::getTreeNode() const
 
 static LLFastTimer::DeclareTimer FTM_PROCESS_TIMES("Process FastTimer Times");
 
+// not thread safe, so only call on main thread
 //static
 void TimeBlock::processTimes()
 {
@@ -195,8 +196,8 @@ void TimeBlock::processTimes()
 		TimeBlock& timer = *it;
 		if (&timer == &TimeBlock::getRootTimeBlock()) continue;
 			
-			// bootstrap tree construction by attaching to last timer to be on stack
-			// when this timer was called
+		// bootstrap tree construction by attaching to last timer to be on stack
+		// when this timer was called
 		if (timer.getParent() == &TimeBlock::getRootTimeBlock())
 		{
 			TimeBlockAccumulator* accumulator = timer.getPrimaryAccumulator();
@@ -233,30 +234,30 @@ void TimeBlock::processTimes()
 			TimeBlockAccumulator* accumulator = timerp->getPrimaryAccumulator();
 
 			if (accumulator->mMoveUpTree)
-		{
+			{
 				// since ancestors have already been visited, re-parenting won't affect tree traversal
-			//step up tree, bringing our descendants with us
-			LL_DEBUGS("FastTimers") << "Moving " << timerp->getName() << " from child of " << timerp->getParent()->getName() <<
-				" to child of " << timerp->getParent()->getParent()->getName() << LL_ENDL;
-			timerp->setParent(timerp->getParent()->getParent());
-				accumulator->mParent = timerp->getParent();
-				accumulator->mMoveUpTree = false;
-
-			// don't bubble up any ancestors until descendants are done bubbling up
-				// as ancestors may call this timer only on certain paths, so we want to resolve
-				// child-most block locations before their parents
-			it.skipAncestors();
+				//step up tree, bringing our descendants with us
+				LL_DEBUGS("FastTimers") << "Moving " << timerp->getName() << " from child of " << timerp->getParent()->getName() <<
+					" to child of " << timerp->getParent()->getParent()->getName() << LL_ENDL;
+				timerp->setParent(timerp->getParent()->getParent());
+					accumulator->mParent = timerp->getParent();
+					accumulator->mMoveUpTree = false;
+
+				// don't bubble up any ancestors until descendants are done bubbling up
+					// as ancestors may call this timer only on certain paths, so we want to resolve
+					// child-most block locations before their parents
+				it.skipAncestors();
+			}
 		}
 	}
-}
 
 	// walk up stack of active timers and accumulate current time while leaving timing structures active
 	BlockTimerStackRecord* stack_record			= ThreadTimerStack::getInstance();
 	BlockTimer* cur_timer						= stack_record->mActiveTimer;
 	TimeBlockAccumulator* accumulator = stack_record->mTimeBlock->getPrimaryAccumulator();
 
-	// root defined by parent pointing to self
-	while(cur_timer && cur_timer->mParentTimerData.mActiveTimer != cur_timer)
+	while(cur_timer 
+		&& cur_timer->mParentTimerData.mActiveTimer != cur_timer) // root defined by parent pointing to self
 	{
 		U64 cumulative_time_delta = cur_time - cur_timer->mStartTime;
 		accumulator->mTotalTimeCounter += cumulative_time_delta - (accumulator->mTotalTimeCounter - cur_timer->mBlockStartTotalTimeCounter);
@@ -413,8 +414,11 @@ TimeBlockAccumulator::TimeBlockAccumulator()
 	mParent(NULL)
 {}
 
-void TimeBlockAccumulator::addSamples( const TimeBlockAccumulator& other )
+void TimeBlockAccumulator::addSamples( const TimeBlockAccumulator& other, bool append )
 {
+	// we can't merge two unrelated time block samples, as that will screw with the nested timings
+	// due to the call hierarchy of each thread
+	llassert(append);
 	mTotalTimeCounter += other.mTotalTimeCounter - other.mStartTotalTimeCounter;
 	mSelfTimeCounter += other.mSelfTimeCounter;
 	mCalls += other.mCalls;
diff --git a/indra/llcommon/lltrace.h b/indra/llcommon/lltrace.h
index 00bab536ff..6dfe9e4b4e 100644
--- a/indra/llcommon/lltrace.h
+++ b/indra/llcommon/lltrace.h
@@ -121,12 +121,12 @@ public:
 		return mStorage[index]; 
 	}
 
-	void addSamples(const AccumulatorBuffer<ACCUMULATOR>& other)
+	void addSamples(const AccumulatorBuffer<ACCUMULATOR>& other, bool append = true)
 	{
 		llassert(mStorageSize >= sNextStorageSlot && other.mStorageSize > sNextStorageSlot);
 		for (size_t i = 0; i < sNextStorageSlot; i++)
 		{
-			mStorage[i].addSamples(other.mStorage[i]);
+			mStorage[i].addSamples(other.mStorage[i], append);
 		}
 	}
 
@@ -310,7 +310,7 @@ public:
 		mLastValue = value;
 	}
 
-	void addSamples(const self_t& other)
+	void addSamples(const self_t& other, bool append)
 	{
 		if (other.mNumSamples)
 		{
@@ -350,7 +350,7 @@ public:
 			F64 weight = (F64)mNumSamples / (F64)(mNumSamples + other.mNumSamples);
 			mNumSamples += other.mNumSamples;
 			mMean = mMean * weight + other.mMean * (1.f - weight);
-			mLastValue = other.mLastValue;
+			if (append) mLastValue = other.mLastValue;
 		}
 	}
 
@@ -434,7 +434,7 @@ public:
 		mHasValue = true;
 	}
 
-	void addSamples(const self_t& other)
+	void addSamples(const self_t& other, bool append)
 	{
 		if (other.mTotalSamplingTime)
 		{
@@ -476,9 +476,12 @@ public:
 			mNumSamples += other.mNumSamples;
 			mTotalSamplingTime += other.mTotalSamplingTime;
 			mMean = (mMean * weight) + (other.mMean * (1.0 - weight));
-			mLastValue = other.mLastValue;
-			mLastSampleTimeStamp = other.mLastSampleTimeStamp;
-			mHasValue |= other.mHasValue;
+			if (append)
+			{
+				mLastValue = other.mLastValue;
+				mLastSampleTimeStamp = other.mLastSampleTimeStamp;
+				mHasValue |= other.mHasValue;
+			}
 		}
 	}
 
@@ -551,7 +554,7 @@ public:
 		mSum += value;
 	}
 
-	void addSamples(const CountAccumulator<T>& other)
+	void addSamples(const CountAccumulator<T>& other, bool /*append*/)
 	{
 		mSum += other.mSum;
 		mNumSamples += other.mNumSamples;
@@ -596,7 +599,7 @@ public:
 	};
 
 	TimeBlockAccumulator();
-	void addSamples(const self_t& other);
+	void addSamples(const self_t& other, bool /*append*/);
 	void reset(const self_t* other);
 	void flush() {}
 
@@ -716,6 +719,8 @@ void add(CountStatHandle<T>& count, VALUE_T value)
 
 struct MemStatAccumulator
 {
+	typedef MemStatAccumulator self_t;
+
 	MemStatAccumulator()
 	:	mSize(0),
 		mChildSize(0),
@@ -723,7 +728,7 @@ struct MemStatAccumulator
 		mDeallocatedCount(0)
 	{}
 
-	void addSamples(const MemStatAccumulator& other)
+	void addSamples(const MemStatAccumulator& other, bool /*append*/)
 	{
 		mSize += other.mSize;
 		mChildSize += other.mChildSize;
diff --git a/indra/llcommon/lltracerecording.cpp b/indra/llcommon/lltracerecording.cpp
index 86cdca3e10..3994e4f521 100644
--- a/indra/llcommon/lltracerecording.cpp
+++ b/indra/llcommon/lltracerecording.cpp
@@ -97,13 +97,15 @@ void RecordingBuffers::append( const RecordingBuffers& other )
 
 void RecordingBuffers::merge( const RecordingBuffers& other)
 {
-	mCountsFloat.addSamples(other.mCountsFloat);
-	mCounts.addSamples(other.mCounts);
-	mSamplesFloat.addSamples(other.mSamplesFloat);
-	mSamples.addSamples(other.mSamples);
-	mEventsFloat.addSamples(other.mEventsFloat);
-	mEvents.addSamples(other.mEvents);
-	mMemStats.addSamples(other.mMemStats);
+	mCountsFloat.addSamples(other.mCountsFloat, false);
+	mCounts.addSamples(other.mCounts, false);
+	mSamplesFloat.addSamples(other.mSamplesFloat, false);
+	mSamples.addSamples(other.mSamples, false);
+	mEventsFloat.addSamples(other.mEventsFloat, false);
+	mEvents.addSamples(other.mEvents, false);
+	mMemStats.addSamples(other.mMemStats, false);
+	// for now, hold out timers from merge, need to be displayed per thread
+	//mStackTimers.addSamples(other.mStackTimers, false);
 }
 
 void RecordingBuffers::reset(RecordingBuffers* other)
@@ -190,7 +192,6 @@ void Recording::handleStop()
 {
 	mElapsedSeconds += mSamplingTimer.getElapsedTimeF64();
 	mBuffers.write()->flush();
-	LLTrace::TimeBlock::processTimes();
 	LLTrace::get_thread_recorder()->deactivate(this);
 }
 
diff --git a/indra/llcommon/lltracethreadrecorder.cpp b/indra/llcommon/lltracethreadrecorder.cpp
index 89b5df1f94..75c7cb2ff1 100644
--- a/indra/llcommon/lltracethreadrecorder.cpp
+++ b/indra/llcommon/lltracethreadrecorder.cpp
@@ -202,21 +202,14 @@ SlaveThreadRecorder::~SlaveThreadRecorder()
 	mMasterRecorder.removeSlaveThread(this);
 }
 
-bool SlaveThreadRecorder::pushToMaster()
+void SlaveThreadRecorder::pushToMaster()
 {
-	if (mPushCount != mMasterRecorder.getPullCount())
+	mThreadRecording.stop();
 	{
-		mThreadRecording.stop();
-		{
-			LLMutexLock(mMasterRecorder.getSlaveListMutex());
-			mSharedData.appendFrom(mThreadRecording);
-		}
-		mThreadRecording.start();
-
-		mPushCount = mMasterRecorder.getPullCount();
-		return true;
+		LLMutexLock(mMasterRecorder.getSlaveListMutex());
+		mSharedData.appendFrom(mThreadRecording);
 	}
-	return false;
+	mThreadRecording.start();
 }
 
 void SlaveThreadRecorder::SharedData::appendFrom( const Recording& source )
@@ -271,8 +264,6 @@ void MasterThreadRecorder::pullFromSlaveThreads()
 		(*it)->mSharedData.mergeTo(target_recording_buffers);
 		(*it)->mSharedData.reset();
 	}
-
-	mPullCount++;
 }
 
 void MasterThreadRecorder::addSlaveThread( class SlaveThreadRecorder* child )
@@ -298,10 +289,8 @@ void MasterThreadRecorder::removeSlaveThread( class SlaveThreadRecorder* child )
 	}
 }
 
-bool MasterThreadRecorder::pushToMaster()
-{
-	return false;
-}
+void MasterThreadRecorder::pushToMaster()
+{}
 
 MasterThreadRecorder::MasterThreadRecorder()
 {}
diff --git a/indra/llcommon/lltracethreadrecorder.h b/indra/llcommon/lltracethreadrecorder.h
index a044757e62..bf3701304f 100644
--- a/indra/llcommon/lltracethreadrecorder.h
+++ b/indra/llcommon/lltracethreadrecorder.h
@@ -49,7 +49,7 @@ namespace LLTrace
 		void deactivate(Recording* recording);
 		active_recording_list_t::reverse_iterator bringUpToDate(Recording* recording);
 
-		virtual bool pushToMaster() = 0;
+		virtual void pushToMaster() = 0;
 
 		TimeBlockTreeNode* getTimeBlockTreeNode(S32 index);
 
@@ -80,14 +80,13 @@ namespace LLTrace
 		void addSlaveThread(class SlaveThreadRecorder* child);
 		void removeSlaveThread(class SlaveThreadRecorder* child);
 
-		/*virtual */ bool pushToMaster();
+		/*virtual */ void pushToMaster();
 
 		// call this periodically to gather stats data from slave threads
 		void pullFromSlaveThreads();
 
 		LLMutex* getSlaveListMutex() { return &mSlaveListMutex; }
 
-		U32	getPullCount() { return mPullCount; }
 
 	private:
 
@@ -95,7 +94,6 @@ namespace LLTrace
 
 		slave_thread_recorder_list_t	mSlaveThreadRecorders;	// list of slave thread recorders associated with this master
 		LLMutex							mSlaveListMutex;		// protects access to slave list
-		LLAtomicU32						mPullCount;				// number of times data has been pulled from slaves
 	};
 
 	class LL_COMMON_API SlaveThreadRecorder : public ThreadRecorder
@@ -105,7 +103,7 @@ namespace LLTrace
 		~SlaveThreadRecorder();
 
 		// call this periodically to gather stats data for master thread to consume
-		/*virtual*/ bool pushToMaster();
+		/*virtual*/ void pushToMaster();
 
 		MasterThreadRecorder* 	mMaster;
 
@@ -122,7 +120,6 @@ namespace LLTrace
 		};
 		SharedData				mSharedData;
 		MasterThreadRecorder&	mMasterRecorder;
-		U32						mPushCount;
 	};
 }
 
diff --git a/indra/newview/llappviewer.cpp b/indra/newview/llappviewer.cpp
index a049aabe74..ff481d6278 100644
--- a/indra/newview/llappviewer.cpp
+++ b/indra/newview/llappviewer.cpp
@@ -1241,6 +1241,7 @@ bool LLAppViewer::mainLoop()
 	while (!LLApp::isExiting())
 	{
 		LLFastTimer _(FTM_FRAME);
+		LLTrace::TimeBlock::processTimes();
 		LLTrace::get_frame_recording().nextPeriod();
 		LLTrace::TimeBlock::logStats();
 
-- 
cgit v1.2.3