From 563daa96a873ef627ca42d24e0f6de6bb359326d Mon Sep 17 00:00:00 2001
From: Tofu Linden <tofu.linden@lindenlab.com>
Date: Mon, 25 Jan 2010 12:07:25 -0800
Subject: pull in the linux+solaris fast-timers impl from snowglobe, fit it
 into viewer2, start moving headers around.

---
 doc/contributions.txt              |   1 +
 indra/llcommon/llfasttimer.h       | 364 -------------------------------------
 indra/llcommon/llfasttimer_class.h | 271 +++++++++++++++++++++++++++
 3 files changed, 272 insertions(+), 364 deletions(-)
 delete mode 100644 indra/llcommon/llfasttimer.h
 create mode 100644 indra/llcommon/llfasttimer_class.h

diff --git a/doc/contributions.txt b/doc/contributions.txt
index cf10ecccfb..2e4d803252 100644
--- a/doc/contributions.txt
+++ b/doc/contributions.txt
@@ -500,6 +500,7 @@ Ringo Tuxing
 	CT-231
 	CT-321
 Robin Cornelius
+	SNOW-108
 	SNOW-204
 	VWR-2488
 	VWR-9557
diff --git a/indra/llcommon/llfasttimer.h b/indra/llcommon/llfasttimer.h
deleted file mode 100644
index 8af79c90fd..0000000000
--- a/indra/llcommon/llfasttimer.h
+++ /dev/null
@@ -1,364 +0,0 @@
-/**
- * @file llfasttimer.h
- * @brief Declaration of a fast timer.
- *
- * $LicenseInfo:firstyear=2004&license=viewergpl$
- *
- * Copyright (c) 2004-2009, Linden Research, Inc.
- *
- * Second Life Viewer Source Code
- * The source code in this file ("Source Code") is provided by Linden Lab
- * to you under the terms of the GNU General Public License, version 2.0
- * ("GPL"), unless you have obtained a separate licensing agreement
- * ("Other License"), formally executed by you and Linden Lab.  Terms of
- * the GPL can be found in doc/GPL-license.txt in this distribution, or
- * online at http://secondlifegrid.net/programs/open_source/licensing/gplv2
- *
- * There are special exceptions to the terms and conditions of the GPL as
- * it is applied to this Source Code. View the full text of the exception
- * in the file doc/FLOSS-exception.txt in this software distribution, or
- * online at
- * http://secondlifegrid.net/programs/open_source/licensing/flossexception
- *
- * By copying, modifying or distributing this software, you acknowledge
- * that you have read and understood your obligations described above,
- * and agree to abide by those obligations.
- *
- * ALL LINDEN LAB SOURCE CODE IS PROVIDED "AS IS." LINDEN LAB MAKES NO
- * WARRANTIES, EXPRESS, IMPLIED OR OTHERWISE, REGARDING ITS ACCURACY,
- * COMPLETENESS OR PERFORMANCE.
- * $/LicenseInfo$
- */
-
-#ifndef LL_FASTTIMER_H
-#define LL_FASTTIMER_H
-
-#include "llinstancetracker.h"
-
-#define FAST_TIMER_ON 1
-#define TIME_FAST_TIMERS 0
-
-#if LL_WINDOWS
-#define LL_INLINE __forceinline
-
-//
-// NOTE: put back in when we aren't using platform sdk anymore
-//
-// because MS has different signatures for these functions in winnt.h
-// need to rename them to avoid conflicts
-//#define _interlockedbittestandset _renamed_interlockedbittestandset
-//#define _interlockedbittestandreset _renamed_interlockedbittestandreset
-//#include <intrin.h>
-//#undef _interlockedbittestandset
-//#undef _interlockedbittestandreset
-
-//inline U32 get_cpu_clock_count_32()
-//{
-//	U64 time_stamp = __rdtsc();
-//	return (U32)(time_stamp >> 8);
-//}
-//
-//// return full timer value, *not* shifted by 8 bits
-//inline U64 get_cpu_clock_count_64()
-//{
-//	return __rdtsc();
-//}
-
-// shift off lower 8 bits for lower resolution but longer term timing
-// on 1Ghz machine, a 32-bit word will hold ~1000 seconds of timing
-inline U32 get_cpu_clock_count_32()
-{
-	U32 ret_val;
-	__asm
-	{
-        _emit   0x0f
-        _emit   0x31
-		shr eax,8
-		shl edx,24
-		or eax, edx
-		mov dword ptr [ret_val], eax
-	}
-    return ret_val;
-}
-
-// return full timer value, *not* shifted by 8 bits
-inline U64 get_cpu_clock_count_64()
-{
-	U64 ret_val;
-	__asm
-	{
-        _emit   0x0f
-        _emit   0x31
-		mov eax,eax
-		mov edx,edx
-		mov dword ptr [ret_val+4], edx
-		mov dword ptr [ret_val], eax
-	}
-    return ret_val;
-}
-#else
-#define LL_INLINE
-#endif
-
-#if (LL_LINUX || LL_SOLARIS || LL_DARWIN) && (defined(__i386__) || defined(__amd64__))
-inline U32 get_cpu_clock_count_32()
-{
-	U64 x;
-	__asm__ volatile (".byte 0x0f, 0x31": "=A"(x));
-	return (U32)x >> 8;
-}
-
-inline U32 get_cpu_clock_count_64()
-{
-	U64 x;
-	__asm__ volatile (".byte 0x0f, 0x31": "=A"(x));
-	return x >> 8;
-}
-#endif
-
-#if ( LL_DARWIN && !(defined(__i386__) || defined(__amd64__))) || (LL_SOLARIS && defined(__sparc__))
-//
-// Mac PPC (deprecated) & Solaris SPARC implementation of CPU clock
-//
-// Just use gettimeofday implementation for now
-
-inline U32 get_cpu_clock_count_32()
-{
-	return (U32)get_clock_count();
-}
-
-inline U32 get_cpu_clock_count_64()
-{
-	return get_clock_count();
-}
-#endif
-
-class LLMutex;
-
-#include <queue>
-#include "llsd.h"
-
-class LL_COMMON_API LLFastTimer
-{
-public:
-
-	class NamedTimer;
-
-	struct LL_COMMON_API FrameState
-	{
-		FrameState(NamedTimer* timerp);
-
-		U32 				mSelfTimeCounter;
-		U32 				mCalls;
-		FrameState*			mParent;		// info for caller timer
-		FrameState*			mLastCaller;	// used to bootstrap tree construction
-		NamedTimer*			mTimer;
-		U16					mActiveCount;	// number of timers with this ID active on stack
-		bool				mMoveUpTree;	// needs to be moved up the tree of timers at the end of frame
-	};
-
-	// stores a "named" timer instance to be reused via multiple LLFastTimer stack instances
-	class LL_COMMON_API NamedTimer
-	:	public LLInstanceTracker<NamedTimer>
-	{
-		friend class DeclareTimer;
-	public:
-		~NamedTimer();
-
-		enum { HISTORY_NUM = 60 };
-
-		const std::string& getName() const { return mName; }
-		NamedTimer* getParent() const { return mParent; }
-		void setParent(NamedTimer* parent);
-		S32 getDepth();
-		std::string getToolTip(S32 history_index = -1);
-
-		typedef std::vector<NamedTimer*>::const_iterator child_const_iter;
-		child_const_iter beginChildren();
-		child_const_iter endChildren();
-		std::vector<NamedTimer*>& getChildren();
-
-		void setCollapsed(bool collapsed) { mCollapsed = collapsed; }
-		bool getCollapsed() const { return mCollapsed; }
-
-		U32 getCountAverage() const { return mCountAverage; }
-		U32 getCallAverage() const { return mCallAverage; }
-
-		U32 getHistoricalCount(S32 history_index = 0) const;
-		U32 getHistoricalCalls(S32 history_index = 0) const;
-
-		static NamedTimer& getRootNamedTimer();
-
-		S32 getFrameStateIndex() const { return mFrameStateIndex; }
-
-		FrameState& getFrameState() const;
-
-	private:
-		friend class LLFastTimer;
-		friend class NamedTimerFactory;
-
-		//
-		// methods
-		//
-		NamedTimer(const std::string& name);
-		// recursive call to gather total time from children
-		static void accumulateTimings();
-
-		// updates cumulative times and hierarchy,
-		// can be called multiple times in a frame, at any point
-		static void processTimes();
-
-		static void buildHierarchy();
-		static void resetFrame();
-		static void reset();
-
-		//
-		// members
-		//
-		S32			mFrameStateIndex;
-
-		std::string	mName;
-
-		U32 		mTotalTimeCounter;
-
-		U32 		mCountAverage;
-		U32			mCallAverage;
-
-		U32*		mCountHistory;
-		U32*		mCallHistory;
-
-		// tree structure
-		NamedTimer*					mParent;				// NamedTimer of caller(parent)
-		std::vector<NamedTimer*>	mChildren;
-		bool						mCollapsed;				// don't show children
-		bool						mNeedsSorting;			// sort children whenever child added
-	};
-
-	// used to statically declare a new named timer
-	class LL_COMMON_API DeclareTimer
-	:	public LLInstanceTracker<DeclareTimer>
-	{
-		friend class LLFastTimer;
-	public:
-		DeclareTimer(const std::string& name, bool open);
-		DeclareTimer(const std::string& name);
-
-		static void updateCachedPointers();
-
-	private:
-		NamedTimer&		mTimer;
-		FrameState*		mFrameState;
-	};
-
-public:
-	LLFastTimer(LLFastTimer::FrameState* state);
-
-	LL_INLINE LLFastTimer(LLFastTimer::DeclareTimer& timer)
-	:	mFrameState(timer.mFrameState)
-	{
-#if TIME_FAST_TIMERS
-		U64 timer_start = get_cpu_clock_count_64();
-#endif
-#if FAST_TIMER_ON
-		LLFastTimer::FrameState* frame_state = mFrameState;
-		mStartTime = get_cpu_clock_count_32();
-
-		frame_state->mActiveCount++;
-		frame_state->mCalls++;
-		// keep current parent as long as it is active when we are
-		frame_state->mMoveUpTree |= (frame_state->mParent->mActiveCount == 0);
-
-		LLFastTimer::CurTimerData* cur_timer_data = &LLFastTimer::sCurTimerData;
-		mLastTimerData = *cur_timer_data;
-		cur_timer_data->mCurTimer = this;
-		cur_timer_data->mFrameState = frame_state;
-		cur_timer_data->mChildTime = 0;
-#endif
-#if TIME_FAST_TIMERS
-		U64 timer_end = get_cpu_clock_count_64();
-		sTimerCycles += timer_end - timer_start;
-#endif
-	}
-
-	LL_INLINE ~LLFastTimer()
-	{
-#if TIME_FAST_TIMERS
-		U64 timer_start = get_cpu_clock_count_64();
-#endif
-#if FAST_TIMER_ON
-		LLFastTimer::FrameState* frame_state = mFrameState;
-		U32 total_time = get_cpu_clock_count_32() - mStartTime;
-
-		frame_state->mSelfTimeCounter += total_time - LLFastTimer::sCurTimerData.mChildTime;
-		frame_state->mActiveCount--;
-
-		// store last caller to bootstrap tree creation
-		// do this in the destructor in case of recursion to get topmost caller
-		frame_state->mLastCaller = mLastTimerData.mFrameState;
-
-		// we are only tracking self time, so subtract our total time delta from parents
-		mLastTimerData.mChildTime += total_time;
-
-		LLFastTimer::sCurTimerData = mLastTimerData;
-#endif
-#if TIME_FAST_TIMERS
-		U64 timer_end = get_cpu_clock_count_64();
-		sTimerCycles += timer_end - timer_start;
-		sTimerCalls++;
-#endif
-	}
-
-public:
-	static LLMutex*			sLogLock;
-	static std::queue<LLSD> sLogQueue;
-	static BOOL				sLog;
-	static BOOL				sMetricLog;
-	static bool 			sPauseHistory;
-	static bool 			sResetHistory;
-	static U64				sTimerCycles;
-	static U32				sTimerCalls;
-
-	typedef std::vector<FrameState> info_list_t;
-	static info_list_t& getFrameStateList();
-
-
-	// call this once a frame to reset timers
-	static void nextFrame();
-
-	// dumps current cumulative frame stats to log
-	// call nextFrame() to reset timers
-	static void dumpCurTimes();
-
-	// call this to reset timer hierarchy, averages, etc.
-	static void reset();
-
-	static U64 countsPerSecond();
-	static S32 getLastFrameIndex() { return sLastFrameIndex; }
-	static S32 getCurFrameIndex() { return sCurFrameIndex; }
-
-	static void writeLog(std::ostream& os);
-	static const NamedTimer* getTimerByName(const std::string& name);
-
-	struct CurTimerData
-	{
-		LLFastTimer*	mCurTimer;
-		FrameState*		mFrameState;
-		U32				mChildTime;
-	};
-	static CurTimerData		sCurTimerData;
-
-private:
-	static S32				sCurFrameIndex;
-	static S32				sLastFrameIndex;
-	static U64				sLastFrameTime;
-	static info_list_t*		sTimerInfos;
-
-	U32							mStartTime;
-	LLFastTimer::FrameState*	mFrameState;
-	LLFastTimer::CurTimerData	mLastTimerData;
-
-};
-
-typedef class LLFastTimer LLFastTimer;
-
-#endif // LL_LLFASTTIMER_H
diff --git a/indra/llcommon/llfasttimer_class.h b/indra/llcommon/llfasttimer_class.h
new file mode 100644
index 0000000000..93d2d0494d
--- /dev/null
+++ b/indra/llcommon/llfasttimer_class.h
@@ -0,0 +1,271 @@
+/**
+ * @file llfasttimer_class.h
+ * @brief Declaration of a fast timer.
+ *
+ * $LicenseInfo:firstyear=2004&license=viewergpl$
+ *
+ * Copyright (c) 2004-2009, Linden Research, Inc.
+ *
+ * Second Life Viewer Source Code
+ * The source code in this file ("Source Code") is provided by Linden Lab
+ * to you under the terms of the GNU General Public License, version 2.0
+ * ("GPL"), unless you have obtained a separate licensing agreement
+ * ("Other License"), formally executed by you and Linden Lab.  Terms of
+ * the GPL can be found in doc/GPL-license.txt in this distribution, or
+ * online at http://secondlifegrid.net/programs/open_source/licensing/gplv2
+ *
+ * There are special exceptions to the terms and conditions of the GPL as
+ * it is applied to this Source Code. View the full text of the exception
+ * in the file doc/FLOSS-exception.txt in this software distribution, or
+ * online at
+ * http://secondlifegrid.net/programs/open_source/licensing/flossexception
+ *
+ * By copying, modifying or distributing this software, you acknowledge
+ * that you have read and understood your obligations described above,
+ * and agree to abide by those obligations.
+ *
+ * ALL LINDEN LAB SOURCE CODE IS PROVIDED "AS IS." LINDEN LAB MAKES NO
+ * WARRANTIES, EXPRESS, IMPLIED OR OTHERWISE, REGARDING ITS ACCURACY,
+ * COMPLETENESS OR PERFORMANCE.
+ * $/LicenseInfo$
+ */
+
+#ifndef LL_FASTTIMER_CLASS_H
+#define LL_FASTTIMER_CLASS_H
+
+#include "llinstancetracker.h"
+
+#define FAST_TIMER_ON 1
+#define TIME_FAST_TIMERS 0
+
+
+class LLMutex;
+
+#include <queue>
+#include "llsd.h"
+
+class LL_COMMON_API LLFastTimer
+{
+public:
+
+	class NamedTimer;
+
+	struct LL_COMMON_API FrameState
+	{
+		FrameState(NamedTimer* timerp);
+
+		U32 				mSelfTimeCounter;
+		U32 				mCalls;
+		FrameState*			mParent;		// info for caller timer
+		FrameState*			mLastCaller;	// used to bootstrap tree construction
+		NamedTimer*			mTimer;
+		U16					mActiveCount;	// number of timers with this ID active on stack
+		bool				mMoveUpTree;	// needs to be moved up the tree of timers at the end of frame
+	};
+
+	// stores a "named" timer instance to be reused via multiple LLFastTimer stack instances
+	class LL_COMMON_API NamedTimer
+	:	public LLInstanceTracker<NamedTimer>
+	{
+		friend class DeclareTimer;
+	public:
+		~NamedTimer();
+
+		enum { HISTORY_NUM = 60 };
+
+		const std::string& getName() const { return mName; }
+		NamedTimer* getParent() const { return mParent; }
+		void setParent(NamedTimer* parent);
+		S32 getDepth();
+		std::string getToolTip(S32 history_index = -1);
+
+		typedef std::vector<NamedTimer*>::const_iterator child_const_iter;
+		child_const_iter beginChildren();
+		child_const_iter endChildren();
+		std::vector<NamedTimer*>& getChildren();
+
+		void setCollapsed(bool collapsed) { mCollapsed = collapsed; }
+		bool getCollapsed() const { return mCollapsed; }
+
+		U32 getCountAverage() const { return mCountAverage; }
+		U32 getCallAverage() const { return mCallAverage; }
+
+		U32 getHistoricalCount(S32 history_index = 0) const;
+		U32 getHistoricalCalls(S32 history_index = 0) const;
+
+		static NamedTimer& getRootNamedTimer();
+
+		S32 getFrameStateIndex() const { return mFrameStateIndex; }
+
+		FrameState& getFrameState() const;
+
+	private:
+		friend class LLFastTimer;
+		friend class NamedTimerFactory;
+
+		//
+		// methods
+		//
+		NamedTimer(const std::string& name);
+		// recursive call to gather total time from children
+		static void accumulateTimings();
+
+		// updates cumulative times and hierarchy,
+		// can be called multiple times in a frame, at any point
+		static void processTimes();
+
+		static void buildHierarchy();
+		static void resetFrame();
+		static void reset();
+
+		//
+		// members
+		//
+		S32			mFrameStateIndex;
+
+		std::string	mName;
+
+		U32 		mTotalTimeCounter;
+
+		U32 		mCountAverage;
+		U32			mCallAverage;
+
+		U32*		mCountHistory;
+		U32*		mCallHistory;
+
+		// tree structure
+		NamedTimer*					mParent;				// NamedTimer of caller(parent)
+		std::vector<NamedTimer*>	mChildren;
+		bool						mCollapsed;				// don't show children
+		bool						mNeedsSorting;			// sort children whenever child added
+	};
+
+	// used to statically declare a new named timer
+	class LL_COMMON_API DeclareTimer
+	:	public LLInstanceTracker<DeclareTimer>
+	{
+		friend class LLFastTimer;
+	public:
+		DeclareTimer(const std::string& name, bool open);
+		DeclareTimer(const std::string& name);
+
+		static void updateCachedPointers();
+
+	private:
+		NamedTimer&		mTimer;
+		FrameState*		mFrameState;
+	};
+
+public:
+	LLFastTimer(LLFastTimer::FrameState* state);
+
+	LL_INLINE LLFastTimer(LLFastTimer::DeclareTimer& timer)
+	:	mFrameState(timer.mFrameState)
+	{
+#if TIME_FAST_TIMERS
+		U64 timer_start = get_cpu_clock_count_64();
+#endif
+#if FAST_TIMER_ON
+		LLFastTimer::FrameState* frame_state = mFrameState;
+		mStartTime = get_cpu_clock_count_32();
+
+		frame_state->mActiveCount++;
+		frame_state->mCalls++;
+		// keep current parent as long as it is active when we are
+		frame_state->mMoveUpTree |= (frame_state->mParent->mActiveCount == 0);
+
+		LLFastTimer::CurTimerData* cur_timer_data = &LLFastTimer::sCurTimerData;
+		mLastTimerData = *cur_timer_data;
+		cur_timer_data->mCurTimer = this;
+		cur_timer_data->mFrameState = frame_state;
+		cur_timer_data->mChildTime = 0;
+#endif
+#if TIME_FAST_TIMERS
+		U64 timer_end = get_cpu_clock_count_64();
+		sTimerCycles += timer_end - timer_start;
+#endif
+	}
+
+	LL_INLINE ~LLFastTimer()
+	{
+#if TIME_FAST_TIMERS
+		U64 timer_start = get_cpu_clock_count_64();
+#endif
+#if FAST_TIMER_ON
+		LLFastTimer::FrameState* frame_state = mFrameState;
+		U32 total_time = get_cpu_clock_count_32() - mStartTime;
+
+		frame_state->mSelfTimeCounter += total_time - LLFastTimer::sCurTimerData.mChildTime;
+		frame_state->mActiveCount--;
+
+		// store last caller to bootstrap tree creation
+		// do this in the destructor in case of recursion to get topmost caller
+		frame_state->mLastCaller = mLastTimerData.mFrameState;
+
+		// we are only tracking self time, so subtract our total time delta from parents
+		mLastTimerData.mChildTime += total_time;
+
+		LLFastTimer::sCurTimerData = mLastTimerData;
+#endif
+#if TIME_FAST_TIMERS
+		U64 timer_end = get_cpu_clock_count_64();
+		sTimerCycles += timer_end - timer_start;
+		sTimerCalls++;
+#endif
+	}
+
+public:
+	static LLMutex*			sLogLock;
+	static std::queue<LLSD> sLogQueue;
+	static BOOL				sLog;
+	static BOOL				sMetricLog;
+	static bool 			sPauseHistory;
+	static bool 			sResetHistory;
+	static U64				sTimerCycles;
+	static U32				sTimerCalls;
+
+	typedef std::vector<FrameState> info_list_t;
+	static info_list_t& getFrameStateList();
+
+
+	// call this once a frame to reset timers
+	static void nextFrame();
+
+	// dumps current cumulative frame stats to log
+	// call nextFrame() to reset timers
+	static void dumpCurTimes();
+
+	// call this to reset timer hierarchy, averages, etc.
+	static void reset();
+
+	static U64 countsPerSecond();
+	static S32 getLastFrameIndex() { return sLastFrameIndex; }
+	static S32 getCurFrameIndex() { return sCurFrameIndex; }
+
+	static void writeLog(std::ostream& os);
+	static const NamedTimer* getTimerByName(const std::string& name);
+
+	struct CurTimerData
+	{
+		LLFastTimer*	mCurTimer;
+		FrameState*		mFrameState;
+		U32				mChildTime;
+	};
+	static CurTimerData		sCurTimerData;
+
+private:
+	static S32				sCurFrameIndex;
+	static S32				sLastFrameIndex;
+	static U64				sLastFrameTime;
+	static info_list_t*		sTimerInfos;
+	static U64 sClockResolution;
+
+	U32							mStartTime;
+	LLFastTimer::FrameState*	mFrameState;
+	LLFastTimer::CurTimerData	mLastTimerData;
+
+};
+
+typedef class LLFastTimer LLFastTimer;
+
+#endif // LL_LLFASTTIMER_CLASS_H
-- 
cgit v1.2.3


From ec7b204ed657f7c1becac3b410ae0bc94c03aa75 Mon Sep 17 00:00:00 2001
From: Tofu Linden <tofu.linden@lindenlab.com>
Date: Mon, 25 Jan 2010 12:31:50 -0800
Subject: shuffle shuffle of timer code.  cleanup.

---
 indra/llcommon/CMakeLists.txt        |   2 +-
 indra/llcommon/llfasttimer.h         | 169 ++++++++
 indra/llcommon/llfasttimer_class.cpp | 751 +++++++++++++++++++++++++++++++++++
 indra/llcommon/llfasttimer_class.h   |  24 +-
 4 files changed, 937 insertions(+), 9 deletions(-)
 create mode 100644 indra/llcommon/llfasttimer.h
 create mode 100644 indra/llcommon/llfasttimer_class.cpp

diff --git a/indra/llcommon/CMakeLists.txt b/indra/llcommon/CMakeLists.txt
index ac7cc2cdac..05e45d6d8a 100644
--- a/indra/llcommon/CMakeLists.txt
+++ b/indra/llcommon/CMakeLists.txt
@@ -50,7 +50,7 @@ set(llcommon_SOURCE_FILES
     lleventdispatcher.cpp
     lleventfilter.cpp
     llevents.cpp
-    llfasttimer.cpp
+    llfasttimer_class.cpp
     llfile.cpp
     llfindlocale.cpp
     llfixedbuffer.cpp
diff --git a/indra/llcommon/llfasttimer.h b/indra/llcommon/llfasttimer.h
new file mode 100644
index 0000000000..0f3280023c
--- /dev/null
+++ b/indra/llcommon/llfasttimer.h
@@ -0,0 +1,169 @@
+/**
+ * @file llfasttimer.h
+ * @brief Inline implementations of fast timers.
+ *
+ * $LicenseInfo:firstyear=2004&license=viewergpl$
+ *
+ * Copyright (c) 2004-2009, Linden Research, Inc.
+ *
+ * Second Life Viewer Source Code
+ * The source code in this file ("Source Code") is provided by Linden Lab
+ * to you under the terms of the GNU General Public License, version 2.0
+ * ("GPL"), unless you have obtained a separate licensing agreement
+ * ("Other License"), formally executed by you and Linden Lab.  Terms of
+ * the GPL can be found in doc/GPL-license.txt in this distribution, or
+ * online at http://secondlifegrid.net/programs/open_source/licensing/gplv2
+ *
+ * There are special exceptions to the terms and conditions of the GPL as
+ * it is applied to this Source Code. View the full text of the exception
+ * in the file doc/FLOSS-exception.txt in this software distribution, or
+ * online at
+ * http://secondlifegrid.net/programs/open_source/licensing/flossexception
+ *
+ * By copying, modifying or distributing this software, you acknowledge
+ * that you have read and understood your obligations described above,
+ * and agree to abide by those obligations.
+ *
+ * ALL LINDEN LAB SOURCE CODE IS PROVIDED "AS IS." LINDEN LAB MAKES NO
+ * WARRANTIES, EXPRESS, IMPLIED OR OTHERWISE, REGARDING ITS ACCURACY,
+ * COMPLETENESS OR PERFORMANCE.
+ * $/LicenseInfo$
+ */
+
+#ifndef LL_FASTTIMER_H
+#define LL_FASTTIMER_H
+
+// pull in the actual class definition
+#include "llfasttimer_class.h"
+
+#if LL_WINDOWS
+#define LL_INLINE __forceinline
+//
+// Windows implementation of CPU clock
+//
+
+//
+// NOTE: put back in when we aren't using platform sdk anymore
+//
+// because MS has different signatures for these functions in winnt.h
+// need to rename them to avoid conflicts
+//#define _interlockedbittestandset _renamed_interlockedbittestandset
+//#define _interlockedbittestandreset _renamed_interlockedbittestandreset
+//#include <intrin.h>
+//#undef _interlockedbittestandset
+//#undef _interlockedbittestandreset
+
+//inline U32 get_cpu_clock_count_32()
+//{
+//	U64 time_stamp = __rdtsc();
+//	return (U32)(time_stamp >> 8);
+//}
+//
+//// return full timer value, *not* shifted by 8 bits
+//inline U64 get_cpu_clock_count_64()
+//{
+//	return __rdtsc();
+//}
+
+// shift off lower 8 bits for lower resolution but longer term timing
+// on 1Ghz machine, a 32-bit word will hold ~1000 seconds of timing
+inline U32 get_cpu_clock_count_32()
+{
+	U32 ret_val;
+	__asm
+	{
+        _emit   0x0f
+        _emit   0x31
+		shr eax,8
+		shl edx,24
+		or eax, edx
+		mov dword ptr [ret_val], eax
+	}
+    return ret_val;
+}
+
+// return full timer value, *not* shifted by 8 bits
+inline U64 get_cpu_clock_count_64()
+{
+	U64 ret_val;
+	__asm
+	{
+        _emit   0x0f
+        _emit   0x31
+		mov eax,eax
+		mov edx,edx
+		mov dword ptr [ret_val+4], edx
+		mov dword ptr [ret_val], eax
+	}
+    return ret_val;
+}
+#else
+#define LL_INLINE
+#endif
+
+
+#if LL_LINUX || LL_SOLARIS
+//
+// Linux and Solaris implementation of CPU clock - all architectures.
+//
+// Try to use the MONOTONIC clock if available, this is a constant time counter
+// with nanosecond resolution (but not necessarily accuracy) and attempts are made
+// to synchronize this value between cores at kernel start. It should not be affected
+// by CPU frequency. If not available use the REALTIME clock, but this may be affected by
+// NTP adjustments or other user activity affecting the system time.
+inline U64 get_cpu_clock_count_64()
+{
+	struct timespec tp;
+	
+#ifdef CLOCK_MONOTONIC // MONOTONIC supported at build-time?
+	if (-1 == clock_gettime(CLOCK_MONOTONIC,&tp)) // if MONOTONIC isn't supported at runtime, try REALTIME
+#endif
+		clock_gettime(CLOCK_REALTIME,&tp);
+
+	return (tp.tv_sec*LLFastTimer::sClockResolution)+tp.tv_nsec;        
+}
+
+inline U32 get_cpu_clock_count_32()
+{
+	return (U32)get_cpu_clock_count_64();
+}
+#endif // (LL_LINUX || LL_SOLARIS))
+
+
+#if (LL_DARWIN) && (defined(__i386__) || defined(__amd64__))
+//
+// Mac x86 implementation of CPU clock
+inline U32 get_cpu_clock_count_32()
+{
+	U64 x;
+	__asm__ volatile (".byte 0x0f, 0x31": "=A"(x));
+	return (U32)x >> 8;
+}
+
+inline U32 get_cpu_clock_count_64()
+{
+	U64 x;
+	__asm__ volatile (".byte 0x0f, 0x31": "=A"(x));
+	return x >> 8;
+}
+#endif
+
+
+#if ( LL_DARWIN && !(defined(__i386__) || defined(__amd64__)))
+//
+// Mac PPC (deprecated) implementation of CPU clock
+//
+// Just use gettimeofday implementation for now
+
+inline U32 get_cpu_clock_count_32()
+{
+	return (U32)get_clock_count();
+}
+
+inline U32 get_cpu_clock_count_64()
+{
+	return get_clock_count();
+}
+#endif
+
+#endif // LL_LLFASTTIMER_H
diff --git a/indra/llcommon/llfasttimer_class.cpp b/indra/llcommon/llfasttimer_class.cpp
new file mode 100644
index 0000000000..6dbd90f9fa
--- /dev/null
+++ b/indra/llcommon/llfasttimer_class.cpp
@@ -0,0 +1,751 @@
+/** 
+ * @file llfasttimer_class.cpp
+ * @brief Implementation of the fast timer.
+ *
+ * $LicenseInfo:firstyear=2004&license=viewergpl$
+ * 
+ * Copyright (c) 2004-2007, Linden Research, Inc.
+ * 
+ * Second Life Viewer Source Code
+ * The source code in this file ("Source Code") is provided by Linden Lab
+ * to you under the terms of the GNU General Public License, version 2.0
+ * ("GPL"), unless you have obtained a separate licensing agreement
+ * ("Other License"), formally executed by you and Linden Lab.  Terms of
+ * the GPL can be found in doc/GPL-license.txt in this distribution, or
+ * online at http://secondlife.com/developers/opensource/gplv2
+ * 
+ * There are special exceptions to the terms and conditions of the GPL as
+ * it is applied to this Source Code. View the full text of the exception
+ * in the file doc/FLOSS-exception.txt in this software distribution, or
+ * online at http://secondlife.com/developers/opensource/flossexception
+ * 
+ * By copying, modifying or distributing this software, you acknowledge
+ * that you have read and understood your obligations described above,
+ * and agree to abide by those obligations.
+ * 
+ * ALL LINDEN LAB SOURCE CODE IS PROVIDED "AS IS."LINDEN LAB MAKES NO
+ * WARRANTIES, EXPRESS, IMPLIED OR OTHERWISE, REGARDING ITS ACCURACY,
+ * COMPLETENESS OR PERFORMANCE.
+ * $/LicenseInfo$
+ */
+#include "linden_common.h"
+
+#include "llfasttimer.h"
+
+#include "llmemory.h"
+#include "llprocessor.h"
+#include "llsingleton.h"
+#include "lltreeiterators.h"
+#include "llsdserialize.h"
+
+#include <boost/bind.hpp>
+
+#if LL_WINDOWS
+#elif LL_LINUX || LL_SOLARIS
+#include <sys/time.h>
+#include <sched.h>
+#elif LL_DARWIN
+#include <sys/time.h>
+#include "lltimer.h"	// get_clock_count()
+#else 
+#error "architecture not supported"
+#endif
+
+//////////////////////////////////////////////////////////////////////////////
+// statics
+
+S32 LLFastTimer::sCurFrameIndex = -1;
+S32 LLFastTimer::sLastFrameIndex = -1;
+U64 LLFastTimer::sLastFrameTime = get_cpu_clock_count_64();
+bool LLFastTimer::sPauseHistory = 0;
+bool LLFastTimer::sResetHistory = 0;
+LLFastTimer::CurTimerData LLFastTimer::sCurTimerData;
+BOOL LLFastTimer::sLog = FALSE;
+BOOL LLFastTimer::sMetricLog = FALSE;
+LLMutex* LLFastTimer::sLogLock = NULL;
+std::queue<LLSD> LLFastTimer::sLogQueue;
+
+#if LL_LINUX || LL_SOLARIS
+U64 LLFastTimer::sClockResolution = 1000000000; // Nanosecond resolution
+#else
+U64 LLFastTimer::sClockResolution = 1000000; // Microsecond resolution
+#endif
+
+std::vector<LLFastTimer::FrameState>* LLFastTimer::sTimerInfos = NULL;
+U64				LLFastTimer::sTimerCycles = 0;
+U32				LLFastTimer::sTimerCalls = 0;
+
+
+// FIXME: move these declarations to the relevant modules
+
+// helper functions
+typedef LLTreeDFSPostIter<LLFastTimer::NamedTimer, LLFastTimer::NamedTimer::child_const_iter> timer_tree_bottom_up_iterator_t;
+
+static timer_tree_bottom_up_iterator_t begin_timer_tree_bottom_up(LLFastTimer::NamedTimer& id) 
+{ 
+	return timer_tree_bottom_up_iterator_t(&id, 
+							boost::bind(boost::mem_fn(&LLFastTimer::NamedTimer::beginChildren), _1), 
+							boost::bind(boost::mem_fn(&LLFastTimer::NamedTimer::endChildren), _1));
+}
+
+static timer_tree_bottom_up_iterator_t end_timer_tree_bottom_up() 
+{ 
+	return timer_tree_bottom_up_iterator_t(); 
+}
+
+typedef LLTreeDFSIter<LLFastTimer::NamedTimer, LLFastTimer::NamedTimer::child_const_iter> timer_tree_dfs_iterator_t;
+
+
+static timer_tree_dfs_iterator_t begin_timer_tree(LLFastTimer::NamedTimer& id) 
+{ 
+	return timer_tree_dfs_iterator_t(&id, 
+		boost::bind(boost::mem_fn(&LLFastTimer::NamedTimer::beginChildren), _1), 
+							boost::bind(boost::mem_fn(&LLFastTimer::NamedTimer::endChildren), _1));
+}
+
+static timer_tree_dfs_iterator_t end_timer_tree() 
+{ 
+	return timer_tree_dfs_iterator_t(); 
+}
+
+
+
+// factory class that creates NamedTimers via static DeclareTimer objects
+class NamedTimerFactory : public LLSingleton<NamedTimerFactory>
+{
+public:
+	NamedTimerFactory() 
+	{}
+
+	/*virtual */ void initSingleton()
+	{
+		mTimerRoot = new LLFastTimer::NamedTimer("root");
+
+		mActiveTimerRoot = new LLFastTimer::NamedTimer("Frame");
+		mActiveTimerRoot->setCollapsed(false);
+
+		mRootFrameState = new LLFastTimer::FrameState(mActiveTimerRoot);
+		mRootFrameState->mParent = &mTimerRoot->getFrameState();
+		mActiveTimerRoot->setParent(mTimerRoot);
+
+		mAppTimer = new LLFastTimer(mRootFrameState);
+	}
+
+	~NamedTimerFactory()
+	{
+		std::for_each(mTimers.begin(), mTimers.end(), DeletePairedPointer());
+
+		delete mAppTimer;
+		delete mActiveTimerRoot; 
+		delete mTimerRoot;
+		delete mRootFrameState;
+	}
+
+	LLFastTimer::NamedTimer& createNamedTimer(const std::string& name)
+	{
+		timer_map_t::iterator found_it = mTimers.find(name);
+		if (found_it != mTimers.end())
+		{
+			return *found_it->second;
+		}
+
+		LLFastTimer::NamedTimer* timer = new LLFastTimer::NamedTimer(name);
+		timer->setParent(mTimerRoot);
+		mTimers.insert(std::make_pair(name, timer));
+
+		return *timer;
+	}
+
+	LLFastTimer::NamedTimer* getTimerByName(const std::string& name)
+	{
+		timer_map_t::iterator found_it = mTimers.find(name);
+		if (found_it != mTimers.end())
+		{
+			return found_it->second;
+		}
+		return NULL;
+	}
+
+	LLFastTimer::NamedTimer* getActiveRootTimer() { return mActiveTimerRoot; }
+	LLFastTimer::NamedTimer* getRootTimer() { return mTimerRoot; }
+	const LLFastTimer* getAppTimer() { return mAppTimer; }
+	LLFastTimer::FrameState& getRootFrameState() { return *mRootFrameState; }
+
+	typedef std::map<std::string, LLFastTimer::NamedTimer*> timer_map_t;
+	timer_map_t::iterator beginTimers() { return mTimers.begin(); }
+	timer_map_t::iterator endTimers() { return mTimers.end(); }
+	S32 timerCount() { return mTimers.size(); }
+
+private:
+	timer_map_t mTimers;
+
+	LLFastTimer::NamedTimer*		mActiveTimerRoot;
+	LLFastTimer::NamedTimer*		mTimerRoot;
+	LLFastTimer*						mAppTimer;
+	LLFastTimer::FrameState*		mRootFrameState;
+};
+
+void update_cached_pointers_if_changed()
+{
+	// detect when elements have moved and update cached pointers
+	static LLFastTimer::FrameState* sFirstTimerAddress = NULL;
+	if (&*(LLFastTimer::getFrameStateList().begin()) != sFirstTimerAddress)
+	{
+		LLFastTimer::DeclareTimer::updateCachedPointers();
+	}
+	sFirstTimerAddress = &*(LLFastTimer::getFrameStateList().begin());
+}
+
+LLFastTimer::DeclareTimer::DeclareTimer(const std::string& name, bool open )
+:	mTimer(NamedTimerFactory::instance().createNamedTimer(name))
+{
+	mTimer.setCollapsed(!open);
+	mFrameState = &mTimer.getFrameState();
+	update_cached_pointers_if_changed();
+}
+
+LLFastTimer::DeclareTimer::DeclareTimer(const std::string& name)
+:	mTimer(NamedTimerFactory::instance().createNamedTimer(name))
+{
+	mFrameState = &mTimer.getFrameState();
+	update_cached_pointers_if_changed();
+}
+
+// static
+void LLFastTimer::DeclareTimer::updateCachedPointers()
+{
+	// propagate frame state pointers to timer declarations
+	for (DeclareTimer::instance_iter it = DeclareTimer::beginInstances();
+		it != DeclareTimer::endInstances();
+		++it)
+	{
+		// update cached pointer
+		it->mFrameState = &it->mTimer.getFrameState();
+	}
+}
+
+//static
+#if LL_LINUX || LL_SOLARIS || ( LL_DARWIN && !(defined(__i386__) || defined(__amd64__)) )
+U64 LLFastTimer::countsPerSecond()
+{
+	return sClockResolution;
+}
+#else // windows or x86-mac
+U64 LLFastTimer::countsPerSecond()
+{
+	static U64 sCPUClockFrequency = U64(CProcessor().GetCPUFrequency(50));
+
+	// we drop the low-order byte in out timers, so report a lower frequency
+	return sCPUClockFrequency >> 8;
+}
+#endif
+
+LLFastTimer::FrameState::FrameState(LLFastTimer::NamedTimer* timerp)
+:	mActiveCount(0),
+	mCalls(0),
+	mSelfTimeCounter(0),
+	mParent(NULL),
+	mLastCaller(NULL),
+	mMoveUpTree(false),
+	mTimer(timerp)
+{}
+
+
+LLFastTimer::NamedTimer::NamedTimer(const std::string& name)
+:	mName(name),
+	mCollapsed(true),
+	mParent(NULL),
+	mTotalTimeCounter(0),
+	mCountAverage(0),
+	mCallAverage(0),
+	mNeedsSorting(false)
+{
+	info_list_t& frame_state_list = getFrameStateList();
+	mFrameStateIndex = frame_state_list.size();
+	getFrameStateList().push_back(FrameState(this));
+
+	mCountHistory = new U32[HISTORY_NUM];
+	memset(mCountHistory, 0, sizeof(U32) * HISTORY_NUM);
+	mCallHistory = new U32[HISTORY_NUM];
+	memset(mCallHistory, 0, sizeof(U32) * HISTORY_NUM);
+}
+
+LLFastTimer::NamedTimer::~NamedTimer()
+{
+	delete[] mCountHistory;
+	delete[] mCallHistory;
+}
+
+std::string LLFastTimer::NamedTimer::getToolTip(S32 history_idx)
+{
+	if (history_idx < 0)
+	{
+		// by default, show average number of calls
+		return llformat("%s (%d calls)", getName().c_str(), (S32)getCallAverage());
+	}
+	else
+	{
+		return llformat("%s (%d calls)", getName().c_str(), (S32)getHistoricalCalls(history_idx));
+	}
+}
+
+void LLFastTimer::NamedTimer::setParent(NamedTimer* parent)
+{
+	llassert_always(parent != this);
+	llassert_always(parent != NULL);
+
+	if (mParent)
+	{
+		// subtract our accumulated from previous parent
+		for (S32 i = 0; i < HISTORY_NUM; i++)
+		{
+			mParent->mCountHistory[i] -= mCountHistory[i];
+		}
+
+		// subtract average timing from previous parent
+		mParent->mCountAverage -= mCountAverage;
+
+		std::vector<NamedTimer*>& children = mParent->getChildren();
+		std::vector<NamedTimer*>::iterator found_it = std::find(children.begin(), children.end(), this);
+		if (found_it != children.end())
+		{
+			children.erase(found_it);
+		}
+	}
+
+	mParent = parent;
+	if (parent)
+	{
+		getFrameState().mParent = &parent->getFrameState();
+		parent->getChildren().push_back(this);
+		parent->mNeedsSorting = true;
+	}
+}
+
+S32 LLFastTimer::NamedTimer::getDepth()
+{
+	S32 depth = 0;
+	NamedTimer* timerp = mParent;
+	while(timerp)
+	{
+		depth++;
+		timerp = timerp->mParent;
+	}
+	return depth;
+}
+
+// static
+void LLFastTimer::NamedTimer::processTimes()
+{
+	if (sCurFrameIndex < 0) return;
+
+	buildHierarchy();
+	accumulateTimings();
+}
+
+// sort timer info structs by depth first traversal order
+struct SortTimersDFS
+{
+	bool operator()(const LLFastTimer::FrameState& i1, const LLFastTimer::FrameState& i2)
+	{
+		return i1.mTimer->getFrameStateIndex() < i2.mTimer->getFrameStateIndex();
+	}
+};
+
+// sort child timers by name
+struct SortTimerByName
+{
+	bool operator()(const LLFastTimer::NamedTimer* i1, const LLFastTimer::NamedTimer* i2)
+	{
+		return i1->getName() < i2->getName();
+	}
+};
+
+//static
+void LLFastTimer::NamedTimer::buildHierarchy()
+{
+	if (sCurFrameIndex < 0 ) return;
+
+	// set up initial tree
+    for (instance_iter it = NamedTimer::beginInstances();
+		it != endInstances();
+		++it)
+	{
+		NamedTimer& timer = *it;
+		if (&timer == NamedTimerFactory::instance().getRootTimer()) continue;
+
+		// bootstrap tree construction by attaching to last timer to be on stack
+		// when this timer was called
+		if (timer.getFrameState().mLastCaller && timer.mParent == NamedTimerFactory::instance().getRootTimer())
+		{
+			timer.setParent(timer.getFrameState().mLastCaller->mTimer);
+			// no need to push up tree on first use, flag can be set spuriously
+			timer.getFrameState().mMoveUpTree = false;
+		}
+	}
+
+	// bump timers up tree if they've been flagged as being in the wrong place
+	// do this in a bottom up order to promote descendants first before promoting ancestors
+	// this preserves partial order derived from current frame's observations
+	for(timer_tree_bottom_up_iterator_t it = begin_timer_tree_bottom_up(*NamedTimerFactory::instance().getRootTimer());
+		it != end_timer_tree_bottom_up();
+		++it)
+	{
+		NamedTimer* timerp = *it;
+		// skip root timer
+		if (timerp == NamedTimerFactory::instance().getRootTimer()) continue;
+
+		if (timerp->getFrameState().mMoveUpTree)
+		{
+			// since ancestors have already been visited, reparenting won't affect tree traversal
+			//step up tree, bringing our descendants with us
+			//llinfos << "Moving " << timerp->getName() << " from child of " << timerp->getParent()->getName() <<
+			//	" to child of " << timerp->getParent()->getParent()->getName() << llendl;
+			timerp->setParent(timerp->getParent()->getParent());
+			timerp->getFrameState().mMoveUpTree = false;
+
+			// don't bubble up any ancestors until descendants are done bubbling up
+			it.skipAncestors();
+		}
+	}
+
+	// sort timers by time last called, so call graph makes sense
+	for(timer_tree_dfs_iterator_t it = begin_timer_tree(*NamedTimerFactory::instance().getRootTimer());
+		it != end_timer_tree();
+		++it)
+	{
+		NamedTimer* timerp = (*it);
+		if (timerp->mNeedsSorting)
+		{
+			std::sort(timerp->getChildren().begin(), timerp->getChildren().end(), SortTimerByName());
+		}
+		timerp->mNeedsSorting = false;
+	}
+}
+
+//static
+void LLFastTimer::NamedTimer::accumulateTimings()
+{
+	U32 cur_time = get_cpu_clock_count_32();
+
+	// walk up stack of active timers and accumulate current time while leaving timing structures active
+	LLFastTimer* cur_timer = sCurTimerData.mCurTimer;
+	// root defined by parent pointing to self
+	CurTimerData* cur_data = &sCurTimerData;
+	while(cur_timer->mLastTimerData.mCurTimer != cur_timer)
+	{
+		U32 cumulative_time_delta = cur_time - cur_timer->mStartTime;
+		U32 self_time_delta = cumulative_time_delta - cur_data->mChildTime;
+		cur_data->mChildTime = 0;
+		cur_timer->mFrameState->mSelfTimeCounter += self_time_delta;
+		cur_timer->mStartTime = cur_time;
+
+		cur_data = &cur_timer->mLastTimerData;
+		cur_data->mChildTime += cumulative_time_delta;
+
+		cur_timer = cur_timer->mLastTimerData.mCurTimer;
+	}
+
+	// traverse tree in DFS post order, or bottom up
+	for(timer_tree_bottom_up_iterator_t it = begin_timer_tree_bottom_up(*NamedTimerFactory::instance().getActiveRootTimer());
+		it != end_timer_tree_bottom_up();
+		++it)
+	{
+		NamedTimer* timerp = (*it);
+		timerp->mTotalTimeCounter = timerp->getFrameState().mSelfTimeCounter;
+		for (child_const_iter child_it = timerp->beginChildren(); child_it != timerp->endChildren(); ++child_it)
+		{
+			timerp->mTotalTimeCounter += (*child_it)->mTotalTimeCounter;
+		}
+
+		S32 cur_frame = sCurFrameIndex;
+		if (cur_frame >= 0)
+		{
+			// update timer history
+			int hidx = cur_frame % HISTORY_NUM;
+
+			timerp->mCountHistory[hidx] = timerp->mTotalTimeCounter;
+			timerp->mCountAverage = (timerp->mCountAverage * cur_frame + timerp->mTotalTimeCounter) / (cur_frame+1);
+			timerp->mCallHistory[hidx] = timerp->getFrameState().mCalls;
+			timerp->mCallAverage = (timerp->mCallAverage * cur_frame + timerp->getFrameState().mCalls) / (cur_frame+1);
+		}
+	}
+}
+
+// static
+void LLFastTimer::NamedTimer::resetFrame()
+{
+	if (sLog)
+	{ //output current frame counts to performance log
+		F64 iclock_freq = 1000.0 / countsPerSecond(); // good place to calculate clock frequency
+
+		F64 total_time = 0;
+		LLSD sd;
+
+		for (NamedTimer::instance_iter it = NamedTimer::beginInstances();
+					it != NamedTimer::endInstances();
+					++it)
+		{
+			NamedTimer& timer = *it;
+			FrameState& info = timer.getFrameState();
+			sd[timer.getName()]["Time"] = (LLSD::Real) (info.mSelfTimeCounter*iclock_freq);	
+			sd[timer.getName()]["Calls"] = (LLSD::Integer) info.mCalls;
+			
+			// computing total time here because getting the root timer's getCountHistory
+			// doesn't work correctly on the first frame
+			total_time = total_time + info.mSelfTimeCounter * iclock_freq;
+		}
+
+		sd["Total"]["Time"] = (LLSD::Real) total_time;
+		sd["Total"]["Calls"] = (LLSD::Integer) 1;
+
+		{		
+			LLMutexLock lock(sLogLock);
+			sLogQueue.push(sd);
+		}
+	}
+
+
+	// tag timers by position in depth first traversal of tree
+	S32 index = 0;
+	for(timer_tree_dfs_iterator_t it = begin_timer_tree(*NamedTimerFactory::instance().getRootTimer());
+		it != end_timer_tree();
+		++it)
+	{
+		NamedTimer* timerp = (*it);
+		
+		timerp->mFrameStateIndex = index;
+		index++;
+
+		llassert_always(timerp->mFrameStateIndex < (S32)getFrameStateList().size());
+	}
+
+	// sort timers by dfs traversal order to improve cache coherency
+	std::sort(getFrameStateList().begin(), getFrameStateList().end(), SortTimersDFS());
+
+	// update pointers into framestatelist now that we've sorted it
+	DeclareTimer::updateCachedPointers();
+
+	// reset for next frame
+	for (NamedTimer::instance_iter it = NamedTimer::beginInstances();
+		it != NamedTimer::endInstances();
+		++it)
+	{
+		NamedTimer& timer = *it;
+
+		FrameState& info = timer.getFrameState();
+		info.mSelfTimeCounter = 0;
+		info.mCalls = 0;
+		info.mLastCaller = NULL;
+		info.mMoveUpTree = false;
+		// update parent pointer in timer state struct
+		if (timer.mParent)
+		{
+			info.mParent = &timer.mParent->getFrameState();
+		}
+	}
+
+	//sTimerCycles = 0;
+	//sTimerCalls = 0;
+}
+
+//static
+void LLFastTimer::NamedTimer::reset()
+{
+	resetFrame(); // reset frame data
+
+	// walk up stack of active timers and reset start times to current time
+	// effectively zeroing out any accumulated time
+	U32 cur_time = get_cpu_clock_count_32();
+
+	// root defined by parent pointing to self
+	CurTimerData* cur_data = &sCurTimerData;
+	LLFastTimer* cur_timer = cur_data->mCurTimer;
+	while(cur_timer->mLastTimerData.mCurTimer != cur_timer)
+	{
+		cur_timer->mStartTime = cur_time;
+		cur_data->mChildTime = 0;
+
+		cur_data = &cur_timer->mLastTimerData;
+		cur_timer = cur_data->mCurTimer;
+	}
+
+	// reset all history
+	for (NamedTimer::instance_iter it = NamedTimer::beginInstances();
+		it != NamedTimer::endInstances();
+		++it)
+	{
+		NamedTimer& timer = *it;
+		if (&timer != NamedTimerFactory::instance().getRootTimer()) 
+		{
+			timer.setParent(NamedTimerFactory::instance().getRootTimer());
+		}
+
+		timer.mCountAverage = 0;
+		timer.mCallAverage = 0;
+		memset(timer.mCountHistory, 0, sizeof(U32) * HISTORY_NUM);
+		memset(timer.mCallHistory, 0, sizeof(U32) * HISTORY_NUM);
+	}
+
+	sLastFrameIndex = 0;
+	sCurFrameIndex = 0;
+}
+
+//static 
+LLFastTimer::info_list_t& LLFastTimer::getFrameStateList() 
+{ 
+	if (!sTimerInfos) 
+	{ 
+		sTimerInfos = new info_list_t(); 
+	} 
+	return *sTimerInfos; 
+}
+
+
+U32 LLFastTimer::NamedTimer::getHistoricalCount(S32 history_index) const
+{
+	S32 history_idx = (getLastFrameIndex() + history_index) % LLFastTimer::NamedTimer::HISTORY_NUM;
+	return mCountHistory[history_idx];
+}
+
+U32 LLFastTimer::NamedTimer::getHistoricalCalls(S32 history_index ) const
+{
+	S32 history_idx = (getLastFrameIndex() + history_index) % LLFastTimer::NamedTimer::HISTORY_NUM;
+	return mCallHistory[history_idx];
+}
+
+LLFastTimer::FrameState& LLFastTimer::NamedTimer::getFrameState() const
+{
+	llassert_always(mFrameStateIndex >= 0);
+	if (this == NamedTimerFactory::instance().getActiveRootTimer()) 
+	{
+		return NamedTimerFactory::instance().getRootFrameState();
+	}
+	return getFrameStateList()[mFrameStateIndex];
+}
+
+// static
+LLFastTimer::NamedTimer& LLFastTimer::NamedTimer::getRootNamedTimer()
+{ 
+	return *NamedTimerFactory::instance().getActiveRootTimer(); 
+}
+
+std::vector<LLFastTimer::NamedTimer*>::const_iterator LLFastTimer::NamedTimer::beginChildren()
+{ 
+	return mChildren.begin(); 
+}
+
+std::vector<LLFastTimer::NamedTimer*>::const_iterator LLFastTimer::NamedTimer::endChildren()
+{
+	return mChildren.end();
+}
+
+std::vector<LLFastTimer::NamedTimer*>& LLFastTimer::NamedTimer::getChildren()
+{
+	return mChildren;
+}
+
+//static
+void LLFastTimer::nextFrame()
+{
+	countsPerSecond(); // good place to calculate clock frequency
+	U64 frame_time = get_cpu_clock_count_64();
+	if ((frame_time - sLastFrameTime) >> 8 > 0xffffffff)
+	{
+		llinfos << "Slow frame, fast timers inaccurate" << llendl;
+	}
+
+	if (sPauseHistory)
+	{
+		sResetHistory = true;
+	}
+	else if (sResetHistory)
+	{
+		sLastFrameIndex = 0;
+		sCurFrameIndex = 0;
+		sResetHistory = false;
+	}
+	else // not paused
+	{
+		NamedTimer::processTimes();
+		sLastFrameIndex = sCurFrameIndex++;
+	}
+	
+	// get ready for next frame
+	NamedTimer::resetFrame();
+	sLastFrameTime = frame_time;
+}
+
+//static
+void LLFastTimer::dumpCurTimes()
+{
+	// accumulate timings, etc.
+	NamedTimer::processTimes();
+	
+	F64 clock_freq = (F64)countsPerSecond();
+	F64 iclock_freq = 1000.0 / clock_freq; // clock_ticks -> milliseconds
+
+	// walk over timers in depth order and output timings
+	for(timer_tree_dfs_iterator_t it = begin_timer_tree(*NamedTimerFactory::instance().getRootTimer());
+		it != end_timer_tree();
+		++it)
+	{
+		NamedTimer* timerp = (*it);
+		F64 total_time_ms = ((F64)timerp->getHistoricalCount(0) * iclock_freq);
+		// Don't bother with really brief times, keep output concise
+		if (total_time_ms < 0.1) continue;
+
+		std::ostringstream out_str;
+		for (S32 i = 0; i < timerp->getDepth(); i++)
+		{
+			out_str << "\t";
+		}
+
+
+		out_str << timerp->getName() << " " 
+			<< std::setprecision(3) << total_time_ms << " ms, "
+			<< timerp->getHistoricalCalls(0) << " calls";
+
+		llinfos << out_str.str() << llendl;
+	}
+}
+
+//static 
+void LLFastTimer::reset()
+{
+	NamedTimer::reset();
+}
+
+
+//static
+void LLFastTimer::writeLog(std::ostream& os)
+{
+	while (!sLogQueue.empty())
+	{
+		LLSD& sd = sLogQueue.front();
+		LLSDSerialize::toXML(sd, os);
+		LLMutexLock lock(sLogLock);
+		sLogQueue.pop();
+	}
+}
+
+//static
+const LLFastTimer::NamedTimer* LLFastTimer::getTimerByName(const std::string& name)
+{
+	return NamedTimerFactory::instance().getTimerByName(name);
+}
+
+LLFastTimer::LLFastTimer(LLFastTimer::FrameState* state)
+:	mFrameState(state)
+{
+	U32 start_time = get_cpu_clock_count_32();
+	mStartTime = start_time;
+	mFrameState->mActiveCount++;
+	LLFastTimer::sCurTimerData.mCurTimer = this;
+	LLFastTimer::sCurTimerData.mFrameState = mFrameState;
+	LLFastTimer::sCurTimerData.mChildTime = 0;
+	mLastTimerData = LLFastTimer::sCurTimerData;
+}
+
+
+//////////////////////////////////////////////////////////////////////////////
diff --git a/indra/llcommon/llfasttimer_class.h b/indra/llcommon/llfasttimer_class.h
index 93d2d0494d..4cb0c2d04e 100644
--- a/indra/llcommon/llfasttimer_class.h
+++ b/indra/llcommon/llfasttimer_class.h
@@ -39,6 +39,12 @@
 #define TIME_FAST_TIMERS 0
 
 
+#if LL_WINDOWS
+#define LL_INLINE __forceinline
+#else
+#define LL_INLINE
+#endif // LL_WINDOWS
+
 class LLMutex;
 
 #include <queue>
@@ -47,7 +53,6 @@ class LLMutex;
 class LL_COMMON_API LLFastTimer
 {
 public:
-
 	class NamedTimer;
 
 	struct LL_COMMON_API FrameState
@@ -163,11 +168,11 @@ public:
 	:	mFrameState(timer.mFrameState)
 	{
 #if TIME_FAST_TIMERS
-		U64 timer_start = get_cpu_clock_count_64();
+		U64 timer_start = getCPUClockCount64();
 #endif
 #if FAST_TIMER_ON
 		LLFastTimer::FrameState* frame_state = mFrameState;
-		mStartTime = get_cpu_clock_count_32();
+		mStartTime = getCPUClockCount32();
 
 		frame_state->mActiveCount++;
 		frame_state->mCalls++;
@@ -181,7 +186,7 @@ public:
 		cur_timer_data->mChildTime = 0;
 #endif
 #if TIME_FAST_TIMERS
-		U64 timer_end = get_cpu_clock_count_64();
+		U64 timer_end = getCPUClockCount64();
 		sTimerCycles += timer_end - timer_start;
 #endif
 	}
@@ -189,11 +194,11 @@ public:
 	LL_INLINE ~LLFastTimer()
 	{
 #if TIME_FAST_TIMERS
-		U64 timer_start = get_cpu_clock_count_64();
+		U64 timer_start = getCPUClockCount64();
 #endif
 #if FAST_TIMER_ON
 		LLFastTimer::FrameState* frame_state = mFrameState;
-		U32 total_time = get_cpu_clock_count_32() - mStartTime;
+		U32 total_time = getCPUClockCount32() - mStartTime;
 
 		frame_state->mSelfTimeCounter += total_time - LLFastTimer::sCurTimerData.mChildTime;
 		frame_state->mActiveCount--;
@@ -208,7 +213,7 @@ public:
 		LLFastTimer::sCurTimerData = mLastTimerData;
 #endif
 #if TIME_FAST_TIMERS
-		U64 timer_end = get_cpu_clock_count_64();
+		U64 timer_end = getCPUClockCount64();
 		sTimerCycles += timer_end - timer_start;
 		sTimerCalls++;
 #endif
@@ -254,11 +259,14 @@ public:
 	static CurTimerData		sCurTimerData;
 
 private:
+	static U32 getCPUClockCount32();
+	static U64 getCPUClockCount64();
+	static U64 sClockResolution;
+
 	static S32				sCurFrameIndex;
 	static S32				sLastFrameIndex;
 	static U64				sLastFrameTime;
 	static info_list_t*		sTimerInfos;
-	static U64 sClockResolution;
 
 	U32							mStartTime;
 	LLFastTimer::FrameState*	mFrameState;
-- 
cgit v1.2.3


From 3d0ff2585eb32c67d503452ce9f8d2198be823c8 Mon Sep 17 00:00:00 2001
From: Tofu Linden <tofu.linden@lindenlab.com>
Date: Mon, 25 Jan 2010 13:24:45 -0800
Subject: Final fix for fast timer reshuffle.

---
 indra/llcommon/llfasttimer.h         | 25 +++++++++++--------------
 indra/llcommon/llfasttimer_class.cpp | 10 +++++-----
 2 files changed, 16 insertions(+), 19 deletions(-)

diff --git a/indra/llcommon/llfasttimer.h b/indra/llcommon/llfasttimer.h
index 0f3280023c..9f9e2ea945 100644
--- a/indra/llcommon/llfasttimer.h
+++ b/indra/llcommon/llfasttimer.h
@@ -37,7 +37,6 @@
 #include "llfasttimer_class.h"
 
 #if LL_WINDOWS
-#define LL_INLINE __forceinline
 //
 // Windows implementation of CPU clock
 //
@@ -53,21 +52,21 @@
 //#undef _interlockedbittestandset
 //#undef _interlockedbittestandreset
 
-//inline U32 get_cpu_clock_count_32()
+//inline U32 LLFastTimer::getCPUClockCount32()
 //{
 //	U64 time_stamp = __rdtsc();
 //	return (U32)(time_stamp >> 8);
 //}
 //
 //// return full timer value, *not* shifted by 8 bits
-//inline U64 get_cpu_clock_count_64()
+//inline U64 LLFastTimer::getCPUClockCount64()
 //{
 //	return __rdtsc();
 //}
 
 // shift off lower 8 bits for lower resolution but longer term timing
 // on 1Ghz machine, a 32-bit word will hold ~1000 seconds of timing
-inline U32 get_cpu_clock_count_32()
+inline U32 LLFastTimer::getCPUClockCount32()
 {
 	U32 ret_val;
 	__asm
@@ -83,7 +82,7 @@ inline U32 get_cpu_clock_count_32()
 }
 
 // return full timer value, *not* shifted by 8 bits
-inline U64 get_cpu_clock_count_64()
+inline U64 LLFastTimer::getCPUClockCount64()
 {
 	U64 ret_val;
 	__asm
@@ -97,8 +96,6 @@ inline U64 get_cpu_clock_count_64()
 	}
     return ret_val;
 }
-#else
-#define LL_INLINE
 #endif
 
 
@@ -111,7 +108,7 @@ inline U64 get_cpu_clock_count_64()
 // to synchronize this value between cores at kernel start. It should not be affected
 // by CPU frequency. If not available use the REALTIME clock, but this may be affected by
 // NTP adjustments or other user activity affecting the system time.
-inline U64 get_cpu_clock_count_64()
+inline U64 LLFastTimer::getCPUClockCount64()
 {
 	struct timespec tp;
 	
@@ -123,9 +120,9 @@ inline U64 get_cpu_clock_count_64()
 	return (tp.tv_sec*LLFastTimer::sClockResolution)+tp.tv_nsec;        
 }
 
-inline U32 get_cpu_clock_count_32()
+inline U32 LLFastTimer::getCPUClockCount32()
 {
-	return (U32)get_cpu_clock_count_64();
+	return (U32)LLFastTimer::getCPUClockCount64();
 }
 #endif // (LL_LINUX || LL_SOLARIS))
 
@@ -133,14 +130,14 @@ inline U32 get_cpu_clock_count_32()
 #if (LL_DARWIN) && (defined(__i386__) || defined(__amd64__))
 //
 // Mac x86 implementation of CPU clock
-inline U32 get_cpu_clock_count_32()
+inline U32 LLFastTimer::getCPUClockCount32()
 {
 	U64 x;
 	__asm__ volatile (".byte 0x0f, 0x31": "=A"(x));
 	return (U32)x >> 8;
 }
 
-inline U32 get_cpu_clock_count_64()
+inline U32 LLFastTimer::getCPUClockCount64()
 {
 	U64 x;
 	__asm__ volatile (".byte 0x0f, 0x31": "=A"(x));
@@ -155,12 +152,12 @@ inline U32 get_cpu_clock_count_64()
 //
 // Just use gettimeofday implementation for now
 
-inline U32 get_cpu_clock_count_32()
+inline U32 LLFastTimer::getCPUClockCount32()
 {
 	return (U32)get_clock_count();
 }
 
-inline U32 get_cpu_clock_count_64()
+inline U32 LLFastTimer::getCPUClockCount64()
 {
 	return get_clock_count();
 }
diff --git a/indra/llcommon/llfasttimer_class.cpp b/indra/llcommon/llfasttimer_class.cpp
index 6dbd90f9fa..abcaee673e 100644
--- a/indra/llcommon/llfasttimer_class.cpp
+++ b/indra/llcommon/llfasttimer_class.cpp
@@ -56,7 +56,7 @@
 
 S32 LLFastTimer::sCurFrameIndex = -1;
 S32 LLFastTimer::sLastFrameIndex = -1;
-U64 LLFastTimer::sLastFrameTime = get_cpu_clock_count_64();
+U64 LLFastTimer::sLastFrameTime = LLFastTimer::getCPUClockCount64();
 bool LLFastTimer::sPauseHistory = 0;
 bool LLFastTimer::sResetHistory = 0;
 LLFastTimer::CurTimerData LLFastTimer::sCurTimerData;
@@ -426,7 +426,7 @@ void LLFastTimer::NamedTimer::buildHierarchy()
 //static
 void LLFastTimer::NamedTimer::accumulateTimings()
 {
-	U32 cur_time = get_cpu_clock_count_32();
+	U32 cur_time = getCPUClockCount32();
 
 	// walk up stack of active timers and accumulate current time while leaving timing structures active
 	LLFastTimer* cur_timer = sCurTimerData.mCurTimer;
@@ -556,7 +556,7 @@ void LLFastTimer::NamedTimer::reset()
 
 	// walk up stack of active timers and reset start times to current time
 	// effectively zeroing out any accumulated time
-	U32 cur_time = get_cpu_clock_count_32();
+	U32 cur_time = getCPUClockCount32();
 
 	// root defined by parent pointing to self
 	CurTimerData* cur_data = &sCurTimerData;
@@ -649,7 +649,7 @@ std::vector<LLFastTimer::NamedTimer*>& LLFastTimer::NamedTimer::getChildren()
 void LLFastTimer::nextFrame()
 {
 	countsPerSecond(); // good place to calculate clock frequency
-	U64 frame_time = get_cpu_clock_count_64();
+	U64 frame_time = getCPUClockCount64();
 	if ((frame_time - sLastFrameTime) >> 8 > 0xffffffff)
 	{
 		llinfos << "Slow frame, fast timers inaccurate" << llendl;
@@ -738,7 +738,7 @@ const LLFastTimer::NamedTimer* LLFastTimer::getTimerByName(const std::string& na
 LLFastTimer::LLFastTimer(LLFastTimer::FrameState* state)
 :	mFrameState(state)
 {
-	U32 start_time = get_cpu_clock_count_32();
+	U32 start_time = getCPUClockCount32();
 	mStartTime = start_time;
 	mFrameState->mActiveCount++;
 	LLFastTimer::sCurTimerData.mCurTimer = this;
-- 
cgit v1.2.3


From 18bdeb3d0e5ef4e5b014b41a8003569061e00910 Mon Sep 17 00:00:00 2001
From: Tofu Linden <tofu.linden@lindenlab.com>
Date: Mon, 25 Jan 2010 13:32:49 -0800
Subject: DEV-45468 'SNOW-108: Fast timers are broken / badly-scaled on linux'
 ready to merge.

legacy coding policy fix-up.
---
 indra/llcommon/CMakeLists.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/indra/llcommon/CMakeLists.txt b/indra/llcommon/CMakeLists.txt
index 05e45d6d8a..9ead183a9e 100644
--- a/indra/llcommon/CMakeLists.txt
+++ b/indra/llcommon/CMakeLists.txt
@@ -250,7 +250,7 @@ list(APPEND llcommon_SOURCE_FILES ${llcommon_HEADER_FILES})
 
 if(LLCOMMON_LINK_SHARED)
     add_library (llcommon SHARED ${llcommon_SOURCE_FILES})
-	ll_stage_sharedlib(llcommon)
+    ll_stage_sharedlib(llcommon)
 else(LLCOMMON_LINK_SHARED)
     add_library (llcommon ${llcommon_SOURCE_FILES})
 endif(LLCOMMON_LINK_SHARED)
-- 
cgit v1.2.3


From 2375afc4283e47a50516c1e003a6f699b0a2cfe1 Mon Sep 17 00:00:00 2001
From: Tofu Linden <tofu.linden@lindenlab.com>
Date: Mon, 25 Jan 2010 13:49:13 -0800
Subject: Gosh, the mac prototypes for get_cpu_clock_count_64 have always been
 wrong, but the compiler didn't start caring until I made these proper member
 functions.

fixed.
---
 indra/llcommon/llfasttimer.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/indra/llcommon/llfasttimer.h b/indra/llcommon/llfasttimer.h
index 9f9e2ea945..32f3561616 100644
--- a/indra/llcommon/llfasttimer.h
+++ b/indra/llcommon/llfasttimer.h
@@ -137,7 +137,7 @@ inline U32 LLFastTimer::getCPUClockCount32()
 	return (U32)x >> 8;
 }
 
-inline U32 LLFastTimer::getCPUClockCount64()
+inline U64 LLFastTimer::getCPUClockCount64()
 {
 	U64 x;
 	__asm__ volatile (".byte 0x0f, 0x31": "=A"(x));
@@ -157,7 +157,7 @@ inline U32 LLFastTimer::getCPUClockCount32()
 	return (U32)get_clock_count();
 }
 
-inline U32 LLFastTimer::getCPUClockCount64()
+inline U64 LLFastTimer::getCPUClockCount64()
 {
 	return get_clock_count();
 }
-- 
cgit v1.2.3


From cbe647786f8631d32fc5f9226853950359207699 Mon Sep 17 00:00:00 2001
From: Tofu Linden <tofu.linden@lindenlab.com>
Date: Mon, 25 Jan 2010 14:25:58 -0800
Subject: We already have a LL_FORCE_INLINE in llpreprocessor.h, don't
 re-invent it for llfasttimers.h. Also define LL_LIKELY/LL_UNLIKELY with a
 warning about its micro-optimizey nature, and use it to annotate llasserts
 (i.e. make llassert() lower-overhead and thus more attractive, even in
 inner-ish loops.)

---
 indra/llcommon/llerrorlegacy.h     |  4 ++--
 indra/llcommon/llfasttimer_class.h | 11 ++---------
 indra/llcommon/llpreprocessor.h    | 15 +++++++++++++++
 3 files changed, 19 insertions(+), 11 deletions(-)

diff --git a/indra/llcommon/llerrorlegacy.h b/indra/llcommon/llerrorlegacy.h
index 9920921a58..476d75380f 100644
--- a/indra/llcommon/llerrorlegacy.h
+++ b/indra/llcommon/llerrorlegacy.h
@@ -34,7 +34,7 @@
 #ifndef LL_LLERRORLEGACY_H
 #define LL_LLERRORLEGACY_H
 
-
+#include "llpreprocessor.h"
 
 /*
 	LEGACY -- DO NOT USE THIS STUFF ANYMORE
@@ -107,7 +107,7 @@ const int LL_ERR_PRICE_MISMATCH = -23018;
 
 #define llwarning(msg, num)		llwarns << "Warning # " << num << ": " << msg << llendl;
 
-#define llassert_always(func)	if (!(func)) llerrs << "ASSERT (" << #func << ")" << llendl;
+#define llassert_always(func)	if (LL_UNLIKELY(!(func))) llerrs << "ASSERT (" << #func << ")" << llendl;
 
 #ifdef SHOW_ASSERT
 #define llassert(func)			llassert_always(func)
diff --git a/indra/llcommon/llfasttimer_class.h b/indra/llcommon/llfasttimer_class.h
index 4cb0c2d04e..ddb1a74793 100644
--- a/indra/llcommon/llfasttimer_class.h
+++ b/indra/llcommon/llfasttimer_class.h
@@ -38,13 +38,6 @@
 #define FAST_TIMER_ON 1
 #define TIME_FAST_TIMERS 0
 
-
-#if LL_WINDOWS
-#define LL_INLINE __forceinline
-#else
-#define LL_INLINE
-#endif // LL_WINDOWS
-
 class LLMutex;
 
 #include <queue>
@@ -164,7 +157,7 @@ public:
 public:
 	LLFastTimer(LLFastTimer::FrameState* state);
 
-	LL_INLINE LLFastTimer(LLFastTimer::DeclareTimer& timer)
+	LL_FORCE_INLINE LLFastTimer(LLFastTimer::DeclareTimer& timer)
 	:	mFrameState(timer.mFrameState)
 	{
 #if TIME_FAST_TIMERS
@@ -191,7 +184,7 @@ public:
 #endif
 	}
 
-	LL_INLINE ~LLFastTimer()
+	LL_FORCE_INLINE ~LLFastTimer()
 	{
 #if TIME_FAST_TIMERS
 		U64 timer_start = getCPUClockCount64();
diff --git a/indra/llcommon/llpreprocessor.h b/indra/llcommon/llpreprocessor.h
index 5eefa6a16b..1c1503ca7b 100644
--- a/indra/llcommon/llpreprocessor.h
+++ b/indra/llcommon/llpreprocessor.h
@@ -55,13 +55,28 @@
 #define LL_BIG_ENDIAN 1
 #endif
 
+
 // Per-compiler switches
+
 #ifdef __GNUC__
 #define LL_FORCE_INLINE inline __attribute__((always_inline))
 #else
 #define LL_FORCE_INLINE __forceinline
 #endif
 
+// Mark-up expressions with branch prediction hints.  Do NOT use
+// this with reckless abandon - it's an obfuscating micro-optimization
+// outside of inner loops or other places where you are OVERWHELMINGLY
+// sure which way an expression almost-always evaluates.
+#if __GNUC__ >= 3
+# define LL_LIKELY(EXPR) __builtin_expect (!!(EXPR), true)
+# define LL_UNLIKELY(EXPR) __builtin_expect (!!(EXPR), false)
+#else
+# define LL_LIKELY(EXPR) (EXPR)
+# define LL_UNLIKELY(EXPR) (EXPR)
+#endif
+
+
 // Figure out differences between compilers
 #if defined(__GNUC__)
 	#define GCC_VERSION (__GNUC__ * 10000 \
-- 
cgit v1.2.3