summaryrefslogtreecommitdiff
path: root/indra/llcommon/llfasttimer.h
diff options
context:
space:
mode:
Diffstat (limited to 'indra/llcommon/llfasttimer.h')
-rw-r--r--indra/llcommon/llfasttimer.h362
1 files changed, 358 insertions, 4 deletions
diff --git a/indra/llcommon/llfasttimer.h b/indra/llcommon/llfasttimer.h
index 2b25f2fabb..e42e549df5 100644
--- a/indra/llcommon/llfasttimer.h
+++ b/indra/llcommon/llfasttimer.h
@@ -1,6 +1,6 @@
/**
* @file llfasttimer.h
- * @brief Inline implementations of fast timers.
+ * @brief Declaration of a fast timer.
*
* $LicenseInfo:firstyear=2004&license=viewerlgpl$
* Second Life Viewer Source Code
@@ -27,9 +27,363 @@
#ifndef LL_FASTTIMER_H
#define LL_FASTTIMER_H
-// Implementation of getCPUClockCount32() and getCPUClockCount64 are now in llfastertimer_class.cpp.
+#include "llinstancetracker.h"
-// pull in the actual class definition
-#include "llfasttimer_class.h"
+#define FAST_TIMER_ON 1
+#define DEBUG_FAST_TIMER_THREADS 1
+
+class LLMutex;
+
+#include <queue>
+#include "llsd.h"
+
+#define LL_FASTTIMER_USE_RDTSC 1
+
+
+LL_COMMON_API void assert_main_thread();
+
+class LL_COMMON_API LLFastTimer
+{
+public:
+ class NamedTimer;
+
+ struct LL_COMMON_API FrameState
+ {
+ FrameState();
+ void setNamedTimer(NamedTimer* timerp) { mTimer = timerp; }
+
+ U32 mSelfTimeCounter;
+ U32 mCalls;
+ FrameState* mParent; // info for caller timer
+ FrameState* mLastCaller; // used to bootstrap tree construction
+ NamedTimer* mTimer;
+ U16 mActiveCount; // number of timers with this ID active on stack
+ bool mMoveUpTree; // needs to be moved up the tree of timers at the end of frame
+ };
+
+ // stores a "named" timer instance to be reused via multiple LLFastTimer stack instances
+ class LL_COMMON_API NamedTimer
+ : public LLInstanceTracker<NamedTimer>
+ {
+ friend class DeclareTimer;
+ public:
+ ~NamedTimer();
+
+ enum { HISTORY_NUM = 300 };
+
+ const std::string& getName() const { return mName; }
+ NamedTimer* getParent() const { return mParent; }
+ void setParent(NamedTimer* parent);
+ S32 getDepth();
+ std::string getToolTip(S32 history_index = -1);
+
+ typedef std::vector<NamedTimer*>::const_iterator child_const_iter;
+ child_const_iter beginChildren();
+ child_const_iter endChildren();
+ std::vector<NamedTimer*>& getChildren();
+
+ void setCollapsed(bool collapsed) { mCollapsed = collapsed; }
+ bool getCollapsed() const { return mCollapsed; }
+
+ U32 getCountAverage() const { return mCountAverage; }
+ U32 getCallAverage() const { return mCallAverage; }
+
+ U32 getHistoricalCount(S32 history_index = 0) const;
+ U32 getHistoricalCalls(S32 history_index = 0) const;
+
+ void setFrameState(FrameState* state) { mFrameState = state; state->setNamedTimer(this); }
+ FrameState& getFrameState() const;
+
+ private:
+ friend class LLFastTimer;
+ friend class NamedTimerFactory;
+
+ //
+ // methods
+ //
+ NamedTimer(const std::string& name);
+ // recursive call to gather total time from children
+ static void accumulateTimings();
+
+ // updates cumulative times and hierarchy,
+ // can be called multiple times in a frame, at any point
+ static void processTimes();
+
+ static void buildHierarchy();
+ static void resetFrame();
+ static void reset();
+
+ //
+ // members
+ //
+ FrameState* mFrameState;
+
+ std::string mName;
+
+ U32 mTotalTimeCounter;
+
+ U32 mCountAverage;
+ U32 mCallAverage;
+
+ U32* mCountHistory;
+ U32* mCallHistory;
+
+ // tree structure
+ NamedTimer* mParent; // NamedTimer of caller(parent)
+ std::vector<NamedTimer*> mChildren;
+ bool mCollapsed; // don't show children
+ bool mNeedsSorting; // sort children whenever child added
+ };
+
+ // used to statically declare a new named timer
+ class LL_COMMON_API DeclareTimer
+ : public LLInstanceTracker<DeclareTimer>
+ {
+ friend class LLFastTimer;
+ public:
+ DeclareTimer(const std::string& name, bool open);
+ DeclareTimer(const std::string& name);
+
+ NamedTimer& getNamedTimer() { return mTimer; }
+
+ private:
+ FrameState mFrameState;
+ NamedTimer& mTimer;
+ };
+
+public:
+ LLFastTimer(LLFastTimer::FrameState* state);
+
+ LL_FORCE_INLINE LLFastTimer(LLFastTimer::DeclareTimer& timer)
+ : mFrameState(&timer.mFrameState)
+ {
+#if FAST_TIMER_ON
+ LLFastTimer::FrameState* frame_state = mFrameState;
+ mStartTime = getCPUClockCount32();
+
+ frame_state->mActiveCount++;
+ frame_state->mCalls++;
+ // keep current parent as long as it is active when we are
+ frame_state->mMoveUpTree |= (frame_state->mParent->mActiveCount == 0);
+
+ LLFastTimer::CurTimerData* cur_timer_data = &LLFastTimer::sCurTimerData;
+ mLastTimerData = *cur_timer_data;
+ cur_timer_data->mCurTimer = this;
+ cur_timer_data->mFrameState = frame_state;
+ cur_timer_data->mChildTime = 0;
+#endif
+#if DEBUG_FAST_TIMER_THREADS
+#if !LL_RELEASE
+ assert_main_thread();
+#endif
+#endif
+ }
+
+ LL_FORCE_INLINE ~LLFastTimer()
+ {
+#if FAST_TIMER_ON
+ LLFastTimer::FrameState* frame_state = mFrameState;
+ U32 total_time = getCPUClockCount32() - mStartTime;
+
+ frame_state->mSelfTimeCounter += total_time - LLFastTimer::sCurTimerData.mChildTime;
+ frame_state->mActiveCount--;
+
+ // store last caller to bootstrap tree creation
+ // do this in the destructor in case of recursion to get topmost caller
+ frame_state->mLastCaller = mLastTimerData.mFrameState;
+
+ // we are only tracking self time, so subtract our total time delta from parents
+ mLastTimerData.mChildTime += total_time;
+
+ LLFastTimer::sCurTimerData = mLastTimerData;
+#endif
+ }
+
+public:
+ static LLMutex* sLogLock;
+ static std::queue<LLSD> sLogQueue;
+ static BOOL sLog;
+ static BOOL sMetricLog;
+ static std::string sLogName;
+ static bool sPauseHistory;
+ static bool sResetHistory;
+
+ // call this once a frame to reset timers
+ static void nextFrame();
+
+ // dumps current cumulative frame stats to log
+ // call nextFrame() to reset timers
+ static void dumpCurTimes();
+
+ // call this to reset timer hierarchy, averages, etc.
+ static void reset();
+
+ static U64 countsPerSecond();
+ static S32 getLastFrameIndex() { return sLastFrameIndex; }
+ static S32 getCurFrameIndex() { return sCurFrameIndex; }
+
+ static void writeLog(std::ostream& os);
+ static const NamedTimer* getTimerByName(const std::string& name);
+
+ struct CurTimerData
+ {
+ LLFastTimer* mCurTimer;
+ FrameState* mFrameState;
+ U32 mChildTime;
+ };
+ static CurTimerData sCurTimerData;
+
+private:
+
+
+ //////////////////////////////////////////////////////////////////////////////
+ //
+ // Important note: These implementations must be FAST!
+ //
+
+
+#if LL_WINDOWS
+ //
+ // Windows implementation of CPU clock
+ //
+
+ //
+ // NOTE: put back in when we aren't using platform sdk anymore
+ //
+ // because MS has different signatures for these functions in winnt.h
+ // need to rename them to avoid conflicts
+ //#define _interlockedbittestandset _renamed_interlockedbittestandset
+ //#define _interlockedbittestandreset _renamed_interlockedbittestandreset
+ //#include <intrin.h>
+ //#undef _interlockedbittestandset
+ //#undef _interlockedbittestandreset
+
+ //inline U32 LLFastTimer::getCPUClockCount32()
+ //{
+ // U64 time_stamp = __rdtsc();
+ // return (U32)(time_stamp >> 8);
+ //}
+ //
+ //// return full timer value, *not* shifted by 8 bits
+ //inline U64 LLFastTimer::getCPUClockCount64()
+ //{
+ // return __rdtsc();
+ //}
+
+ // shift off lower 8 bits for lower resolution but longer term timing
+ // on 1Ghz machine, a 32-bit word will hold ~1000 seconds of timing
+#if LL_FASTTIMER_USE_RDTSC
+ static U32 getCPUClockCount32()
+ {
+ U32 ret_val;
+ __asm
+ {
+ _emit 0x0f
+ _emit 0x31
+ shr eax,8
+ shl edx,24
+ or eax, edx
+ mov dword ptr [ret_val], eax
+ }
+ return ret_val;
+ }
+
+ // return full timer value, *not* shifted by 8 bits
+ static U64 getCPUClockCount64()
+ {
+ U64 ret_val;
+ __asm
+ {
+ _emit 0x0f
+ _emit 0x31
+ mov eax,eax
+ mov edx,edx
+ mov dword ptr [ret_val+4], edx
+ mov dword ptr [ret_val], eax
+ }
+ return ret_val;
+ }
+
+#else
+ //LL_COMMON_API U64 get_clock_count(); // in lltimer.cpp
+ // These use QueryPerformanceCounter, which is arguably fine and also works on AMD architectures.
+ static U32 getCPUClockCount32()
+ {
+ return (U32)(get_clock_count()>>8);
+ }
+
+ static U64 getCPUClockCount64()
+ {
+ return get_clock_count();
+ }
+
+#endif
+
+#endif
+
+
+#if (LL_LINUX || LL_SOLARIS) && !(defined(__i386__) || defined(__amd64__))
+ //
+ // Linux and Solaris implementation of CPU clock - non-x86.
+ // This is accurate but SLOW! Only use out of desperation.
+ //
+ // Try to use the MONOTONIC clock if available, this is a constant time counter
+ // with nanosecond resolution (but not necessarily accuracy) and attempts are
+ // made to synchronize this value between cores at kernel start. It should not
+ // be affected by CPU frequency. If not available use the REALTIME clock, but
+ // this may be affected by NTP adjustments or other user activity affecting
+ // the system time.
+ static U64 getCPUClockCount64()
+ {
+ struct timespec tp;
+
+#ifdef CLOCK_MONOTONIC // MONOTONIC supported at build-time?
+ if (-1 == clock_gettime(CLOCK_MONOTONIC,&tp)) // if MONOTONIC isn't supported at runtime then ouch, try REALTIME
+#endif
+ clock_gettime(CLOCK_REALTIME,&tp);
+
+ return (tp.tv_sec*sClockResolution)+tp.tv_nsec;
+ }
+
+ static U32 getCPUClockCount32()
+ {
+ return (U32)(getCPUClockCount64() >> 8);
+ }
+
+#endif // (LL_LINUX || LL_SOLARIS) && !(defined(__i386__) || defined(__amd64__))
+
+
+#if (LL_LINUX || LL_SOLARIS || LL_DARWIN) && (defined(__i386__) || defined(__amd64__))
+ //
+ // Mac+Linux+Solaris FAST x86 implementation of CPU clock
+ static U32 getCPUClockCount32()
+ {
+ U64 x;
+ __asm__ volatile (".byte 0x0f, 0x31": "=A"(x));
+ return (U32)(x >> 8);
+ }
+
+ static U64 getCPUClockCount64()
+ {
+ U64 x;
+ __asm__ volatile (".byte 0x0f, 0x31": "=A"(x));
+ return x;
+ }
+
+#endif
+
+ static U64 sClockResolution;
+
+ static S32 sCurFrameIndex;
+ static S32 sLastFrameIndex;
+ static U64 sLastFrameTime;
+
+ U32 mStartTime;
+ LLFastTimer::FrameState* mFrameState;
+ LLFastTimer::CurTimerData mLastTimerData;
+
+};
+
+typedef class LLFastTimer LLFastTimer;
#endif // LL_LLFASTTIMER_H