summaryrefslogtreecommitdiff
path: root/indra/llcommon/llfasttimer.h
diff options
context:
space:
mode:
Diffstat (limited to 'indra/llcommon/llfasttimer.h')
-rwxr-xr-x[-rw-r--r--]indra/llcommon/llfasttimer.h532
1 files changed, 272 insertions, 260 deletions
diff --git a/indra/llcommon/llfasttimer.h b/indra/llcommon/llfasttimer.h
index f5c90291b8..2370253078 100644..100755
--- a/indra/llcommon/llfasttimer.h
+++ b/indra/llcommon/llfasttimer.h
@@ -1,32 +1,26 @@
-/**
+/**
* @file llfasttimer.h
* @brief Declaration of a fast timer.
*
- * $LicenseInfo:firstyear=2004&license=viewergpl$
- *
- * Copyright (c) 2004-2009, Linden Research, Inc.
- *
+ * $LicenseInfo:firstyear=2004&license=viewerlgpl$
* Second Life Viewer Source Code
- * The source code in this file ("Source Code") is provided by Linden Lab
- * to you under the terms of the GNU General Public License, version 2.0
- * ("GPL"), unless you have obtained a separate licensing agreement
- * ("Other License"), formally executed by you and Linden Lab. Terms of
- * the GPL can be found in doc/GPL-license.txt in this distribution, or
- * online at http://secondlifegrid.net/programs/open_source/licensing/gplv2
+ * Copyright (C) 2010, Linden Research, Inc.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation;
+ * version 2.1 of the License only.
*
- * There are special exceptions to the terms and conditions of the GPL as
- * it is applied to this Source Code. View the full text of the exception
- * in the file doc/FLOSS-exception.txt in this software distribution, or
- * online at
- * http://secondlifegrid.net/programs/open_source/licensing/flossexception
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
*
- * By copying, modifying or distributing this software, you acknowledge
- * that you have read and understood your obligations described above,
- * and agree to abide by those obligations.
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*
- * ALL LINDEN LAB SOURCE CODE IS PROVIDED "AS IS." LINDEN LAB MAKES NO
- * WARRANTIES, EXPRESS, IMPLIED OR OTHERWISE, REGARDING ITS ACCURACY,
- * COMPLETENESS OR PERFORMANCE.
+ * Linden Research, Inc., 945 Battery Street, San Francisco, CA 94111 USA
* $/LicenseInfo$
*/
@@ -34,301 +28,319 @@
#define LL_FASTTIMER_H
#include "llinstancetracker.h"
+#include "lltrace.h"
+#include "lltreeiterators.h"
-#define FAST_TIMER_ON 1
-#define TIME_FAST_TIMERS 0
+#define LL_FAST_TIMER_ON 1
+#define LL_FASTTIMER_USE_RDTSC 1
-#if LL_WINDOWS
+#define LL_RECORD_BLOCK_TIME(timer_stat) const LLTrace::BlockTimer& LL_GLUE_TOKENS(block_time_recorder, __LINE__)(LLTrace::timeThisBlock(timer_stat)); (void)LL_GLUE_TOKENS(block_time_recorder, __LINE__);
-// shift off lower 8 bits for lower resolution but longer term timing
-// on 1Ghz machine, a 32-bit word will hold ~1000 seconds of timing
-inline U32 get_cpu_clock_count_32()
+namespace LLTrace
{
- U32 ret_val;
- __asm
- {
- _emit 0x0f
- _emit 0x31
- shr eax,8
- shl edx,24
- or eax, edx
- mov dword ptr [ret_val], eax
- }
- return ret_val;
-}
+// use to create blocktimer rvalue to be captured in a reference so that the BlockTimer lives to the end of the block.
+class BlockTimer timeThisBlock(class BlockTimerStatHandle& timer);
-// return full timer value, *not* shifted by 8 bits
-inline U64 get_cpu_clock_count_64()
+class BlockTimer
{
- U64 ret_val;
- __asm
- {
- _emit 0x0f
- _emit 0x31
- mov eax,eax
- mov edx,edx
- mov dword ptr [ret_val+4], edx
- mov dword ptr [ret_val], eax
- }
- return ret_val;
-}
-
-#endif // LL_WINDOWS
-
-#if (LL_LINUX || LL_SOLARIS || LL_DARWIN) && (defined(__i386__) || defined(__amd64__))
-inline U32 get_cpu_clock_count_32()
-{
- U64 x;
- __asm__ volatile (".byte 0x0f, 0x31": "=A"(x));
- return (U32)x >> 8;
-}
-
-inline U32 get_cpu_clock_count_64()
-{
- U64 x;
- __asm__ volatile (".byte 0x0f, 0x31": "=A"(x));
- return x >> 8;
-}
-#endif
-
-#if ( LL_DARWIN && !(defined(__i386__) || defined(__amd64__))) || (LL_SOLARIS && defined(__sparc__))
-//
-// Mac PPC (deprecated) & Solaris SPARC implementation of CPU clock
-//
-// Just use gettimeofday implementation for now
+public:
+ typedef BlockTimer self_t;
+ typedef class BlockTimerStatHandle DeclareTimer;
-inline U32 get_cpu_clock_count_32()
-{
- return (U32)get_clock_count();
-}
+ ~BlockTimer();
-inline U32 get_cpu_clock_count_64()
-{
- return get_clock_count();
-}
-#endif
+ F64Seconds getElapsedTime();
-class LLMutex;
+ //////////////////////////////////////////////////////////////////////////////
+ //
+ // Important note: These implementations must be FAST!
+ //
-#include <queue>
-#include "llsd.h"
-
-class LL_COMMON_API LLFastTimer
-{
-public:
- // stores a "named" timer instance to be reused via multiple LLFastTimer stack instances
- class LL_COMMON_API NamedTimer
- : public LLInstanceTracker<NamedTimer>
+#if LL_WINDOWS
+ //
+ // Windows implementation of CPU clock
+ //
+
+ //
+ // NOTE: put back in when we aren't using platform sdk anymore
+ //
+ // because MS has different signatures for these functions in winnt.h
+ // need to rename them to avoid conflicts
+ //#define _interlockedbittestandset _renamed_interlockedbittestandset
+ //#define _interlockedbittestandreset _renamed_interlockedbittestandreset
+ //#include <intrin.h>
+ //#undef _interlockedbittestandset
+ //#undef _interlockedbittestandreset
+
+ //inline U32 getCPUClockCount32()
+ //{
+ // U64 time_stamp = __rdtsc();
+ // return (U32)(time_stamp >> 8);
+ //}
+ //
+ //// return full timer value, *not* shifted by 8 bits
+ //inline U64 getCPUClockCount64()
+ //{
+ // return __rdtsc();
+ //}
+
+ // shift off lower 8 bits for lower resolution but longer term timing
+ // on 1Ghz machine, a 32-bit word will hold ~1000 seconds of timing
+#if LL_FASTTIMER_USE_RDTSC
+ static U32 getCPUClockCount32()
{
- friend class DeclareTimer;
- public:
- ~NamedTimer();
-
- enum { HISTORY_NUM = 60 };
+ U32 ret_val;
+ __asm
+ {
+ _emit 0x0f
+ _emit 0x31
+ shr eax,8
+ shl edx,24
+ or eax, edx
+ mov dword ptr [ret_val], eax
+ }
+ return ret_val;
+ }
- const std::string& getName() const { return mName; }
- NamedTimer* getParent() const { return mParent; }
- void setParent(NamedTimer* parent);
- S32 getDepth();
- std::string getToolTip(S32 history_index = -1);
+ // return full timer value, *not* shifted by 8 bits
+ static U64 getCPUClockCount64()
+ {
+ U64 ret_val;
+ __asm
+ {
+ _emit 0x0f
+ _emit 0x31
+ mov eax,eax
+ mov edx,edx
+ mov dword ptr [ret_val+4], edx
+ mov dword ptr [ret_val], eax
+ }
+ return ret_val;
+ }
- typedef std::vector<NamedTimer*>::const_iterator child_const_iter;
- child_const_iter beginChildren();
- child_const_iter endChildren();
- std::vector<NamedTimer*>& getChildren();
+#else
+ //U64 get_clock_count(); // in lltimer.cpp
+ // These use QueryPerformanceCounter, which is arguably fine and also works on AMD architectures.
+ static U32 getCPUClockCount32()
+ {
+ return (U32)(get_clock_count()>>8);
+ }
- void setCollapsed(bool collapsed) { mCollapsed = collapsed; }
- bool getCollapsed() const { return mCollapsed; }
+ static U64 getCPUClockCount64()
+ {
+ return get_clock_count();
+ }
- U32 getCountAverage() const { return mCountAverage; }
- U32 getCallAverage() const { return mCallAverage; }
+#endif
- U32 getHistoricalCount(S32 history_index = 0) const;
- U32 getHistoricalCalls(S32 history_index = 0) const;
+#endif
- static NamedTimer& getRootNamedTimer();
- struct FrameState
- {
- FrameState(NamedTimer* timerp);
+#if (LL_LINUX || LL_SOLARIS) && !(defined(__i386__) || defined(__amd64__))
+ //
+ // Linux and Solaris implementation of CPU clock - non-x86.
+ // This is accurate but SLOW! Only use out of desperation.
+ //
+ // Try to use the MONOTONIC clock if available, this is a constant time counter
+ // with nanosecond resolution (but not necessarily accuracy) and attempts are
+ // made to synchronize this value between cores at kernel start. It should not
+ // be affected by CPU frequency. If not available use the REALTIME clock, but
+ // this may be affected by NTP adjustments or other user activity affecting
+ // the system time.
+ static U64 getCPUClockCount64()
+ {
+ struct timespec tp;
- U32 mSelfTimeCounter;
- U32 mCalls;
- FrameState* mParent; // info for caller timer
- FrameState* mLastCaller; // used to bootstrap tree construction
- NamedTimer* mTimer;
- U16 mActiveCount; // number of timers with this ID active on stack
- bool mMoveUpTree; // needs to be moved up the tree of timers at the end of frame
- };
+#ifdef CLOCK_MONOTONIC // MONOTONIC supported at build-time?
+ if (-1 == clock_gettime(CLOCK_MONOTONIC,&tp)) // if MONOTONIC isn't supported at runtime then ouch, try REALTIME
+#endif
+ clock_gettime(CLOCK_REALTIME,&tp);
- S32 getFrameStateIndex() const { return mFrameStateIndex; }
+ return (tp.tv_sec*sClockResolution)+tp.tv_nsec;
+ }
- FrameState& getFrameState() const;
+ static U32 getCPUClockCount32()
+ {
+ return (U32)(getCPUClockCount64() >> 8);
+ }
+#endif // (LL_LINUX || LL_SOLARIS) && !(defined(__i386__) || defined(__amd64__))
- private:
- friend class LLFastTimer;
- friend class NamedTimerFactory;
- //
- // methods
- //
- NamedTimer(const std::string& name);
- // recursive call to gather total time from children
- static void accumulateTimings();
+#if (LL_LINUX || LL_SOLARIS || LL_DARWIN) && (defined(__i386__) || defined(__amd64__))
+ //
+ // Mac+Linux+Solaris FAST x86 implementation of CPU clock
+ static U32 getCPUClockCount32()
+ {
+ U64 x;
+ __asm__ volatile (".byte 0x0f, 0x31": "=A"(x));
+ return (U32)(x >> 8);
+ }
- // updates cumulative times and hierarchy,
- // can be called multiple times in a frame, at any point
- static void processTimes();
+ static U64 getCPUClockCount64()
+ {
+ U64 x;
+ __asm__ volatile (".byte 0x0f, 0x31": "=A"(x));
+ return x;
+ }
- static void buildHierarchy();
- static void resetFrame();
- static void reset();
+#endif
+ static BlockTimerStatHandle& getRootTimeBlock();
+ static void pushLog(LLSD sd);
+ static void setLogLock(class LLMutex* mutex);
+ static void writeLog(std::ostream& os);
+ static void updateTimes();
- //
- // members
- //
- S32 mFrameStateIndex;
-
- std::string mName;
+ static U64 countsPerSecond();
- U32 mTotalTimeCounter;
+ // updates cumulative times and hierarchy,
+ // can be called multiple times in a frame, at any point
+ static void processTimes();
- U32 mCountAverage;
- U32 mCallAverage;
+ static void bootstrapTimerTree();
+ static void incrementalUpdateTimerTree();
- U32* mCountHistory;
- U32* mCallHistory;
+ // call this once a frame to periodically log timers
+ static void logStats();
- // tree structure
- NamedTimer* mParent; // NamedTimer of caller(parent)
- std::vector<NamedTimer*> mChildren;
- bool mCollapsed; // don't show children
- bool mNeedsSorting; // sort children whenever child added
+ // dumps current cumulative frame stats to log
+ // call nextFrame() to reset timers
+ static void dumpCurTimes();
- };
+private:
+ friend class BlockTimerStatHandle;
+ // FIXME: this friendship exists so that each thread can instantiate a root timer,
+ // which could be a derived class with a public constructor instead, possibly
+ friend class ThreadRecorder;
+ friend BlockTimer timeThisBlock(BlockTimerStatHandle&);
+
+ BlockTimer(BlockTimerStatHandle& timer);
+#if !defined(MSC_VER) || MSC_VER < 1700
+ // Visual Studio 2010 has a bug where capturing an object returned by value
+ // into a local reference requires access to the copy constructor at the call site.
+ // This appears to be fixed in 2012.
+public:
+#endif
+ // no-copy
+ BlockTimer(const BlockTimer& other) {};
- // used to statically declare a new named timer
- class LL_COMMON_API DeclareTimer
- : public LLInstanceTracker<DeclareTimer>
- {
- public:
- DeclareTimer(const std::string& name, bool open);
- DeclareTimer(const std::string& name);
+private:
+ U64 mStartTime;
+ BlockTimerStackRecord mParentTimerData;
- static void updateCachedPointers();
+public:
+ // statics
+ static std::string sLogName;
+ static bool sMetricLog,
+ sLog;
+ static U64 sClockResolution;
- // convertable to NamedTimer::FrameState for convenient usage of LLFastTimer(declared_timer)
- operator NamedTimer::FrameState&() { return *mFrameState; }
- private:
- NamedTimer& mTimer;
- NamedTimer::FrameState* mFrameState;
- };
+};
+// this dummy function assists in allocating a block timer with stack-based lifetime.
+// this is done by capturing the return value in a stack-allocated const reference variable.
+// (This is most easily done using the macro LL_RECORD_BLOCK_TIME)
+// Otherwise, it would be possible to store a BlockTimer on the heap, resulting in non-nested lifetimes,
+// which would break the invariants of the timing hierarchy logic
+LL_FORCE_INLINE class BlockTimer timeThisBlock(class BlockTimerStatHandle& timer)
+{
+ return BlockTimer(timer);
+}
+// stores a "named" timer instance to be reused via multiple BlockTimer stack instances
+class BlockTimerStatHandle
+: public StatType<TimeBlockAccumulator>
+{
public:
- static LLMutex* sLogLock;
- static std::queue<LLSD> sLogQueue;
- static BOOL sLog;
- static BOOL sMetricLog;
+ BlockTimerStatHandle(const char* name, const char* description = "");
- typedef std::vector<NamedTimer::FrameState> info_list_t;
- static info_list_t& getFrameStateList();
+ TimeBlockTreeNode& getTreeNode() const;
+ BlockTimerStatHandle* getParent() const { return getTreeNode().getParent(); }
+ void setParent(BlockTimerStatHandle* parent) { getTreeNode().setParent(parent); }
- enum RootTimerMarker { ROOT };
- LLFastTimer(RootTimerMarker);
+ typedef std::vector<BlockTimerStatHandle*>::iterator child_iter;
+ typedef std::vector<BlockTimerStatHandle*>::const_iterator child_const_iter;
+ child_iter beginChildren();
+ child_iter endChildren();
+ bool hasChildren();
+ std::vector<BlockTimerStatHandle*>& getChildren();
- LLFastTimer(NamedTimer::FrameState& timer)
- : mFrameState(&timer)
+ StatType<TimeBlockAccumulator::CallCountFacet>& callCount()
{
-#if TIME_FAST_TIMERS
- U64 timer_start = get_cpu_clock_count_64();
-#endif
-#if FAST_TIMER_ON
- NamedTimer::FrameState* frame_state = &timer;
- U32 cur_time = get_cpu_clock_count_32();
- mStartSelfTime = cur_time;
- mStartTotalTime = cur_time;
-
- frame_state->mActiveCount++;
- frame_state->mCalls++;
- // keep current parent as long as it is active when we are
- frame_state->mMoveUpTree |= (frame_state->mParent->mActiveCount == 0);
-
- mLastTimer = sCurTimer;
- sCurTimer = this;
-#endif
-#if TIME_FAST_TIMERS
- U64 timer_end = get_cpu_clock_count_64();
- sTimerCycles += timer_end - timer_start;
-#endif
+ return static_cast<StatType<TimeBlockAccumulator::CallCountFacet>&>(*(StatType<TimeBlockAccumulator>*)this);
}
- ~LLFastTimer()
+ StatType<TimeBlockAccumulator::SelfTimeFacet>& selfTime()
{
-#if TIME_FAST_TIMERS
- U64 timer_start = get_cpu_clock_count_64();
-#endif
-#if FAST_TIMER_ON
- NamedTimer::FrameState* frame_state = mFrameState;
- U32 cur_time = get_cpu_clock_count_32();
- frame_state->mSelfTimeCounter += cur_time - mStartSelfTime;
+ return static_cast<StatType<TimeBlockAccumulator::SelfTimeFacet>&>(*(StatType<TimeBlockAccumulator>*)this);
+ }
+
+ bool mCollapsed; // don't show children
+};
- frame_state->mActiveCount--;
- LLFastTimer* last_timer = mLastTimer;
- sCurTimer = last_timer;
+// iterators and helper functions for walking the call hierarchy of block timers in different ways
+typedef LLTreeDFSIter<BlockTimerStatHandle, BlockTimerStatHandle::child_const_iter> block_timer_tree_df_iterator_t;
+typedef LLTreeDFSPostIter<BlockTimerStatHandle, BlockTimerStatHandle::child_const_iter> block_timer_tree_df_post_iterator_t;
+typedef LLTreeBFSIter<BlockTimerStatHandle, BlockTimerStatHandle::child_const_iter> block_timer_tree_bf_iterator_t;
- // store last caller to bootstrap tree creation
- frame_state->mLastCaller = last_timer->mFrameState;
+block_timer_tree_df_iterator_t begin_block_timer_tree_df(BlockTimerStatHandle& id);
+block_timer_tree_df_iterator_t end_block_timer_tree_df();
+block_timer_tree_df_post_iterator_t begin_block_timer_tree_df_post(BlockTimerStatHandle& id);
+block_timer_tree_df_post_iterator_t end_block_timer_tree_df_post();
+block_timer_tree_bf_iterator_t begin_block_timer_tree_bf(BlockTimerStatHandle& id);
+block_timer_tree_bf_iterator_t end_block_timer_tree_bf();
- // we are only tracking self time, so subtract our total time delta from parents
- U32 total_time = cur_time - mStartTotalTime;
- last_timer->mStartSelfTime += total_time;
+LL_FORCE_INLINE BlockTimer::BlockTimer(BlockTimerStatHandle& timer)
+{
+#if LL_FAST_TIMER_ON
+ BlockTimerStackRecord* cur_timer_data = LLThreadLocalSingletonPointer<BlockTimerStackRecord>::getInstance();
+ if (!cur_timer_data) return;
+ TimeBlockAccumulator& accumulator = timer.getCurrentAccumulator();
+ accumulator.mActiveCount++;
+ // keep current parent as long as it is active when we are
+ accumulator.mMoveUpTree |= (accumulator.mParent->getCurrentAccumulator().mActiveCount == 0);
+
+ // store top of stack
+ mParentTimerData = *cur_timer_data;
+ // push new information
+ cur_timer_data->mActiveTimer = this;
+ cur_timer_data->mTimeBlock = &timer;
+ cur_timer_data->mChildTime = 0;
+
+ mStartTime = getCPUClockCount64();
#endif
-#if TIME_FAST_TIMERS
- U64 timer_end = get_cpu_clock_count_64();
- sTimerCycles += timer_end - timer_start;
- sTimerCalls++;
-#endif
- }
+}
+LL_FORCE_INLINE BlockTimer::~BlockTimer()
+{
+#if LL_FAST_TIMER_ON
+ U64 total_time = getCPUClockCount64() - mStartTime;
+ BlockTimerStackRecord* cur_timer_data = LLThreadLocalSingletonPointer<BlockTimerStackRecord>::getInstance();
+ if (!cur_timer_data) return;
- // call this once a frame to reset timers
- static void nextFrame();
+ TimeBlockAccumulator& accumulator = cur_timer_data->mTimeBlock->getCurrentAccumulator();
- // dumps current cumulative frame stats to log
- // call nextFrame() to reset timers
- static void dumpCurTimes();
+ accumulator.mCalls++;
+ accumulator.mTotalTimeCounter += total_time;
+ accumulator.mSelfTimeCounter += total_time - cur_timer_data->mChildTime;
+ accumulator.mActiveCount--;
- // call this to reset timer hierarchy, averages, etc.
- static void reset();
+ // store last caller to bootstrap tree creation
+ // do this in the destructor in case of recursion to get topmost caller
+ accumulator.mLastCaller = mParentTimerData.mTimeBlock;
- static U64 countsPerSecond();
- static S32 getLastFrameIndex() { return sLastFrameIndex; }
- static S32 getCurFrameIndex() { return sCurFrameIndex; }
+ // we are only tracking self time, so subtract our total time delta from parents
+ mParentTimerData.mChildTime += total_time;
- static void writeLog(std::ostream& os);
- static const NamedTimer* getTimerByName(const std::string& name);
+ //pop stack
+ *cur_timer_data = mParentTimerData;
+#endif
+}
-public:
- static bool sPauseHistory;
- static bool sResetHistory;
- static U64 sTimerCycles;
- static U32 sTimerCalls;
-
-private:
- static LLFastTimer* sCurTimer;
- static S32 sCurFrameIndex;
- static S32 sLastFrameIndex;
- static U64 sLastFrameTime;
- static info_list_t* sTimerInfos;
-
- U32 mStartSelfTime; // start time + time of all child timers
- U32 mStartTotalTime; // start time + time of all child timers
- NamedTimer::FrameState* mFrameState;
- LLFastTimer* mLastTimer;
-};
+}
+
+typedef LLTrace::BlockTimer LLFastTimer;
#endif // LL_LLFASTTIMER_H