summaryrefslogtreecommitdiff
path: root/indra/llcommon/llfasttimer.h
diff options
context:
space:
mode:
Diffstat (limited to 'indra/llcommon/llfasttimer.h')
-rwxr-xr-x[-rw-r--r--]indra/llcommon/llfasttimer.h432
1 files changed, 306 insertions, 126 deletions
diff --git a/indra/llcommon/llfasttimer.h b/indra/llcommon/llfasttimer.h
index 48461df6ae..2370253078 100644..100755
--- a/indra/llcommon/llfasttimer.h
+++ b/indra/llcommon/llfasttimer.h
@@ -1,166 +1,346 @@
/**
* @file llfasttimer.h
- * @brief Inline implementations of fast timers.
- *
- * $LicenseInfo:firstyear=2004&license=viewergpl$
- *
- * Copyright (c) 2004-2009, Linden Research, Inc.
+ * @brief Declaration of a fast timer.
*
+ * $LicenseInfo:firstyear=2004&license=viewerlgpl$
* Second Life Viewer Source Code
- * The source code in this file ("Source Code") is provided by Linden Lab
- * to you under the terms of the GNU General Public License, version 2.0
- * ("GPL"), unless you have obtained a separate licensing agreement
- * ("Other License"), formally executed by you and Linden Lab. Terms of
- * the GPL can be found in doc/GPL-license.txt in this distribution, or
- * online at http://secondlifegrid.net/programs/open_source/licensing/gplv2
- *
- * There are special exceptions to the terms and conditions of the GPL as
- * it is applied to this Source Code. View the full text of the exception
- * in the file doc/FLOSS-exception.txt in this software distribution, or
- * online at
- * http://secondlifegrid.net/programs/open_source/licensing/flossexception
- *
- * By copying, modifying or distributing this software, you acknowledge
- * that you have read and understood your obligations described above,
- * and agree to abide by those obligations.
- *
- * ALL LINDEN LAB SOURCE CODE IS PROVIDED "AS IS." LINDEN LAB MAKES NO
- * WARRANTIES, EXPRESS, IMPLIED OR OTHERWISE, REGARDING ITS ACCURACY,
- * COMPLETENESS OR PERFORMANCE.
+ * Copyright (C) 2010, Linden Research, Inc.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation;
+ * version 2.1 of the License only.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ * Linden Research, Inc., 945 Battery Street, San Francisco, CA 94111 USA
* $/LicenseInfo$
*/
#ifndef LL_FASTTIMER_H
#define LL_FASTTIMER_H
-// pull in the actual class definition
-#include "llfasttimer_class.h"
+#include "llinstancetracker.h"
+#include "lltrace.h"
+#include "lltreeiterators.h"
-#if LL_WINDOWS
-//
-// Windows implementation of CPU clock
-//
-
-//
-// NOTE: put back in when we aren't using platform sdk anymore
-//
-// because MS has different signatures for these functions in winnt.h
-// need to rename them to avoid conflicts
-//#define _interlockedbittestandset _renamed_interlockedbittestandset
-//#define _interlockedbittestandreset _renamed_interlockedbittestandreset
-//#include <intrin.h>
-//#undef _interlockedbittestandset
-//#undef _interlockedbittestandreset
-
-//inline U32 LLFastTimer::getCPUClockCount32()
-//{
-// U64 time_stamp = __rdtsc();
-// return (U32)(time_stamp >> 8);
-//}
-//
-//// return full timer value, *not* shifted by 8 bits
-//inline U64 LLFastTimer::getCPUClockCount64()
-//{
-// return __rdtsc();
-//}
-
-// shift off lower 8 bits for lower resolution but longer term timing
-// on 1Ghz machine, a 32-bit word will hold ~1000 seconds of timing
-inline U32 LLFastTimer::getCPUClockCount32()
+#define LL_FAST_TIMER_ON 1
+#define LL_FASTTIMER_USE_RDTSC 1
+
+#define LL_RECORD_BLOCK_TIME(timer_stat) const LLTrace::BlockTimer& LL_GLUE_TOKENS(block_time_recorder, __LINE__)(LLTrace::timeThisBlock(timer_stat)); (void)LL_GLUE_TOKENS(block_time_recorder, __LINE__);
+
+namespace LLTrace
{
- U32 ret_val;
- __asm
+// use to create blocktimer rvalue to be captured in a reference so that the BlockTimer lives to the end of the block.
+class BlockTimer timeThisBlock(class BlockTimerStatHandle& timer);
+
+class BlockTimer
+{
+public:
+ typedef BlockTimer self_t;
+ typedef class BlockTimerStatHandle DeclareTimer;
+
+ ~BlockTimer();
+
+ F64Seconds getElapsedTime();
+
+ //////////////////////////////////////////////////////////////////////////////
+ //
+ // Important note: These implementations must be FAST!
+ //
+
+
+#if LL_WINDOWS
+ //
+ // Windows implementation of CPU clock
+ //
+
+ //
+ // NOTE: put back in when we aren't using platform sdk anymore
+ //
+ // because MS has different signatures for these functions in winnt.h
+ // need to rename them to avoid conflicts
+ //#define _interlockedbittestandset _renamed_interlockedbittestandset
+ //#define _interlockedbittestandreset _renamed_interlockedbittestandreset
+ //#include <intrin.h>
+ //#undef _interlockedbittestandset
+ //#undef _interlockedbittestandreset
+
+ //inline U32 getCPUClockCount32()
+ //{
+ // U64 time_stamp = __rdtsc();
+ // return (U32)(time_stamp >> 8);
+ //}
+ //
+ //// return full timer value, *not* shifted by 8 bits
+ //inline U64 getCPUClockCount64()
+ //{
+ // return __rdtsc();
+ //}
+
+ // shift off lower 8 bits for lower resolution but longer term timing
+ // on 1Ghz machine, a 32-bit word will hold ~1000 seconds of timing
+#if LL_FASTTIMER_USE_RDTSC
+ static U32 getCPUClockCount32()
{
- _emit 0x0f
- _emit 0x31
- shr eax,8
- shl edx,24
- or eax, edx
- mov dword ptr [ret_val], eax
+ U32 ret_val;
+ __asm
+ {
+ _emit 0x0f
+ _emit 0x31
+ shr eax,8
+ shl edx,24
+ or eax, edx
+ mov dword ptr [ret_val], eax
+ }
+ return ret_val;
}
- return ret_val;
-}
-// return full timer value, *not* shifted by 8 bits
-inline U64 LLFastTimer::getCPUClockCount64()
-{
- U64 ret_val;
- __asm
+ // return full timer value, *not* shifted by 8 bits
+ static U64 getCPUClockCount64()
{
- _emit 0x0f
- _emit 0x31
- mov eax,eax
- mov edx,edx
- mov dword ptr [ret_val+4], edx
- mov dword ptr [ret_val], eax
+ U64 ret_val;
+ __asm
+ {
+ _emit 0x0f
+ _emit 0x31
+ mov eax,eax
+ mov edx,edx
+ mov dword ptr [ret_val+4], edx
+ mov dword ptr [ret_val], eax
+ }
+ return ret_val;
}
- return ret_val;
-}
+
+#else
+ //U64 get_clock_count(); // in lltimer.cpp
+ // These use QueryPerformanceCounter, which is arguably fine and also works on AMD architectures.
+ static U32 getCPUClockCount32()
+ {
+ return (U32)(get_clock_count()>>8);
+ }
+
+ static U64 getCPUClockCount64()
+ {
+ return get_clock_count();
+ }
+
#endif
+#endif
+
+
+#if (LL_LINUX || LL_SOLARIS) && !(defined(__i386__) || defined(__amd64__))
+ //
+ // Linux and Solaris implementation of CPU clock - non-x86.
+ // This is accurate but SLOW! Only use out of desperation.
+ //
+ // Try to use the MONOTONIC clock if available, this is a constant time counter
+ // with nanosecond resolution (but not necessarily accuracy) and attempts are
+ // made to synchronize this value between cores at kernel start. It should not
+ // be affected by CPU frequency. If not available use the REALTIME clock, but
+ // this may be affected by NTP adjustments or other user activity affecting
+ // the system time.
+ static U64 getCPUClockCount64()
+ {
+ struct timespec tp;
-#if LL_LINUX || LL_SOLARIS
-//
-// Linux and Solaris implementation of CPU clock - all architectures.
-//
-// Try to use the MONOTONIC clock if available, this is a constant time counter
-// with nanosecond resolution (but not necessarily accuracy) and attempts are made
-// to synchronize this value between cores at kernel start. It should not be affected
-// by CPU frequency. If not available use the REALTIME clock, but this may be affected by
-// NTP adjustments or other user activity affecting the system time.
-inline U64 LLFastTimer::getCPUClockCount64()
-{
- struct timespec tp;
-
#ifdef CLOCK_MONOTONIC // MONOTONIC supported at build-time?
- if (-1 == clock_gettime(CLOCK_MONOTONIC,&tp)) // if MONOTONIC isn't supported at runtime then ouch, try REALTIME
+ if (-1 == clock_gettime(CLOCK_MONOTONIC,&tp)) // if MONOTONIC isn't supported at runtime then ouch, try REALTIME
#endif
- clock_gettime(CLOCK_REALTIME,&tp);
+ clock_gettime(CLOCK_REALTIME,&tp);
- return (tp.tv_sec*LLFastTimer::sClockResolution)+tp.tv_nsec;
-}
+ return (tp.tv_sec*sClockResolution)+tp.tv_nsec;
+ }
+
+ static U32 getCPUClockCount32()
+ {
+ return (U32)(getCPUClockCount64() >> 8);
+ }
+
+#endif // (LL_LINUX || LL_SOLARIS) && !(defined(__i386__) || defined(__amd64__))
-inline U32 LLFastTimer::getCPUClockCount32()
-{
- return (U32)(LLFastTimer::getCPUClockCount64() >> 8);
-}
-#endif // (LL_LINUX || LL_SOLARIS))
+#if (LL_LINUX || LL_SOLARIS || LL_DARWIN) && (defined(__i386__) || defined(__amd64__))
+ //
+ // Mac+Linux+Solaris FAST x86 implementation of CPU clock
+ static U32 getCPUClockCount32()
+ {
+ U64 x;
+ __asm__ volatile (".byte 0x0f, 0x31": "=A"(x));
+ return (U32)(x >> 8);
+ }
+
+ static U64 getCPUClockCount64()
+ {
+ U64 x;
+ __asm__ volatile (".byte 0x0f, 0x31": "=A"(x));
+ return x;
+ }
+
+#endif
+
+ static BlockTimerStatHandle& getRootTimeBlock();
+ static void pushLog(LLSD sd);
+ static void setLogLock(class LLMutex* mutex);
+ static void writeLog(std::ostream& os);
+ static void updateTimes();
+
+ static U64 countsPerSecond();
+
+ // updates cumulative times and hierarchy,
+ // can be called multiple times in a frame, at any point
+ static void processTimes();
+
+ static void bootstrapTimerTree();
+ static void incrementalUpdateTimerTree();
-#if (LL_DARWIN) && (defined(__i386__) || defined(__amd64__))
-//
-// Mac x86 implementation of CPU clock
-inline U32 LLFastTimer::getCPUClockCount32()
+ // call this once a frame to periodically log timers
+ static void logStats();
+
+ // dumps current cumulative frame stats to log
+ // call nextFrame() to reset timers
+ static void dumpCurTimes();
+
+private:
+ friend class BlockTimerStatHandle;
+ // FIXME: this friendship exists so that each thread can instantiate a root timer,
+ // which could be a derived class with a public constructor instead, possibly
+ friend class ThreadRecorder;
+ friend BlockTimer timeThisBlock(BlockTimerStatHandle&);
+
+ BlockTimer(BlockTimerStatHandle& timer);
+#if !defined(MSC_VER) || MSC_VER < 1700
+ // Visual Studio 2010 has a bug where capturing an object returned by value
+ // into a local reference requires access to the copy constructor at the call site.
+ // This appears to be fixed in 2012.
+public:
+#endif
+ // no-copy
+ BlockTimer(const BlockTimer& other) {};
+
+private:
+ U64 mStartTime;
+ BlockTimerStackRecord mParentTimerData;
+
+public:
+ // statics
+ static std::string sLogName;
+ static bool sMetricLog,
+ sLog;
+ static U64 sClockResolution;
+
+};
+
+// this dummy function assists in allocating a block timer with stack-based lifetime.
+// this is done by capturing the return value in a stack-allocated const reference variable.
+// (This is most easily done using the macro LL_RECORD_BLOCK_TIME)
+// Otherwise, it would be possible to store a BlockTimer on the heap, resulting in non-nested lifetimes,
+// which would break the invariants of the timing hierarchy logic
+LL_FORCE_INLINE class BlockTimer timeThisBlock(class BlockTimerStatHandle& timer)
{
- U64 x;
- __asm__ volatile (".byte 0x0f, 0x31": "=A"(x));
- return (U32)(x >> 8);
+ return BlockTimer(timer);
}
-inline U64 LLFastTimer::getCPUClockCount64()
+// stores a "named" timer instance to be reused via multiple BlockTimer stack instances
+class BlockTimerStatHandle
+: public StatType<TimeBlockAccumulator>
{
- U64 x;
- __asm__ volatile (".byte 0x0f, 0x31": "=A"(x));
- return x;
-}
-#endif
+public:
+ BlockTimerStatHandle(const char* name, const char* description = "");
+ TimeBlockTreeNode& getTreeNode() const;
+ BlockTimerStatHandle* getParent() const { return getTreeNode().getParent(); }
+ void setParent(BlockTimerStatHandle* parent) { getTreeNode().setParent(parent); }
-#if ( LL_DARWIN && !(defined(__i386__) || defined(__amd64__)))
-//
-// Mac PPC (deprecated) implementation of CPU clock
-//
-// Just use gettimeofday implementation for now
+ typedef std::vector<BlockTimerStatHandle*>::iterator child_iter;
+ typedef std::vector<BlockTimerStatHandle*>::const_iterator child_const_iter;
+ child_iter beginChildren();
+ child_iter endChildren();
+ bool hasChildren();
+ std::vector<BlockTimerStatHandle*>& getChildren();
-inline U32 LLFastTimer::getCPUClockCount32()
+ StatType<TimeBlockAccumulator::CallCountFacet>& callCount()
+ {
+ return static_cast<StatType<TimeBlockAccumulator::CallCountFacet>&>(*(StatType<TimeBlockAccumulator>*)this);
+ }
+
+ StatType<TimeBlockAccumulator::SelfTimeFacet>& selfTime()
+ {
+ return static_cast<StatType<TimeBlockAccumulator::SelfTimeFacet>&>(*(StatType<TimeBlockAccumulator>*)this);
+ }
+
+ bool mCollapsed; // don't show children
+};
+
+// iterators and helper functions for walking the call hierarchy of block timers in different ways
+typedef LLTreeDFSIter<BlockTimerStatHandle, BlockTimerStatHandle::child_const_iter> block_timer_tree_df_iterator_t;
+typedef LLTreeDFSPostIter<BlockTimerStatHandle, BlockTimerStatHandle::child_const_iter> block_timer_tree_df_post_iterator_t;
+typedef LLTreeBFSIter<BlockTimerStatHandle, BlockTimerStatHandle::child_const_iter> block_timer_tree_bf_iterator_t;
+
+block_timer_tree_df_iterator_t begin_block_timer_tree_df(BlockTimerStatHandle& id);
+block_timer_tree_df_iterator_t end_block_timer_tree_df();
+block_timer_tree_df_post_iterator_t begin_block_timer_tree_df_post(BlockTimerStatHandle& id);
+block_timer_tree_df_post_iterator_t end_block_timer_tree_df_post();
+block_timer_tree_bf_iterator_t begin_block_timer_tree_bf(BlockTimerStatHandle& id);
+block_timer_tree_bf_iterator_t end_block_timer_tree_bf();
+
+LL_FORCE_INLINE BlockTimer::BlockTimer(BlockTimerStatHandle& timer)
{
- return (U32)(get_clock_count()>>8);
+#if LL_FAST_TIMER_ON
+ BlockTimerStackRecord* cur_timer_data = LLThreadLocalSingletonPointer<BlockTimerStackRecord>::getInstance();
+ if (!cur_timer_data) return;
+ TimeBlockAccumulator& accumulator = timer.getCurrentAccumulator();
+ accumulator.mActiveCount++;
+ // keep current parent as long as it is active when we are
+ accumulator.mMoveUpTree |= (accumulator.mParent->getCurrentAccumulator().mActiveCount == 0);
+
+ // store top of stack
+ mParentTimerData = *cur_timer_data;
+ // push new information
+ cur_timer_data->mActiveTimer = this;
+ cur_timer_data->mTimeBlock = &timer;
+ cur_timer_data->mChildTime = 0;
+
+ mStartTime = getCPUClockCount64();
+#endif
}
-inline U64 LLFastTimer::getCPUClockCount64()
+LL_FORCE_INLINE BlockTimer::~BlockTimer()
{
- return get_clock_count();
-}
+#if LL_FAST_TIMER_ON
+ U64 total_time = getCPUClockCount64() - mStartTime;
+ BlockTimerStackRecord* cur_timer_data = LLThreadLocalSingletonPointer<BlockTimerStackRecord>::getInstance();
+ if (!cur_timer_data) return;
+
+ TimeBlockAccumulator& accumulator = cur_timer_data->mTimeBlock->getCurrentAccumulator();
+
+ accumulator.mCalls++;
+ accumulator.mTotalTimeCounter += total_time;
+ accumulator.mSelfTimeCounter += total_time - cur_timer_data->mChildTime;
+ accumulator.mActiveCount--;
+
+ // store last caller to bootstrap tree creation
+ // do this in the destructor in case of recursion to get topmost caller
+ accumulator.mLastCaller = mParentTimerData.mTimeBlock;
+
+ // we are only tracking self time, so subtract our total time delta from parents
+ mParentTimerData.mChildTime += total_time;
+
+ //pop stack
+ *cur_timer_data = mParentTimerData;
#endif
+}
+
+}
+
+typedef LLTrace::BlockTimer LLFastTimer;
#endif // LL_LLFASTTIMER_H