diff options
Diffstat (limited to 'indra/llcommon/llfasttimer.h')
-rwxr-xr-x[-rw-r--r--] | indra/llcommon/llfasttimer.h | 532 |
1 files changed, 272 insertions, 260 deletions
diff --git a/indra/llcommon/llfasttimer.h b/indra/llcommon/llfasttimer.h index f5c90291b8..2370253078 100644..100755 --- a/indra/llcommon/llfasttimer.h +++ b/indra/llcommon/llfasttimer.h @@ -1,32 +1,26 @@ -/** +/** * @file llfasttimer.h * @brief Declaration of a fast timer. * - * $LicenseInfo:firstyear=2004&license=viewergpl$ - * - * Copyright (c) 2004-2009, Linden Research, Inc. - * + * $LicenseInfo:firstyear=2004&license=viewerlgpl$ * Second Life Viewer Source Code - * The source code in this file ("Source Code") is provided by Linden Lab - * to you under the terms of the GNU General Public License, version 2.0 - * ("GPL"), unless you have obtained a separate licensing agreement - * ("Other License"), formally executed by you and Linden Lab. Terms of - * the GPL can be found in doc/GPL-license.txt in this distribution, or - * online at http://secondlifegrid.net/programs/open_source/licensing/gplv2 + * Copyright (C) 2010, Linden Research, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; + * version 2.1 of the License only. * - * There are special exceptions to the terms and conditions of the GPL as - * it is applied to this Source Code. View the full text of the exception - * in the file doc/FLOSS-exception.txt in this software distribution, or - * online at - * http://secondlifegrid.net/programs/open_source/licensing/flossexception + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. * - * By copying, modifying or distributing this software, you acknowledge - * that you have read and understood your obligations described above, - * and agree to abide by those obligations. + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA * - * ALL LINDEN LAB SOURCE CODE IS PROVIDED "AS IS." LINDEN LAB MAKES NO - * WARRANTIES, EXPRESS, IMPLIED OR OTHERWISE, REGARDING ITS ACCURACY, - * COMPLETENESS OR PERFORMANCE. + * Linden Research, Inc., 945 Battery Street, San Francisco, CA 94111 USA * $/LicenseInfo$ */ @@ -34,301 +28,319 @@ #define LL_FASTTIMER_H #include "llinstancetracker.h" +#include "lltrace.h" +#include "lltreeiterators.h" -#define FAST_TIMER_ON 1 -#define TIME_FAST_TIMERS 0 +#define LL_FAST_TIMER_ON 1 +#define LL_FASTTIMER_USE_RDTSC 1 -#if LL_WINDOWS +#define LL_RECORD_BLOCK_TIME(timer_stat) const LLTrace::BlockTimer& LL_GLUE_TOKENS(block_time_recorder, __LINE__)(LLTrace::timeThisBlock(timer_stat)); (void)LL_GLUE_TOKENS(block_time_recorder, __LINE__); -// shift off lower 8 bits for lower resolution but longer term timing -// on 1Ghz machine, a 32-bit word will hold ~1000 seconds of timing -inline U32 get_cpu_clock_count_32() +namespace LLTrace { - U32 ret_val; - __asm - { - _emit 0x0f - _emit 0x31 - shr eax,8 - shl edx,24 - or eax, edx - mov dword ptr [ret_val], eax - } - return ret_val; -} +// use to create blocktimer rvalue to be captured in a reference so that the BlockTimer lives to the end of the block. +class BlockTimer timeThisBlock(class BlockTimerStatHandle& timer); -// return full timer value, *not* shifted by 8 bits -inline U64 get_cpu_clock_count_64() +class BlockTimer { - U64 ret_val; - __asm - { - _emit 0x0f - _emit 0x31 - mov eax,eax - mov edx,edx - mov dword ptr [ret_val+4], edx - mov dword ptr [ret_val], eax - } - return ret_val; -} - -#endif // LL_WINDOWS - -#if (LL_LINUX || LL_SOLARIS || LL_DARWIN) && (defined(__i386__) || defined(__amd64__)) -inline U32 get_cpu_clock_count_32() -{ - U64 x; - __asm__ volatile (".byte 0x0f, 0x31": "=A"(x)); - return (U32)x >> 8; -} - -inline U32 get_cpu_clock_count_64() -{ - U64 x; - __asm__ volatile (".byte 0x0f, 0x31": "=A"(x)); - return x >> 8; -} -#endif - -#if ( LL_DARWIN && !(defined(__i386__) || defined(__amd64__))) || (LL_SOLARIS && defined(__sparc__)) -// -// Mac PPC (deprecated) & Solaris SPARC implementation of CPU clock -// -// Just use gettimeofday implementation for now +public: + typedef BlockTimer self_t; + typedef class BlockTimerStatHandle DeclareTimer; -inline U32 get_cpu_clock_count_32() -{ - return (U32)get_clock_count(); -} + ~BlockTimer(); -inline U32 get_cpu_clock_count_64() -{ - return get_clock_count(); -} -#endif + F64Seconds getElapsedTime(); -class LLMutex; + ////////////////////////////////////////////////////////////////////////////// + // + // Important note: These implementations must be FAST! + // -#include <queue> -#include "llsd.h" - -class LL_COMMON_API LLFastTimer -{ -public: - // stores a "named" timer instance to be reused via multiple LLFastTimer stack instances - class LL_COMMON_API NamedTimer - : public LLInstanceTracker<NamedTimer> +#if LL_WINDOWS + // + // Windows implementation of CPU clock + // + + // + // NOTE: put back in when we aren't using platform sdk anymore + // + // because MS has different signatures for these functions in winnt.h + // need to rename them to avoid conflicts + //#define _interlockedbittestandset _renamed_interlockedbittestandset + //#define _interlockedbittestandreset _renamed_interlockedbittestandreset + //#include <intrin.h> + //#undef _interlockedbittestandset + //#undef _interlockedbittestandreset + + //inline U32 getCPUClockCount32() + //{ + // U64 time_stamp = __rdtsc(); + // return (U32)(time_stamp >> 8); + //} + // + //// return full timer value, *not* shifted by 8 bits + //inline U64 getCPUClockCount64() + //{ + // return __rdtsc(); + //} + + // shift off lower 8 bits for lower resolution but longer term timing + // on 1Ghz machine, a 32-bit word will hold ~1000 seconds of timing +#if LL_FASTTIMER_USE_RDTSC + static U32 getCPUClockCount32() { - friend class DeclareTimer; - public: - ~NamedTimer(); - - enum { HISTORY_NUM = 60 }; + U32 ret_val; + __asm + { + _emit 0x0f + _emit 0x31 + shr eax,8 + shl edx,24 + or eax, edx + mov dword ptr [ret_val], eax + } + return ret_val; + } - const std::string& getName() const { return mName; } - NamedTimer* getParent() const { return mParent; } - void setParent(NamedTimer* parent); - S32 getDepth(); - std::string getToolTip(S32 history_index = -1); + // return full timer value, *not* shifted by 8 bits + static U64 getCPUClockCount64() + { + U64 ret_val; + __asm + { + _emit 0x0f + _emit 0x31 + mov eax,eax + mov edx,edx + mov dword ptr [ret_val+4], edx + mov dword ptr [ret_val], eax + } + return ret_val; + } - typedef std::vector<NamedTimer*>::const_iterator child_const_iter; - child_const_iter beginChildren(); - child_const_iter endChildren(); - std::vector<NamedTimer*>& getChildren(); +#else + //U64 get_clock_count(); // in lltimer.cpp + // These use QueryPerformanceCounter, which is arguably fine and also works on AMD architectures. + static U32 getCPUClockCount32() + { + return (U32)(get_clock_count()>>8); + } - void setCollapsed(bool collapsed) { mCollapsed = collapsed; } - bool getCollapsed() const { return mCollapsed; } + static U64 getCPUClockCount64() + { + return get_clock_count(); + } - U32 getCountAverage() const { return mCountAverage; } - U32 getCallAverage() const { return mCallAverage; } +#endif - U32 getHistoricalCount(S32 history_index = 0) const; - U32 getHistoricalCalls(S32 history_index = 0) const; +#endif - static NamedTimer& getRootNamedTimer(); - struct FrameState - { - FrameState(NamedTimer* timerp); +#if (LL_LINUX || LL_SOLARIS) && !(defined(__i386__) || defined(__amd64__)) + // + // Linux and Solaris implementation of CPU clock - non-x86. + // This is accurate but SLOW! Only use out of desperation. + // + // Try to use the MONOTONIC clock if available, this is a constant time counter + // with nanosecond resolution (but not necessarily accuracy) and attempts are + // made to synchronize this value between cores at kernel start. It should not + // be affected by CPU frequency. If not available use the REALTIME clock, but + // this may be affected by NTP adjustments or other user activity affecting + // the system time. + static U64 getCPUClockCount64() + { + struct timespec tp; - U32 mSelfTimeCounter; - U32 mCalls; - FrameState* mParent; // info for caller timer - FrameState* mLastCaller; // used to bootstrap tree construction - NamedTimer* mTimer; - U16 mActiveCount; // number of timers with this ID active on stack - bool mMoveUpTree; // needs to be moved up the tree of timers at the end of frame - }; +#ifdef CLOCK_MONOTONIC // MONOTONIC supported at build-time? + if (-1 == clock_gettime(CLOCK_MONOTONIC,&tp)) // if MONOTONIC isn't supported at runtime then ouch, try REALTIME +#endif + clock_gettime(CLOCK_REALTIME,&tp); - S32 getFrameStateIndex() const { return mFrameStateIndex; } + return (tp.tv_sec*sClockResolution)+tp.tv_nsec; + } - FrameState& getFrameState() const; + static U32 getCPUClockCount32() + { + return (U32)(getCPUClockCount64() >> 8); + } +#endif // (LL_LINUX || LL_SOLARIS) && !(defined(__i386__) || defined(__amd64__)) - private: - friend class LLFastTimer; - friend class NamedTimerFactory; - // - // methods - // - NamedTimer(const std::string& name); - // recursive call to gather total time from children - static void accumulateTimings(); +#if (LL_LINUX || LL_SOLARIS || LL_DARWIN) && (defined(__i386__) || defined(__amd64__)) + // + // Mac+Linux+Solaris FAST x86 implementation of CPU clock + static U32 getCPUClockCount32() + { + U64 x; + __asm__ volatile (".byte 0x0f, 0x31": "=A"(x)); + return (U32)(x >> 8); + } - // updates cumulative times and hierarchy, - // can be called multiple times in a frame, at any point - static void processTimes(); + static U64 getCPUClockCount64() + { + U64 x; + __asm__ volatile (".byte 0x0f, 0x31": "=A"(x)); + return x; + } - static void buildHierarchy(); - static void resetFrame(); - static void reset(); +#endif + static BlockTimerStatHandle& getRootTimeBlock(); + static void pushLog(LLSD sd); + static void setLogLock(class LLMutex* mutex); + static void writeLog(std::ostream& os); + static void updateTimes(); - // - // members - // - S32 mFrameStateIndex; - - std::string mName; + static U64 countsPerSecond(); - U32 mTotalTimeCounter; + // updates cumulative times and hierarchy, + // can be called multiple times in a frame, at any point + static void processTimes(); - U32 mCountAverage; - U32 mCallAverage; + static void bootstrapTimerTree(); + static void incrementalUpdateTimerTree(); - U32* mCountHistory; - U32* mCallHistory; + // call this once a frame to periodically log timers + static void logStats(); - // tree structure - NamedTimer* mParent; // NamedTimer of caller(parent) - std::vector<NamedTimer*> mChildren; - bool mCollapsed; // don't show children - bool mNeedsSorting; // sort children whenever child added + // dumps current cumulative frame stats to log + // call nextFrame() to reset timers + static void dumpCurTimes(); - }; +private: + friend class BlockTimerStatHandle; + // FIXME: this friendship exists so that each thread can instantiate a root timer, + // which could be a derived class with a public constructor instead, possibly + friend class ThreadRecorder; + friend BlockTimer timeThisBlock(BlockTimerStatHandle&); + + BlockTimer(BlockTimerStatHandle& timer); +#if !defined(MSC_VER) || MSC_VER < 1700 + // Visual Studio 2010 has a bug where capturing an object returned by value + // into a local reference requires access to the copy constructor at the call site. + // This appears to be fixed in 2012. +public: +#endif + // no-copy + BlockTimer(const BlockTimer& other) {}; - // used to statically declare a new named timer - class LL_COMMON_API DeclareTimer - : public LLInstanceTracker<DeclareTimer> - { - public: - DeclareTimer(const std::string& name, bool open); - DeclareTimer(const std::string& name); +private: + U64 mStartTime; + BlockTimerStackRecord mParentTimerData; - static void updateCachedPointers(); +public: + // statics + static std::string sLogName; + static bool sMetricLog, + sLog; + static U64 sClockResolution; - // convertable to NamedTimer::FrameState for convenient usage of LLFastTimer(declared_timer) - operator NamedTimer::FrameState&() { return *mFrameState; } - private: - NamedTimer& mTimer; - NamedTimer::FrameState* mFrameState; - }; +}; +// this dummy function assists in allocating a block timer with stack-based lifetime. +// this is done by capturing the return value in a stack-allocated const reference variable. +// (This is most easily done using the macro LL_RECORD_BLOCK_TIME) +// Otherwise, it would be possible to store a BlockTimer on the heap, resulting in non-nested lifetimes, +// which would break the invariants of the timing hierarchy logic +LL_FORCE_INLINE class BlockTimer timeThisBlock(class BlockTimerStatHandle& timer) +{ + return BlockTimer(timer); +} +// stores a "named" timer instance to be reused via multiple BlockTimer stack instances +class BlockTimerStatHandle +: public StatType<TimeBlockAccumulator> +{ public: - static LLMutex* sLogLock; - static std::queue<LLSD> sLogQueue; - static BOOL sLog; - static BOOL sMetricLog; + BlockTimerStatHandle(const char* name, const char* description = ""); - typedef std::vector<NamedTimer::FrameState> info_list_t; - static info_list_t& getFrameStateList(); + TimeBlockTreeNode& getTreeNode() const; + BlockTimerStatHandle* getParent() const { return getTreeNode().getParent(); } + void setParent(BlockTimerStatHandle* parent) { getTreeNode().setParent(parent); } - enum RootTimerMarker { ROOT }; - LLFastTimer(RootTimerMarker); + typedef std::vector<BlockTimerStatHandle*>::iterator child_iter; + typedef std::vector<BlockTimerStatHandle*>::const_iterator child_const_iter; + child_iter beginChildren(); + child_iter endChildren(); + bool hasChildren(); + std::vector<BlockTimerStatHandle*>& getChildren(); - LLFastTimer(NamedTimer::FrameState& timer) - : mFrameState(&timer) + StatType<TimeBlockAccumulator::CallCountFacet>& callCount() { -#if TIME_FAST_TIMERS - U64 timer_start = get_cpu_clock_count_64(); -#endif -#if FAST_TIMER_ON - NamedTimer::FrameState* frame_state = &timer; - U32 cur_time = get_cpu_clock_count_32(); - mStartSelfTime = cur_time; - mStartTotalTime = cur_time; - - frame_state->mActiveCount++; - frame_state->mCalls++; - // keep current parent as long as it is active when we are - frame_state->mMoveUpTree |= (frame_state->mParent->mActiveCount == 0); - - mLastTimer = sCurTimer; - sCurTimer = this; -#endif -#if TIME_FAST_TIMERS - U64 timer_end = get_cpu_clock_count_64(); - sTimerCycles += timer_end - timer_start; -#endif + return static_cast<StatType<TimeBlockAccumulator::CallCountFacet>&>(*(StatType<TimeBlockAccumulator>*)this); } - ~LLFastTimer() + StatType<TimeBlockAccumulator::SelfTimeFacet>& selfTime() { -#if TIME_FAST_TIMERS - U64 timer_start = get_cpu_clock_count_64(); -#endif -#if FAST_TIMER_ON - NamedTimer::FrameState* frame_state = mFrameState; - U32 cur_time = get_cpu_clock_count_32(); - frame_state->mSelfTimeCounter += cur_time - mStartSelfTime; + return static_cast<StatType<TimeBlockAccumulator::SelfTimeFacet>&>(*(StatType<TimeBlockAccumulator>*)this); + } + + bool mCollapsed; // don't show children +}; - frame_state->mActiveCount--; - LLFastTimer* last_timer = mLastTimer; - sCurTimer = last_timer; +// iterators and helper functions for walking the call hierarchy of block timers in different ways +typedef LLTreeDFSIter<BlockTimerStatHandle, BlockTimerStatHandle::child_const_iter> block_timer_tree_df_iterator_t; +typedef LLTreeDFSPostIter<BlockTimerStatHandle, BlockTimerStatHandle::child_const_iter> block_timer_tree_df_post_iterator_t; +typedef LLTreeBFSIter<BlockTimerStatHandle, BlockTimerStatHandle::child_const_iter> block_timer_tree_bf_iterator_t; - // store last caller to bootstrap tree creation - frame_state->mLastCaller = last_timer->mFrameState; +block_timer_tree_df_iterator_t begin_block_timer_tree_df(BlockTimerStatHandle& id); +block_timer_tree_df_iterator_t end_block_timer_tree_df(); +block_timer_tree_df_post_iterator_t begin_block_timer_tree_df_post(BlockTimerStatHandle& id); +block_timer_tree_df_post_iterator_t end_block_timer_tree_df_post(); +block_timer_tree_bf_iterator_t begin_block_timer_tree_bf(BlockTimerStatHandle& id); +block_timer_tree_bf_iterator_t end_block_timer_tree_bf(); - // we are only tracking self time, so subtract our total time delta from parents - U32 total_time = cur_time - mStartTotalTime; - last_timer->mStartSelfTime += total_time; +LL_FORCE_INLINE BlockTimer::BlockTimer(BlockTimerStatHandle& timer) +{ +#if LL_FAST_TIMER_ON + BlockTimerStackRecord* cur_timer_data = LLThreadLocalSingletonPointer<BlockTimerStackRecord>::getInstance(); + if (!cur_timer_data) return; + TimeBlockAccumulator& accumulator = timer.getCurrentAccumulator(); + accumulator.mActiveCount++; + // keep current parent as long as it is active when we are + accumulator.mMoveUpTree |= (accumulator.mParent->getCurrentAccumulator().mActiveCount == 0); + + // store top of stack + mParentTimerData = *cur_timer_data; + // push new information + cur_timer_data->mActiveTimer = this; + cur_timer_data->mTimeBlock = &timer; + cur_timer_data->mChildTime = 0; + + mStartTime = getCPUClockCount64(); #endif -#if TIME_FAST_TIMERS - U64 timer_end = get_cpu_clock_count_64(); - sTimerCycles += timer_end - timer_start; - sTimerCalls++; -#endif - } +} +LL_FORCE_INLINE BlockTimer::~BlockTimer() +{ +#if LL_FAST_TIMER_ON + U64 total_time = getCPUClockCount64() - mStartTime; + BlockTimerStackRecord* cur_timer_data = LLThreadLocalSingletonPointer<BlockTimerStackRecord>::getInstance(); + if (!cur_timer_data) return; - // call this once a frame to reset timers - static void nextFrame(); + TimeBlockAccumulator& accumulator = cur_timer_data->mTimeBlock->getCurrentAccumulator(); - // dumps current cumulative frame stats to log - // call nextFrame() to reset timers - static void dumpCurTimes(); + accumulator.mCalls++; + accumulator.mTotalTimeCounter += total_time; + accumulator.mSelfTimeCounter += total_time - cur_timer_data->mChildTime; + accumulator.mActiveCount--; - // call this to reset timer hierarchy, averages, etc. - static void reset(); + // store last caller to bootstrap tree creation + // do this in the destructor in case of recursion to get topmost caller + accumulator.mLastCaller = mParentTimerData.mTimeBlock; - static U64 countsPerSecond(); - static S32 getLastFrameIndex() { return sLastFrameIndex; } - static S32 getCurFrameIndex() { return sCurFrameIndex; } + // we are only tracking self time, so subtract our total time delta from parents + mParentTimerData.mChildTime += total_time; - static void writeLog(std::ostream& os); - static const NamedTimer* getTimerByName(const std::string& name); + //pop stack + *cur_timer_data = mParentTimerData; +#endif +} -public: - static bool sPauseHistory; - static bool sResetHistory; - static U64 sTimerCycles; - static U32 sTimerCalls; - -private: - static LLFastTimer* sCurTimer; - static S32 sCurFrameIndex; - static S32 sLastFrameIndex; - static U64 sLastFrameTime; - static info_list_t* sTimerInfos; - - U32 mStartSelfTime; // start time + time of all child timers - U32 mStartTotalTime; // start time + time of all child timers - NamedTimer::FrameState* mFrameState; - LLFastTimer* mLastTimer; -}; +} + +typedef LLTrace::BlockTimer LLFastTimer; #endif // LL_LLFASTTIMER_H |