diff options
Diffstat (limited to 'indra/llcommon/llfasttimer.h')
-rw-r--r-- | indra/llcommon/llfasttimer.h | 408 |
1 files changed, 204 insertions, 204 deletions
diff --git a/indra/llcommon/llfasttimer.h b/indra/llcommon/llfasttimer.h index 9bd93d7240..17ad37b031 100644 --- a/indra/llcommon/llfasttimer.h +++ b/indra/llcommon/llfasttimer.h @@ -5,21 +5,21 @@ * $LicenseInfo:firstyear=2004&license=viewerlgpl$ * Second Life Viewer Source Code * Copyright (C) 2010, Linden Research, Inc. - * + * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; * version 2.1 of the License only. - * + * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. - * + * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - * + * * Linden Research, Inc., 945 Battery Street, San Francisco, CA 94111 USA * $/LicenseInfo$ */ @@ -51,77 +51,77 @@ class BlockTimer timeThisBlock(class BlockTimerStatHandle& timer); class BlockTimer { public: - typedef BlockTimer self_t; - typedef class BlockTimerStatHandle DeclareTimer; + typedef BlockTimer self_t; + typedef class BlockTimerStatHandle DeclareTimer; - ~BlockTimer(); + ~BlockTimer(); - F64Seconds getElapsedTime(); + F64Seconds getElapsedTime(); - ////////////////////////////////////////////////////////////////////////////// - // - // Important note: These implementations must be FAST! - // + ////////////////////////////////////////////////////////////////////////////// + // + // Important note: These implementations must be FAST! + // #if LL_WINDOWS - // - // Windows implementation of CPU clock - // - - // - // NOTE: put back in when we aren't using platform sdk anymore - // - // because MS has different signatures for these functions in winnt.h - // need to rename them to avoid conflicts - //#define _interlockedbittestandset _renamed_interlockedbittestandset - //#define _interlockedbittestandreset _renamed_interlockedbittestandreset - //#include <intrin.h> - //#undef _interlockedbittestandset - //#undef _interlockedbittestandreset - - //inline U32 getCPUClockCount32() - //{ - // U64 time_stamp = __rdtsc(); - // return (U32)(time_stamp >> 8); - //} - // - //// return full timer value, *not* shifted by 8 bits - //inline U64 getCPUClockCount64() - //{ - // return __rdtsc(); - //} - - - - // shift off lower 8 bits for lower resolution but longer term timing - // on 1Ghz machine, a 32-bit word will hold ~1000 seconds of timing + // + // Windows implementation of CPU clock + // + + // + // NOTE: put back in when we aren't using platform sdk anymore + // + // because MS has different signatures for these functions in winnt.h + // need to rename them to avoid conflicts + //#define _interlockedbittestandset _renamed_interlockedbittestandset + //#define _interlockedbittestandreset _renamed_interlockedbittestandreset + //#include <intrin.h> + //#undef _interlockedbittestandset + //#undef _interlockedbittestandreset + + //inline U32 getCPUClockCount32() + //{ + // U64 time_stamp = __rdtsc(); + // return (U32)(time_stamp >> 8); + //} + // + //// return full timer value, *not* shifted by 8 bits + //inline U64 getCPUClockCount64() + //{ + // return __rdtsc(); + //} + + + + // shift off lower 8 bits for lower resolution but longer term timing + // on 1Ghz machine, a 32-bit word will hold ~1000 seconds of timing #if LL_FASTTIMER_USE_RDTSC - static U32 getCPUClockCount32() - { - unsigned __int64 val = __rdtsc(); - val = val >> 8; - return static_cast<U32>(val); - } - - // return full timer value, *not* shifted by 8 bits - static U64 getCPUClockCount64() - { - return static_cast<U64>( __rdtsc() ); - } + static U32 getCPUClockCount32() + { + unsigned __int64 val = __rdtsc(); + val = val >> 8; + return static_cast<U32>(val); + } + + // return full timer value, *not* shifted by 8 bits + static U64 getCPUClockCount64() + { + return static_cast<U64>( __rdtsc() ); + } #else - //U64 get_clock_count(); // in lltimer.cpp - // These use QueryPerformanceCounter, which is arguably fine and also works on AMD architectures. - static U32 getCPUClockCount32() - { - return (U32)(get_clock_count()>>8); - } - - static U64 getCPUClockCount64() - { - return get_clock_count(); - } + //U64 get_clock_count(); // in lltimer.cpp + // These use QueryPerformanceCounter, which is arguably fine and also works on AMD architectures. + static U32 getCPUClockCount32() + { + return (U32)(get_clock_count()>>8); + } + + static U64 getCPUClockCount64() + { + return get_clock_count(); + } #endif @@ -129,142 +129,142 @@ public: #if (LL_LINUX) && !(defined(__i386__) || defined(__amd64__)) - // - // Linux implementation of CPU clock - non-x86. - // This is accurate but SLOW! Only use out of desperation. - // - // Try to use the MONOTONIC clock if available, this is a constant time counter - // with nanosecond resolution (but not necessarily accuracy) and attempts are - // made to synchronize this value between cores at kernel start. It should not - // be affected by CPU frequency. If not available use the REALTIME clock, but - // this may be affected by NTP adjustments or other user activity affecting - // the system time. - static U64 getCPUClockCount64() - { - struct timespec tp; + // + // Linux implementation of CPU clock - non-x86. + // This is accurate but SLOW! Only use out of desperation. + // + // Try to use the MONOTONIC clock if available, this is a constant time counter + // with nanosecond resolution (but not necessarily accuracy) and attempts are + // made to synchronize this value between cores at kernel start. It should not + // be affected by CPU frequency. If not available use the REALTIME clock, but + // this may be affected by NTP adjustments or other user activity affecting + // the system time. + static U64 getCPUClockCount64() + { + struct timespec tp; #ifdef CLOCK_MONOTONIC // MONOTONIC supported at build-time? - if (-1 == clock_gettime(CLOCK_MONOTONIC,&tp)) // if MONOTONIC isn't supported at runtime then ouch, try REALTIME + if (-1 == clock_gettime(CLOCK_MONOTONIC,&tp)) // if MONOTONIC isn't supported at runtime then ouch, try REALTIME #endif - clock_gettime(CLOCK_REALTIME,&tp); + clock_gettime(CLOCK_REALTIME,&tp); - return (tp.tv_sec*sClockResolution)+tp.tv_nsec; - } + return (tp.tv_sec*sClockResolution)+tp.tv_nsec; + } - static U32 getCPUClockCount32() - { - return (U32)(getCPUClockCount64() >> 8); - } + static U32 getCPUClockCount32() + { + return (U32)(getCPUClockCount64() >> 8); + } #endif // (LL_LINUX) && !(defined(__i386__) || defined(__amd64__)) #if (LL_LINUX || LL_DARWIN) && (defined(__i386__) || defined(__amd64__)) - // - // Mac+Linux FAST x86 implementation of CPU clock - static U32 getCPUClockCount32() - { - U32 low(0),high(0); - __asm__ volatile (".byte 0x0f, 0x31": "=a"(low), "=d"(high) ); - return (low>>8) | (high<<24); - } - - static U64 getCPUClockCount64() - { - U32 low(0),high(0); - __asm__ volatile (".byte 0x0f, 0x31": "=a"(low), "=d"(high) ); - return (U64)low | ( ((U64)high) << 32); - } + // + // Mac+Linux FAST x86 implementation of CPU clock + static U32 getCPUClockCount32() + { + U32 low(0),high(0); + __asm__ volatile (".byte 0x0f, 0x31": "=a"(low), "=d"(high) ); + return (low>>8) | (high<<24); + } + + static U64 getCPUClockCount64() + { + U32 low(0),high(0); + __asm__ volatile (".byte 0x0f, 0x31": "=a"(low), "=d"(high) ); + return (U64)low | ( ((U64)high) << 32); + } #endif - static BlockTimerStatHandle& getRootTimeBlock(); - static void pushLog(LLSD sd); - static void setLogLock(class LLMutex* mutex); - static void writeLog(std::ostream& os); - static void updateTimes(); - - static U64 countsPerSecond(); + static BlockTimerStatHandle& getRootTimeBlock(); + static void pushLog(LLSD sd); + static void setLogLock(class LLMutex* mutex); + static void writeLog(std::ostream& os); + static void updateTimes(); + + static U64 countsPerSecond(); - // updates cumulative times and hierarchy, - // can be called multiple times in a frame, at any point - static void processTimes(); + // updates cumulative times and hierarchy, + // can be called multiple times in a frame, at any point + static void processTimes(); - static void bootstrapTimerTree(); - static void incrementalUpdateTimerTree(); + static void bootstrapTimerTree(); + static void incrementalUpdateTimerTree(); - // call this once a frame to periodically log timers - static void logStats(); + // call this once a frame to periodically log timers + static void logStats(); - // dumps current cumulative frame stats to log - // call nextFrame() to reset timers - static void dumpCurTimes(); + // dumps current cumulative frame stats to log + // call nextFrame() to reset timers + static void dumpCurTimes(); private: - friend class BlockTimerStatHandle; - // FIXME: this friendship exists so that each thread can instantiate a root timer, - // which could be a derived class with a public constructor instead, possibly - friend class ThreadRecorder; - friend BlockTimer timeThisBlock(BlockTimerStatHandle&); + friend class BlockTimerStatHandle; + // FIXME: this friendship exists so that each thread can instantiate a root timer, + // which could be a derived class with a public constructor instead, possibly + friend class ThreadRecorder; + friend BlockTimer timeThisBlock(BlockTimerStatHandle&); - BlockTimer(BlockTimerStatHandle& timer); + BlockTimer(BlockTimerStatHandle& timer); - // no-copy - BlockTimer(const BlockTimer& other); - BlockTimer& operator=(const BlockTimer& other); + // no-copy + BlockTimer(const BlockTimer& other); + BlockTimer& operator=(const BlockTimer& other); private: - U64 mStartTime; - BlockTimerStackRecord mParentTimerData; + U64 mStartTime; + BlockTimerStackRecord mParentTimerData; public: - // statics - static std::string sLogName; - static bool sMetricLog, - sLog; - static U64 sClockResolution; + // statics + static std::string sLogName; + static bool sMetricLog, + sLog; + static U64 sClockResolution; }; // this dummy function assists in allocating a block timer with stack-based lifetime. -// this is done by capturing the return value in a stack-allocated const reference variable. +// this is done by capturing the return value in a stack-allocated const reference variable. // (This is most easily done using the macro LL_RECORD_BLOCK_TIME) -// Otherwise, it would be possible to store a BlockTimer on the heap, resulting in non-nested lifetimes, +// Otherwise, it would be possible to store a BlockTimer on the heap, resulting in non-nested lifetimes, // which would break the invariants of the timing hierarchy logic LL_FORCE_INLINE class BlockTimer timeThisBlock(class BlockTimerStatHandle& timer) { - return BlockTimer(timer); + return BlockTimer(timer); } // stores a "named" timer instance to be reused via multiple BlockTimer stack instances -class BlockTimerStatHandle -: public StatType<TimeBlockAccumulator> +class BlockTimerStatHandle +: public StatType<TimeBlockAccumulator> { public: - BlockTimerStatHandle(const char* name, const char* description = ""); - - TimeBlockTreeNode& getTreeNode() const; - BlockTimerStatHandle* getParent() const { return getTreeNode().getParent(); } - void setParent(BlockTimerStatHandle* parent) { getTreeNode().setParent(parent); } - - typedef std::vector<BlockTimerStatHandle*>::iterator child_iter; - typedef std::vector<BlockTimerStatHandle*>::const_iterator child_const_iter; - child_iter beginChildren(); - child_iter endChildren(); - bool hasChildren(); - std::vector<BlockTimerStatHandle*>& getChildren(); - - StatType<TimeBlockAccumulator::CallCountFacet>& callCount() - { - return static_cast<StatType<TimeBlockAccumulator::CallCountFacet>&>(*(StatType<TimeBlockAccumulator>*)this); - } - - StatType<TimeBlockAccumulator::SelfTimeFacet>& selfTime() - { - return static_cast<StatType<TimeBlockAccumulator::SelfTimeFacet>&>(*(StatType<TimeBlockAccumulator>*)this); - } - - bool mCollapsed; // don't show children + BlockTimerStatHandle(const char* name, const char* description = ""); + + TimeBlockTreeNode& getTreeNode() const; + BlockTimerStatHandle* getParent() const { return getTreeNode().getParent(); } + void setParent(BlockTimerStatHandle* parent) { getTreeNode().setParent(parent); } + + typedef std::vector<BlockTimerStatHandle*>::iterator child_iter; + typedef std::vector<BlockTimerStatHandle*>::const_iterator child_const_iter; + child_iter beginChildren(); + child_iter endChildren(); + bool hasChildren(); + std::vector<BlockTimerStatHandle*>& getChildren(); + + StatType<TimeBlockAccumulator::CallCountFacet>& callCount() + { + return static_cast<StatType<TimeBlockAccumulator::CallCountFacet>&>(*(StatType<TimeBlockAccumulator>*)this); + } + + StatType<TimeBlockAccumulator::SelfTimeFacet>& selfTime() + { + return static_cast<StatType<TimeBlockAccumulator::SelfTimeFacet>&>(*(StatType<TimeBlockAccumulator>*)this); + } + + bool mCollapsed; // don't show children }; // iterators and helper functions for walking the call hierarchy of block timers in different ways @@ -282,61 +282,61 @@ block_timer_tree_bf_iterator_t end_block_timer_tree_bf(); LL_FORCE_INLINE BlockTimer::BlockTimer(BlockTimerStatHandle& timer) { #if LL_FAST_TIMER_ON - BlockTimerStackRecord* cur_timer_data = LLThreadLocalSingletonPointer<BlockTimerStackRecord>::getInstance(); - if (!cur_timer_data) - { - // How likely is it that - // LLThreadLocalSingletonPointer<T>::getInstance() will return NULL? - // Even without researching, what we can say is that if we exit - // without setting mStartTime at all, gcc 4.7 produces (fatal) - // warnings about a possibly-uninitialized data member. - mStartTime = 0; - return; - } - TimeBlockAccumulator& accumulator = timer.getCurrentAccumulator(); - accumulator.mActiveCount++; - // keep current parent as long as it is active when we are - accumulator.mMoveUpTree |= (accumulator.mParent->getCurrentAccumulator().mActiveCount == 0); - - // store top of stack - mParentTimerData = *cur_timer_data; - // push new information - cur_timer_data->mActiveTimer = this; - cur_timer_data->mTimeBlock = &timer; - cur_timer_data->mChildTime = 0; - - mStartTime = getCPUClockCount64(); + BlockTimerStackRecord* cur_timer_data = LLThreadLocalSingletonPointer<BlockTimerStackRecord>::getInstance(); + if (!cur_timer_data) + { + // How likely is it that + // LLThreadLocalSingletonPointer<T>::getInstance() will return NULL? + // Even without researching, what we can say is that if we exit + // without setting mStartTime at all, gcc 4.7 produces (fatal) + // warnings about a possibly-uninitialized data member. + mStartTime = 0; + return; + } + TimeBlockAccumulator& accumulator = timer.getCurrentAccumulator(); + accumulator.mActiveCount++; + // keep current parent as long as it is active when we are + accumulator.mMoveUpTree |= (accumulator.mParent->getCurrentAccumulator().mActiveCount == 0); + + // store top of stack + mParentTimerData = *cur_timer_data; + // push new information + cur_timer_data->mActiveTimer = this; + cur_timer_data->mTimeBlock = &timer; + cur_timer_data->mChildTime = 0; + + mStartTime = getCPUClockCount64(); #endif } LL_FORCE_INLINE BlockTimer::~BlockTimer() { #if LL_FAST_TIMER_ON - U64 total_time = getCPUClockCount64() - mStartTime; - BlockTimerStackRecord* cur_timer_data = LLThreadLocalSingletonPointer<BlockTimerStackRecord>::getInstance(); - if (!cur_timer_data) return; + U64 total_time = getCPUClockCount64() - mStartTime; + BlockTimerStackRecord* cur_timer_data = LLThreadLocalSingletonPointer<BlockTimerStackRecord>::getInstance(); + if (!cur_timer_data) return; - TimeBlockAccumulator& accumulator = cur_timer_data->mTimeBlock->getCurrentAccumulator(); + TimeBlockAccumulator& accumulator = cur_timer_data->mTimeBlock->getCurrentAccumulator(); - accumulator.mCalls++; - accumulator.mTotalTimeCounter += total_time; - accumulator.mSelfTimeCounter += total_time - cur_timer_data->mChildTime; - accumulator.mActiveCount--; + accumulator.mCalls++; + accumulator.mTotalTimeCounter += total_time; + accumulator.mSelfTimeCounter += total_time - cur_timer_data->mChildTime; + accumulator.mActiveCount--; - // store last caller to bootstrap tree creation - // do this in the destructor in case of recursion to get topmost caller - accumulator.mLastCaller = mParentTimerData.mTimeBlock; + // store last caller to bootstrap tree creation + // do this in the destructor in case of recursion to get topmost caller + accumulator.mLastCaller = mParentTimerData.mTimeBlock; - // we are only tracking self time, so subtract our total time delta from parents - mParentTimerData.mChildTime += total_time; + // we are only tracking self time, so subtract our total time delta from parents + mParentTimerData.mChildTime += total_time; - //pop stack - *cur_timer_data = mParentTimerData; + //pop stack + *cur_timer_data = mParentTimerData; #endif } } -typedef LLTrace::BlockTimer LLFastTimer; +typedef LLTrace::BlockTimer LLFastTimer; #endif // LL_LLFASTTIMER_H |