From 563daa96a873ef627ca42d24e0f6de6bb359326d Mon Sep 17 00:00:00 2001 From: Tofu Linden Date: Mon, 25 Jan 2010 12:07:25 -0800 Subject: pull in the linux+solaris fast-timers impl from snowglobe, fit it into viewer2, start moving headers around. --- indra/llcommon/llfasttimer.h | 364 ------------------------------------- indra/llcommon/llfasttimer_class.h | 271 +++++++++++++++++++++++++++ 2 files changed, 271 insertions(+), 364 deletions(-) delete mode 100644 indra/llcommon/llfasttimer.h create mode 100644 indra/llcommon/llfasttimer_class.h (limited to 'indra/llcommon') diff --git a/indra/llcommon/llfasttimer.h b/indra/llcommon/llfasttimer.h deleted file mode 100644 index 8af79c90fd..0000000000 --- a/indra/llcommon/llfasttimer.h +++ /dev/null @@ -1,364 +0,0 @@ -/** - * @file llfasttimer.h - * @brief Declaration of a fast timer. - * - * $LicenseInfo:firstyear=2004&license=viewergpl$ - * - * Copyright (c) 2004-2009, Linden Research, Inc. - * - * Second Life Viewer Source Code - * The source code in this file ("Source Code") is provided by Linden Lab - * to you under the terms of the GNU General Public License, version 2.0 - * ("GPL"), unless you have obtained a separate licensing agreement - * ("Other License"), formally executed by you and Linden Lab. Terms of - * the GPL can be found in doc/GPL-license.txt in this distribution, or - * online at http://secondlifegrid.net/programs/open_source/licensing/gplv2 - * - * There are special exceptions to the terms and conditions of the GPL as - * it is applied to this Source Code. View the full text of the exception - * in the file doc/FLOSS-exception.txt in this software distribution, or - * online at - * http://secondlifegrid.net/programs/open_source/licensing/flossexception - * - * By copying, modifying or distributing this software, you acknowledge - * that you have read and understood your obligations described above, - * and agree to abide by those obligations. - * - * ALL LINDEN LAB SOURCE CODE IS PROVIDED "AS IS." LINDEN LAB MAKES NO - * WARRANTIES, EXPRESS, IMPLIED OR OTHERWISE, REGARDING ITS ACCURACY, - * COMPLETENESS OR PERFORMANCE. - * $/LicenseInfo$ - */ - -#ifndef LL_FASTTIMER_H -#define LL_FASTTIMER_H - -#include "llinstancetracker.h" - -#define FAST_TIMER_ON 1 -#define TIME_FAST_TIMERS 0 - -#if LL_WINDOWS -#define LL_INLINE __forceinline - -// -// NOTE: put back in when we aren't using platform sdk anymore -// -// because MS has different signatures for these functions in winnt.h -// need to rename them to avoid conflicts -//#define _interlockedbittestandset _renamed_interlockedbittestandset -//#define _interlockedbittestandreset _renamed_interlockedbittestandreset -//#include -//#undef _interlockedbittestandset -//#undef _interlockedbittestandreset - -//inline U32 get_cpu_clock_count_32() -//{ -// U64 time_stamp = __rdtsc(); -// return (U32)(time_stamp >> 8); -//} -// -//// return full timer value, *not* shifted by 8 bits -//inline U64 get_cpu_clock_count_64() -//{ -// return __rdtsc(); -//} - -// shift off lower 8 bits for lower resolution but longer term timing -// on 1Ghz machine, a 32-bit word will hold ~1000 seconds of timing -inline U32 get_cpu_clock_count_32() -{ - U32 ret_val; - __asm - { - _emit 0x0f - _emit 0x31 - shr eax,8 - shl edx,24 - or eax, edx - mov dword ptr [ret_val], eax - } - return ret_val; -} - -// return full timer value, *not* shifted by 8 bits -inline U64 get_cpu_clock_count_64() -{ - U64 ret_val; - __asm - { - _emit 0x0f - _emit 0x31 - mov eax,eax - mov edx,edx - mov dword ptr [ret_val+4], edx - mov dword ptr [ret_val], eax - } - return ret_val; -} -#else -#define LL_INLINE -#endif - -#if (LL_LINUX || LL_SOLARIS || LL_DARWIN) && (defined(__i386__) || defined(__amd64__)) -inline U32 get_cpu_clock_count_32() -{ - U64 x; - __asm__ volatile (".byte 0x0f, 0x31": "=A"(x)); - return (U32)x >> 8; -} - -inline U32 get_cpu_clock_count_64() -{ - U64 x; - __asm__ volatile (".byte 0x0f, 0x31": "=A"(x)); - return x >> 8; -} -#endif - -#if ( LL_DARWIN && !(defined(__i386__) || defined(__amd64__))) || (LL_SOLARIS && defined(__sparc__)) -// -// Mac PPC (deprecated) & Solaris SPARC implementation of CPU clock -// -// Just use gettimeofday implementation for now - -inline U32 get_cpu_clock_count_32() -{ - return (U32)get_clock_count(); -} - -inline U32 get_cpu_clock_count_64() -{ - return get_clock_count(); -} -#endif - -class LLMutex; - -#include -#include "llsd.h" - -class LL_COMMON_API LLFastTimer -{ -public: - - class NamedTimer; - - struct LL_COMMON_API FrameState - { - FrameState(NamedTimer* timerp); - - U32 mSelfTimeCounter; - U32 mCalls; - FrameState* mParent; // info for caller timer - FrameState* mLastCaller; // used to bootstrap tree construction - NamedTimer* mTimer; - U16 mActiveCount; // number of timers with this ID active on stack - bool mMoveUpTree; // needs to be moved up the tree of timers at the end of frame - }; - - // stores a "named" timer instance to be reused via multiple LLFastTimer stack instances - class LL_COMMON_API NamedTimer - : public LLInstanceTracker - { - friend class DeclareTimer; - public: - ~NamedTimer(); - - enum { HISTORY_NUM = 60 }; - - const std::string& getName() const { return mName; } - NamedTimer* getParent() const { return mParent; } - void setParent(NamedTimer* parent); - S32 getDepth(); - std::string getToolTip(S32 history_index = -1); - - typedef std::vector::const_iterator child_const_iter; - child_const_iter beginChildren(); - child_const_iter endChildren(); - std::vector& getChildren(); - - void setCollapsed(bool collapsed) { mCollapsed = collapsed; } - bool getCollapsed() const { return mCollapsed; } - - U32 getCountAverage() const { return mCountAverage; } - U32 getCallAverage() const { return mCallAverage; } - - U32 getHistoricalCount(S32 history_index = 0) const; - U32 getHistoricalCalls(S32 history_index = 0) const; - - static NamedTimer& getRootNamedTimer(); - - S32 getFrameStateIndex() const { return mFrameStateIndex; } - - FrameState& getFrameState() const; - - private: - friend class LLFastTimer; - friend class NamedTimerFactory; - - // - // methods - // - NamedTimer(const std::string& name); - // recursive call to gather total time from children - static void accumulateTimings(); - - // updates cumulative times and hierarchy, - // can be called multiple times in a frame, at any point - static void processTimes(); - - static void buildHierarchy(); - static void resetFrame(); - static void reset(); - - // - // members - // - S32 mFrameStateIndex; - - std::string mName; - - U32 mTotalTimeCounter; - - U32 mCountAverage; - U32 mCallAverage; - - U32* mCountHistory; - U32* mCallHistory; - - // tree structure - NamedTimer* mParent; // NamedTimer of caller(parent) - std::vector mChildren; - bool mCollapsed; // don't show children - bool mNeedsSorting; // sort children whenever child added - }; - - // used to statically declare a new named timer - class LL_COMMON_API DeclareTimer - : public LLInstanceTracker - { - friend class LLFastTimer; - public: - DeclareTimer(const std::string& name, bool open); - DeclareTimer(const std::string& name); - - static void updateCachedPointers(); - - private: - NamedTimer& mTimer; - FrameState* mFrameState; - }; - -public: - LLFastTimer(LLFastTimer::FrameState* state); - - LL_INLINE LLFastTimer(LLFastTimer::DeclareTimer& timer) - : mFrameState(timer.mFrameState) - { -#if TIME_FAST_TIMERS - U64 timer_start = get_cpu_clock_count_64(); -#endif -#if FAST_TIMER_ON - LLFastTimer::FrameState* frame_state = mFrameState; - mStartTime = get_cpu_clock_count_32(); - - frame_state->mActiveCount++; - frame_state->mCalls++; - // keep current parent as long as it is active when we are - frame_state->mMoveUpTree |= (frame_state->mParent->mActiveCount == 0); - - LLFastTimer::CurTimerData* cur_timer_data = &LLFastTimer::sCurTimerData; - mLastTimerData = *cur_timer_data; - cur_timer_data->mCurTimer = this; - cur_timer_data->mFrameState = frame_state; - cur_timer_data->mChildTime = 0; -#endif -#if TIME_FAST_TIMERS - U64 timer_end = get_cpu_clock_count_64(); - sTimerCycles += timer_end - timer_start; -#endif - } - - LL_INLINE ~LLFastTimer() - { -#if TIME_FAST_TIMERS - U64 timer_start = get_cpu_clock_count_64(); -#endif -#if FAST_TIMER_ON - LLFastTimer::FrameState* frame_state = mFrameState; - U32 total_time = get_cpu_clock_count_32() - mStartTime; - - frame_state->mSelfTimeCounter += total_time - LLFastTimer::sCurTimerData.mChildTime; - frame_state->mActiveCount--; - - // store last caller to bootstrap tree creation - // do this in the destructor in case of recursion to get topmost caller - frame_state->mLastCaller = mLastTimerData.mFrameState; - - // we are only tracking self time, so subtract our total time delta from parents - mLastTimerData.mChildTime += total_time; - - LLFastTimer::sCurTimerData = mLastTimerData; -#endif -#if TIME_FAST_TIMERS - U64 timer_end = get_cpu_clock_count_64(); - sTimerCycles += timer_end - timer_start; - sTimerCalls++; -#endif - } - -public: - static LLMutex* sLogLock; - static std::queue sLogQueue; - static BOOL sLog; - static BOOL sMetricLog; - static bool sPauseHistory; - static bool sResetHistory; - static U64 sTimerCycles; - static U32 sTimerCalls; - - typedef std::vector info_list_t; - static info_list_t& getFrameStateList(); - - - // call this once a frame to reset timers - static void nextFrame(); - - // dumps current cumulative frame stats to log - // call nextFrame() to reset timers - static void dumpCurTimes(); - - // call this to reset timer hierarchy, averages, etc. - static void reset(); - - static U64 countsPerSecond(); - static S32 getLastFrameIndex() { return sLastFrameIndex; } - static S32 getCurFrameIndex() { return sCurFrameIndex; } - - static void writeLog(std::ostream& os); - static const NamedTimer* getTimerByName(const std::string& name); - - struct CurTimerData - { - LLFastTimer* mCurTimer; - FrameState* mFrameState; - U32 mChildTime; - }; - static CurTimerData sCurTimerData; - -private: - static S32 sCurFrameIndex; - static S32 sLastFrameIndex; - static U64 sLastFrameTime; - static info_list_t* sTimerInfos; - - U32 mStartTime; - LLFastTimer::FrameState* mFrameState; - LLFastTimer::CurTimerData mLastTimerData; - -}; - -typedef class LLFastTimer LLFastTimer; - -#endif // LL_LLFASTTIMER_H diff --git a/indra/llcommon/llfasttimer_class.h b/indra/llcommon/llfasttimer_class.h new file mode 100644 index 0000000000..93d2d0494d --- /dev/null +++ b/indra/llcommon/llfasttimer_class.h @@ -0,0 +1,271 @@ +/** + * @file llfasttimer_class.h + * @brief Declaration of a fast timer. + * + * $LicenseInfo:firstyear=2004&license=viewergpl$ + * + * Copyright (c) 2004-2009, Linden Research, Inc. + * + * Second Life Viewer Source Code + * The source code in this file ("Source Code") is provided by Linden Lab + * to you under the terms of the GNU General Public License, version 2.0 + * ("GPL"), unless you have obtained a separate licensing agreement + * ("Other License"), formally executed by you and Linden Lab. Terms of + * the GPL can be found in doc/GPL-license.txt in this distribution, or + * online at http://secondlifegrid.net/programs/open_source/licensing/gplv2 + * + * There are special exceptions to the terms and conditions of the GPL as + * it is applied to this Source Code. View the full text of the exception + * in the file doc/FLOSS-exception.txt in this software distribution, or + * online at + * http://secondlifegrid.net/programs/open_source/licensing/flossexception + * + * By copying, modifying or distributing this software, you acknowledge + * that you have read and understood your obligations described above, + * and agree to abide by those obligations. + * + * ALL LINDEN LAB SOURCE CODE IS PROVIDED "AS IS." LINDEN LAB MAKES NO + * WARRANTIES, EXPRESS, IMPLIED OR OTHERWISE, REGARDING ITS ACCURACY, + * COMPLETENESS OR PERFORMANCE. + * $/LicenseInfo$ + */ + +#ifndef LL_FASTTIMER_CLASS_H +#define LL_FASTTIMER_CLASS_H + +#include "llinstancetracker.h" + +#define FAST_TIMER_ON 1 +#define TIME_FAST_TIMERS 0 + + +class LLMutex; + +#include +#include "llsd.h" + +class LL_COMMON_API LLFastTimer +{ +public: + + class NamedTimer; + + struct LL_COMMON_API FrameState + { + FrameState(NamedTimer* timerp); + + U32 mSelfTimeCounter; + U32 mCalls; + FrameState* mParent; // info for caller timer + FrameState* mLastCaller; // used to bootstrap tree construction + NamedTimer* mTimer; + U16 mActiveCount; // number of timers with this ID active on stack + bool mMoveUpTree; // needs to be moved up the tree of timers at the end of frame + }; + + // stores a "named" timer instance to be reused via multiple LLFastTimer stack instances + class LL_COMMON_API NamedTimer + : public LLInstanceTracker + { + friend class DeclareTimer; + public: + ~NamedTimer(); + + enum { HISTORY_NUM = 60 }; + + const std::string& getName() const { return mName; } + NamedTimer* getParent() const { return mParent; } + void setParent(NamedTimer* parent); + S32 getDepth(); + std::string getToolTip(S32 history_index = -1); + + typedef std::vector::const_iterator child_const_iter; + child_const_iter beginChildren(); + child_const_iter endChildren(); + std::vector& getChildren(); + + void setCollapsed(bool collapsed) { mCollapsed = collapsed; } + bool getCollapsed() const { return mCollapsed; } + + U32 getCountAverage() const { return mCountAverage; } + U32 getCallAverage() const { return mCallAverage; } + + U32 getHistoricalCount(S32 history_index = 0) const; + U32 getHistoricalCalls(S32 history_index = 0) const; + + static NamedTimer& getRootNamedTimer(); + + S32 getFrameStateIndex() const { return mFrameStateIndex; } + + FrameState& getFrameState() const; + + private: + friend class LLFastTimer; + friend class NamedTimerFactory; + + // + // methods + // + NamedTimer(const std::string& name); + // recursive call to gather total time from children + static void accumulateTimings(); + + // updates cumulative times and hierarchy, + // can be called multiple times in a frame, at any point + static void processTimes(); + + static void buildHierarchy(); + static void resetFrame(); + static void reset(); + + // + // members + // + S32 mFrameStateIndex; + + std::string mName; + + U32 mTotalTimeCounter; + + U32 mCountAverage; + U32 mCallAverage; + + U32* mCountHistory; + U32* mCallHistory; + + // tree structure + NamedTimer* mParent; // NamedTimer of caller(parent) + std::vector mChildren; + bool mCollapsed; // don't show children + bool mNeedsSorting; // sort children whenever child added + }; + + // used to statically declare a new named timer + class LL_COMMON_API DeclareTimer + : public LLInstanceTracker + { + friend class LLFastTimer; + public: + DeclareTimer(const std::string& name, bool open); + DeclareTimer(const std::string& name); + + static void updateCachedPointers(); + + private: + NamedTimer& mTimer; + FrameState* mFrameState; + }; + +public: + LLFastTimer(LLFastTimer::FrameState* state); + + LL_INLINE LLFastTimer(LLFastTimer::DeclareTimer& timer) + : mFrameState(timer.mFrameState) + { +#if TIME_FAST_TIMERS + U64 timer_start = get_cpu_clock_count_64(); +#endif +#if FAST_TIMER_ON + LLFastTimer::FrameState* frame_state = mFrameState; + mStartTime = get_cpu_clock_count_32(); + + frame_state->mActiveCount++; + frame_state->mCalls++; + // keep current parent as long as it is active when we are + frame_state->mMoveUpTree |= (frame_state->mParent->mActiveCount == 0); + + LLFastTimer::CurTimerData* cur_timer_data = &LLFastTimer::sCurTimerData; + mLastTimerData = *cur_timer_data; + cur_timer_data->mCurTimer = this; + cur_timer_data->mFrameState = frame_state; + cur_timer_data->mChildTime = 0; +#endif +#if TIME_FAST_TIMERS + U64 timer_end = get_cpu_clock_count_64(); + sTimerCycles += timer_end - timer_start; +#endif + } + + LL_INLINE ~LLFastTimer() + { +#if TIME_FAST_TIMERS + U64 timer_start = get_cpu_clock_count_64(); +#endif +#if FAST_TIMER_ON + LLFastTimer::FrameState* frame_state = mFrameState; + U32 total_time = get_cpu_clock_count_32() - mStartTime; + + frame_state->mSelfTimeCounter += total_time - LLFastTimer::sCurTimerData.mChildTime; + frame_state->mActiveCount--; + + // store last caller to bootstrap tree creation + // do this in the destructor in case of recursion to get topmost caller + frame_state->mLastCaller = mLastTimerData.mFrameState; + + // we are only tracking self time, so subtract our total time delta from parents + mLastTimerData.mChildTime += total_time; + + LLFastTimer::sCurTimerData = mLastTimerData; +#endif +#if TIME_FAST_TIMERS + U64 timer_end = get_cpu_clock_count_64(); + sTimerCycles += timer_end - timer_start; + sTimerCalls++; +#endif + } + +public: + static LLMutex* sLogLock; + static std::queue sLogQueue; + static BOOL sLog; + static BOOL sMetricLog; + static bool sPauseHistory; + static bool sResetHistory; + static U64 sTimerCycles; + static U32 sTimerCalls; + + typedef std::vector info_list_t; + static info_list_t& getFrameStateList(); + + + // call this once a frame to reset timers + static void nextFrame(); + + // dumps current cumulative frame stats to log + // call nextFrame() to reset timers + static void dumpCurTimes(); + + // call this to reset timer hierarchy, averages, etc. + static void reset(); + + static U64 countsPerSecond(); + static S32 getLastFrameIndex() { return sLastFrameIndex; } + static S32 getCurFrameIndex() { return sCurFrameIndex; } + + static void writeLog(std::ostream& os); + static const NamedTimer* getTimerByName(const std::string& name); + + struct CurTimerData + { + LLFastTimer* mCurTimer; + FrameState* mFrameState; + U32 mChildTime; + }; + static CurTimerData sCurTimerData; + +private: + static S32 sCurFrameIndex; + static S32 sLastFrameIndex; + static U64 sLastFrameTime; + static info_list_t* sTimerInfos; + static U64 sClockResolution; + + U32 mStartTime; + LLFastTimer::FrameState* mFrameState; + LLFastTimer::CurTimerData mLastTimerData; + +}; + +typedef class LLFastTimer LLFastTimer; + +#endif // LL_LLFASTTIMER_CLASS_H -- cgit v1.2.3 From ec7b204ed657f7c1becac3b410ae0bc94c03aa75 Mon Sep 17 00:00:00 2001 From: Tofu Linden Date: Mon, 25 Jan 2010 12:31:50 -0800 Subject: shuffle shuffle of timer code. cleanup. --- indra/llcommon/CMakeLists.txt | 2 +- indra/llcommon/llfasttimer.h | 169 ++++++++ indra/llcommon/llfasttimer_class.cpp | 751 +++++++++++++++++++++++++++++++++++ indra/llcommon/llfasttimer_class.h | 24 +- 4 files changed, 937 insertions(+), 9 deletions(-) create mode 100644 indra/llcommon/llfasttimer.h create mode 100644 indra/llcommon/llfasttimer_class.cpp (limited to 'indra/llcommon') diff --git a/indra/llcommon/CMakeLists.txt b/indra/llcommon/CMakeLists.txt index ac7cc2cdac..05e45d6d8a 100644 --- a/indra/llcommon/CMakeLists.txt +++ b/indra/llcommon/CMakeLists.txt @@ -50,7 +50,7 @@ set(llcommon_SOURCE_FILES lleventdispatcher.cpp lleventfilter.cpp llevents.cpp - llfasttimer.cpp + llfasttimer_class.cpp llfile.cpp llfindlocale.cpp llfixedbuffer.cpp diff --git a/indra/llcommon/llfasttimer.h b/indra/llcommon/llfasttimer.h new file mode 100644 index 0000000000..0f3280023c --- /dev/null +++ b/indra/llcommon/llfasttimer.h @@ -0,0 +1,169 @@ +/** + * @file llfasttimer.h + * @brief Inline implementations of fast timers. + * + * $LicenseInfo:firstyear=2004&license=viewergpl$ + * + * Copyright (c) 2004-2009, Linden Research, Inc. + * + * Second Life Viewer Source Code + * The source code in this file ("Source Code") is provided by Linden Lab + * to you under the terms of the GNU General Public License, version 2.0 + * ("GPL"), unless you have obtained a separate licensing agreement + * ("Other License"), formally executed by you and Linden Lab. Terms of + * the GPL can be found in doc/GPL-license.txt in this distribution, or + * online at http://secondlifegrid.net/programs/open_source/licensing/gplv2 + * + * There are special exceptions to the terms and conditions of the GPL as + * it is applied to this Source Code. View the full text of the exception + * in the file doc/FLOSS-exception.txt in this software distribution, or + * online at + * http://secondlifegrid.net/programs/open_source/licensing/flossexception + * + * By copying, modifying or distributing this software, you acknowledge + * that you have read and understood your obligations described above, + * and agree to abide by those obligations. + * + * ALL LINDEN LAB SOURCE CODE IS PROVIDED "AS IS." LINDEN LAB MAKES NO + * WARRANTIES, EXPRESS, IMPLIED OR OTHERWISE, REGARDING ITS ACCURACY, + * COMPLETENESS OR PERFORMANCE. + * $/LicenseInfo$ + */ + +#ifndef LL_FASTTIMER_H +#define LL_FASTTIMER_H + +// pull in the actual class definition +#include "llfasttimer_class.h" + +#if LL_WINDOWS +#define LL_INLINE __forceinline +// +// Windows implementation of CPU clock +// + +// +// NOTE: put back in when we aren't using platform sdk anymore +// +// because MS has different signatures for these functions in winnt.h +// need to rename them to avoid conflicts +//#define _interlockedbittestandset _renamed_interlockedbittestandset +//#define _interlockedbittestandreset _renamed_interlockedbittestandreset +//#include +//#undef _interlockedbittestandset +//#undef _interlockedbittestandreset + +//inline U32 get_cpu_clock_count_32() +//{ +// U64 time_stamp = __rdtsc(); +// return (U32)(time_stamp >> 8); +//} +// +//// return full timer value, *not* shifted by 8 bits +//inline U64 get_cpu_clock_count_64() +//{ +// return __rdtsc(); +//} + +// shift off lower 8 bits for lower resolution but longer term timing +// on 1Ghz machine, a 32-bit word will hold ~1000 seconds of timing +inline U32 get_cpu_clock_count_32() +{ + U32 ret_val; + __asm + { + _emit 0x0f + _emit 0x31 + shr eax,8 + shl edx,24 + or eax, edx + mov dword ptr [ret_val], eax + } + return ret_val; +} + +// return full timer value, *not* shifted by 8 bits +inline U64 get_cpu_clock_count_64() +{ + U64 ret_val; + __asm + { + _emit 0x0f + _emit 0x31 + mov eax,eax + mov edx,edx + mov dword ptr [ret_val+4], edx + mov dword ptr [ret_val], eax + } + return ret_val; +} +#else +#define LL_INLINE +#endif + + +#if LL_LINUX || LL_SOLARIS +// +// Linux and Solaris implementation of CPU clock - all architectures. +// +// Try to use the MONOTONIC clock if available, this is a constant time counter +// with nanosecond resolution (but not necessarily accuracy) and attempts are made +// to synchronize this value between cores at kernel start. It should not be affected +// by CPU frequency. If not available use the REALTIME clock, but this may be affected by +// NTP adjustments or other user activity affecting the system time. +inline U64 get_cpu_clock_count_64() +{ + struct timespec tp; + +#ifdef CLOCK_MONOTONIC // MONOTONIC supported at build-time? + if (-1 == clock_gettime(CLOCK_MONOTONIC,&tp)) // if MONOTONIC isn't supported at runtime, try REALTIME +#endif + clock_gettime(CLOCK_REALTIME,&tp); + + return (tp.tv_sec*LLFastTimer::sClockResolution)+tp.tv_nsec; +} + +inline U32 get_cpu_clock_count_32() +{ + return (U32)get_cpu_clock_count_64(); +} +#endif // (LL_LINUX || LL_SOLARIS)) + + +#if (LL_DARWIN) && (defined(__i386__) || defined(__amd64__)) +// +// Mac x86 implementation of CPU clock +inline U32 get_cpu_clock_count_32() +{ + U64 x; + __asm__ volatile (".byte 0x0f, 0x31": "=A"(x)); + return (U32)x >> 8; +} + +inline U32 get_cpu_clock_count_64() +{ + U64 x; + __asm__ volatile (".byte 0x0f, 0x31": "=A"(x)); + return x >> 8; +} +#endif + + +#if ( LL_DARWIN && !(defined(__i386__) || defined(__amd64__))) +// +// Mac PPC (deprecated) implementation of CPU clock +// +// Just use gettimeofday implementation for now + +inline U32 get_cpu_clock_count_32() +{ + return (U32)get_clock_count(); +} + +inline U32 get_cpu_clock_count_64() +{ + return get_clock_count(); +} +#endif + +#endif // LL_LLFASTTIMER_H diff --git a/indra/llcommon/llfasttimer_class.cpp b/indra/llcommon/llfasttimer_class.cpp new file mode 100644 index 0000000000..6dbd90f9fa --- /dev/null +++ b/indra/llcommon/llfasttimer_class.cpp @@ -0,0 +1,751 @@ +/** + * @file llfasttimer_class.cpp + * @brief Implementation of the fast timer. + * + * $LicenseInfo:firstyear=2004&license=viewergpl$ + * + * Copyright (c) 2004-2007, Linden Research, Inc. + * + * Second Life Viewer Source Code + * The source code in this file ("Source Code") is provided by Linden Lab + * to you under the terms of the GNU General Public License, version 2.0 + * ("GPL"), unless you have obtained a separate licensing agreement + * ("Other License"), formally executed by you and Linden Lab. Terms of + * the GPL can be found in doc/GPL-license.txt in this distribution, or + * online at http://secondlife.com/developers/opensource/gplv2 + * + * There are special exceptions to the terms and conditions of the GPL as + * it is applied to this Source Code. View the full text of the exception + * in the file doc/FLOSS-exception.txt in this software distribution, or + * online at http://secondlife.com/developers/opensource/flossexception + * + * By copying, modifying or distributing this software, you acknowledge + * that you have read and understood your obligations described above, + * and agree to abide by those obligations. + * + * ALL LINDEN LAB SOURCE CODE IS PROVIDED "AS IS."LINDEN LAB MAKES NO + * WARRANTIES, EXPRESS, IMPLIED OR OTHERWISE, REGARDING ITS ACCURACY, + * COMPLETENESS OR PERFORMANCE. + * $/LicenseInfo$ + */ +#include "linden_common.h" + +#include "llfasttimer.h" + +#include "llmemory.h" +#include "llprocessor.h" +#include "llsingleton.h" +#include "lltreeiterators.h" +#include "llsdserialize.h" + +#include + +#if LL_WINDOWS +#elif LL_LINUX || LL_SOLARIS +#include +#include +#elif LL_DARWIN +#include +#include "lltimer.h" // get_clock_count() +#else +#error "architecture not supported" +#endif + +////////////////////////////////////////////////////////////////////////////// +// statics + +S32 LLFastTimer::sCurFrameIndex = -1; +S32 LLFastTimer::sLastFrameIndex = -1; +U64 LLFastTimer::sLastFrameTime = get_cpu_clock_count_64(); +bool LLFastTimer::sPauseHistory = 0; +bool LLFastTimer::sResetHistory = 0; +LLFastTimer::CurTimerData LLFastTimer::sCurTimerData; +BOOL LLFastTimer::sLog = FALSE; +BOOL LLFastTimer::sMetricLog = FALSE; +LLMutex* LLFastTimer::sLogLock = NULL; +std::queue LLFastTimer::sLogQueue; + +#if LL_LINUX || LL_SOLARIS +U64 LLFastTimer::sClockResolution = 1000000000; // Nanosecond resolution +#else +U64 LLFastTimer::sClockResolution = 1000000; // Microsecond resolution +#endif + +std::vector* LLFastTimer::sTimerInfos = NULL; +U64 LLFastTimer::sTimerCycles = 0; +U32 LLFastTimer::sTimerCalls = 0; + + +// FIXME: move these declarations to the relevant modules + +// helper functions +typedef LLTreeDFSPostIter timer_tree_bottom_up_iterator_t; + +static timer_tree_bottom_up_iterator_t begin_timer_tree_bottom_up(LLFastTimer::NamedTimer& id) +{ + return timer_tree_bottom_up_iterator_t(&id, + boost::bind(boost::mem_fn(&LLFastTimer::NamedTimer::beginChildren), _1), + boost::bind(boost::mem_fn(&LLFastTimer::NamedTimer::endChildren), _1)); +} + +static timer_tree_bottom_up_iterator_t end_timer_tree_bottom_up() +{ + return timer_tree_bottom_up_iterator_t(); +} + +typedef LLTreeDFSIter timer_tree_dfs_iterator_t; + + +static timer_tree_dfs_iterator_t begin_timer_tree(LLFastTimer::NamedTimer& id) +{ + return timer_tree_dfs_iterator_t(&id, + boost::bind(boost::mem_fn(&LLFastTimer::NamedTimer::beginChildren), _1), + boost::bind(boost::mem_fn(&LLFastTimer::NamedTimer::endChildren), _1)); +} + +static timer_tree_dfs_iterator_t end_timer_tree() +{ + return timer_tree_dfs_iterator_t(); +} + + + +// factory class that creates NamedTimers via static DeclareTimer objects +class NamedTimerFactory : public LLSingleton +{ +public: + NamedTimerFactory() + {} + + /*virtual */ void initSingleton() + { + mTimerRoot = new LLFastTimer::NamedTimer("root"); + + mActiveTimerRoot = new LLFastTimer::NamedTimer("Frame"); + mActiveTimerRoot->setCollapsed(false); + + mRootFrameState = new LLFastTimer::FrameState(mActiveTimerRoot); + mRootFrameState->mParent = &mTimerRoot->getFrameState(); + mActiveTimerRoot->setParent(mTimerRoot); + + mAppTimer = new LLFastTimer(mRootFrameState); + } + + ~NamedTimerFactory() + { + std::for_each(mTimers.begin(), mTimers.end(), DeletePairedPointer()); + + delete mAppTimer; + delete mActiveTimerRoot; + delete mTimerRoot; + delete mRootFrameState; + } + + LLFastTimer::NamedTimer& createNamedTimer(const std::string& name) + { + timer_map_t::iterator found_it = mTimers.find(name); + if (found_it != mTimers.end()) + { + return *found_it->second; + } + + LLFastTimer::NamedTimer* timer = new LLFastTimer::NamedTimer(name); + timer->setParent(mTimerRoot); + mTimers.insert(std::make_pair(name, timer)); + + return *timer; + } + + LLFastTimer::NamedTimer* getTimerByName(const std::string& name) + { + timer_map_t::iterator found_it = mTimers.find(name); + if (found_it != mTimers.end()) + { + return found_it->second; + } + return NULL; + } + + LLFastTimer::NamedTimer* getActiveRootTimer() { return mActiveTimerRoot; } + LLFastTimer::NamedTimer* getRootTimer() { return mTimerRoot; } + const LLFastTimer* getAppTimer() { return mAppTimer; } + LLFastTimer::FrameState& getRootFrameState() { return *mRootFrameState; } + + typedef std::map timer_map_t; + timer_map_t::iterator beginTimers() { return mTimers.begin(); } + timer_map_t::iterator endTimers() { return mTimers.end(); } + S32 timerCount() { return mTimers.size(); } + +private: + timer_map_t mTimers; + + LLFastTimer::NamedTimer* mActiveTimerRoot; + LLFastTimer::NamedTimer* mTimerRoot; + LLFastTimer* mAppTimer; + LLFastTimer::FrameState* mRootFrameState; +}; + +void update_cached_pointers_if_changed() +{ + // detect when elements have moved and update cached pointers + static LLFastTimer::FrameState* sFirstTimerAddress = NULL; + if (&*(LLFastTimer::getFrameStateList().begin()) != sFirstTimerAddress) + { + LLFastTimer::DeclareTimer::updateCachedPointers(); + } + sFirstTimerAddress = &*(LLFastTimer::getFrameStateList().begin()); +} + +LLFastTimer::DeclareTimer::DeclareTimer(const std::string& name, bool open ) +: mTimer(NamedTimerFactory::instance().createNamedTimer(name)) +{ + mTimer.setCollapsed(!open); + mFrameState = &mTimer.getFrameState(); + update_cached_pointers_if_changed(); +} + +LLFastTimer::DeclareTimer::DeclareTimer(const std::string& name) +: mTimer(NamedTimerFactory::instance().createNamedTimer(name)) +{ + mFrameState = &mTimer.getFrameState(); + update_cached_pointers_if_changed(); +} + +// static +void LLFastTimer::DeclareTimer::updateCachedPointers() +{ + // propagate frame state pointers to timer declarations + for (DeclareTimer::instance_iter it = DeclareTimer::beginInstances(); + it != DeclareTimer::endInstances(); + ++it) + { + // update cached pointer + it->mFrameState = &it->mTimer.getFrameState(); + } +} + +//static +#if LL_LINUX || LL_SOLARIS || ( LL_DARWIN && !(defined(__i386__) || defined(__amd64__)) ) +U64 LLFastTimer::countsPerSecond() +{ + return sClockResolution; +} +#else // windows or x86-mac +U64 LLFastTimer::countsPerSecond() +{ + static U64 sCPUClockFrequency = U64(CProcessor().GetCPUFrequency(50)); + + // we drop the low-order byte in out timers, so report a lower frequency + return sCPUClockFrequency >> 8; +} +#endif + +LLFastTimer::FrameState::FrameState(LLFastTimer::NamedTimer* timerp) +: mActiveCount(0), + mCalls(0), + mSelfTimeCounter(0), + mParent(NULL), + mLastCaller(NULL), + mMoveUpTree(false), + mTimer(timerp) +{} + + +LLFastTimer::NamedTimer::NamedTimer(const std::string& name) +: mName(name), + mCollapsed(true), + mParent(NULL), + mTotalTimeCounter(0), + mCountAverage(0), + mCallAverage(0), + mNeedsSorting(false) +{ + info_list_t& frame_state_list = getFrameStateList(); + mFrameStateIndex = frame_state_list.size(); + getFrameStateList().push_back(FrameState(this)); + + mCountHistory = new U32[HISTORY_NUM]; + memset(mCountHistory, 0, sizeof(U32) * HISTORY_NUM); + mCallHistory = new U32[HISTORY_NUM]; + memset(mCallHistory, 0, sizeof(U32) * HISTORY_NUM); +} + +LLFastTimer::NamedTimer::~NamedTimer() +{ + delete[] mCountHistory; + delete[] mCallHistory; +} + +std::string LLFastTimer::NamedTimer::getToolTip(S32 history_idx) +{ + if (history_idx < 0) + { + // by default, show average number of calls + return llformat("%s (%d calls)", getName().c_str(), (S32)getCallAverage()); + } + else + { + return llformat("%s (%d calls)", getName().c_str(), (S32)getHistoricalCalls(history_idx)); + } +} + +void LLFastTimer::NamedTimer::setParent(NamedTimer* parent) +{ + llassert_always(parent != this); + llassert_always(parent != NULL); + + if (mParent) + { + // subtract our accumulated from previous parent + for (S32 i = 0; i < HISTORY_NUM; i++) + { + mParent->mCountHistory[i] -= mCountHistory[i]; + } + + // subtract average timing from previous parent + mParent->mCountAverage -= mCountAverage; + + std::vector& children = mParent->getChildren(); + std::vector::iterator found_it = std::find(children.begin(), children.end(), this); + if (found_it != children.end()) + { + children.erase(found_it); + } + } + + mParent = parent; + if (parent) + { + getFrameState().mParent = &parent->getFrameState(); + parent->getChildren().push_back(this); + parent->mNeedsSorting = true; + } +} + +S32 LLFastTimer::NamedTimer::getDepth() +{ + S32 depth = 0; + NamedTimer* timerp = mParent; + while(timerp) + { + depth++; + timerp = timerp->mParent; + } + return depth; +} + +// static +void LLFastTimer::NamedTimer::processTimes() +{ + if (sCurFrameIndex < 0) return; + + buildHierarchy(); + accumulateTimings(); +} + +// sort timer info structs by depth first traversal order +struct SortTimersDFS +{ + bool operator()(const LLFastTimer::FrameState& i1, const LLFastTimer::FrameState& i2) + { + return i1.mTimer->getFrameStateIndex() < i2.mTimer->getFrameStateIndex(); + } +}; + +// sort child timers by name +struct SortTimerByName +{ + bool operator()(const LLFastTimer::NamedTimer* i1, const LLFastTimer::NamedTimer* i2) + { + return i1->getName() < i2->getName(); + } +}; + +//static +void LLFastTimer::NamedTimer::buildHierarchy() +{ + if (sCurFrameIndex < 0 ) return; + + // set up initial tree + for (instance_iter it = NamedTimer::beginInstances(); + it != endInstances(); + ++it) + { + NamedTimer& timer = *it; + if (&timer == NamedTimerFactory::instance().getRootTimer()) continue; + + // bootstrap tree construction by attaching to last timer to be on stack + // when this timer was called + if (timer.getFrameState().mLastCaller && timer.mParent == NamedTimerFactory::instance().getRootTimer()) + { + timer.setParent(timer.getFrameState().mLastCaller->mTimer); + // no need to push up tree on first use, flag can be set spuriously + timer.getFrameState().mMoveUpTree = false; + } + } + + // bump timers up tree if they've been flagged as being in the wrong place + // do this in a bottom up order to promote descendants first before promoting ancestors + // this preserves partial order derived from current frame's observations + for(timer_tree_bottom_up_iterator_t it = begin_timer_tree_bottom_up(*NamedTimerFactory::instance().getRootTimer()); + it != end_timer_tree_bottom_up(); + ++it) + { + NamedTimer* timerp = *it; + // skip root timer + if (timerp == NamedTimerFactory::instance().getRootTimer()) continue; + + if (timerp->getFrameState().mMoveUpTree) + { + // since ancestors have already been visited, reparenting won't affect tree traversal + //step up tree, bringing our descendants with us + //llinfos << "Moving " << timerp->getName() << " from child of " << timerp->getParent()->getName() << + // " to child of " << timerp->getParent()->getParent()->getName() << llendl; + timerp->setParent(timerp->getParent()->getParent()); + timerp->getFrameState().mMoveUpTree = false; + + // don't bubble up any ancestors until descendants are done bubbling up + it.skipAncestors(); + } + } + + // sort timers by time last called, so call graph makes sense + for(timer_tree_dfs_iterator_t it = begin_timer_tree(*NamedTimerFactory::instance().getRootTimer()); + it != end_timer_tree(); + ++it) + { + NamedTimer* timerp = (*it); + if (timerp->mNeedsSorting) + { + std::sort(timerp->getChildren().begin(), timerp->getChildren().end(), SortTimerByName()); + } + timerp->mNeedsSorting = false; + } +} + +//static +void LLFastTimer::NamedTimer::accumulateTimings() +{ + U32 cur_time = get_cpu_clock_count_32(); + + // walk up stack of active timers and accumulate current time while leaving timing structures active + LLFastTimer* cur_timer = sCurTimerData.mCurTimer; + // root defined by parent pointing to self + CurTimerData* cur_data = &sCurTimerData; + while(cur_timer->mLastTimerData.mCurTimer != cur_timer) + { + U32 cumulative_time_delta = cur_time - cur_timer->mStartTime; + U32 self_time_delta = cumulative_time_delta - cur_data->mChildTime; + cur_data->mChildTime = 0; + cur_timer->mFrameState->mSelfTimeCounter += self_time_delta; + cur_timer->mStartTime = cur_time; + + cur_data = &cur_timer->mLastTimerData; + cur_data->mChildTime += cumulative_time_delta; + + cur_timer = cur_timer->mLastTimerData.mCurTimer; + } + + // traverse tree in DFS post order, or bottom up + for(timer_tree_bottom_up_iterator_t it = begin_timer_tree_bottom_up(*NamedTimerFactory::instance().getActiveRootTimer()); + it != end_timer_tree_bottom_up(); + ++it) + { + NamedTimer* timerp = (*it); + timerp->mTotalTimeCounter = timerp->getFrameState().mSelfTimeCounter; + for (child_const_iter child_it = timerp->beginChildren(); child_it != timerp->endChildren(); ++child_it) + { + timerp->mTotalTimeCounter += (*child_it)->mTotalTimeCounter; + } + + S32 cur_frame = sCurFrameIndex; + if (cur_frame >= 0) + { + // update timer history + int hidx = cur_frame % HISTORY_NUM; + + timerp->mCountHistory[hidx] = timerp->mTotalTimeCounter; + timerp->mCountAverage = (timerp->mCountAverage * cur_frame + timerp->mTotalTimeCounter) / (cur_frame+1); + timerp->mCallHistory[hidx] = timerp->getFrameState().mCalls; + timerp->mCallAverage = (timerp->mCallAverage * cur_frame + timerp->getFrameState().mCalls) / (cur_frame+1); + } + } +} + +// static +void LLFastTimer::NamedTimer::resetFrame() +{ + if (sLog) + { //output current frame counts to performance log + F64 iclock_freq = 1000.0 / countsPerSecond(); // good place to calculate clock frequency + + F64 total_time = 0; + LLSD sd; + + for (NamedTimer::instance_iter it = NamedTimer::beginInstances(); + it != NamedTimer::endInstances(); + ++it) + { + NamedTimer& timer = *it; + FrameState& info = timer.getFrameState(); + sd[timer.getName()]["Time"] = (LLSD::Real) (info.mSelfTimeCounter*iclock_freq); + sd[timer.getName()]["Calls"] = (LLSD::Integer) info.mCalls; + + // computing total time here because getting the root timer's getCountHistory + // doesn't work correctly on the first frame + total_time = total_time + info.mSelfTimeCounter * iclock_freq; + } + + sd["Total"]["Time"] = (LLSD::Real) total_time; + sd["Total"]["Calls"] = (LLSD::Integer) 1; + + { + LLMutexLock lock(sLogLock); + sLogQueue.push(sd); + } + } + + + // tag timers by position in depth first traversal of tree + S32 index = 0; + for(timer_tree_dfs_iterator_t it = begin_timer_tree(*NamedTimerFactory::instance().getRootTimer()); + it != end_timer_tree(); + ++it) + { + NamedTimer* timerp = (*it); + + timerp->mFrameStateIndex = index; + index++; + + llassert_always(timerp->mFrameStateIndex < (S32)getFrameStateList().size()); + } + + // sort timers by dfs traversal order to improve cache coherency + std::sort(getFrameStateList().begin(), getFrameStateList().end(), SortTimersDFS()); + + // update pointers into framestatelist now that we've sorted it + DeclareTimer::updateCachedPointers(); + + // reset for next frame + for (NamedTimer::instance_iter it = NamedTimer::beginInstances(); + it != NamedTimer::endInstances(); + ++it) + { + NamedTimer& timer = *it; + + FrameState& info = timer.getFrameState(); + info.mSelfTimeCounter = 0; + info.mCalls = 0; + info.mLastCaller = NULL; + info.mMoveUpTree = false; + // update parent pointer in timer state struct + if (timer.mParent) + { + info.mParent = &timer.mParent->getFrameState(); + } + } + + //sTimerCycles = 0; + //sTimerCalls = 0; +} + +//static +void LLFastTimer::NamedTimer::reset() +{ + resetFrame(); // reset frame data + + // walk up stack of active timers and reset start times to current time + // effectively zeroing out any accumulated time + U32 cur_time = get_cpu_clock_count_32(); + + // root defined by parent pointing to self + CurTimerData* cur_data = &sCurTimerData; + LLFastTimer* cur_timer = cur_data->mCurTimer; + while(cur_timer->mLastTimerData.mCurTimer != cur_timer) + { + cur_timer->mStartTime = cur_time; + cur_data->mChildTime = 0; + + cur_data = &cur_timer->mLastTimerData; + cur_timer = cur_data->mCurTimer; + } + + // reset all history + for (NamedTimer::instance_iter it = NamedTimer::beginInstances(); + it != NamedTimer::endInstances(); + ++it) + { + NamedTimer& timer = *it; + if (&timer != NamedTimerFactory::instance().getRootTimer()) + { + timer.setParent(NamedTimerFactory::instance().getRootTimer()); + } + + timer.mCountAverage = 0; + timer.mCallAverage = 0; + memset(timer.mCountHistory, 0, sizeof(U32) * HISTORY_NUM); + memset(timer.mCallHistory, 0, sizeof(U32) * HISTORY_NUM); + } + + sLastFrameIndex = 0; + sCurFrameIndex = 0; +} + +//static +LLFastTimer::info_list_t& LLFastTimer::getFrameStateList() +{ + if (!sTimerInfos) + { + sTimerInfos = new info_list_t(); + } + return *sTimerInfos; +} + + +U32 LLFastTimer::NamedTimer::getHistoricalCount(S32 history_index) const +{ + S32 history_idx = (getLastFrameIndex() + history_index) % LLFastTimer::NamedTimer::HISTORY_NUM; + return mCountHistory[history_idx]; +} + +U32 LLFastTimer::NamedTimer::getHistoricalCalls(S32 history_index ) const +{ + S32 history_idx = (getLastFrameIndex() + history_index) % LLFastTimer::NamedTimer::HISTORY_NUM; + return mCallHistory[history_idx]; +} + +LLFastTimer::FrameState& LLFastTimer::NamedTimer::getFrameState() const +{ + llassert_always(mFrameStateIndex >= 0); + if (this == NamedTimerFactory::instance().getActiveRootTimer()) + { + return NamedTimerFactory::instance().getRootFrameState(); + } + return getFrameStateList()[mFrameStateIndex]; +} + +// static +LLFastTimer::NamedTimer& LLFastTimer::NamedTimer::getRootNamedTimer() +{ + return *NamedTimerFactory::instance().getActiveRootTimer(); +} + +std::vector::const_iterator LLFastTimer::NamedTimer::beginChildren() +{ + return mChildren.begin(); +} + +std::vector::const_iterator LLFastTimer::NamedTimer::endChildren() +{ + return mChildren.end(); +} + +std::vector& LLFastTimer::NamedTimer::getChildren() +{ + return mChildren; +} + +//static +void LLFastTimer::nextFrame() +{ + countsPerSecond(); // good place to calculate clock frequency + U64 frame_time = get_cpu_clock_count_64(); + if ((frame_time - sLastFrameTime) >> 8 > 0xffffffff) + { + llinfos << "Slow frame, fast timers inaccurate" << llendl; + } + + if (sPauseHistory) + { + sResetHistory = true; + } + else if (sResetHistory) + { + sLastFrameIndex = 0; + sCurFrameIndex = 0; + sResetHistory = false; + } + else // not paused + { + NamedTimer::processTimes(); + sLastFrameIndex = sCurFrameIndex++; + } + + // get ready for next frame + NamedTimer::resetFrame(); + sLastFrameTime = frame_time; +} + +//static +void LLFastTimer::dumpCurTimes() +{ + // accumulate timings, etc. + NamedTimer::processTimes(); + + F64 clock_freq = (F64)countsPerSecond(); + F64 iclock_freq = 1000.0 / clock_freq; // clock_ticks -> milliseconds + + // walk over timers in depth order and output timings + for(timer_tree_dfs_iterator_t it = begin_timer_tree(*NamedTimerFactory::instance().getRootTimer()); + it != end_timer_tree(); + ++it) + { + NamedTimer* timerp = (*it); + F64 total_time_ms = ((F64)timerp->getHistoricalCount(0) * iclock_freq); + // Don't bother with really brief times, keep output concise + if (total_time_ms < 0.1) continue; + + std::ostringstream out_str; + for (S32 i = 0; i < timerp->getDepth(); i++) + { + out_str << "\t"; + } + + + out_str << timerp->getName() << " " + << std::setprecision(3) << total_time_ms << " ms, " + << timerp->getHistoricalCalls(0) << " calls"; + + llinfos << out_str.str() << llendl; + } +} + +//static +void LLFastTimer::reset() +{ + NamedTimer::reset(); +} + + +//static +void LLFastTimer::writeLog(std::ostream& os) +{ + while (!sLogQueue.empty()) + { + LLSD& sd = sLogQueue.front(); + LLSDSerialize::toXML(sd, os); + LLMutexLock lock(sLogLock); + sLogQueue.pop(); + } +} + +//static +const LLFastTimer::NamedTimer* LLFastTimer::getTimerByName(const std::string& name) +{ + return NamedTimerFactory::instance().getTimerByName(name); +} + +LLFastTimer::LLFastTimer(LLFastTimer::FrameState* state) +: mFrameState(state) +{ + U32 start_time = get_cpu_clock_count_32(); + mStartTime = start_time; + mFrameState->mActiveCount++; + LLFastTimer::sCurTimerData.mCurTimer = this; + LLFastTimer::sCurTimerData.mFrameState = mFrameState; + LLFastTimer::sCurTimerData.mChildTime = 0; + mLastTimerData = LLFastTimer::sCurTimerData; +} + + +////////////////////////////////////////////////////////////////////////////// diff --git a/indra/llcommon/llfasttimer_class.h b/indra/llcommon/llfasttimer_class.h index 93d2d0494d..4cb0c2d04e 100644 --- a/indra/llcommon/llfasttimer_class.h +++ b/indra/llcommon/llfasttimer_class.h @@ -39,6 +39,12 @@ #define TIME_FAST_TIMERS 0 +#if LL_WINDOWS +#define LL_INLINE __forceinline +#else +#define LL_INLINE +#endif // LL_WINDOWS + class LLMutex; #include @@ -47,7 +53,6 @@ class LLMutex; class LL_COMMON_API LLFastTimer { public: - class NamedTimer; struct LL_COMMON_API FrameState @@ -163,11 +168,11 @@ public: : mFrameState(timer.mFrameState) { #if TIME_FAST_TIMERS - U64 timer_start = get_cpu_clock_count_64(); + U64 timer_start = getCPUClockCount64(); #endif #if FAST_TIMER_ON LLFastTimer::FrameState* frame_state = mFrameState; - mStartTime = get_cpu_clock_count_32(); + mStartTime = getCPUClockCount32(); frame_state->mActiveCount++; frame_state->mCalls++; @@ -181,7 +186,7 @@ public: cur_timer_data->mChildTime = 0; #endif #if TIME_FAST_TIMERS - U64 timer_end = get_cpu_clock_count_64(); + U64 timer_end = getCPUClockCount64(); sTimerCycles += timer_end - timer_start; #endif } @@ -189,11 +194,11 @@ public: LL_INLINE ~LLFastTimer() { #if TIME_FAST_TIMERS - U64 timer_start = get_cpu_clock_count_64(); + U64 timer_start = getCPUClockCount64(); #endif #if FAST_TIMER_ON LLFastTimer::FrameState* frame_state = mFrameState; - U32 total_time = get_cpu_clock_count_32() - mStartTime; + U32 total_time = getCPUClockCount32() - mStartTime; frame_state->mSelfTimeCounter += total_time - LLFastTimer::sCurTimerData.mChildTime; frame_state->mActiveCount--; @@ -208,7 +213,7 @@ public: LLFastTimer::sCurTimerData = mLastTimerData; #endif #if TIME_FAST_TIMERS - U64 timer_end = get_cpu_clock_count_64(); + U64 timer_end = getCPUClockCount64(); sTimerCycles += timer_end - timer_start; sTimerCalls++; #endif @@ -254,11 +259,14 @@ public: static CurTimerData sCurTimerData; private: + static U32 getCPUClockCount32(); + static U64 getCPUClockCount64(); + static U64 sClockResolution; + static S32 sCurFrameIndex; static S32 sLastFrameIndex; static U64 sLastFrameTime; static info_list_t* sTimerInfos; - static U64 sClockResolution; U32 mStartTime; LLFastTimer::FrameState* mFrameState; -- cgit v1.2.3 From 3d0ff2585eb32c67d503452ce9f8d2198be823c8 Mon Sep 17 00:00:00 2001 From: Tofu Linden Date: Mon, 25 Jan 2010 13:24:45 -0800 Subject: Final fix for fast timer reshuffle. --- indra/llcommon/llfasttimer.h | 25 +++++++++++-------------- indra/llcommon/llfasttimer_class.cpp | 10 +++++----- 2 files changed, 16 insertions(+), 19 deletions(-) (limited to 'indra/llcommon') diff --git a/indra/llcommon/llfasttimer.h b/indra/llcommon/llfasttimer.h index 0f3280023c..9f9e2ea945 100644 --- a/indra/llcommon/llfasttimer.h +++ b/indra/llcommon/llfasttimer.h @@ -37,7 +37,6 @@ #include "llfasttimer_class.h" #if LL_WINDOWS -#define LL_INLINE __forceinline // // Windows implementation of CPU clock // @@ -53,21 +52,21 @@ //#undef _interlockedbittestandset //#undef _interlockedbittestandreset -//inline U32 get_cpu_clock_count_32() +//inline U32 LLFastTimer::getCPUClockCount32() //{ // U64 time_stamp = __rdtsc(); // return (U32)(time_stamp >> 8); //} // //// return full timer value, *not* shifted by 8 bits -//inline U64 get_cpu_clock_count_64() +//inline U64 LLFastTimer::getCPUClockCount64() //{ // return __rdtsc(); //} // shift off lower 8 bits for lower resolution but longer term timing // on 1Ghz machine, a 32-bit word will hold ~1000 seconds of timing -inline U32 get_cpu_clock_count_32() +inline U32 LLFastTimer::getCPUClockCount32() { U32 ret_val; __asm @@ -83,7 +82,7 @@ inline U32 get_cpu_clock_count_32() } // return full timer value, *not* shifted by 8 bits -inline U64 get_cpu_clock_count_64() +inline U64 LLFastTimer::getCPUClockCount64() { U64 ret_val; __asm @@ -97,8 +96,6 @@ inline U64 get_cpu_clock_count_64() } return ret_val; } -#else -#define LL_INLINE #endif @@ -111,7 +108,7 @@ inline U64 get_cpu_clock_count_64() // to synchronize this value between cores at kernel start. It should not be affected // by CPU frequency. If not available use the REALTIME clock, but this may be affected by // NTP adjustments or other user activity affecting the system time. -inline U64 get_cpu_clock_count_64() +inline U64 LLFastTimer::getCPUClockCount64() { struct timespec tp; @@ -123,9 +120,9 @@ inline U64 get_cpu_clock_count_64() return (tp.tv_sec*LLFastTimer::sClockResolution)+tp.tv_nsec; } -inline U32 get_cpu_clock_count_32() +inline U32 LLFastTimer::getCPUClockCount32() { - return (U32)get_cpu_clock_count_64(); + return (U32)LLFastTimer::getCPUClockCount64(); } #endif // (LL_LINUX || LL_SOLARIS)) @@ -133,14 +130,14 @@ inline U32 get_cpu_clock_count_32() #if (LL_DARWIN) && (defined(__i386__) || defined(__amd64__)) // // Mac x86 implementation of CPU clock -inline U32 get_cpu_clock_count_32() +inline U32 LLFastTimer::getCPUClockCount32() { U64 x; __asm__ volatile (".byte 0x0f, 0x31": "=A"(x)); return (U32)x >> 8; } -inline U32 get_cpu_clock_count_64() +inline U32 LLFastTimer::getCPUClockCount64() { U64 x; __asm__ volatile (".byte 0x0f, 0x31": "=A"(x)); @@ -155,12 +152,12 @@ inline U32 get_cpu_clock_count_64() // // Just use gettimeofday implementation for now -inline U32 get_cpu_clock_count_32() +inline U32 LLFastTimer::getCPUClockCount32() { return (U32)get_clock_count(); } -inline U32 get_cpu_clock_count_64() +inline U32 LLFastTimer::getCPUClockCount64() { return get_clock_count(); } diff --git a/indra/llcommon/llfasttimer_class.cpp b/indra/llcommon/llfasttimer_class.cpp index 6dbd90f9fa..abcaee673e 100644 --- a/indra/llcommon/llfasttimer_class.cpp +++ b/indra/llcommon/llfasttimer_class.cpp @@ -56,7 +56,7 @@ S32 LLFastTimer::sCurFrameIndex = -1; S32 LLFastTimer::sLastFrameIndex = -1; -U64 LLFastTimer::sLastFrameTime = get_cpu_clock_count_64(); +U64 LLFastTimer::sLastFrameTime = LLFastTimer::getCPUClockCount64(); bool LLFastTimer::sPauseHistory = 0; bool LLFastTimer::sResetHistory = 0; LLFastTimer::CurTimerData LLFastTimer::sCurTimerData; @@ -426,7 +426,7 @@ void LLFastTimer::NamedTimer::buildHierarchy() //static void LLFastTimer::NamedTimer::accumulateTimings() { - U32 cur_time = get_cpu_clock_count_32(); + U32 cur_time = getCPUClockCount32(); // walk up stack of active timers and accumulate current time while leaving timing structures active LLFastTimer* cur_timer = sCurTimerData.mCurTimer; @@ -556,7 +556,7 @@ void LLFastTimer::NamedTimer::reset() // walk up stack of active timers and reset start times to current time // effectively zeroing out any accumulated time - U32 cur_time = get_cpu_clock_count_32(); + U32 cur_time = getCPUClockCount32(); // root defined by parent pointing to self CurTimerData* cur_data = &sCurTimerData; @@ -649,7 +649,7 @@ std::vector& LLFastTimer::NamedTimer::getChildren() void LLFastTimer::nextFrame() { countsPerSecond(); // good place to calculate clock frequency - U64 frame_time = get_cpu_clock_count_64(); + U64 frame_time = getCPUClockCount64(); if ((frame_time - sLastFrameTime) >> 8 > 0xffffffff) { llinfos << "Slow frame, fast timers inaccurate" << llendl; @@ -738,7 +738,7 @@ const LLFastTimer::NamedTimer* LLFastTimer::getTimerByName(const std::string& na LLFastTimer::LLFastTimer(LLFastTimer::FrameState* state) : mFrameState(state) { - U32 start_time = get_cpu_clock_count_32(); + U32 start_time = getCPUClockCount32(); mStartTime = start_time; mFrameState->mActiveCount++; LLFastTimer::sCurTimerData.mCurTimer = this; -- cgit v1.2.3 From 18bdeb3d0e5ef4e5b014b41a8003569061e00910 Mon Sep 17 00:00:00 2001 From: Tofu Linden Date: Mon, 25 Jan 2010 13:32:49 -0800 Subject: DEV-45468 'SNOW-108: Fast timers are broken / badly-scaled on linux' ready to merge. legacy coding policy fix-up. --- indra/llcommon/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'indra/llcommon') diff --git a/indra/llcommon/CMakeLists.txt b/indra/llcommon/CMakeLists.txt index 05e45d6d8a..9ead183a9e 100644 --- a/indra/llcommon/CMakeLists.txt +++ b/indra/llcommon/CMakeLists.txt @@ -250,7 +250,7 @@ list(APPEND llcommon_SOURCE_FILES ${llcommon_HEADER_FILES}) if(LLCOMMON_LINK_SHARED) add_library (llcommon SHARED ${llcommon_SOURCE_FILES}) - ll_stage_sharedlib(llcommon) + ll_stage_sharedlib(llcommon) else(LLCOMMON_LINK_SHARED) add_library (llcommon ${llcommon_SOURCE_FILES}) endif(LLCOMMON_LINK_SHARED) -- cgit v1.2.3 From 2375afc4283e47a50516c1e003a6f699b0a2cfe1 Mon Sep 17 00:00:00 2001 From: Tofu Linden Date: Mon, 25 Jan 2010 13:49:13 -0800 Subject: Gosh, the mac prototypes for get_cpu_clock_count_64 have always been wrong, but the compiler didn't start caring until I made these proper member functions. fixed. --- indra/llcommon/llfasttimer.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'indra/llcommon') diff --git a/indra/llcommon/llfasttimer.h b/indra/llcommon/llfasttimer.h index 9f9e2ea945..32f3561616 100644 --- a/indra/llcommon/llfasttimer.h +++ b/indra/llcommon/llfasttimer.h @@ -137,7 +137,7 @@ inline U32 LLFastTimer::getCPUClockCount32() return (U32)x >> 8; } -inline U32 LLFastTimer::getCPUClockCount64() +inline U64 LLFastTimer::getCPUClockCount64() { U64 x; __asm__ volatile (".byte 0x0f, 0x31": "=A"(x)); @@ -157,7 +157,7 @@ inline U32 LLFastTimer::getCPUClockCount32() return (U32)get_clock_count(); } -inline U32 LLFastTimer::getCPUClockCount64() +inline U64 LLFastTimer::getCPUClockCount64() { return get_clock_count(); } -- cgit v1.2.3 From cbe647786f8631d32fc5f9226853950359207699 Mon Sep 17 00:00:00 2001 From: Tofu Linden Date: Mon, 25 Jan 2010 14:25:58 -0800 Subject: We already have a LL_FORCE_INLINE in llpreprocessor.h, don't re-invent it for llfasttimers.h. Also define LL_LIKELY/LL_UNLIKELY with a warning about its micro-optimizey nature, and use it to annotate llasserts (i.e. make llassert() lower-overhead and thus more attractive, even in inner-ish loops.) --- indra/llcommon/llerrorlegacy.h | 4 ++-- indra/llcommon/llfasttimer_class.h | 11 ++--------- indra/llcommon/llpreprocessor.h | 15 +++++++++++++++ 3 files changed, 19 insertions(+), 11 deletions(-) (limited to 'indra/llcommon') diff --git a/indra/llcommon/llerrorlegacy.h b/indra/llcommon/llerrorlegacy.h index 9920921a58..476d75380f 100644 --- a/indra/llcommon/llerrorlegacy.h +++ b/indra/llcommon/llerrorlegacy.h @@ -34,7 +34,7 @@ #ifndef LL_LLERRORLEGACY_H #define LL_LLERRORLEGACY_H - +#include "llpreprocessor.h" /* LEGACY -- DO NOT USE THIS STUFF ANYMORE @@ -107,7 +107,7 @@ const int LL_ERR_PRICE_MISMATCH = -23018; #define llwarning(msg, num) llwarns << "Warning # " << num << ": " << msg << llendl; -#define llassert_always(func) if (!(func)) llerrs << "ASSERT (" << #func << ")" << llendl; +#define llassert_always(func) if (LL_UNLIKELY(!(func))) llerrs << "ASSERT (" << #func << ")" << llendl; #ifdef SHOW_ASSERT #define llassert(func) llassert_always(func) diff --git a/indra/llcommon/llfasttimer_class.h b/indra/llcommon/llfasttimer_class.h index 4cb0c2d04e..ddb1a74793 100644 --- a/indra/llcommon/llfasttimer_class.h +++ b/indra/llcommon/llfasttimer_class.h @@ -38,13 +38,6 @@ #define FAST_TIMER_ON 1 #define TIME_FAST_TIMERS 0 - -#if LL_WINDOWS -#define LL_INLINE __forceinline -#else -#define LL_INLINE -#endif // LL_WINDOWS - class LLMutex; #include @@ -164,7 +157,7 @@ public: public: LLFastTimer(LLFastTimer::FrameState* state); - LL_INLINE LLFastTimer(LLFastTimer::DeclareTimer& timer) + LL_FORCE_INLINE LLFastTimer(LLFastTimer::DeclareTimer& timer) : mFrameState(timer.mFrameState) { #if TIME_FAST_TIMERS @@ -191,7 +184,7 @@ public: #endif } - LL_INLINE ~LLFastTimer() + LL_FORCE_INLINE ~LLFastTimer() { #if TIME_FAST_TIMERS U64 timer_start = getCPUClockCount64(); diff --git a/indra/llcommon/llpreprocessor.h b/indra/llcommon/llpreprocessor.h index 5eefa6a16b..1c1503ca7b 100644 --- a/indra/llcommon/llpreprocessor.h +++ b/indra/llcommon/llpreprocessor.h @@ -55,13 +55,28 @@ #define LL_BIG_ENDIAN 1 #endif + // Per-compiler switches + #ifdef __GNUC__ #define LL_FORCE_INLINE inline __attribute__((always_inline)) #else #define LL_FORCE_INLINE __forceinline #endif +// Mark-up expressions with branch prediction hints. Do NOT use +// this with reckless abandon - it's an obfuscating micro-optimization +// outside of inner loops or other places where you are OVERWHELMINGLY +// sure which way an expression almost-always evaluates. +#if __GNUC__ >= 3 +# define LL_LIKELY(EXPR) __builtin_expect (!!(EXPR), true) +# define LL_UNLIKELY(EXPR) __builtin_expect (!!(EXPR), false) +#else +# define LL_LIKELY(EXPR) (EXPR) +# define LL_UNLIKELY(EXPR) (EXPR) +#endif + + // Figure out differences between compilers #if defined(__GNUC__) #define GCC_VERSION (__GNUC__ * 10000 \ -- cgit v1.2.3