diff options
author | Richard Linden <none@none> | 2012-08-06 16:08:04 -0700 |
---|---|---|
committer | Richard Linden <none@none> | 2012-08-06 16:08:04 -0700 |
commit | a98c7e150b3404cbbb7324bfe2a5f547f613346b (patch) | |
tree | 1cd71cfb72cc342c74edfb0326aa56eb01240c54 | |
parent | 4d395a0de044328cf318598ef9b88eddde8e82af (diff) |
llfasttimer cleanup
removed unnecessary cache miss from fast timers
renamed llfasttimer_class back to llfasttimer
-rw-r--r-- | indra/llcommon/CMakeLists.txt | 3 | ||||
-rw-r--r-- | indra/llcommon/llfasttimer.cpp (renamed from indra/llcommon/llfasttimer_class.cpp) | 294 | ||||
-rw-r--r-- | indra/llcommon/llfasttimer.h | 362 | ||||
-rw-r--r-- | indra/llcommon/llfasttimer_class.h | 258 | ||||
-rw-r--r-- | indra/newview/llappviewer.cpp | 7 | ||||
-rw-r--r-- | indra/newview/llappviewer.h | 3 | ||||
-rw-r--r-- | indra/newview/lldrawpoolbump.cpp | 2 | ||||
-rw-r--r-- | indra/newview/llfasttimerview.cpp | 28 | ||||
-rw-r--r-- | indra/newview/llfasttimerview.h | 1 |
9 files changed, 406 insertions, 552 deletions
diff --git a/indra/llcommon/CMakeLists.txt b/indra/llcommon/CMakeLists.txt index dd7b8c6eb8..7795d55d62 100644 --- a/indra/llcommon/CMakeLists.txt +++ b/indra/llcommon/CMakeLists.txt @@ -53,7 +53,7 @@ set(llcommon_SOURCE_FILES lleventfilter.cpp llevents.cpp lleventtimer.cpp - llfasttimer_class.cpp + llfasttimer.cpp llfile.cpp llfindlocale.cpp llfixedbuffer.cpp @@ -167,7 +167,6 @@ set(llcommon_HEADER_FILES lleventemitter.h llextendedstatus.h llfasttimer.h - llfasttimer_class.h llfile.h llfindlocale.h llfixedbuffer.h diff --git a/indra/llcommon/llfasttimer_class.cpp b/indra/llcommon/llfasttimer.cpp index 449074dbfe..ff6806082c 100644 --- a/indra/llcommon/llfasttimer_class.cpp +++ b/indra/llcommon/llfasttimer.cpp @@ -1,5 +1,5 @@ /** - * @file llfasttimer_class.cpp + * @file llfasttimer.cpp * @brief Implementation of the fast timer. * * $LicenseInfo:firstyear=2004&license=viewerlgpl$ @@ -64,16 +64,12 @@ BOOL LLFastTimer::sMetricLog = FALSE; LLMutex* LLFastTimer::sLogLock = NULL; std::queue<LLSD> LLFastTimer::sLogQueue; -#define USE_RDTSC 0 - #if LL_LINUX || LL_SOLARIS U64 LLFastTimer::sClockResolution = 1000000000; // Nanosecond resolution #else U64 LLFastTimer::sClockResolution = 1000000; // Microsecond resolution #endif -std::vector<LLFastTimer::FrameState>* LLFastTimer::sTimerInfos = NULL; - // FIXME: move these declarations to the relevant modules // helper functions @@ -106,52 +102,41 @@ static timer_tree_dfs_iterator_t end_timer_tree() return timer_tree_dfs_iterator_t(); } - - // factory class that creates NamedTimers via static DeclareTimer objects class NamedTimerFactory : public LLSingleton<NamedTimerFactory> { public: NamedTimerFactory() - : mActiveTimerRoot(NULL), - mTimerRoot(NULL), - mAppTimer(NULL), - mRootFrameState(NULL) + : mTimerRoot(NULL) {} /*virtual */ void initSingleton() { mTimerRoot = new LLFastTimer::NamedTimer("root"); - - mActiveTimerRoot = new LLFastTimer::NamedTimer("Frame"); - mActiveTimerRoot->setCollapsed(false); - - mRootFrameState = new LLFastTimer::FrameState(mActiveTimerRoot); - mRootFrameState->mParent = &mTimerRoot->getFrameState(); - mActiveTimerRoot->setParent(mTimerRoot); - - mAppTimer = new LLFastTimer(mRootFrameState); + mRootFrameState.setNamedTimer(mTimerRoot); + mTimerRoot->setFrameState(&mRootFrameState); + mTimerRoot->mParent = mTimerRoot; + mRootFrameState.mParent = &mRootFrameState; } ~NamedTimerFactory() { std::for_each(mTimers.begin(), mTimers.end(), DeletePairedPointer()); - delete mAppTimer; - delete mActiveTimerRoot; delete mTimerRoot; - delete mRootFrameState; } - LLFastTimer::NamedTimer& createNamedTimer(const std::string& name) + LLFastTimer::NamedTimer& createNamedTimer(const std::string& name, LLFastTimer::FrameState* state) { timer_map_t::iterator found_it = mTimers.find(name); if (found_it != mTimers.end()) { + llerrs << "Duplicate timer declaration for: " << name << llendl; return *found_it->second; } LLFastTimer::NamedTimer* timer = new LLFastTimer::NamedTimer(name); + timer->setFrameState(state); timer->setParent(mTimerRoot); mTimers.insert(std::make_pair(name, timer)); @@ -168,10 +153,7 @@ public: return NULL; } - LLFastTimer::NamedTimer* getActiveRootTimer() { return mActiveTimerRoot; } LLFastTimer::NamedTimer* getRootTimer() { return mTimerRoot; } - const LLFastTimer* getAppTimer() { return mAppTimer; } - LLFastTimer::FrameState& getRootFrameState() { return *mRootFrameState; } typedef std::map<std::string, LLFastTimer::NamedTimer*> timer_map_t; timer_map_t::iterator beginTimers() { return mTimers.begin(); } @@ -181,55 +163,19 @@ public: private: timer_map_t mTimers; - LLFastTimer::NamedTimer* mActiveTimerRoot; LLFastTimer::NamedTimer* mTimerRoot; - LLFastTimer* mAppTimer; - LLFastTimer::FrameState* mRootFrameState; + LLFastTimer::FrameState mRootFrameState; }; -void update_cached_pointers_if_changed() -{ - // detect when elements have moved and update cached pointers - static LLFastTimer::FrameState* sFirstTimerAddress = NULL; - if (&*(LLFastTimer::getFrameStateList().begin()) != sFirstTimerAddress) - { - LLFastTimer::DeclareTimer::updateCachedPointers(); - } - sFirstTimerAddress = &*(LLFastTimer::getFrameStateList().begin()); -} - LLFastTimer::DeclareTimer::DeclareTimer(const std::string& name, bool open ) -: mTimer(NamedTimerFactory::instance().createNamedTimer(name)) +: mTimer(NamedTimerFactory::instance().createNamedTimer(name, &mFrameState)) { mTimer.setCollapsed(!open); - mFrameState = &mTimer.getFrameState(); - update_cached_pointers_if_changed(); } LLFastTimer::DeclareTimer::DeclareTimer(const std::string& name) -: mTimer(NamedTimerFactory::instance().createNamedTimer(name)) +: mTimer(NamedTimerFactory::instance().createNamedTimer(name, &mFrameState)) { - mFrameState = &mTimer.getFrameState(); - update_cached_pointers_if_changed(); -} - -// static -void LLFastTimer::DeclareTimer::updateCachedPointers() -{ - // propagate frame state pointers to timer declarations - for (instance_iter it = beginInstances(); it != endInstances(); ++it) - { - // update cached pointer - it->mFrameState = &it->mTimer.getFrameState(); - } - - // also update frame states of timers on stack - LLFastTimer* cur_timerp = LLFastTimer::sCurTimerData.mCurTimer; - while(cur_timerp->mLastTimerData.mCurTimer != cur_timerp) - { - cur_timerp->mFrameState = &cur_timerp->mFrameState->mTimer->getFrameState(); - cur_timerp = cur_timerp->mLastTimerData.mCurTimer; - } } //static @@ -241,7 +187,7 @@ U64 LLFastTimer::countsPerSecond() // counts per second for the *32-bit* timer #else // windows or x86-mac or x86-linux or x86-solaris U64 LLFastTimer::countsPerSecond() // counts per second for the *32-bit* timer { -#if USE_RDTSC || !LL_WINDOWS +#if LL_FASTTIMER_USE_RDTSC || !LL_WINDOWS //getCPUFrequency returns MHz and sCPUClockFrequency wants to be in Hz static U64 sCPUClockFrequency = U64(LLProcessorInfo().getCPUFrequency()*1000000.0); @@ -262,14 +208,13 @@ U64 LLFastTimer::countsPerSecond() // counts per second for the *32-bit* timer } #endif -LLFastTimer::FrameState::FrameState(LLFastTimer::NamedTimer* timerp) +LLFastTimer::FrameState::FrameState() : mActiveCount(0), mCalls(0), mSelfTimeCounter(0), mParent(NULL), mLastCaller(NULL), - mMoveUpTree(false), - mTimer(timerp) + mMoveUpTree(false) {} @@ -280,12 +225,9 @@ LLFastTimer::NamedTimer::NamedTimer(const std::string& name) mTotalTimeCounter(0), mCountAverage(0), mCallAverage(0), - mNeedsSorting(false) + mNeedsSorting(false), + mFrameState(NULL) { - info_list_t& frame_state_list = getFrameStateList(); - mFrameStateIndex = frame_state_list.size(); - getFrameStateList().push_back(FrameState(this)); - mCountHistory = new U32[HISTORY_NUM]; memset(mCountHistory, 0, sizeof(U32) * HISTORY_NUM); mCallHistory = new U32[HISTORY_NUM]; @@ -366,15 +308,6 @@ void LLFastTimer::NamedTimer::processTimes() accumulateTimings(); } -// sort timer info structs by depth first traversal order -struct SortTimersDFS -{ - bool operator()(const LLFastTimer::FrameState& i1, const LLFastTimer::FrameState& i2) - { - return i1.mTimer->getFrameStateIndex() < i2.mTimer->getFrameStateIndex(); - } -}; - // sort child timers by name struct SortTimerByName { @@ -455,7 +388,7 @@ void LLFastTimer::NamedTimer::accumulateTimings() LLFastTimer* cur_timer = sCurTimerData.mCurTimer; // root defined by parent pointing to self CurTimerData* cur_data = &sCurTimerData; - while(cur_timer->mLastTimerData.mCurTimer != cur_timer) + while(cur_timer && cur_timer->mLastTimerData.mCurTimer != cur_timer) { U32 cumulative_time_delta = cur_time - cur_timer->mStartTime; U32 self_time_delta = cumulative_time_delta - cur_data->mChildTime; @@ -470,7 +403,7 @@ void LLFastTimer::NamedTimer::accumulateTimings() } // traverse tree in DFS post order, or bottom up - for(timer_tree_bottom_up_iterator_t it = begin_timer_tree_bottom_up(*NamedTimerFactory::instance().getActiveRootTimer()); + for(timer_tree_bottom_up_iterator_t it = begin_timer_tree_bottom_up(*NamedTimerFactory::instance().getRootTimer()); it != end_timer_tree_bottom_up(); ++it) { @@ -541,27 +474,6 @@ void LLFastTimer::NamedTimer::resetFrame() } } - - // tag timers by position in depth first traversal of tree - S32 index = 0; - for(timer_tree_dfs_iterator_t it = begin_timer_tree(*NamedTimerFactory::instance().getRootTimer()); - it != end_timer_tree(); - ++it) - { - NamedTimer* timerp = (*it); - - timerp->mFrameStateIndex = index; - index++; - - llassert_always(timerp->mFrameStateIndex < (S32)getFrameStateList().size()); - } - - // sort timers by DFS traversal order to improve cache coherency - std::sort(getFrameStateList().begin(), getFrameStateList().end(), SortTimersDFS()); - - // update pointers into framestatelist now that we've sorted it - DeclareTimer::updateCachedPointers(); - // reset for next frame for (instance_iter it = beginInstances(); it != endInstances(); ++it) { @@ -592,7 +504,7 @@ void LLFastTimer::NamedTimer::reset() // root defined by parent pointing to self CurTimerData* cur_data = &sCurTimerData; LLFastTimer* cur_timer = cur_data->mCurTimer; - while(cur_timer->mLastTimerData.mCurTimer != cur_timer) + while(cur_timer && cur_timer->mLastTimerData.mCurTimer != cur_timer) { cur_timer->mStartTime = cur_time; cur_data->mChildTime = 0; @@ -622,17 +534,6 @@ void LLFastTimer::NamedTimer::reset() sCurFrameIndex = 0; } -//static -LLFastTimer::info_list_t& LLFastTimer::getFrameStateList() -{ - if (!sTimerInfos) - { - sTimerInfos = new info_list_t(); - } - return *sTimerInfos; -} - - U32 LLFastTimer::NamedTimer::getHistoricalCount(S32 history_index) const { S32 history_idx = (getLastFrameIndex() + history_index) % LLFastTimer::NamedTimer::HISTORY_NUM; @@ -647,18 +548,7 @@ U32 LLFastTimer::NamedTimer::getHistoricalCalls(S32 history_index ) const LLFastTimer::FrameState& LLFastTimer::NamedTimer::getFrameState() const { - llassert_always(mFrameStateIndex >= 0); - if (this == NamedTimerFactory::instance().getActiveRootTimer()) - { - return NamedTimerFactory::instance().getRootFrameState(); - } - return getFrameStateList()[mFrameStateIndex]; -} - -// static -LLFastTimer::NamedTimer& LLFastTimer::NamedTimer::getRootNamedTimer() -{ - return *NamedTimerFactory::instance().getActiveRootTimer(); + return *mFrameState; } std::vector<LLFastTimer::NamedTimer*>::const_iterator LLFastTimer::NamedTimer::beginChildren() @@ -769,145 +659,3 @@ LLFastTimer::LLFastTimer(LLFastTimer::FrameState* state) } -////////////////////////////////////////////////////////////////////////////// -// -// Important note: These implementations must be FAST! -// - - -#if LL_WINDOWS -// -// Windows implementation of CPU clock -// - -// -// NOTE: put back in when we aren't using platform sdk anymore -// -// because MS has different signatures for these functions in winnt.h -// need to rename them to avoid conflicts -//#define _interlockedbittestandset _renamed_interlockedbittestandset -//#define _interlockedbittestandreset _renamed_interlockedbittestandreset -//#include <intrin.h> -//#undef _interlockedbittestandset -//#undef _interlockedbittestandreset - -//inline U32 LLFastTimer::getCPUClockCount32() -//{ -// U64 time_stamp = __rdtsc(); -// return (U32)(time_stamp >> 8); -//} -// -//// return full timer value, *not* shifted by 8 bits -//inline U64 LLFastTimer::getCPUClockCount64() -//{ -// return __rdtsc(); -//} - -// shift off lower 8 bits for lower resolution but longer term timing -// on 1Ghz machine, a 32-bit word will hold ~1000 seconds of timing -#if USE_RDTSC -U32 LLFastTimer::getCPUClockCount32() -{ - U32 ret_val; - __asm - { - _emit 0x0f - _emit 0x31 - shr eax,8 - shl edx,24 - or eax, edx - mov dword ptr [ret_val], eax - } - return ret_val; -} - -// return full timer value, *not* shifted by 8 bits -U64 LLFastTimer::getCPUClockCount64() -{ - U64 ret_val; - __asm - { - _emit 0x0f - _emit 0x31 - mov eax,eax - mov edx,edx - mov dword ptr [ret_val+4], edx - mov dword ptr [ret_val], eax - } - return ret_val; -} - -std::string LLFastTimer::sClockType = "rdtsc"; - -#else -//LL_COMMON_API U64 get_clock_count(); // in lltimer.cpp -// These use QueryPerformanceCounter, which is arguably fine and also works on AMD architectures. -U32 LLFastTimer::getCPUClockCount32() -{ - return (U32)(get_clock_count()>>8); -} - -U64 LLFastTimer::getCPUClockCount64() -{ - return get_clock_count(); -} - -std::string LLFastTimer::sClockType = "QueryPerformanceCounter"; -#endif - -#endif - - -#if (LL_LINUX || LL_SOLARIS) && !(defined(__i386__) || defined(__amd64__)) -// -// Linux and Solaris implementation of CPU clock - non-x86. -// This is accurate but SLOW! Only use out of desperation. -// -// Try to use the MONOTONIC clock if available, this is a constant time counter -// with nanosecond resolution (but not necessarily accuracy) and attempts are -// made to synchronize this value between cores at kernel start. It should not -// be affected by CPU frequency. If not available use the REALTIME clock, but -// this may be affected by NTP adjustments or other user activity affecting -// the system time. -U64 LLFastTimer::getCPUClockCount64() -{ - struct timespec tp; - -#ifdef CLOCK_MONOTONIC // MONOTONIC supported at build-time? - if (-1 == clock_gettime(CLOCK_MONOTONIC,&tp)) // if MONOTONIC isn't supported at runtime then ouch, try REALTIME -#endif - clock_gettime(CLOCK_REALTIME,&tp); - - return (tp.tv_sec*LLFastTimer::sClockResolution)+tp.tv_nsec; -} - -U32 LLFastTimer::getCPUClockCount32() -{ - return (U32)(LLFastTimer::getCPUClockCount64() >> 8); -} - -std::string LLFastTimer::sClockType = "clock_gettime"; - -#endif // (LL_LINUX || LL_SOLARIS) && !(defined(__i386__) || defined(__amd64__)) - - -#if (LL_LINUX || LL_SOLARIS || LL_DARWIN) && (defined(__i386__) || defined(__amd64__)) -// -// Mac+Linux+Solaris FAST x86 implementation of CPU clock -U32 LLFastTimer::getCPUClockCount32() -{ - U64 x; - __asm__ volatile (".byte 0x0f, 0x31": "=A"(x)); - return (U32)(x >> 8); -} - -U64 LLFastTimer::getCPUClockCount64() -{ - U64 x; - __asm__ volatile (".byte 0x0f, 0x31": "=A"(x)); - return x; -} - -std::string LLFastTimer::sClockType = "rdtsc"; -#endif - diff --git a/indra/llcommon/llfasttimer.h b/indra/llcommon/llfasttimer.h index 2b25f2fabb..e42e549df5 100644 --- a/indra/llcommon/llfasttimer.h +++ b/indra/llcommon/llfasttimer.h @@ -1,6 +1,6 @@ /** * @file llfasttimer.h - * @brief Inline implementations of fast timers. + * @brief Declaration of a fast timer. * * $LicenseInfo:firstyear=2004&license=viewerlgpl$ * Second Life Viewer Source Code @@ -27,9 +27,363 @@ #ifndef LL_FASTTIMER_H #define LL_FASTTIMER_H -// Implementation of getCPUClockCount32() and getCPUClockCount64 are now in llfastertimer_class.cpp. +#include "llinstancetracker.h" -// pull in the actual class definition -#include "llfasttimer_class.h" +#define FAST_TIMER_ON 1 +#define DEBUG_FAST_TIMER_THREADS 1 + +class LLMutex; + +#include <queue> +#include "llsd.h" + +#define LL_FASTTIMER_USE_RDTSC 1 + + +LL_COMMON_API void assert_main_thread(); + +class LL_COMMON_API LLFastTimer +{ +public: + class NamedTimer; + + struct LL_COMMON_API FrameState + { + FrameState(); + void setNamedTimer(NamedTimer* timerp) { mTimer = timerp; } + + U32 mSelfTimeCounter; + U32 mCalls; + FrameState* mParent; // info for caller timer + FrameState* mLastCaller; // used to bootstrap tree construction + NamedTimer* mTimer; + U16 mActiveCount; // number of timers with this ID active on stack + bool mMoveUpTree; // needs to be moved up the tree of timers at the end of frame + }; + + // stores a "named" timer instance to be reused via multiple LLFastTimer stack instances + class LL_COMMON_API NamedTimer + : public LLInstanceTracker<NamedTimer> + { + friend class DeclareTimer; + public: + ~NamedTimer(); + + enum { HISTORY_NUM = 300 }; + + const std::string& getName() const { return mName; } + NamedTimer* getParent() const { return mParent; } + void setParent(NamedTimer* parent); + S32 getDepth(); + std::string getToolTip(S32 history_index = -1); + + typedef std::vector<NamedTimer*>::const_iterator child_const_iter; + child_const_iter beginChildren(); + child_const_iter endChildren(); + std::vector<NamedTimer*>& getChildren(); + + void setCollapsed(bool collapsed) { mCollapsed = collapsed; } + bool getCollapsed() const { return mCollapsed; } + + U32 getCountAverage() const { return mCountAverage; } + U32 getCallAverage() const { return mCallAverage; } + + U32 getHistoricalCount(S32 history_index = 0) const; + U32 getHistoricalCalls(S32 history_index = 0) const; + + void setFrameState(FrameState* state) { mFrameState = state; state->setNamedTimer(this); } + FrameState& getFrameState() const; + + private: + friend class LLFastTimer; + friend class NamedTimerFactory; + + // + // methods + // + NamedTimer(const std::string& name); + // recursive call to gather total time from children + static void accumulateTimings(); + + // updates cumulative times and hierarchy, + // can be called multiple times in a frame, at any point + static void processTimes(); + + static void buildHierarchy(); + static void resetFrame(); + static void reset(); + + // + // members + // + FrameState* mFrameState; + + std::string mName; + + U32 mTotalTimeCounter; + + U32 mCountAverage; + U32 mCallAverage; + + U32* mCountHistory; + U32* mCallHistory; + + // tree structure + NamedTimer* mParent; // NamedTimer of caller(parent) + std::vector<NamedTimer*> mChildren; + bool mCollapsed; // don't show children + bool mNeedsSorting; // sort children whenever child added + }; + + // used to statically declare a new named timer + class LL_COMMON_API DeclareTimer + : public LLInstanceTracker<DeclareTimer> + { + friend class LLFastTimer; + public: + DeclareTimer(const std::string& name, bool open); + DeclareTimer(const std::string& name); + + NamedTimer& getNamedTimer() { return mTimer; } + + private: + FrameState mFrameState; + NamedTimer& mTimer; + }; + +public: + LLFastTimer(LLFastTimer::FrameState* state); + + LL_FORCE_INLINE LLFastTimer(LLFastTimer::DeclareTimer& timer) + : mFrameState(&timer.mFrameState) + { +#if FAST_TIMER_ON + LLFastTimer::FrameState* frame_state = mFrameState; + mStartTime = getCPUClockCount32(); + + frame_state->mActiveCount++; + frame_state->mCalls++; + // keep current parent as long as it is active when we are + frame_state->mMoveUpTree |= (frame_state->mParent->mActiveCount == 0); + + LLFastTimer::CurTimerData* cur_timer_data = &LLFastTimer::sCurTimerData; + mLastTimerData = *cur_timer_data; + cur_timer_data->mCurTimer = this; + cur_timer_data->mFrameState = frame_state; + cur_timer_data->mChildTime = 0; +#endif +#if DEBUG_FAST_TIMER_THREADS +#if !LL_RELEASE + assert_main_thread(); +#endif +#endif + } + + LL_FORCE_INLINE ~LLFastTimer() + { +#if FAST_TIMER_ON + LLFastTimer::FrameState* frame_state = mFrameState; + U32 total_time = getCPUClockCount32() - mStartTime; + + frame_state->mSelfTimeCounter += total_time - LLFastTimer::sCurTimerData.mChildTime; + frame_state->mActiveCount--; + + // store last caller to bootstrap tree creation + // do this in the destructor in case of recursion to get topmost caller + frame_state->mLastCaller = mLastTimerData.mFrameState; + + // we are only tracking self time, so subtract our total time delta from parents + mLastTimerData.mChildTime += total_time; + + LLFastTimer::sCurTimerData = mLastTimerData; +#endif + } + +public: + static LLMutex* sLogLock; + static std::queue<LLSD> sLogQueue; + static BOOL sLog; + static BOOL sMetricLog; + static std::string sLogName; + static bool sPauseHistory; + static bool sResetHistory; + + // call this once a frame to reset timers + static void nextFrame(); + + // dumps current cumulative frame stats to log + // call nextFrame() to reset timers + static void dumpCurTimes(); + + // call this to reset timer hierarchy, averages, etc. + static void reset(); + + static U64 countsPerSecond(); + static S32 getLastFrameIndex() { return sLastFrameIndex; } + static S32 getCurFrameIndex() { return sCurFrameIndex; } + + static void writeLog(std::ostream& os); + static const NamedTimer* getTimerByName(const std::string& name); + + struct CurTimerData + { + LLFastTimer* mCurTimer; + FrameState* mFrameState; + U32 mChildTime; + }; + static CurTimerData sCurTimerData; + +private: + + + ////////////////////////////////////////////////////////////////////////////// + // + // Important note: These implementations must be FAST! + // + + +#if LL_WINDOWS + // + // Windows implementation of CPU clock + // + + // + // NOTE: put back in when we aren't using platform sdk anymore + // + // because MS has different signatures for these functions in winnt.h + // need to rename them to avoid conflicts + //#define _interlockedbittestandset _renamed_interlockedbittestandset + //#define _interlockedbittestandreset _renamed_interlockedbittestandreset + //#include <intrin.h> + //#undef _interlockedbittestandset + //#undef _interlockedbittestandreset + + //inline U32 LLFastTimer::getCPUClockCount32() + //{ + // U64 time_stamp = __rdtsc(); + // return (U32)(time_stamp >> 8); + //} + // + //// return full timer value, *not* shifted by 8 bits + //inline U64 LLFastTimer::getCPUClockCount64() + //{ + // return __rdtsc(); + //} + + // shift off lower 8 bits for lower resolution but longer term timing + // on 1Ghz machine, a 32-bit word will hold ~1000 seconds of timing +#if LL_FASTTIMER_USE_RDTSC + static U32 getCPUClockCount32() + { + U32 ret_val; + __asm + { + _emit 0x0f + _emit 0x31 + shr eax,8 + shl edx,24 + or eax, edx + mov dword ptr [ret_val], eax + } + return ret_val; + } + + // return full timer value, *not* shifted by 8 bits + static U64 getCPUClockCount64() + { + U64 ret_val; + __asm + { + _emit 0x0f + _emit 0x31 + mov eax,eax + mov edx,edx + mov dword ptr [ret_val+4], edx + mov dword ptr [ret_val], eax + } + return ret_val; + } + +#else + //LL_COMMON_API U64 get_clock_count(); // in lltimer.cpp + // These use QueryPerformanceCounter, which is arguably fine and also works on AMD architectures. + static U32 getCPUClockCount32() + { + return (U32)(get_clock_count()>>8); + } + + static U64 getCPUClockCount64() + { + return get_clock_count(); + } + +#endif + +#endif + + +#if (LL_LINUX || LL_SOLARIS) && !(defined(__i386__) || defined(__amd64__)) + // + // Linux and Solaris implementation of CPU clock - non-x86. + // This is accurate but SLOW! Only use out of desperation. + // + // Try to use the MONOTONIC clock if available, this is a constant time counter + // with nanosecond resolution (but not necessarily accuracy) and attempts are + // made to synchronize this value between cores at kernel start. It should not + // be affected by CPU frequency. If not available use the REALTIME clock, but + // this may be affected by NTP adjustments or other user activity affecting + // the system time. + static U64 getCPUClockCount64() + { + struct timespec tp; + +#ifdef CLOCK_MONOTONIC // MONOTONIC supported at build-time? + if (-1 == clock_gettime(CLOCK_MONOTONIC,&tp)) // if MONOTONIC isn't supported at runtime then ouch, try REALTIME +#endif + clock_gettime(CLOCK_REALTIME,&tp); + + return (tp.tv_sec*sClockResolution)+tp.tv_nsec; + } + + static U32 getCPUClockCount32() + { + return (U32)(getCPUClockCount64() >> 8); + } + +#endif // (LL_LINUX || LL_SOLARIS) && !(defined(__i386__) || defined(__amd64__)) + + +#if (LL_LINUX || LL_SOLARIS || LL_DARWIN) && (defined(__i386__) || defined(__amd64__)) + // + // Mac+Linux+Solaris FAST x86 implementation of CPU clock + static U32 getCPUClockCount32() + { + U64 x; + __asm__ volatile (".byte 0x0f, 0x31": "=A"(x)); + return (U32)(x >> 8); + } + + static U64 getCPUClockCount64() + { + U64 x; + __asm__ volatile (".byte 0x0f, 0x31": "=A"(x)); + return x; + } + +#endif + + static U64 sClockResolution; + + static S32 sCurFrameIndex; + static S32 sLastFrameIndex; + static U64 sLastFrameTime; + + U32 mStartTime; + LLFastTimer::FrameState* mFrameState; + LLFastTimer::CurTimerData mLastTimerData; + +}; + +typedef class LLFastTimer LLFastTimer; #endif // LL_LLFASTTIMER_H diff --git a/indra/llcommon/llfasttimer_class.h b/indra/llcommon/llfasttimer_class.h deleted file mode 100644 index 8a12aa1372..0000000000 --- a/indra/llcommon/llfasttimer_class.h +++ /dev/null @@ -1,258 +0,0 @@ -/** - * @file llfasttimer_class.h - * @brief Declaration of a fast timer. - * - * $LicenseInfo:firstyear=2004&license=viewerlgpl$ - * Second Life Viewer Source Code - * Copyright (C) 2010, Linden Research, Inc. - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; - * version 2.1 of the License only. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - * - * Linden Research, Inc., 945 Battery Street, San Francisco, CA 94111 USA - * $/LicenseInfo$ - */ - -#ifndef LL_FASTTIMER_CLASS_H -#define LL_FASTTIMER_CLASS_H - -#include "llinstancetracker.h" - -#define FAST_TIMER_ON 1 -#define DEBUG_FAST_TIMER_THREADS 1 - -class LLMutex; - -#include <queue> -#include "llsd.h" - -LL_COMMON_API void assert_main_thread(); - -class LL_COMMON_API LLFastTimer -{ -public: - class NamedTimer; - - struct LL_COMMON_API FrameState - { - FrameState(NamedTimer* timerp); - - U32 mSelfTimeCounter; - U32 mCalls; - FrameState* mParent; // info for caller timer - FrameState* mLastCaller; // used to bootstrap tree construction - NamedTimer* mTimer; - U16 mActiveCount; // number of timers with this ID active on stack - bool mMoveUpTree; // needs to be moved up the tree of timers at the end of frame - }; - - // stores a "named" timer instance to be reused via multiple LLFastTimer stack instances - class LL_COMMON_API NamedTimer - : public LLInstanceTracker<NamedTimer> - { - friend class DeclareTimer; - public: - ~NamedTimer(); - - enum { HISTORY_NUM = 300 }; - - const std::string& getName() const { return mName; } - NamedTimer* getParent() const { return mParent; } - void setParent(NamedTimer* parent); - S32 getDepth(); - std::string getToolTip(S32 history_index = -1); - - typedef std::vector<NamedTimer*>::const_iterator child_const_iter; - child_const_iter beginChildren(); - child_const_iter endChildren(); - std::vector<NamedTimer*>& getChildren(); - - void setCollapsed(bool collapsed) { mCollapsed = collapsed; } - bool getCollapsed() const { return mCollapsed; } - - U32 getCountAverage() const { return mCountAverage; } - U32 getCallAverage() const { return mCallAverage; } - - U32 getHistoricalCount(S32 history_index = 0) const; - U32 getHistoricalCalls(S32 history_index = 0) const; - - static NamedTimer& getRootNamedTimer(); - - S32 getFrameStateIndex() const { return mFrameStateIndex; } - - FrameState& getFrameState() const; - - private: - friend class LLFastTimer; - friend class NamedTimerFactory; - - // - // methods - // - NamedTimer(const std::string& name); - // recursive call to gather total time from children - static void accumulateTimings(); - - // updates cumulative times and hierarchy, - // can be called multiple times in a frame, at any point - static void processTimes(); - - static void buildHierarchy(); - static void resetFrame(); - static void reset(); - - // - // members - // - S32 mFrameStateIndex; - - std::string mName; - - U32 mTotalTimeCounter; - - U32 mCountAverage; - U32 mCallAverage; - - U32* mCountHistory; - U32* mCallHistory; - - // tree structure - NamedTimer* mParent; // NamedTimer of caller(parent) - std::vector<NamedTimer*> mChildren; - bool mCollapsed; // don't show children - bool mNeedsSorting; // sort children whenever child added - }; - - // used to statically declare a new named timer - class LL_COMMON_API DeclareTimer - : public LLInstanceTracker<DeclareTimer> - { - friend class LLFastTimer; - public: - DeclareTimer(const std::string& name, bool open); - DeclareTimer(const std::string& name); - - static void updateCachedPointers(); - - private: - NamedTimer& mTimer; - FrameState* mFrameState; - }; - -public: - LLFastTimer(LLFastTimer::FrameState* state); - - LL_FORCE_INLINE LLFastTimer(LLFastTimer::DeclareTimer& timer) - : mFrameState(timer.mFrameState) - { -#if FAST_TIMER_ON - LLFastTimer::FrameState* frame_state = mFrameState; - mStartTime = getCPUClockCount32(); - - frame_state->mActiveCount++; - frame_state->mCalls++; - // keep current parent as long as it is active when we are - frame_state->mMoveUpTree |= (frame_state->mParent->mActiveCount == 0); - - LLFastTimer::CurTimerData* cur_timer_data = &LLFastTimer::sCurTimerData; - mLastTimerData = *cur_timer_data; - cur_timer_data->mCurTimer = this; - cur_timer_data->mFrameState = frame_state; - cur_timer_data->mChildTime = 0; -#endif -#if DEBUG_FAST_TIMER_THREADS -#if !LL_RELEASE - assert_main_thread(); -#endif -#endif - } - - LL_FORCE_INLINE ~LLFastTimer() - { -#if FAST_TIMER_ON - LLFastTimer::FrameState* frame_state = mFrameState; - U32 total_time = getCPUClockCount32() - mStartTime; - - frame_state->mSelfTimeCounter += total_time - LLFastTimer::sCurTimerData.mChildTime; - frame_state->mActiveCount--; - - // store last caller to bootstrap tree creation - // do this in the destructor in case of recursion to get topmost caller - frame_state->mLastCaller = mLastTimerData.mFrameState; - - // we are only tracking self time, so subtract our total time delta from parents - mLastTimerData.mChildTime += total_time; - - LLFastTimer::sCurTimerData = mLastTimerData; -#endif - } - -public: - static LLMutex* sLogLock; - static std::queue<LLSD> sLogQueue; - static BOOL sLog; - static BOOL sMetricLog; - static std::string sLogName; - static bool sPauseHistory; - static bool sResetHistory; - - typedef std::vector<FrameState> info_list_t; - static info_list_t& getFrameStateList(); - - - // call this once a frame to reset timers - static void nextFrame(); - - // dumps current cumulative frame stats to log - // call nextFrame() to reset timers - static void dumpCurTimes(); - - // call this to reset timer hierarchy, averages, etc. - static void reset(); - - static U64 countsPerSecond(); - static S32 getLastFrameIndex() { return sLastFrameIndex; } - static S32 getCurFrameIndex() { return sCurFrameIndex; } - - static void writeLog(std::ostream& os); - static const NamedTimer* getTimerByName(const std::string& name); - - struct CurTimerData - { - LLFastTimer* mCurTimer; - FrameState* mFrameState; - U32 mChildTime; - }; - static CurTimerData sCurTimerData; - static std::string sClockType; - -private: - static U32 getCPUClockCount32(); - static U64 getCPUClockCount64(); - static U64 sClockResolution; - - static S32 sCurFrameIndex; - static S32 sLastFrameIndex; - static U64 sLastFrameTime; - static info_list_t* sTimerInfos; - - U32 mStartTime; - LLFastTimer::FrameState* mFrameState; - LLFastTimer::CurTimerData mLastTimerData; - -}; - -typedef class LLFastTimer LLFastTimer; - -#endif // LL_LLFASTTIMER_CLASS_H diff --git a/indra/newview/llappviewer.cpp b/indra/newview/llappviewer.cpp index 161cc418dd..c717a64501 100644 --- a/indra/newview/llappviewer.cpp +++ b/indra/newview/llappviewer.cpp @@ -1168,6 +1168,8 @@ static LLFastTimer::DeclareTimer FTM_SERVICE_CALLBACK("Callback"); static LLFastTimer::DeclareTimer FTM_AGENT_AUTOPILOT("Autopilot"); static LLFastTimer::DeclareTimer FTM_AGENT_UPDATE("Update"); +LLFastTimer::DeclareTimer FTM_FRAME("Frame"); + bool LLAppViewer::mainLoop() { LLMemType mt1(LLMemType::MTYPE_MAIN); @@ -1206,7 +1208,8 @@ bool LLAppViewer::mainLoop() // Handle messages while (!LLApp::isExiting()) { - LLFastTimer::nextFrame(); // Should be outside of any timer instances + LLFastTimer _(FTM_FRAME); + LLFastTimer::nextFrame(); //clear call stack records llclearcallstacks; @@ -3106,8 +3109,6 @@ void LLAppViewer::writeSystemInfo() LL_INFOS("SystemInfo") << "OS: " << getOSInfo().getOSStringSimple() << LL_ENDL; LL_INFOS("SystemInfo") << "OS info: " << getOSInfo() << LL_ENDL; - LL_INFOS("SystemInfo") << "Timers: " << LLFastTimer::sClockType << LL_ENDL; - writeDebugInfo(); // Save out debug_info.log early, in case of crash. } diff --git a/indra/newview/llappviewer.h b/indra/newview/llappviewer.h index f7d019ccba..304b084d39 100644 --- a/indra/newview/llappviewer.h +++ b/indra/newview/llappviewer.h @@ -41,6 +41,9 @@ class LLTextureFetch; class LLWatchdogTimeout; class LLUpdaterService; +extern LLFastTimer::DeclareTimer FTM_FRAME; + + class LLAppViewer : public LLApp { public: diff --git a/indra/newview/lldrawpoolbump.cpp b/indra/newview/lldrawpoolbump.cpp index 6f71e6ebc8..a340f43594 100644 --- a/indra/newview/lldrawpoolbump.cpp +++ b/indra/newview/lldrawpoolbump.cpp @@ -1192,7 +1192,7 @@ static LLFastTimer::DeclareTimer FTM_BUMP_SOURCE_MIN_MAX("Min/Max"); static LLFastTimer::DeclareTimer FTM_BUMP_SOURCE_RGB2LUM("RGB to Luminance"); static LLFastTimer::DeclareTimer FTM_BUMP_SOURCE_RESCALE("Rescale"); static LLFastTimer::DeclareTimer FTM_BUMP_SOURCE_GEN_NORMAL("Generate Normal"); -static LLFastTimer::DeclareTimer FTM_BUMP_SOURCE_CREATE("Create"); +static LLFastTimer::DeclareTimer FTM_BUMP_SOURCE_CREATE("Bump Source Create"); // static void LLBumpImageList::onSourceLoaded( BOOL success, LLViewerTexture *src_vi, LLImageRaw* src, LLUUID& source_asset_id, EBumpEffect bump_code ) diff --git a/indra/newview/llfasttimerview.cpp b/indra/newview/llfasttimerview.cpp index c032d5d049..59bf70f488 100644 --- a/indra/newview/llfasttimerview.cpp +++ b/indra/newview/llfasttimerview.cpp @@ -160,7 +160,7 @@ LLFastTimer::NamedTimer* LLFastTimerView::getLegendID(S32 y) BOOL LLFastTimerView::handleDoubleClick(S32 x, S32 y, MASK mask) { - for(timer_tree_iterator_t it = begin_timer_tree(LLFastTimer::NamedTimer::getRootNamedTimer()); + for(timer_tree_iterator_t it = begin_timer_tree(getFrameTimer()); it != end_timer_tree(); ++it) { @@ -257,7 +257,7 @@ BOOL LLFastTimerView::handleHover(S32 x, S32 y, MASK mask) } S32 i = 0; - for(timer_tree_iterator_t it = begin_timer_tree(LLFastTimer::NamedTimer::getRootNamedTimer()); + for(timer_tree_iterator_t it = begin_timer_tree(getFrameTimer()); it != end_timer_tree(); ++it, ++i) { @@ -419,11 +419,11 @@ void LLFastTimerView::draw() y -= (texth + 2); - sTimerColors[&LLFastTimer::NamedTimer::getRootNamedTimer()] = LLColor4::grey; + sTimerColors[&getFrameTimer()] = LLColor4::grey; F32 hue = 0.f; - for (timer_tree_iterator_t it = begin_timer_tree(LLFastTimer::NamedTimer::getRootNamedTimer()); + for (timer_tree_iterator_t it = begin_timer_tree(getFrameTimer()); it != timer_tree_iterator_t(); ++it) { @@ -448,7 +448,7 @@ void LLFastTimerView::draw() S32 cur_line = 0; ft_display_idx.clear(); std::map<LLFastTimer::NamedTimer*, S32> display_line; - for (timer_tree_iterator_t it = begin_timer_tree(LLFastTimer::NamedTimer::getRootNamedTimer()); + for (timer_tree_iterator_t it = begin_timer_tree(getFrameTimer()); it != timer_tree_iterator_t(); ++it) { @@ -558,7 +558,7 @@ void LLFastTimerView::draw() U64 totalticks; if (!LLFastTimer::sPauseHistory) { - U64 ticks = LLFastTimer::NamedTimer::getRootNamedTimer().getHistoricalCount(mScrollIndex); + U64 ticks = getFrameTimer().getHistoricalCount(mScrollIndex); if (LLFastTimer::getCurFrameIndex() >= 10) { @@ -598,7 +598,7 @@ void LLFastTimerView::draw() totalticks = 0; for (S32 j=0; j<histmax; j++) { - U64 ticks = LLFastTimer::NamedTimer::getRootNamedTimer().getHistoricalCount(j); + U64 ticks = getFrameTimer().getHistoricalCount(j); if (ticks > totalticks) totalticks = ticks; @@ -704,7 +704,7 @@ void LLFastTimerView::draw() LLFastTimer::NamedTimer* prev_id = NULL; S32 i = 0; - for(timer_tree_iterator_t it = begin_timer_tree(LLFastTimer::NamedTimer::getRootNamedTimer()); + for(timer_tree_iterator_t it = begin_timer_tree(getFrameTimer()); it != end_timer_tree(); ++it, ++i) { @@ -867,7 +867,7 @@ void LLFastTimerView::draw() } U64 cur_max = 0; - for(timer_tree_iterator_t it = begin_timer_tree(LLFastTimer::NamedTimer::getRootNamedTimer()); + for(timer_tree_iterator_t it = begin_timer_tree(getFrameTimer()); it != end_timer_tree(); ++it) { @@ -968,7 +968,7 @@ void LLFastTimerView::draw() { std::string legend_stat; bool first = true; - for(timer_tree_iterator_t it = begin_timer_tree(LLFastTimer::NamedTimer::getRootNamedTimer()); + for(timer_tree_iterator_t it = begin_timer_tree(getFrameTimer()); it != end_timer_tree(); ++it) { @@ -990,7 +990,7 @@ void LLFastTimerView::draw() std::string timer_stat; first = true; - for(timer_tree_iterator_t it = begin_timer_tree(LLFastTimer::NamedTimer::getRootNamedTimer()); + for(timer_tree_iterator_t it = begin_timer_tree(getFrameTimer()); it != end_timer_tree(); ++it) { @@ -1551,3 +1551,9 @@ void LLFastTimerView::onClickCloseBtn() setVisible(false); } +LLFastTimer::NamedTimer& LLFastTimerView::getFrameTimer() +{ + return FTM_FRAME.getNamedTimer(); +} + + diff --git a/indra/newview/llfasttimerview.h b/indra/newview/llfasttimerview.h index 1fda91f173..5766cfa0b0 100644 --- a/indra/newview/llfasttimerview.h +++ b/indra/newview/llfasttimerview.h @@ -46,6 +46,7 @@ private: static LLSD analyzePerformanceLogDefault(std::istream& is) ; static void exportCharts(const std::string& base, const std::string& target); void onPause(); + LLFastTimer::NamedTimer& getFrameTimer(); public: |