summaryrefslogtreecommitdiff
path: root/indra/llcommon/llfasttimer_class.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'indra/llcommon/llfasttimer_class.cpp')
-rw-r--r--indra/llcommon/llfasttimer_class.cpp226
1 files changed, 191 insertions, 35 deletions
diff --git a/indra/llcommon/llfasttimer_class.cpp b/indra/llcommon/llfasttimer_class.cpp
index bce87ada96..463f558c2c 100644
--- a/indra/llcommon/llfasttimer_class.cpp
+++ b/indra/llcommon/llfasttimer_class.cpp
@@ -35,10 +35,13 @@
#include <boost/bind.hpp>
+
#if LL_WINDOWS
+#include "lltimer.h"
#elif LL_LINUX || LL_SOLARIS
#include <sys/time.h>
#include <sched.h>
+#include "lltimer.h"
#elif LL_DARWIN
#include <sys/time.h>
#include "lltimer.h" // get_clock_count()
@@ -61,6 +64,8 @@ BOOL LLFastTimer::sMetricLog = FALSE;
LLMutex* LLFastTimer::sLogLock = NULL;
std::queue<LLSD> LLFastTimer::sLogQueue;
+#define USE_RDTSC 0
+
#if LL_LINUX || LL_SOLARIS
U64 LLFastTimer::sClockResolution = 1000000000; // Nanosecond resolution
#else
@@ -214,15 +219,20 @@ LLFastTimer::DeclareTimer::DeclareTimer(const std::string& name)
// static
void LLFastTimer::DeclareTimer::updateCachedPointers()
{
- DeclareTimer::LLInstanceTrackerScopedGuard guard;
// propagate frame state pointers to timer declarations
- for (DeclareTimer::instance_iter it = guard.beginInstances();
- it != guard.endInstances();
- ++it)
+ for (instance_iter it = beginInstances(); it != endInstances(); ++it)
{
// update cached pointer
it->mFrameState = &it->mTimer.getFrameState();
}
+
+ // also update frame states of timers on stack
+ LLFastTimer* cur_timerp = LLFastTimer::sCurTimerData.mCurTimer;
+ while(cur_timerp->mLastTimerData.mCurTimer != cur_timerp)
+ {
+ cur_timerp->mFrameState = &cur_timerp->mFrameState->mTimer->getFrameState();
+ cur_timerp = cur_timerp->mLastTimerData.mCurTimer;
+ }
}
//static
@@ -234,10 +244,23 @@ U64 LLFastTimer::countsPerSecond() // counts per second for the *32-bit* timer
#else // windows or x86-mac or x86-linux or x86-solaris
U64 LLFastTimer::countsPerSecond() // counts per second for the *32-bit* timer
{
+#if USE_RDTSC || !LL_WINDOWS
//getCPUFrequency returns MHz and sCPUClockFrequency wants to be in Hz
static U64 sCPUClockFrequency = U64(LLProcessorInfo().getCPUFrequency()*1000000.0);
// we drop the low-order byte in our timers, so report a lower frequency
+#else
+ // If we're not using RDTSC, each fasttimer tick is just a performance counter tick.
+ // Not redefining the clock frequency itself (in llprocessor.cpp/calculate_cpu_frequency())
+ // since that would change displayed MHz stats for CPUs
+ static bool firstcall = true;
+ static U64 sCPUClockFrequency;
+ if (firstcall)
+ {
+ QueryPerformanceFrequency((LARGE_INTEGER*)&sCPUClockFrequency);
+ firstcall = false;
+ }
+#endif
return sCPUClockFrequency >> 8;
}
#endif
@@ -280,14 +303,15 @@ LLFastTimer::NamedTimer::~NamedTimer()
std::string LLFastTimer::NamedTimer::getToolTip(S32 history_idx)
{
+ F64 ms_multiplier = 1000.0 / (F64)LLFastTimer::countsPerSecond();
if (history_idx < 0)
{
- // by default, show average number of calls
- return llformat("%s (%d calls)", getName().c_str(), (S32)getCallAverage());
+ // by default, show average number of call
+ return llformat("%s (%d ms, %d calls)", getName().c_str(), (S32)(getCountAverage() * ms_multiplier), (S32)getCallAverage());
}
else
{
- return llformat("%s (%d calls)", getName().c_str(), (S32)getHistoricalCalls(history_idx));
+ return llformat("%s (%d ms, %d calls)", getName().c_str(), (S32)(getHistoricalCount(history_idx) * ms_multiplier), (S32)getHistoricalCalls(history_idx));
}
}
@@ -370,10 +394,7 @@ void LLFastTimer::NamedTimer::buildHierarchy()
// set up initial tree
{
- NamedTimer::LLInstanceTrackerScopedGuard guard;
- for (instance_iter it = guard.beginInstances();
- it != guard.endInstances();
- ++it)
+ for (instance_iter it = beginInstances(); it != endInstances(); ++it)
{
NamedTimer& timer = *it;
if (&timer == NamedTimerFactory::instance().getRootTimer()) continue;
@@ -482,16 +503,26 @@ void LLFastTimer::NamedTimer::resetFrame()
{
if (sLog)
{ //output current frame counts to performance log
+
+ static S32 call_count = 0;
+ if (call_count % 100 == 0)
+ {
+ llinfos << "countsPerSecond (32 bit): " << countsPerSecond() << llendl;
+ llinfos << "get_clock_count (64 bit): " << get_clock_count() << llendl;
+ llinfos << "LLProcessorInfo().getCPUFrequency() " << LLProcessorInfo().getCPUFrequency() << llendl;
+ llinfos << "getCPUClockCount32() " << getCPUClockCount32() << llendl;
+ llinfos << "getCPUClockCount64() " << getCPUClockCount64() << llendl;
+ llinfos << "elapsed sec " << ((F64)getCPUClockCount64())/((F64)LLProcessorInfo().getCPUFrequency()*1000000.0) << llendl;
+ }
+ call_count++;
+
F64 iclock_freq = 1000.0 / countsPerSecond(); // good place to calculate clock frequency
F64 total_time = 0;
LLSD sd;
{
- NamedTimer::LLInstanceTrackerScopedGuard guard;
- for (NamedTimer::instance_iter it = guard.beginInstances();
- it != guard.endInstances();
- ++it)
+ for (instance_iter it = beginInstances(); it != endInstances(); ++it)
{
NamedTimer& timer = *it;
FrameState& info = timer.getFrameState();
@@ -528,7 +559,7 @@ void LLFastTimer::NamedTimer::resetFrame()
llassert_always(timerp->mFrameStateIndex < (S32)getFrameStateList().size());
}
- // sort timers by dfs traversal order to improve cache coherency
+ // sort timers by DFS traversal order to improve cache coherency
std::sort(getFrameStateList().begin(), getFrameStateList().end(), SortTimersDFS());
// update pointers into framestatelist now that we've sorted it
@@ -536,10 +567,7 @@ void LLFastTimer::NamedTimer::resetFrame()
// reset for next frame
{
- NamedTimer::LLInstanceTrackerScopedGuard guard;
- for (NamedTimer::instance_iter it = guard.beginInstances();
- it != guard.endInstances();
- ++it)
+ for (instance_iter it = beginInstances(); it != endInstances(); ++it)
{
NamedTimer& timer = *it;
@@ -583,10 +611,7 @@ void LLFastTimer::NamedTimer::reset()
// reset all history
{
- NamedTimer::LLInstanceTrackerScopedGuard guard;
- for (NamedTimer::instance_iter it = guard.beginInstances();
- it != guard.endInstances();
- ++it)
+ for (instance_iter it = beginInstances(); it != endInstances(); ++it)
{
NamedTimer& timer = *it;
if (&timer != NamedTimerFactory::instance().getRootTimer())
@@ -669,17 +694,7 @@ void LLFastTimer::nextFrame()
llinfos << "Slow frame, fast timers inaccurate" << llendl;
}
- if (sPauseHistory)
- {
- sResetHistory = true;
- }
- else if (sResetHistory)
- {
- sLastFrameIndex = 0;
- sCurFrameIndex = 0;
- sResetHistory = false;
- }
- else // not paused
+ if (!sPauseHistory)
{
NamedTimer::processTimes();
sLastFrameIndex = sCurFrameIndex++;
@@ -763,3 +778,144 @@ LLFastTimer::LLFastTimer(LLFastTimer::FrameState* state)
//////////////////////////////////////////////////////////////////////////////
+//
+// Important note: These implementations must be FAST!
+//
+
+
+#if LL_WINDOWS
+//
+// Windows implementation of CPU clock
+//
+
+//
+// NOTE: put back in when we aren't using platform sdk anymore
+//
+// because MS has different signatures for these functions in winnt.h
+// need to rename them to avoid conflicts
+//#define _interlockedbittestandset _renamed_interlockedbittestandset
+//#define _interlockedbittestandreset _renamed_interlockedbittestandreset
+//#include <intrin.h>
+//#undef _interlockedbittestandset
+//#undef _interlockedbittestandreset
+
+//inline U32 LLFastTimer::getCPUClockCount32()
+//{
+// U64 time_stamp = __rdtsc();
+// return (U32)(time_stamp >> 8);
+//}
+//
+//// return full timer value, *not* shifted by 8 bits
+//inline U64 LLFastTimer::getCPUClockCount64()
+//{
+// return __rdtsc();
+//}
+
+// shift off lower 8 bits for lower resolution but longer term timing
+// on 1Ghz machine, a 32-bit word will hold ~1000 seconds of timing
+#if USE_RDTSC
+U32 LLFastTimer::getCPUClockCount32()
+{
+ U32 ret_val;
+ __asm
+ {
+ _emit 0x0f
+ _emit 0x31
+ shr eax,8
+ shl edx,24
+ or eax, edx
+ mov dword ptr [ret_val], eax
+ }
+ return ret_val;
+}
+
+// return full timer value, *not* shifted by 8 bits
+U64 LLFastTimer::getCPUClockCount64()
+{
+ U64 ret_val;
+ __asm
+ {
+ _emit 0x0f
+ _emit 0x31
+ mov eax,eax
+ mov edx,edx
+ mov dword ptr [ret_val+4], edx
+ mov dword ptr [ret_val], eax
+ }
+ return ret_val;
+}
+
+std::string LLFastTimer::sClockType = "rdtsc";
+
+#else
+//LL_COMMON_API U64 get_clock_count(); // in lltimer.cpp
+// These use QueryPerformanceCounter, which is arguably fine and also works on AMD architectures.
+U32 LLFastTimer::getCPUClockCount32()
+{
+ return (U32)(get_clock_count()>>8);
+}
+
+U64 LLFastTimer::getCPUClockCount64()
+{
+ return get_clock_count();
+}
+
+std::string LLFastTimer::sClockType = "QueryPerformanceCounter";
+#endif
+
+#endif
+
+
+#if (LL_LINUX || LL_SOLARIS) && !(defined(__i386__) || defined(__amd64__))
+//
+// Linux and Solaris implementation of CPU clock - non-x86.
+// This is accurate but SLOW! Only use out of desperation.
+//
+// Try to use the MONOTONIC clock if available, this is a constant time counter
+// with nanosecond resolution (but not necessarily accuracy) and attempts are
+// made to synchronize this value between cores at kernel start. It should not
+// be affected by CPU frequency. If not available use the REALTIME clock, but
+// this may be affected by NTP adjustments or other user activity affecting
+// the system time.
+U64 LLFastTimer::getCPUClockCount64()
+{
+ struct timespec tp;
+
+#ifdef CLOCK_MONOTONIC // MONOTONIC supported at build-time?
+ if (-1 == clock_gettime(CLOCK_MONOTONIC,&tp)) // if MONOTONIC isn't supported at runtime then ouch, try REALTIME
+#endif
+ clock_gettime(CLOCK_REALTIME,&tp);
+
+ return (tp.tv_sec*LLFastTimer::sClockResolution)+tp.tv_nsec;
+}
+
+U32 LLFastTimer::getCPUClockCount32()
+{
+ return (U32)(LLFastTimer::getCPUClockCount64() >> 8);
+}
+
+std::string LLFastTimer::sClockType = "clock_gettime";
+
+#endif // (LL_LINUX || LL_SOLARIS) && !(defined(__i386__) || defined(__amd64__))
+
+
+#if (LL_LINUX || LL_SOLARIS || LL_DARWIN) && (defined(__i386__) || defined(__amd64__))
+//
+// Mac+Linux+Solaris FAST x86 implementation of CPU clock
+U32 LLFastTimer::getCPUClockCount32()
+{
+ U64 x;
+ __asm__ volatile (".byte 0x0f, 0x31": "=A"(x));
+ return (U32)(x >> 8);
+}
+
+U64 LLFastTimer::getCPUClockCount64()
+{
+ U64 x;
+ __asm__ volatile (".byte 0x0f, 0x31": "=A"(x));
+ return x;
+}
+
+std::string LLFastTimer::sClockType = "rdtsc";
+#endif
+