From 90e3d83a5cb35e98a02a3017dd79ebc272bbfe85 Mon Sep 17 00:00:00 2001 From: "Brad Payne (Vir Linden)" Date: Tue, 21 Sep 2010 13:26:52 -0400 Subject: Fix for build failures - disabling tcmalloc for now --- indra/llcommon/llfasttimer_class.cpp | 0 1 file changed, 0 insertions(+), 0 deletions(-) mode change 100644 => 100755 indra/llcommon/llfasttimer_class.cpp (limited to 'indra/llcommon/llfasttimer_class.cpp') diff --git a/indra/llcommon/llfasttimer_class.cpp b/indra/llcommon/llfasttimer_class.cpp old mode 100644 new mode 100755 -- cgit v1.2.3 From a5619d16f74863168f45b04b37cc6383e1a92263 Mon Sep 17 00:00:00 2001 From: Oz Linden Date: Wed, 13 Oct 2010 07:24:37 -0400 Subject: correct licenses (fix problem with license change merge) --- indra/llcommon/llfasttimer_class.cpp | 35 +++++++++++++++-------------------- 1 file changed, 15 insertions(+), 20 deletions(-) (limited to 'indra/llcommon/llfasttimer_class.cpp') diff --git a/indra/llcommon/llfasttimer_class.cpp b/indra/llcommon/llfasttimer_class.cpp index e9ac709254..c45921cdec 100644 --- a/indra/llcommon/llfasttimer_class.cpp +++ b/indra/llcommon/llfasttimer_class.cpp @@ -2,30 +2,25 @@ * @file llfasttimer_class.cpp * @brief Implementation of the fast timer. * - * $LicenseInfo:firstyear=2004&license=viewergpl$ - * - * Copyright (c) 2004-2007, Linden Research, Inc. - * + * $LicenseInfo:firstyear=2004&license=viewerlgpl$ * Second Life Viewer Source Code - * The source code in this file ("Source Code") is provided by Linden Lab - * to you under the terms of the GNU General Public License, version 2.0 - * ("GPL"), unless you have obtained a separate licensing agreement - * ("Other License"), formally executed by you and Linden Lab. Terms of - * the GPL can be found in doc/GPL-license.txt in this distribution, or - * online at http://secondlife.com/developers/opensource/gplv2 + * Copyright (C) 2010, Linden Research, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; + * version 2.1 of the License only. * - * There are special exceptions to the terms and conditions of the GPL as - * it is applied to this Source Code. View the full text of the exception - * in the file doc/FLOSS-exception.txt in this software distribution, or - * online at http://secondlife.com/developers/opensource/flossexception + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. * - * By copying, modifying or distributing this software, you acknowledge - * that you have read and understood your obligations described above, - * and agree to abide by those obligations. + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA * - * ALL LINDEN LAB SOURCE CODE IS PROVIDED "AS IS."LINDEN LAB MAKES NO - * WARRANTIES, EXPRESS, IMPLIED OR OTHERWISE, REGARDING ITS ACCURACY, - * COMPLETENESS OR PERFORMANCE. + * Linden Research, Inc., 945 Battery Street, San Francisco, CA 94111 USA * $/LicenseInfo$ */ #include "linden_common.h" -- cgit v1.2.3 From 1e49e9ae323395721ef2d845979b0cec5abb9b75 Mon Sep 17 00:00:00 2001 From: "Brad Payne (Vir Linden)" Date: Wed, 1 Dec 2010 13:08:09 -0500 Subject: Moving timer code for AMD issue diagnosis --- indra/llcommon/llfasttimer_class.cpp | 164 +++++++++++++++++++++++++++++++++++ 1 file changed, 164 insertions(+) mode change 100644 => 100755 indra/llcommon/llfasttimer_class.cpp (limited to 'indra/llcommon/llfasttimer_class.cpp') diff --git a/indra/llcommon/llfasttimer_class.cpp b/indra/llcommon/llfasttimer_class.cpp old mode 100644 new mode 100755 index c45921cdec..a3e006d70b --- a/indra/llcommon/llfasttimer_class.cpp +++ b/indra/llcommon/llfasttimer_class.cpp @@ -35,7 +35,9 @@ #include + #if LL_WINDOWS +#include "lltimer.h" #elif LL_LINUX || LL_SOLARIS #include #include @@ -481,6 +483,19 @@ void LLFastTimer::NamedTimer::resetFrame() { if (sLog) { //output current frame counts to performance log + + static S32 call_count = 0; + if (call_count % 100 == 0) + { + llinfos << "countsPerSecond (32 bit): " << countsPerSecond() << llendl; + llinfos << "get_clock_count (64 bit): " << get_clock_count() << llendl; + llinfos << "LLProcessorInfo().getCPUFrequency() " << LLProcessorInfo().getCPUFrequency() << llendl; + llinfos << "getCPUClockCount32() " << getCPUClockCount32() << llendl; + llinfos << "getCPUClockCount64() " << getCPUClockCount64() << llendl; + llinfos << "elapsed sec " << ((F64)getCPUClockCount64())/((F64)LLProcessorInfo().getCPUFrequency()*1000000.0) << llendl; + } + call_count++; + F64 iclock_freq = 1000.0 / countsPerSecond(); // good place to calculate clock frequency F64 total_time = 0; @@ -762,3 +777,152 @@ LLFastTimer::LLFastTimer(LLFastTimer::FrameState* state) ////////////////////////////////////////////////////////////////////////////// +// +// Important note: These implementations must be FAST! +// + + +#if LL_WINDOWS +// +// Windows implementation of CPU clock +// + +// +// NOTE: put back in when we aren't using platform sdk anymore +// +// because MS has different signatures for these functions in winnt.h +// need to rename them to avoid conflicts +//#define _interlockedbittestandset _renamed_interlockedbittestandset +//#define _interlockedbittestandreset _renamed_interlockedbittestandreset +//#include +//#undef _interlockedbittestandset +//#undef _interlockedbittestandreset + +//inline U32 LLFastTimer::getCPUClockCount32() +//{ +// U64 time_stamp = __rdtsc(); +// return (U32)(time_stamp >> 8); +//} +// +//// return full timer value, *not* shifted by 8 bits +//inline U64 LLFastTimer::getCPUClockCount64() +//{ +// return __rdtsc(); +//} + +// shift off lower 8 bits for lower resolution but longer term timing +// on 1Ghz machine, a 32-bit word will hold ~1000 seconds of timing +#ifdef USE_RDTSC +inline U32 LLFastTimer::getCPUClockCount32() +{ + U32 ret_val; + __asm + { + _emit 0x0f + _emit 0x31 + shr eax,8 + shl edx,24 + or eax, edx + mov dword ptr [ret_val], eax + } + return ret_val; +} + +// return full timer value, *not* shifted by 8 bits +inline U64 LLFastTimer::getCPUClockCount64() +{ + U64 ret_val; + __asm + { + _emit 0x0f + _emit 0x31 + mov eax,eax + mov edx,edx + mov dword ptr [ret_val+4], edx + mov dword ptr [ret_val], eax + } + return ret_val; +} +#else +//LL_COMMON_API U64 get_clock_count(); // in lltimer.cpp +// These use QueryPerformanceCounter, which is arguably fine and also works on amd architectures. +inline U32 LLFastTimer::getCPUClockCount32() +{ + return (U32)(get_clock_count()>>8); +} + +inline U64 LLFastTimer::getCPUClockCount64() +{ + return get_clock_count(); +} +#endif + +#endif + + +#if (LL_LINUX || LL_SOLARIS) && !(defined(__i386__) || defined(__amd64__)) +// +// Linux and Solaris implementation of CPU clock - non-x86. +// This is accurate but SLOW! Only use out of desperation. +// +// Try to use the MONOTONIC clock if available, this is a constant time counter +// with nanosecond resolution (but not necessarily accuracy) and attempts are +// made to synchronize this value between cores at kernel start. It should not +// be affected by CPU frequency. If not available use the REALTIME clock, but +// this may be affected by NTP adjustments or other user activity affecting +// the system time. +inline U64 LLFastTimer::getCPUClockCount64() +{ + struct timespec tp; + +#ifdef CLOCK_MONOTONIC // MONOTONIC supported at build-time? + if (-1 == clock_gettime(CLOCK_MONOTONIC,&tp)) // if MONOTONIC isn't supported at runtime then ouch, try REALTIME +#endif + clock_gettime(CLOCK_REALTIME,&tp); + + return (tp.tv_sec*LLFastTimer::sClockResolution)+tp.tv_nsec; +} + +inline U32 LLFastTimer::getCPUClockCount32() +{ + return (U32)(LLFastTimer::getCPUClockCount64() >> 8); +} +#endif // (LL_LINUX || LL_SOLARIS) && !(defined(__i386__) || defined(__amd64__)) + + +#if (LL_LINUX || LL_SOLARIS || LL_DARWIN) && (defined(__i386__) || defined(__amd64__)) +// +// Mac+Linux+Solaris FAST x86 implementation of CPU clock +inline U32 LLFastTimer::getCPUClockCount32() +{ + U64 x; + __asm__ volatile (".byte 0x0f, 0x31": "=A"(x)); + return (U32)(x >> 8); +} + +inline U64 LLFastTimer::getCPUClockCount64() +{ + U64 x; + __asm__ volatile (".byte 0x0f, 0x31": "=A"(x)); + return x; +} +#endif + + +#if ( LL_DARWIN && !(defined(__i386__) || defined(__amd64__))) +// +// Mac PPC (deprecated) implementation of CPU clock +// +// Just use gettimeofday implementation for now + +inline U32 LLFastTimer::getCPUClockCount32() +{ + return (U32)(get_clock_count()>>8); +} + +inline U64 LLFastTimer::getCPUClockCount64() +{ + return get_clock_count(); +} +#endif + -- cgit v1.2.3 From 3a40bdbe522994036bfb56937644eb323f97a269 Mon Sep 17 00:00:00 2001 From: "Brad Payne (Vir Linden)" Date: Wed, 1 Dec 2010 13:22:45 -0500 Subject: Fixing linux build failure --- indra/llcommon/llfasttimer_class.cpp | 1 + 1 file changed, 1 insertion(+) (limited to 'indra/llcommon/llfasttimer_class.cpp') diff --git a/indra/llcommon/llfasttimer_class.cpp b/indra/llcommon/llfasttimer_class.cpp index a3e006d70b..f48ecda8ce 100755 --- a/indra/llcommon/llfasttimer_class.cpp +++ b/indra/llcommon/llfasttimer_class.cpp @@ -41,6 +41,7 @@ #elif LL_LINUX || LL_SOLARIS #include #include +#include "lltimer.h" #elif LL_DARWIN #include #include "lltimer.h" // get_clock_count() -- cgit v1.2.3 From 447e697e33bc6b0643524faa68614e087e936187 Mon Sep 17 00:00:00 2001 From: "Brad Payne (Vir Linden)" Date: Wed, 1 Dec 2010 15:01:06 -0500 Subject: Still fixing non-windows builds --- indra/llcommon/llfasttimer_class.cpp | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) (limited to 'indra/llcommon/llfasttimer_class.cpp') diff --git a/indra/llcommon/llfasttimer_class.cpp b/indra/llcommon/llfasttimer_class.cpp index f48ecda8ce..59f8547a12 100755 --- a/indra/llcommon/llfasttimer_class.cpp +++ b/indra/llcommon/llfasttimer_class.cpp @@ -814,7 +814,7 @@ LLFastTimer::LLFastTimer(LLFastTimer::FrameState* state) // shift off lower 8 bits for lower resolution but longer term timing // on 1Ghz machine, a 32-bit word will hold ~1000 seconds of timing #ifdef USE_RDTSC -inline U32 LLFastTimer::getCPUClockCount32() +U32 LLFastTimer::getCPUClockCount32() { U32 ret_val; __asm @@ -830,7 +830,7 @@ inline U32 LLFastTimer::getCPUClockCount32() } // return full timer value, *not* shifted by 8 bits -inline U64 LLFastTimer::getCPUClockCount64() +U64 LLFastTimer::getCPUClockCount64() { U64 ret_val; __asm @@ -847,12 +847,12 @@ inline U64 LLFastTimer::getCPUClockCount64() #else //LL_COMMON_API U64 get_clock_count(); // in lltimer.cpp // These use QueryPerformanceCounter, which is arguably fine and also works on amd architectures. -inline U32 LLFastTimer::getCPUClockCount32() +U32 LLFastTimer::getCPUClockCount32() { return (U32)(get_clock_count()>>8); } -inline U64 LLFastTimer::getCPUClockCount64() +U64 LLFastTimer::getCPUClockCount64() { return get_clock_count(); } @@ -872,7 +872,7 @@ inline U64 LLFastTimer::getCPUClockCount64() // be affected by CPU frequency. If not available use the REALTIME clock, but // this may be affected by NTP adjustments or other user activity affecting // the system time. -inline U64 LLFastTimer::getCPUClockCount64() +U64 LLFastTimer::getCPUClockCount64() { struct timespec tp; @@ -884,7 +884,7 @@ inline U64 LLFastTimer::getCPUClockCount64() return (tp.tv_sec*LLFastTimer::sClockResolution)+tp.tv_nsec; } -inline U32 LLFastTimer::getCPUClockCount32() +U32 LLFastTimer::getCPUClockCount32() { return (U32)(LLFastTimer::getCPUClockCount64() >> 8); } @@ -894,14 +894,14 @@ inline U32 LLFastTimer::getCPUClockCount32() #if (LL_LINUX || LL_SOLARIS || LL_DARWIN) && (defined(__i386__) || defined(__amd64__)) // // Mac+Linux+Solaris FAST x86 implementation of CPU clock -inline U32 LLFastTimer::getCPUClockCount32() +U32 LLFastTimer::getCPUClockCount32() { U64 x; __asm__ volatile (".byte 0x0f, 0x31": "=A"(x)); return (U32)(x >> 8); } -inline U64 LLFastTimer::getCPUClockCount64() +U64 LLFastTimer::getCPUClockCount64() { U64 x; __asm__ volatile (".byte 0x0f, 0x31": "=A"(x)); @@ -916,12 +916,12 @@ inline U64 LLFastTimer::getCPUClockCount64() // // Just use gettimeofday implementation for now -inline U32 LLFastTimer::getCPUClockCount32() +U32 LLFastTimer::getCPUClockCount32() { return (U32)(get_clock_count()>>8); } -inline U64 LLFastTimer::getCPUClockCount64() +U64 LLFastTimer::getCPUClockCount64() { return get_clock_count(); } -- cgit v1.2.3 From 20737614b06766458242752c9eee595b124323d8 Mon Sep 17 00:00:00 2001 From: "Brad Payne (Vir Linden)" Date: Thu, 2 Dec 2010 13:12:20 -0500 Subject: Avoiding more RDTSC dependencies in fast timers --- indra/llcommon/llfasttimer_class.cpp | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) (limited to 'indra/llcommon/llfasttimer_class.cpp') diff --git a/indra/llcommon/llfasttimer_class.cpp b/indra/llcommon/llfasttimer_class.cpp index 59f8547a12..055660e85b 100755 --- a/indra/llcommon/llfasttimer_class.cpp +++ b/indra/llcommon/llfasttimer_class.cpp @@ -63,6 +63,8 @@ BOOL LLFastTimer::sMetricLog = FALSE; LLMutex* LLFastTimer::sLogLock = NULL; std::queue LLFastTimer::sLogQueue; +#define USE_RDTSC 0 + #if LL_LINUX || LL_SOLARIS U64 LLFastTimer::sClockResolution = 1000000000; // Nanosecond resolution #else @@ -236,10 +238,20 @@ U64 LLFastTimer::countsPerSecond() // counts per second for the *32-bit* timer #else // windows or x86-mac or x86-linux or x86-solaris U64 LLFastTimer::countsPerSecond() // counts per second for the *32-bit* timer { +#if USE_RDTSC || !LL_WINDOWS //getCPUFrequency returns MHz and sCPUClockFrequency wants to be in Hz static U64 sCPUClockFrequency = U64(LLProcessorInfo().getCPUFrequency()*1000000.0); // we drop the low-order byte in our timers, so report a lower frequency +#else + static bool firstcall = true; + static U64 sCPUClockFrequency; + if (firstcall) + { + QueryPerformanceFrequency((LARGE_INTEGER*)&sCPUClockFrequency); + firstcall = false; + } +#endif return sCPUClockFrequency >> 8; } #endif @@ -813,7 +825,7 @@ LLFastTimer::LLFastTimer(LLFastTimer::FrameState* state) // shift off lower 8 bits for lower resolution but longer term timing // on 1Ghz machine, a 32-bit word will hold ~1000 seconds of timing -#ifdef USE_RDTSC +#if USE_RDTSC U32 LLFastTimer::getCPUClockCount32() { U32 ret_val; -- cgit v1.2.3 From f0bca5f0683db1540b640191ab2780224aceb800 Mon Sep 17 00:00:00 2001 From: "Brad Payne (Vir Linden)" Date: Thu, 2 Dec 2010 16:58:46 -0500 Subject: Cleaning up fast timer fixes --- indra/llcommon/llfasttimer_class.cpp | 15 +++------------ 1 file changed, 3 insertions(+), 12 deletions(-) (limited to 'indra/llcommon/llfasttimer_class.cpp') diff --git a/indra/llcommon/llfasttimer_class.cpp b/indra/llcommon/llfasttimer_class.cpp index 055660e85b..558a4dc005 100755 --- a/indra/llcommon/llfasttimer_class.cpp +++ b/indra/llcommon/llfasttimer_class.cpp @@ -244,6 +244,9 @@ U64 LLFastTimer::countsPerSecond() // counts per second for the *32-bit* timer // we drop the low-order byte in our timers, so report a lower frequency #else + // If we're not using RDTSC, each fasttimer tick is just a performance counter tick. + // Not redefining the clock frequency itself (in llprocessor.cpp/calculate_cpu_frequency()) + // since that would change displayed MHz stats for CPUs static bool firstcall = true; static U64 sCPUClockFrequency; if (firstcall) @@ -497,18 +500,6 @@ void LLFastTimer::NamedTimer::resetFrame() if (sLog) { //output current frame counts to performance log - static S32 call_count = 0; - if (call_count % 100 == 0) - { - llinfos << "countsPerSecond (32 bit): " << countsPerSecond() << llendl; - llinfos << "get_clock_count (64 bit): " << get_clock_count() << llendl; - llinfos << "LLProcessorInfo().getCPUFrequency() " << LLProcessorInfo().getCPUFrequency() << llendl; - llinfos << "getCPUClockCount32() " << getCPUClockCount32() << llendl; - llinfos << "getCPUClockCount64() " << getCPUClockCount64() << llendl; - llinfos << "elapsed sec " << ((F64)getCPUClockCount64())/((F64)LLProcessorInfo().getCPUFrequency()*1000000.0) << llendl; - } - call_count++; - F64 iclock_freq = 1000.0 / countsPerSecond(); // good place to calculate clock frequency F64 total_time = 0; -- cgit v1.2.3 From 2f4a3197bfdb5200d4472343b729fe7ff7eb9225 Mon Sep 17 00:00:00 2001 From: "Brad Payne (Vir Linden)" Date: Fri, 3 Dec 2010 15:40:42 -0500 Subject: Added timer-related logging during logperformance --- indra/llcommon/llfasttimer_class.cpp | 12 ++++++++++++ 1 file changed, 12 insertions(+) mode change 100644 => 100755 indra/llcommon/llfasttimer_class.cpp (limited to 'indra/llcommon/llfasttimer_class.cpp') diff --git a/indra/llcommon/llfasttimer_class.cpp b/indra/llcommon/llfasttimer_class.cpp old mode 100644 new mode 100755 index c084166ccc..0828635881 --- a/indra/llcommon/llfasttimer_class.cpp +++ b/indra/llcommon/llfasttimer_class.cpp @@ -501,6 +501,18 @@ void LLFastTimer::NamedTimer::resetFrame() if (sLog) { //output current frame counts to performance log + static S32 call_count = 0; + if (call_count % 100 == 0) + { + llinfos << "countsPerSecond (32 bit): " << countsPerSecond() << llendl; + llinfos << "get_clock_count (64 bit): " << get_clock_count() << llendl; + llinfos << "LLProcessorInfo().getCPUFrequency() " << LLProcessorInfo().getCPUFrequency() << llendl; + llinfos << "getCPUClockCount32() " << getCPUClockCount32() << llendl; + llinfos << "getCPUClockCount64() " << getCPUClockCount64() << llendl; + llinfos << "elapsed sec " << ((F64)getCPUClockCount64())/((F64)LLProcessorInfo().getCPUFrequency()*1000000.0) << llendl; + } + call_count++; + F64 iclock_freq = 1000.0 / countsPerSecond(); // good place to calculate clock frequency F64 total_time = 0; -- cgit v1.2.3 From 28b628ab7b92243a6e40fcdf87f4e2c5b44150e7 Mon Sep 17 00:00:00 2001 From: "Brad Payne (Vir Linden)" Date: Wed, 29 Dec 2010 10:03:33 -0500 Subject: allow run-time query of which timer function is being used --- indra/llcommon/llfasttimer_class.cpp | 26 +++++++++----------------- 1 file changed, 9 insertions(+), 17 deletions(-) mode change 100644 => 100755 indra/llcommon/llfasttimer_class.cpp (limited to 'indra/llcommon/llfasttimer_class.cpp') diff --git a/indra/llcommon/llfasttimer_class.cpp b/indra/llcommon/llfasttimer_class.cpp old mode 100644 new mode 100755 index 0828635881..bd594b06cf --- a/indra/llcommon/llfasttimer_class.cpp +++ b/indra/llcommon/llfasttimer_class.cpp @@ -860,6 +860,9 @@ U64 LLFastTimer::getCPUClockCount64() } return ret_val; } + +std::string LLFastTimer::sClockType = "rdtsc"; + #else //LL_COMMON_API U64 get_clock_count(); // in lltimer.cpp // These use QueryPerformanceCounter, which is arguably fine and also works on amd architectures. @@ -872,6 +875,8 @@ U64 LLFastTimer::getCPUClockCount64() { return get_clock_count(); } + +std::string LLFastTimer::sClockType = "QueryPerformanceCounter"; #endif #endif @@ -904,6 +909,9 @@ U32 LLFastTimer::getCPUClockCount32() { return (U32)(LLFastTimer::getCPUClockCount64() >> 8); } + +std::string LLFastTimer::sClockType = "clock_gettime"; + #endif // (LL_LINUX || LL_SOLARIS) && !(defined(__i386__) || defined(__amd64__)) @@ -923,23 +931,7 @@ U64 LLFastTimer::getCPUClockCount64() __asm__ volatile (".byte 0x0f, 0x31": "=A"(x)); return x; } -#endif - - -#if ( LL_DARWIN && !(defined(__i386__) || defined(__amd64__))) -// -// Mac PPC (deprecated) implementation of CPU clock -// -// Just use gettimeofday implementation for now -U32 LLFastTimer::getCPUClockCount32() -{ - return (U32)(get_clock_count()>>8); -} - -U64 LLFastTimer::getCPUClockCount64() -{ - return get_clock_count(); -} +std::string LLFastTimer::sClockType = "rdtsc"; #endif -- cgit v1.2.3