From 1e49e9ae323395721ef2d845979b0cec5abb9b75 Mon Sep 17 00:00:00 2001 From: "Brad Payne (Vir Linden)" Date: Wed, 1 Dec 2010 13:08:09 -0500 Subject: Moving timer code for AMD issue diagnosis --- indra/llcommon/llfasttimer.h | 151 +------------------------------- indra/llcommon/llfasttimer_class.cpp | 164 +++++++++++++++++++++++++++++++++++ 2 files changed, 167 insertions(+), 148 deletions(-) mode change 100644 => 100755 indra/llcommon/llfasttimer_class.cpp diff --git a/indra/llcommon/llfasttimer.h b/indra/llcommon/llfasttimer.h index 5c2df877b0..c177027f4e 100755 --- a/indra/llcommon/llfasttimer.h +++ b/indra/llcommon/llfasttimer.h @@ -27,155 +27,10 @@ #ifndef LL_FASTTIMER_H #define LL_FASTTIMER_H +// Temporarily(?) de-inlined these functions to simplify diagnosis of problems. +// Implementation of getCPUClockCount32() and getCPUClockCount64 are now in llfastertimer_class.cpp. + // pull in the actual class definition #include "llfasttimer_class.h" -// -// Important note: These implementations must be FAST! -// - -#if LL_WINDOWS -// -// Windows implementation of CPU clock -// - -// -// NOTE: put back in when we aren't using platform sdk anymore -// -// because MS has different signatures for these functions in winnt.h -// need to rename them to avoid conflicts -//#define _interlockedbittestandset _renamed_interlockedbittestandset -//#define _interlockedbittestandreset _renamed_interlockedbittestandreset -//#include -//#undef _interlockedbittestandset -//#undef _interlockedbittestandreset - -//inline U32 LLFastTimer::getCPUClockCount32() -//{ -// U64 time_stamp = __rdtsc(); -// return (U32)(time_stamp >> 8); -//} -// -//// return full timer value, *not* shifted by 8 bits -//inline U64 LLFastTimer::getCPUClockCount64() -//{ -// return __rdtsc(); -//} - -// shift off lower 8 bits for lower resolution but longer term timing -// on 1Ghz machine, a 32-bit word will hold ~1000 seconds of timing -#ifdef USE_RDTSC -inline U32 LLFastTimer::getCPUClockCount32() -{ - U32 ret_val; - __asm - { - _emit 0x0f - _emit 0x31 - shr eax,8 - shl edx,24 - or eax, edx - mov dword ptr [ret_val], eax - } - return ret_val; -} - -// return full timer value, *not* shifted by 8 bits -inline U64 LLFastTimer::getCPUClockCount64() -{ - U64 ret_val; - __asm - { - _emit 0x0f - _emit 0x31 - mov eax,eax - mov edx,edx - mov dword ptr [ret_val+4], edx - mov dword ptr [ret_val], eax - } - return ret_val; -} -#else -LL_COMMON_API U64 get_clock_count(); // in lltimer.cpp -// These use QueryPerformanceCounter, which is arguably fine and also works on amd architectures. -inline U32 LLFastTimer::getCPUClockCount32() -{ - return (U32)(get_clock_count()>>8); -} - -inline U64 LLFastTimer::getCPUClockCount64() -{ - return get_clock_count(); -} -#endif - -#endif - - -#if (LL_LINUX || LL_SOLARIS) && !(defined(__i386__) || defined(__amd64__)) -// -// Linux and Solaris implementation of CPU clock - non-x86. -// This is accurate but SLOW! Only use out of desperation. -// -// Try to use the MONOTONIC clock if available, this is a constant time counter -// with nanosecond resolution (but not necessarily accuracy) and attempts are -// made to synchronize this value between cores at kernel start. It should not -// be affected by CPU frequency. If not available use the REALTIME clock, but -// this may be affected by NTP adjustments or other user activity affecting -// the system time. -inline U64 LLFastTimer::getCPUClockCount64() -{ - struct timespec tp; - -#ifdef CLOCK_MONOTONIC // MONOTONIC supported at build-time? - if (-1 == clock_gettime(CLOCK_MONOTONIC,&tp)) // if MONOTONIC isn't supported at runtime then ouch, try REALTIME -#endif - clock_gettime(CLOCK_REALTIME,&tp); - - return (tp.tv_sec*LLFastTimer::sClockResolution)+tp.tv_nsec; -} - -inline U32 LLFastTimer::getCPUClockCount32() -{ - return (U32)(LLFastTimer::getCPUClockCount64() >> 8); -} -#endif // (LL_LINUX || LL_SOLARIS) && !(defined(__i386__) || defined(__amd64__)) - - -#if (LL_LINUX || LL_SOLARIS || LL_DARWIN) && (defined(__i386__) || defined(__amd64__)) -// -// Mac+Linux+Solaris FAST x86 implementation of CPU clock -inline U32 LLFastTimer::getCPUClockCount32() -{ - U64 x; - __asm__ volatile (".byte 0x0f, 0x31": "=A"(x)); - return (U32)(x >> 8); -} - -inline U64 LLFastTimer::getCPUClockCount64() -{ - U64 x; - __asm__ volatile (".byte 0x0f, 0x31": "=A"(x)); - return x; -} -#endif - - -#if ( LL_DARWIN && !(defined(__i386__) || defined(__amd64__))) -// -// Mac PPC (deprecated) implementation of CPU clock -// -// Just use gettimeofday implementation for now - -inline U32 LLFastTimer::getCPUClockCount32() -{ - return (U32)(get_clock_count()>>8); -} - -inline U64 LLFastTimer::getCPUClockCount64() -{ - return get_clock_count(); -} -#endif - #endif // LL_LLFASTTIMER_H diff --git a/indra/llcommon/llfasttimer_class.cpp b/indra/llcommon/llfasttimer_class.cpp old mode 100644 new mode 100755 index c45921cdec..a3e006d70b --- a/indra/llcommon/llfasttimer_class.cpp +++ b/indra/llcommon/llfasttimer_class.cpp @@ -35,7 +35,9 @@ #include + #if LL_WINDOWS +#include "lltimer.h" #elif LL_LINUX || LL_SOLARIS #include #include @@ -481,6 +483,19 @@ void LLFastTimer::NamedTimer::resetFrame() { if (sLog) { //output current frame counts to performance log + + static S32 call_count = 0; + if (call_count % 100 == 0) + { + llinfos << "countsPerSecond (32 bit): " << countsPerSecond() << llendl; + llinfos << "get_clock_count (64 bit): " << get_clock_count() << llendl; + llinfos << "LLProcessorInfo().getCPUFrequency() " << LLProcessorInfo().getCPUFrequency() << llendl; + llinfos << "getCPUClockCount32() " << getCPUClockCount32() << llendl; + llinfos << "getCPUClockCount64() " << getCPUClockCount64() << llendl; + llinfos << "elapsed sec " << ((F64)getCPUClockCount64())/((F64)LLProcessorInfo().getCPUFrequency()*1000000.0) << llendl; + } + call_count++; + F64 iclock_freq = 1000.0 / countsPerSecond(); // good place to calculate clock frequency F64 total_time = 0; @@ -762,3 +777,152 @@ LLFastTimer::LLFastTimer(LLFastTimer::FrameState* state) ////////////////////////////////////////////////////////////////////////////// +// +// Important note: These implementations must be FAST! +// + + +#if LL_WINDOWS +// +// Windows implementation of CPU clock +// + +// +// NOTE: put back in when we aren't using platform sdk anymore +// +// because MS has different signatures for these functions in winnt.h +// need to rename them to avoid conflicts +//#define _interlockedbittestandset _renamed_interlockedbittestandset +//#define _interlockedbittestandreset _renamed_interlockedbittestandreset +//#include +//#undef _interlockedbittestandset +//#undef _interlockedbittestandreset + +//inline U32 LLFastTimer::getCPUClockCount32() +//{ +// U64 time_stamp = __rdtsc(); +// return (U32)(time_stamp >> 8); +//} +// +//// return full timer value, *not* shifted by 8 bits +//inline U64 LLFastTimer::getCPUClockCount64() +//{ +// return __rdtsc(); +//} + +// shift off lower 8 bits for lower resolution but longer term timing +// on 1Ghz machine, a 32-bit word will hold ~1000 seconds of timing +#ifdef USE_RDTSC +inline U32 LLFastTimer::getCPUClockCount32() +{ + U32 ret_val; + __asm + { + _emit 0x0f + _emit 0x31 + shr eax,8 + shl edx,24 + or eax, edx + mov dword ptr [ret_val], eax + } + return ret_val; +} + +// return full timer value, *not* shifted by 8 bits +inline U64 LLFastTimer::getCPUClockCount64() +{ + U64 ret_val; + __asm + { + _emit 0x0f + _emit 0x31 + mov eax,eax + mov edx,edx + mov dword ptr [ret_val+4], edx + mov dword ptr [ret_val], eax + } + return ret_val; +} +#else +//LL_COMMON_API U64 get_clock_count(); // in lltimer.cpp +// These use QueryPerformanceCounter, which is arguably fine and also works on amd architectures. +inline U32 LLFastTimer::getCPUClockCount32() +{ + return (U32)(get_clock_count()>>8); +} + +inline U64 LLFastTimer::getCPUClockCount64() +{ + return get_clock_count(); +} +#endif + +#endif + + +#if (LL_LINUX || LL_SOLARIS) && !(defined(__i386__) || defined(__amd64__)) +// +// Linux and Solaris implementation of CPU clock - non-x86. +// This is accurate but SLOW! Only use out of desperation. +// +// Try to use the MONOTONIC clock if available, this is a constant time counter +// with nanosecond resolution (but not necessarily accuracy) and attempts are +// made to synchronize this value between cores at kernel start. It should not +// be affected by CPU frequency. If not available use the REALTIME clock, but +// this may be affected by NTP adjustments or other user activity affecting +// the system time. +inline U64 LLFastTimer::getCPUClockCount64() +{ + struct timespec tp; + +#ifdef CLOCK_MONOTONIC // MONOTONIC supported at build-time? + if (-1 == clock_gettime(CLOCK_MONOTONIC,&tp)) // if MONOTONIC isn't supported at runtime then ouch, try REALTIME +#endif + clock_gettime(CLOCK_REALTIME,&tp); + + return (tp.tv_sec*LLFastTimer::sClockResolution)+tp.tv_nsec; +} + +inline U32 LLFastTimer::getCPUClockCount32() +{ + return (U32)(LLFastTimer::getCPUClockCount64() >> 8); +} +#endif // (LL_LINUX || LL_SOLARIS) && !(defined(__i386__) || defined(__amd64__)) + + +#if (LL_LINUX || LL_SOLARIS || LL_DARWIN) && (defined(__i386__) || defined(__amd64__)) +// +// Mac+Linux+Solaris FAST x86 implementation of CPU clock +inline U32 LLFastTimer::getCPUClockCount32() +{ + U64 x; + __asm__ volatile (".byte 0x0f, 0x31": "=A"(x)); + return (U32)(x >> 8); +} + +inline U64 LLFastTimer::getCPUClockCount64() +{ + U64 x; + __asm__ volatile (".byte 0x0f, 0x31": "=A"(x)); + return x; +} +#endif + + +#if ( LL_DARWIN && !(defined(__i386__) || defined(__amd64__))) +// +// Mac PPC (deprecated) implementation of CPU clock +// +// Just use gettimeofday implementation for now + +inline U32 LLFastTimer::getCPUClockCount32() +{ + return (U32)(get_clock_count()>>8); +} + +inline U64 LLFastTimer::getCPUClockCount64() +{ + return get_clock_count(); +} +#endif + -- cgit v1.2.3 From 3a40bdbe522994036bfb56937644eb323f97a269 Mon Sep 17 00:00:00 2001 From: "Brad Payne (Vir Linden)" Date: Wed, 1 Dec 2010 13:22:45 -0500 Subject: Fixing linux build failure --- indra/llcommon/llfasttimer_class.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/indra/llcommon/llfasttimer_class.cpp b/indra/llcommon/llfasttimer_class.cpp index a3e006d70b..f48ecda8ce 100755 --- a/indra/llcommon/llfasttimer_class.cpp +++ b/indra/llcommon/llfasttimer_class.cpp @@ -41,6 +41,7 @@ #elif LL_LINUX || LL_SOLARIS #include #include +#include "lltimer.h" #elif LL_DARWIN #include #include "lltimer.h" // get_clock_count() -- cgit v1.2.3 From 447e697e33bc6b0643524faa68614e087e936187 Mon Sep 17 00:00:00 2001 From: "Brad Payne (Vir Linden)" Date: Wed, 1 Dec 2010 15:01:06 -0500 Subject: Still fixing non-windows builds --- indra/llcommon/llfasttimer_class.cpp | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/indra/llcommon/llfasttimer_class.cpp b/indra/llcommon/llfasttimer_class.cpp index f48ecda8ce..59f8547a12 100755 --- a/indra/llcommon/llfasttimer_class.cpp +++ b/indra/llcommon/llfasttimer_class.cpp @@ -814,7 +814,7 @@ LLFastTimer::LLFastTimer(LLFastTimer::FrameState* state) // shift off lower 8 bits for lower resolution but longer term timing // on 1Ghz machine, a 32-bit word will hold ~1000 seconds of timing #ifdef USE_RDTSC -inline U32 LLFastTimer::getCPUClockCount32() +U32 LLFastTimer::getCPUClockCount32() { U32 ret_val; __asm @@ -830,7 +830,7 @@ inline U32 LLFastTimer::getCPUClockCount32() } // return full timer value, *not* shifted by 8 bits -inline U64 LLFastTimer::getCPUClockCount64() +U64 LLFastTimer::getCPUClockCount64() { U64 ret_val; __asm @@ -847,12 +847,12 @@ inline U64 LLFastTimer::getCPUClockCount64() #else //LL_COMMON_API U64 get_clock_count(); // in lltimer.cpp // These use QueryPerformanceCounter, which is arguably fine and also works on amd architectures. -inline U32 LLFastTimer::getCPUClockCount32() +U32 LLFastTimer::getCPUClockCount32() { return (U32)(get_clock_count()>>8); } -inline U64 LLFastTimer::getCPUClockCount64() +U64 LLFastTimer::getCPUClockCount64() { return get_clock_count(); } @@ -872,7 +872,7 @@ inline U64 LLFastTimer::getCPUClockCount64() // be affected by CPU frequency. If not available use the REALTIME clock, but // this may be affected by NTP adjustments or other user activity affecting // the system time. -inline U64 LLFastTimer::getCPUClockCount64() +U64 LLFastTimer::getCPUClockCount64() { struct timespec tp; @@ -884,7 +884,7 @@ inline U64 LLFastTimer::getCPUClockCount64() return (tp.tv_sec*LLFastTimer::sClockResolution)+tp.tv_nsec; } -inline U32 LLFastTimer::getCPUClockCount32() +U32 LLFastTimer::getCPUClockCount32() { return (U32)(LLFastTimer::getCPUClockCount64() >> 8); } @@ -894,14 +894,14 @@ inline U32 LLFastTimer::getCPUClockCount32() #if (LL_LINUX || LL_SOLARIS || LL_DARWIN) && (defined(__i386__) || defined(__amd64__)) // // Mac+Linux+Solaris FAST x86 implementation of CPU clock -inline U32 LLFastTimer::getCPUClockCount32() +U32 LLFastTimer::getCPUClockCount32() { U64 x; __asm__ volatile (".byte 0x0f, 0x31": "=A"(x)); return (U32)(x >> 8); } -inline U64 LLFastTimer::getCPUClockCount64() +U64 LLFastTimer::getCPUClockCount64() { U64 x; __asm__ volatile (".byte 0x0f, 0x31": "=A"(x)); @@ -916,12 +916,12 @@ inline U64 LLFastTimer::getCPUClockCount64() // // Just use gettimeofday implementation for now -inline U32 LLFastTimer::getCPUClockCount32() +U32 LLFastTimer::getCPUClockCount32() { return (U32)(get_clock_count()>>8); } -inline U64 LLFastTimer::getCPUClockCount64() +U64 LLFastTimer::getCPUClockCount64() { return get_clock_count(); } -- cgit v1.2.3 From 7738548af89fd0f343fa90f890ad7598be70d47c Mon Sep 17 00:00:00 2001 From: Dave Parks Date: Wed, 1 Dec 2010 16:59:43 -0600 Subject: Add GTX 580 to GPU table. --- indra/newview/gpu_table.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/indra/newview/gpu_table.txt b/indra/newview/gpu_table.txt index da888bc64d..bf604d6805 100644 --- a/indra/newview/gpu_table.txt +++ b/indra/newview/gpu_table.txt @@ -207,6 +207,7 @@ NVIDIA GTX 280 .*NVIDIA.*GeForce GTX 28.* 3 1 NVIDIA GTX 290 .*NVIDIA.*GeForce GTX 29.* 3 1 NVIDIA GTX 470 .*NVIDIA.*GeForce GTX 47.* 3 1 NVIDIA GTX 480 .*NVIDIA.*GeForce GTX 48.* 3 1 +NVIDIA GTX 580 .*NVIDIA.*GeForce GTX 58.* 3 1 NVIDIA C51 .*NVIDIA.*C51.* 0 1 NVIDIA G72 .*NVIDIA.*G72.* 1 1 NVIDIA G73 .*NVIDIA.*G73.* 1 1 -- cgit v1.2.3 From 398b392dc06f10b643d9930ffc6fd50f611b570b Mon Sep 17 00:00:00 2001 From: Dave Parks Date: Wed, 1 Dec 2010 17:00:22 -0600 Subject: SH-391 Do some bounds checking on texture entry indices when aligning planar faces. --- indra/newview/llpanelface.cpp | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/indra/newview/llpanelface.cpp b/indra/newview/llpanelface.cpp index bce496cbad..07c7f35989 100644 --- a/indra/newview/llpanelface.cpp +++ b/indra/newview/llpanelface.cpp @@ -376,6 +376,11 @@ struct LLPanelFaceSetAlignedTEFunctor : public LLSelectedTEFunctor return true; } + if (facep->getViewerObject()->getVolume()->getNumVolumeFaces() <= te) + { + return true; + } + bool set_aligned = true; if (facep == mCenterFace) { @@ -418,6 +423,12 @@ struct LLPanelFaceGetIsAlignedTEFunctor : public LLSelectedTEFunctor { return false; } + + if (facep->getViewerObject()->getVolume()->getNumVolumeFaces() <= te) + { //volume face does not exist, can't be aligned + return false; + } + if (facep == mCenterFace) { return true; -- cgit v1.2.3 From d06c9fba902b16db6906790bab349c6a01fc04ad Mon Sep 17 00:00:00 2001 From: Dave Parks Date: Thu, 2 Dec 2010 01:23:46 -0600 Subject: SH-437 Fix for crash when setting RenderGlow to FALSE when deferred rendering is on. --- indra/newview/pipeline.cpp | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/indra/newview/pipeline.cpp b/indra/newview/pipeline.cpp index 78d1044ff3..b538969164 100644 --- a/indra/newview/pipeline.cpp +++ b/indra/newview/pipeline.cpp @@ -740,7 +740,11 @@ void LLPipeline::updateRenderDeferred() gSavedSettings.getBOOL("WindLightUseAtmosShaders")) ? TRUE : FALSE) && !gUseWireframe; - sRenderDeferred = deferred; + sRenderDeferred = deferred; + if (deferred) + { //must render glow when rendering deferred since post effect pass is needed to present any lighting at all + sRenderGlow = TRUE; + } } void LLPipeline::releaseGLBuffers() @@ -833,7 +837,6 @@ void LLPipeline::createGLBuffers() allocateScreenBuffer(resX,resY); mScreenWidth = 0; mScreenHeight = 0; - } if (sRenderDeferred) -- cgit v1.2.3