diff options
Diffstat (limited to 'indra')
| -rwxr-xr-x | indra/llcommon/llfasttimer.h | 151 | ||||
| -rw-r--r-- | indra/llcommon/llfasttimer_class.cpp | 165 | ||||
| -rw-r--r-- | indra/newview/gpu_table.txt | 1 | ||||
| -rw-r--r-- | indra/newview/llpanelface.cpp | 11 | ||||
| -rw-r--r-- | indra/newview/pipeline.cpp | 7 | 
5 files changed, 185 insertions, 150 deletions
| diff --git a/indra/llcommon/llfasttimer.h b/indra/llcommon/llfasttimer.h index 5c2df877b0..c177027f4e 100755 --- a/indra/llcommon/llfasttimer.h +++ b/indra/llcommon/llfasttimer.h @@ -27,155 +27,10 @@  #ifndef LL_FASTTIMER_H  #define LL_FASTTIMER_H +// Temporarily(?) de-inlined these functions to simplify diagnosis of problems. +// Implementation of getCPUClockCount32() and getCPUClockCount64 are now in llfastertimer_class.cpp. +  // pull in the actual class definition  #include "llfasttimer_class.h" -// -// Important note: These implementations must be FAST! -// - -#if LL_WINDOWS -// -// Windows implementation of CPU clock -// - -// -// NOTE: put back in when we aren't using platform sdk anymore -// -// because MS has different signatures for these functions in winnt.h -// need to rename them to avoid conflicts -//#define _interlockedbittestandset _renamed_interlockedbittestandset -//#define _interlockedbittestandreset _renamed_interlockedbittestandreset -//#include <intrin.h> -//#undef _interlockedbittestandset -//#undef _interlockedbittestandreset - -//inline U32 LLFastTimer::getCPUClockCount32() -//{ -//	U64 time_stamp = __rdtsc(); -//	return (U32)(time_stamp >> 8); -//} -// -//// return full timer value, *not* shifted by 8 bits -//inline U64 LLFastTimer::getCPUClockCount64() -//{ -//	return __rdtsc(); -//} - -// shift off lower 8 bits for lower resolution but longer term timing -// on 1Ghz machine, a 32-bit word will hold ~1000 seconds of timing -#ifdef USE_RDTSC -inline U32 LLFastTimer::getCPUClockCount32() -{ -	U32 ret_val; -	__asm -	{ -        _emit   0x0f -        _emit   0x31 -		shr eax,8 -		shl edx,24 -		or eax, edx -		mov dword ptr [ret_val], eax -	} -    return ret_val; -} - -// return full timer value, *not* shifted by 8 bits -inline U64 LLFastTimer::getCPUClockCount64() -{ -	U64 ret_val; -	__asm -	{ -        _emit   0x0f -        _emit   0x31 -		mov eax,eax -		mov edx,edx -		mov dword ptr [ret_val+4], edx -		mov dword ptr [ret_val], eax -	} -    return ret_val; -} -#else -LL_COMMON_API U64 get_clock_count(); // in lltimer.cpp -// These use QueryPerformanceCounter, which is arguably fine and also works on amd architectures. -inline U32 LLFastTimer::getCPUClockCount32() -{ -	return (U32)(get_clock_count()>>8); -} - -inline U64 LLFastTimer::getCPUClockCount64() -{ -	return get_clock_count(); -} -#endif - -#endif - - -#if (LL_LINUX || LL_SOLARIS) && !(defined(__i386__) || defined(__amd64__)) -// -// Linux and Solaris implementation of CPU clock - non-x86. -// This is accurate but SLOW!  Only use out of desperation. -// -// Try to use the MONOTONIC clock if available, this is a constant time counter -// with nanosecond resolution (but not necessarily accuracy) and attempts are -// made to synchronize this value between cores at kernel start. It should not -// be affected by CPU frequency. If not available use the REALTIME clock, but -// this may be affected by NTP adjustments or other user activity affecting -// the system time. -inline U64 LLFastTimer::getCPUClockCount64() -{ -	struct timespec tp; -	 -#ifdef CLOCK_MONOTONIC // MONOTONIC supported at build-time? -	if (-1 == clock_gettime(CLOCK_MONOTONIC,&tp)) // if MONOTONIC isn't supported at runtime then ouch, try REALTIME -#endif -		clock_gettime(CLOCK_REALTIME,&tp); - -	return (tp.tv_sec*LLFastTimer::sClockResolution)+tp.tv_nsec;         -} - -inline U32 LLFastTimer::getCPUClockCount32() -{ -	return (U32)(LLFastTimer::getCPUClockCount64() >> 8); -} -#endif // (LL_LINUX || LL_SOLARIS) && !(defined(__i386__) || defined(__amd64__)) - - -#if (LL_LINUX || LL_SOLARIS || LL_DARWIN) && (defined(__i386__) || defined(__amd64__)) -// -// Mac+Linux+Solaris FAST x86 implementation of CPU clock -inline U32 LLFastTimer::getCPUClockCount32() -{ -	U64 x; -	__asm__ volatile (".byte 0x0f, 0x31": "=A"(x)); -	return (U32)(x >> 8); -} - -inline U64 LLFastTimer::getCPUClockCount64() -{ -	U64 x; -	__asm__ volatile (".byte 0x0f, 0x31": "=A"(x)); -	return x; -} -#endif - - -#if ( LL_DARWIN && !(defined(__i386__) || defined(__amd64__))) -// -// Mac PPC (deprecated) implementation of CPU clock -// -// Just use gettimeofday implementation for now - -inline U32 LLFastTimer::getCPUClockCount32() -{ -	return (U32)(get_clock_count()>>8); -} - -inline U64 LLFastTimer::getCPUClockCount64() -{ -	return get_clock_count(); -} -#endif -  #endif // LL_LLFASTTIMER_H diff --git a/indra/llcommon/llfasttimer_class.cpp b/indra/llcommon/llfasttimer_class.cpp index bce87ada96..fba8a3bb57 100644 --- a/indra/llcommon/llfasttimer_class.cpp +++ b/indra/llcommon/llfasttimer_class.cpp @@ -35,10 +35,13 @@  #include <boost/bind.hpp> +  #if LL_WINDOWS +#include "lltimer.h"  #elif LL_LINUX || LL_SOLARIS  #include <sys/time.h>  #include <sched.h> +#include "lltimer.h"  #elif LL_DARWIN  #include <sys/time.h>  #include "lltimer.h"	// get_clock_count() @@ -482,6 +485,19 @@ void LLFastTimer::NamedTimer::resetFrame()  {  	if (sLog)  	{ //output current frame counts to performance log + +		static S32 call_count = 0; +		if (call_count % 100 == 0) +		{ +			llinfos << "countsPerSecond (32 bit): " << countsPerSecond() << llendl; +			llinfos << "get_clock_count (64 bit): " << get_clock_count() << llendl; +			llinfos << "LLProcessorInfo().getCPUFrequency() " << LLProcessorInfo().getCPUFrequency() << llendl; +			llinfos << "getCPUClockCount32() " << getCPUClockCount32() << llendl; +			llinfos << "getCPUClockCount64() " << getCPUClockCount64() << llendl; +			llinfos << "elapsed sec " << ((F64)getCPUClockCount64())/((F64)LLProcessorInfo().getCPUFrequency()*1000000.0) << llendl; +		} +		call_count++; +		  		F64 iclock_freq = 1000.0 / countsPerSecond(); // good place to calculate clock frequency  		F64 total_time = 0; @@ -763,3 +779,152 @@ LLFastTimer::LLFastTimer(LLFastTimer::FrameState* state)  ////////////////////////////////////////////////////////////////////////////// +// +// Important note: These implementations must be FAST! +// + + +#if LL_WINDOWS +// +// Windows implementation of CPU clock +// + +// +// NOTE: put back in when we aren't using platform sdk anymore +// +// because MS has different signatures for these functions in winnt.h +// need to rename them to avoid conflicts +//#define _interlockedbittestandset _renamed_interlockedbittestandset +//#define _interlockedbittestandreset _renamed_interlockedbittestandreset +//#include <intrin.h> +//#undef _interlockedbittestandset +//#undef _interlockedbittestandreset + +//inline U32 LLFastTimer::getCPUClockCount32() +//{ +//	U64 time_stamp = __rdtsc(); +//	return (U32)(time_stamp >> 8); +//} +// +//// return full timer value, *not* shifted by 8 bits +//inline U64 LLFastTimer::getCPUClockCount64() +//{ +//	return __rdtsc(); +//} + +// shift off lower 8 bits for lower resolution but longer term timing +// on 1Ghz machine, a 32-bit word will hold ~1000 seconds of timing +#ifdef USE_RDTSC +U32 LLFastTimer::getCPUClockCount32() +{ +	U32 ret_val; +	__asm +	{ +        _emit   0x0f +        _emit   0x31 +		shr eax,8 +		shl edx,24 +		or eax, edx +		mov dword ptr [ret_val], eax +	} +    return ret_val; +} + +// return full timer value, *not* shifted by 8 bits +U64 LLFastTimer::getCPUClockCount64() +{ +	U64 ret_val; +	__asm +	{ +        _emit   0x0f +        _emit   0x31 +		mov eax,eax +		mov edx,edx +		mov dword ptr [ret_val+4], edx +		mov dword ptr [ret_val], eax +	} +    return ret_val; +} +#else +//LL_COMMON_API U64 get_clock_count(); // in lltimer.cpp +// These use QueryPerformanceCounter, which is arguably fine and also works on amd architectures. +U32 LLFastTimer::getCPUClockCount32() +{ +	return (U32)(get_clock_count()>>8); +} + +U64 LLFastTimer::getCPUClockCount64() +{ +	return get_clock_count(); +} +#endif + +#endif + + +#if (LL_LINUX || LL_SOLARIS) && !(defined(__i386__) || defined(__amd64__)) +// +// Linux and Solaris implementation of CPU clock - non-x86. +// This is accurate but SLOW!  Only use out of desperation. +// +// Try to use the MONOTONIC clock if available, this is a constant time counter +// with nanosecond resolution (but not necessarily accuracy) and attempts are +// made to synchronize this value between cores at kernel start. It should not +// be affected by CPU frequency. If not available use the REALTIME clock, but +// this may be affected by NTP adjustments or other user activity affecting +// the system time. +U64 LLFastTimer::getCPUClockCount64() +{ +	struct timespec tp; +	 +#ifdef CLOCK_MONOTONIC // MONOTONIC supported at build-time? +	if (-1 == clock_gettime(CLOCK_MONOTONIC,&tp)) // if MONOTONIC isn't supported at runtime then ouch, try REALTIME +#endif +		clock_gettime(CLOCK_REALTIME,&tp); + +	return (tp.tv_sec*LLFastTimer::sClockResolution)+tp.tv_nsec;         +} + +U32 LLFastTimer::getCPUClockCount32() +{ +	return (U32)(LLFastTimer::getCPUClockCount64() >> 8); +} +#endif // (LL_LINUX || LL_SOLARIS) && !(defined(__i386__) || defined(__amd64__)) + + +#if (LL_LINUX || LL_SOLARIS || LL_DARWIN) && (defined(__i386__) || defined(__amd64__)) +// +// Mac+Linux+Solaris FAST x86 implementation of CPU clock +U32 LLFastTimer::getCPUClockCount32() +{ +	U64 x; +	__asm__ volatile (".byte 0x0f, 0x31": "=A"(x)); +	return (U32)(x >> 8); +} + +U64 LLFastTimer::getCPUClockCount64() +{ +	U64 x; +	__asm__ volatile (".byte 0x0f, 0x31": "=A"(x)); +	return x; +} +#endif + + +#if ( LL_DARWIN && !(defined(__i386__) || defined(__amd64__))) +// +// Mac PPC (deprecated) implementation of CPU clock +// +// Just use gettimeofday implementation for now + +U32 LLFastTimer::getCPUClockCount32() +{ +	return (U32)(get_clock_count()>>8); +} + +U64 LLFastTimer::getCPUClockCount64() +{ +	return get_clock_count(); +} +#endif + diff --git a/indra/newview/gpu_table.txt b/indra/newview/gpu_table.txt index da888bc64d..bf604d6805 100644 --- a/indra/newview/gpu_table.txt +++ b/indra/newview/gpu_table.txt @@ -207,6 +207,7 @@ NVIDIA GTX 280					.*NVIDIA.*GeForce GTX 28.*			3		1  NVIDIA GTX 290					.*NVIDIA.*GeForce GTX 29.*			3		1  NVIDIA GTX 470					.*NVIDIA.*GeForce GTX 47.*			3		1  NVIDIA GTX 480					.*NVIDIA.*GeForce GTX 48.*			3		1 +NVIDIA GTX 580					.*NVIDIA.*GeForce GTX 58.*			3		1  NVIDIA C51						.*NVIDIA.*C51.*						0		1  NVIDIA G72						.*NVIDIA.*G72.*						1		1  NVIDIA G73						.*NVIDIA.*G73.*						1		1 diff --git a/indra/newview/llpanelface.cpp b/indra/newview/llpanelface.cpp index bce496cbad..07c7f35989 100644 --- a/indra/newview/llpanelface.cpp +++ b/indra/newview/llpanelface.cpp @@ -376,6 +376,11 @@ struct LLPanelFaceSetAlignedTEFunctor : public LLSelectedTEFunctor  			return true;  		} +		if (facep->getViewerObject()->getVolume()->getNumVolumeFaces() <= te) +		{ +			return true; +		} +  		bool set_aligned = true;  		if (facep == mCenterFace)  		{ @@ -418,6 +423,12 @@ struct LLPanelFaceGetIsAlignedTEFunctor : public LLSelectedTEFunctor  		{  			return false;  		} + +		if (facep->getViewerObject()->getVolume()->getNumVolumeFaces() <= te) +		{ //volume face does not exist, can't be aligned +			return false; +		} +  		if (facep == mCenterFace)  		{  			return true; diff --git a/indra/newview/pipeline.cpp b/indra/newview/pipeline.cpp index 022a3afc6a..751d14b27d 100644 --- a/indra/newview/pipeline.cpp +++ b/indra/newview/pipeline.cpp @@ -738,7 +738,11 @@ void LLPipeline::updateRenderDeferred()  					 gSavedSettings.getBOOL("WindLightUseAtmosShaders")) ? TRUE : FALSE) &&  					!gUseWireframe; -	sRenderDeferred = deferred;			 +	sRenderDeferred = deferred;	 +	if (deferred) +	{ //must render glow when rendering deferred since post effect pass is needed to present any lighting at all +		sRenderGlow = TRUE; +	}  }  void LLPipeline::releaseGLBuffers() @@ -831,7 +835,6 @@ void LLPipeline::createGLBuffers()  		allocateScreenBuffer(resX,resY);  		mScreenWidth = 0;  		mScreenHeight = 0; -  	}  	if (sRenderDeferred) | 
