5 files changed, 185 insertions, 150 deletions
diff --git a/indra/llcommon/llfasttimer.h b/indra/llcommon/llfasttimer.h
index 5c2df877b0..c177027f4e 100755
--- a/indra/llcommon/llfasttimer.h
+++ b/indra/llcommon/llfasttimer.h
@@ -27,155 +27,10 @@
 #ifndef LL_FASTTIMER_H
 #define LL_FASTTIMER_H
 
+// Temporarily(?) de-inlined these functions to simplify diagnosis of problems.
+// Implementation of getCPUClockCount32() and getCPUClockCount64 are now in llfastertimer_class.cpp.
+
 // pull in the actual class definition
 #include "llfasttimer_class.h"
 
-//
-// Important note: These implementations must be FAST!
-//
-
-#if LL_WINDOWS
-//
-// Windows implementation of CPU clock
-//
-
-//
-// NOTE: put back in when we aren't using platform sdk anymore
-//
-// because MS has different signatures for these functions in winnt.h
-// need to rename them to avoid conflicts
-//#define _interlockedbittestandset _renamed_interlockedbittestandset
-//#define _interlockedbittestandreset _renamed_interlockedbittestandreset
-//#include <intrin.h>
-//#undef _interlockedbittestandset
-//#undef _interlockedbittestandreset
-
-//inline U32 LLFastTimer::getCPUClockCount32()
-//{
-//	U64 time_stamp = __rdtsc();
-//	return (U32)(time_stamp >> 8);
-//}
-//
-//// return full timer value, *not* shifted by 8 bits
-//inline U64 LLFastTimer::getCPUClockCount64()
-//{
-//	return __rdtsc();
-//}
-
-// shift off lower 8 bits for lower resolution but longer term timing
-// on 1Ghz machine, a 32-bit word will hold ~1000 seconds of timing
-#ifdef USE_RDTSC
-inline U32 LLFastTimer::getCPUClockCount32()
-{
-	U32 ret_val;
-	__asm
-	{
-        _emit   0x0f
-        _emit   0x31
-		shr eax,8
-		shl edx,24
-		or eax, edx
-		mov dword ptr [ret_val], eax
-	}
-    return ret_val;
-}
-
-// return full timer value, *not* shifted by 8 bits
-inline U64 LLFastTimer::getCPUClockCount64()
-{
-	U64 ret_val;
-	__asm
-	{
-        _emit   0x0f
-        _emit   0x31
-		mov eax,eax
-		mov edx,edx
-		mov dword ptr [ret_val+4], edx
-		mov dword ptr [ret_val], eax
-	}
-    return ret_val;
-}
-#else
-LL_COMMON_API U64 get_clock_count(); // in lltimer.cpp
-// These use QueryPerformanceCounter, which is arguably fine and also works on amd architectures.
-inline U32 LLFastTimer::getCPUClockCount32()
-{
-	return (U32)(get_clock_count()>>8);
-}
-
-inline U64 LLFastTimer::getCPUClockCount64()
-{
-	return get_clock_count();
-}
-#endif
-
-#endif
-
-
-#if (LL_LINUX || LL_SOLARIS) && !(defined(__i386__) || defined(__amd64__))
-//
-// Linux and Solaris implementation of CPU clock - non-x86.
-// This is accurate but SLOW!  Only use out of desperation.
-//
-// Try to use the MONOTONIC clock if available, this is a constant time counter
-// with nanosecond resolution (but not necessarily accuracy) and attempts are
-// made to synchronize this value between cores at kernel start. It should not
-// be affected by CPU frequency. If not available use the REALTIME clock, but
-// this may be affected by NTP adjustments or other user activity affecting
-// the system time.
-inline U64 LLFastTimer::getCPUClockCount64()
-{
-	struct timespec tp;
-	
-#ifdef CLOCK_MONOTONIC // MONOTONIC supported at build-time?
-	if (-1 == clock_gettime(CLOCK_MONOTONIC,&tp)) // if MONOTONIC isn't supported at runtime then ouch, try REALTIME
-#endif
-		clock_gettime(CLOCK_REALTIME,&tp);
-
-	return (tp.tv_sec*LLFastTimer::sClockResolution)+tp.tv_nsec;        
-}
-
-inline U32 LLFastTimer::getCPUClockCount32()
-{
-	return (U32)(LLFastTimer::getCPUClockCount64() >> 8);
-}
-#endif // (LL_LINUX || LL_SOLARIS) && !(defined(__i386__) || defined(__amd64__))
-
-
-#if (LL_LINUX || LL_SOLARIS || LL_DARWIN) && (defined(__i386__) || defined(__amd64__))
-//
-// Mac+Linux+Solaris FAST x86 implementation of CPU clock
-inline U32 LLFastTimer::getCPUClockCount32()
-{
-	U64 x;
-	__asm__ volatile (".byte 0x0f, 0x31": "=A"(x));
-	return (U32)(x >> 8);
-}
-
-inline U64 LLFastTimer::getCPUClockCount64()
-{
-	U64 x;
-	__asm__ volatile (".byte 0x0f, 0x31": "=A"(x));
-	return x;
-}
-#endif
-
-
-#if ( LL_DARWIN && !(defined(__i386__) || defined(__amd64__)))
-//
-// Mac PPC (deprecated) implementation of CPU clock
-//
-// Just use gettimeofday implementation for now
-
-inline U32 LLFastTimer::getCPUClockCount32()
-{
-	return (U32)(get_clock_count()>>8);
-}
-
-inline U64 LLFastTimer::getCPUClockCount64()
-{
-	return get_clock_count();
-}
-#endif
-
 #endif // LL_LLFASTTIMER_H
diff --git a/indra/llcommon/llfasttimer_class.cpp b/indra/llcommon/llfasttimer_class.cpp
index bce87ada96..fba8a3bb57 100644
--- a/indra/llcommon/llfasttimer_class.cpp
+++ b/indra/llcommon/llfasttimer_class.cpp
@@ -35,10 +35,13 @@
 
 #include <boost/bind.hpp>
 
+
 #if LL_WINDOWS
+#include "lltimer.h"
 #elif LL_LINUX || LL_SOLARIS
 #include <sys/time.h>
 #include <sched.h>
+#include "lltimer.h"
 #elif LL_DARWIN
 #include <sys/time.h>
 #include "lltimer.h"	// get_clock_count()
@@ -482,6 +485,19 @@ void LLFastTimer::NamedTimer::resetFrame()
 {
 	if (sLog)
 	{ //output current frame counts to performance log
+
+		static S32 call_count = 0;
+		if (call_count % 100 == 0)
+		{
+			llinfos << "countsPerSecond (32 bit): " << countsPerSecond() << llendl;
+			llinfos << "get_clock_count (64 bit): " << get_clock_count() << llendl;
+			llinfos << "LLProcessorInfo().getCPUFrequency() " << LLProcessorInfo().getCPUFrequency() << llendl;
+			llinfos << "getCPUClockCount32() " << getCPUClockCount32() << llendl;
+			llinfos << "getCPUClockCount64() " << getCPUClockCount64() << llendl;
+			llinfos << "elapsed sec " << ((F64)getCPUClockCount64())/((F64)LLProcessorInfo().getCPUFrequency()*1000000.0) << llendl;
+		}
+		call_count++;
+		
 		F64 iclock_freq = 1000.0 / countsPerSecond(); // good place to calculate clock frequency
 
 		F64 total_time = 0;
@@ -763,3 +779,152 @@ LLFastTimer::LLFastTimer(LLFastTimer::FrameState* state)
 
 
 //////////////////////////////////////////////////////////////////////////////
+//
+// Important note: These implementations must be FAST!
+//
+
+
+#if LL_WINDOWS
+//
+// Windows implementation of CPU clock
+//
+
+//
+// NOTE: put back in when we aren't using platform sdk anymore
+//
+// because MS has different signatures for these functions in winnt.h
+// need to rename them to avoid conflicts
+//#define _interlockedbittestandset _renamed_interlockedbittestandset
+//#define _interlockedbittestandreset _renamed_interlockedbittestandreset
+//#include <intrin.h>
+//#undef _interlockedbittestandset
+//#undef _interlockedbittestandreset
+
+//inline U32 LLFastTimer::getCPUClockCount32()
+//{
+//	U64 time_stamp = __rdtsc();
+//	return (U32)(time_stamp >> 8);
+//}
+//
+//// return full timer value, *not* shifted by 8 bits
+//inline U64 LLFastTimer::getCPUClockCount64()
+//{
+//	return __rdtsc();
+//}
+
+// shift off lower 8 bits for lower resolution but longer term timing
+// on 1Ghz machine, a 32-bit word will hold ~1000 seconds of timing
+#ifdef USE_RDTSC
+U32 LLFastTimer::getCPUClockCount32()
+{
+	U32 ret_val;
+	__asm
+	{
+        _emit   0x0f
+        _emit   0x31
+		shr eax,8
+		shl edx,24
+		or eax, edx
+		mov dword ptr [ret_val], eax
+	}
+    return ret_val;
+}
+
+// return full timer value, *not* shifted by 8 bits
+U64 LLFastTimer::getCPUClockCount64()
+{
+	U64 ret_val;
+	__asm
+	{
+        _emit   0x0f
+        _emit   0x31
+		mov eax,eax
+		mov edx,edx
+		mov dword ptr [ret_val+4], edx
+		mov dword ptr [ret_val], eax
+	}
+    return ret_val;
+}
+#else
+//LL_COMMON_API U64 get_clock_count(); // in lltimer.cpp
+// These use QueryPerformanceCounter, which is arguably fine and also works on amd architectures.
+U32 LLFastTimer::getCPUClockCount32()
+{
+	return (U32)(get_clock_count()>>8);
+}
+
+U64 LLFastTimer::getCPUClockCount64()
+{
+	return get_clock_count();
+}
+#endif
+
+#endif
+
+
+#if (LL_LINUX || LL_SOLARIS) && !(defined(__i386__) || defined(__amd64__))
+//
+// Linux and Solaris implementation of CPU clock - non-x86.
+// This is accurate but SLOW!  Only use out of desperation.
+//
+// Try to use the MONOTONIC clock if available, this is a constant time counter
+// with nanosecond resolution (but not necessarily accuracy) and attempts are
+// made to synchronize this value between cores at kernel start. It should not
+// be affected by CPU frequency. If not available use the REALTIME clock, but
+// this may be affected by NTP adjustments or other user activity affecting
+// the system time.
+U64 LLFastTimer::getCPUClockCount64()
+{
+	struct timespec tp;
+	
+#ifdef CLOCK_MONOTONIC // MONOTONIC supported at build-time?
+	if (-1 == clock_gettime(CLOCK_MONOTONIC,&tp)) // if MONOTONIC isn't supported at runtime then ouch, try REALTIME
+#endif
+		clock_gettime(CLOCK_REALTIME,&tp);
+
+	return (tp.tv_sec*LLFastTimer::sClockResolution)+tp.tv_nsec;        
+}
+
+U32 LLFastTimer::getCPUClockCount32()
+{
+	return (U32)(LLFastTimer::getCPUClockCount64() >> 8);
+}
+#endif // (LL_LINUX || LL_SOLARIS) && !(defined(__i386__) || defined(__amd64__))
+
+
+#if (LL_LINUX || LL_SOLARIS || LL_DARWIN) && (defined(__i386__) || defined(__amd64__))
+//
+// Mac+Linux+Solaris FAST x86 implementation of CPU clock
+U32 LLFastTimer::getCPUClockCount32()
+{
+	U64 x;
+	__asm__ volatile (".byte 0x0f, 0x31": "=A"(x));
+	return (U32)(x >> 8);
+}
+
+U64 LLFastTimer::getCPUClockCount64()
+{
+	U64 x;
+	__asm__ volatile (".byte 0x0f, 0x31": "=A"(x));
+	return x;
+}
+#endif
+
+
+#if ( LL_DARWIN && !(defined(__i386__) || defined(__amd64__)))
+//
+// Mac PPC (deprecated) implementation of CPU clock
+//
+// Just use gettimeofday implementation for now
+
+U32 LLFastTimer::getCPUClockCount32()
+{
+	return (U32)(get_clock_count()>>8);
+}
+
+U64 LLFastTimer::getCPUClockCount64()
+{
+	return get_clock_count();
+}
+#endif
+
diff --git a/indra/newview/gpu_table.txt b/indra/newview/gpu_table.txt
index da888bc64d..bf604d6805 100644
--- a/indra/newview/gpu_table.txt
+++ b/indra/newview/gpu_table.txt
@@ -207,6 +207,7 @@ NVIDIA GTX 280					.*NVIDIA.*GeForce GTX 28.*			3		1
 NVIDIA GTX 290					.*NVIDIA.*GeForce GTX 29.*			3		1
 NVIDIA GTX 470					.*NVIDIA.*GeForce GTX 47.*			3		1
 NVIDIA GTX 480					.*NVIDIA.*GeForce GTX 48.*			3		1
+NVIDIA GTX 580					.*NVIDIA.*GeForce GTX 58.*			3		1
 NVIDIA C51						.*NVIDIA.*C51.*						0		1
 NVIDIA G72						.*NVIDIA.*G72.*						1		1
 NVIDIA G73						.*NVIDIA.*G73.*						1		1
diff --git a/indra/newview/llpanelface.cpp b/indra/newview/llpanelface.cpp
index bce496cbad..07c7f35989 100644
--- a/indra/newview/llpanelface.cpp
+++ b/indra/newview/llpanelface.cpp
@@ -376,6 +376,11 @@ struct LLPanelFaceSetAlignedTEFunctor : public LLSelectedTEFunctor
 			return true;
 		}
 
+		if (facep->getViewerObject()->getVolume()->getNumVolumeFaces() <= te)
+		{
+			return true;
+		}
+
 		bool set_aligned = true;
 		if (facep == mCenterFace)
 		{
@@ -418,6 +423,12 @@ struct LLPanelFaceGetIsAlignedTEFunctor : public LLSelectedTEFunctor
 		{
 			return false;
 		}
+
+		if (facep->getViewerObject()->getVolume()->getNumVolumeFaces() <= te)
+		{ //volume face does not exist, can't be aligned
+			return false;
+		}
+
 		if (facep == mCenterFace)
 		{
 			return true;
diff --git a/indra/newview/pipeline.cpp b/indra/newview/pipeline.cpp
index 022a3afc6a..751d14b27d 100644
--- a/indra/newview/pipeline.cpp
+++ b/indra/newview/pipeline.cpp
@@ -738,7 +738,11 @@ void LLPipeline::updateRenderDeferred()
 					 gSavedSettings.getBOOL("WindLightUseAtmosShaders")) ? TRUE : FALSE) &&
 					!gUseWireframe;
 
-	sRenderDeferred = deferred;			
+	sRenderDeferred = deferred;	
+	if (deferred)
+	{ //must render glow when rendering deferred since post effect pass is needed to present any lighting at all
+		sRenderGlow = TRUE;
+	}
 }
 
 void LLPipeline::releaseGLBuffers()
@@ -831,7 +835,6 @@ void LLPipeline::createGLBuffers()
 		allocateScreenBuffer(resX,resY);
 		mScreenWidth = 0;
 		mScreenHeight = 0;
-
 	}
 	
 	if (sRenderDeferred)