6 files changed, 96 insertions, 50 deletions
diff --git a/indra/llmath/llvector4a.inl b/indra/llmath/llvector4a.inl
index 7c52ffef21..558fe09323 100755
--- a/indra/llmath/llvector4a.inl
+++ b/indra/llmath/llvector4a.inl
@@ -409,6 +409,26 @@ inline void LLVector4a::normalize3fast()
 	mQ = _mm_mul_ps( mQ, approxRsqrt );
 }
 
+inline void LLVector4a::normalize3fast_checked(LLVector4a* d)
+{
+	if (!isFinite3())
+	{
+		*this = d ? *d : LLVector4a(0,1,0,1);
+		return;
+	}
+
+	LLVector4a lenSqrd; lenSqrd.setAllDot3( *this, *this );
+
+	if (lenSqrd.getF32ptr()[0] <= FLT_EPSILON)
+	{
+		*this = d ? *d : LLVector4a(0,1,0,1);
+		return;
+	}
+
+	const LLQuad approxRsqrt = _mm_rsqrt_ps(lenSqrd.mQ);
+	mQ = _mm_mul_ps( mQ, approxRsqrt );
+}
+
 // Return true if this vector is normalized with respect to x,y,z up to tolerance
 inline LLBool32 LLVector4a::isNormalized3( F32 tolerance ) const
 {
diff --git a/indra/llmath/llvolume.cpp b/indra/llmath/llvolume.cpp
index 14cebfe5aa..58cac57e7f 100755
--- a/indra/llmath/llvolume.cpp
+++ b/indra/llmath/llvolume.cpp
@@ -4472,6 +4472,9 @@ void LLVolume::generateSilhouetteVertices(std::vector<LLVector3> &vertices,
 					continue; //skip degenerate face
 				}
 
+				LLVector4a default_norm;
+				default_norm.set(0,1,0,1);
+
 				//for each edge
 				for (S32 k = 0; k < 3; k++) {
 					S32 index = face.mEdge[j*3+k];
@@ -4493,14 +4496,14 @@ void LLVolume::generateSilhouetteVertices(std::vector<LLVector3> &vertices,
 
 						norm_mat.rotate(n[v1], t);
 
-						t.normalize3fast();
+						t.normalize3fast_checked(&default_norm);
 						normals.push_back(LLVector3(t[0], t[1], t[2]));
 
 						mat.affineTransform(v[v2], t);
 						vertices.push_back(LLVector3(t[0], t[1], t[2]));
 						
 						norm_mat.rotate(n[v2], t);
-						t.normalize3fast();
+						t.normalize3fast_checked(&default_norm);
 						normals.push_back(LLVector3(t[0], t[1], t[2]));
 					}
 				}		
@@ -6096,6 +6099,9 @@ BOOL LLVolumeFace::createUnCutCubeCap(LLVolume* volume, BOOL partial_build)
 	{
 		VertexData	corners[4];
 		VertexData baseVert;
+		LLVector4a default_norm;
+		default_norm.set(0,1,0,1);
+
 		for(S32 t = 0; t < 4; t++)
 		{
 			corners[t].getPosition().load3( mesh[offset + (grid_size*t)].mPos.mV);
@@ -6108,8 +6114,8 @@ BOOL LLVolumeFace::createUnCutCubeCap(LLVolume* volume, BOOL partial_build)
 			lhs.setSub(corners[1].getPosition(), corners[0].getPosition());
 			LLVector4a rhs;
 			rhs.setSub(corners[2].getPosition(), corners[1].getPosition());
-			baseVert.getNormal().setCross3(lhs, rhs); 
-			baseVert.getNormal().normalize3fast();
+			baseVert.getNormal().setCross3(lhs, rhs);
+			baseVert.getNormal().normalize3fast_checked(&default_norm);
 		}
 
 		if(!(mTypeMask & TOP_MASK))
@@ -6559,17 +6565,12 @@ BOOL LLVolumeFace::createCap(LLVolume* volume, BOOL partial_build)
 	d1.setSub(mPositions[mIndices[2]], mPositions[mIndices[0]]);
 
 	LLVector4a normal;
-	normal.setCross3(d0,d1);
-
-	if (normal.dot3(normal).getF32() > F_APPROXIMATELY_ZERO)
-	{
-		normal.normalize3fast();
-	}
-	else
-	{ //degenerate, make up a value
-		normal.set(0,0,1);
-	}
+	LLVector4a default_norm;
+	default_norm.set(0,1,0,1);
 
+	normal.setCross3(d0,d1);
+	normal.normalize3fast_checked(&default_norm);
+	
 	llassert(llfinite(normal.getF32ptr()[0]));
 	llassert(llfinite(normal.getF32ptr()[1]));
 	llassert(llfinite(normal.getF32ptr()[2]));
@@ -6611,11 +6612,13 @@ void LLVolumeFace::createTangents()
 		CalculateTangentArray(mNumVertices, mPositions, mNormals, mTexCoords, mNumIndices/3, mIndices, mTangents);
 
 		//normalize tangents
+		LLVector4a default_norm;
+		default_norm.set(0,1,0,1);
 		for (U32 i = 0; i < mNumVertices; i++) 
 		{
 			//binorm[i].normalize3fast();
 			//bump map/planar projection code requires normals to be normalized
-			mNormals[i].normalize3fast();
+			mNormals[i].normalize3fast_checked(&default_norm);
 		}
 	}
 }
@@ -6793,6 +6796,9 @@ void LLVolumeFace::appendFace(const LLVolumeFace& face, LLMatrix4& mat_in, LLMat
 	mat.loadu(mat_in);
 	norm_mat.loadu(norm_mat_in);
 
+	LLVector4a default_norm;
+	default_norm.set(0,1,0,1);
+
 	for (U32 i = 0; i < face.mNumVertices; ++i)
 	{
 		//transform appended face position and store
@@ -6800,7 +6806,7 @@ void LLVolumeFace::appendFace(const LLVolumeFace& face, LLMatrix4& mat_in, LLMat
 
 		//transform appended face normal and store
 		norm_mat.rotate(src_norm[i], dst_norm[i]);
-		dst_norm[i].normalize3fast();
+		dst_norm[i].normalize3fast_checked(&default_norm);
 
 		//copy appended face texture coordinate
 		dst_tc[i] = src_tc[i];
@@ -7213,42 +7219,41 @@ BOOL LLVolumeFace::createSide(LLVolume* volume, BOOL partial_build)
 void CalculateTangentArray(U32 vertexCount, const LLVector4a *vertex, const LLVector4a *normal,
         const LLVector2 *texcoord, U32 triangleCount, const U16* index_array, LLVector4a *tangent)
 {
-    //LLVector4a *tan1 = new LLVector4a[vertexCount * 2];
 	LLVector4a* tan1 = (LLVector4a*) ll_aligned_malloc_16(vertexCount*2*sizeof(LLVector4a));
 
-    LLVector4a* tan2 = tan1 + vertexCount;
+ LLVector4a* tan2 = tan1 + vertexCount;
 
 	memset(tan1, 0, vertexCount*2*sizeof(LLVector4a));
         
-    for (U32 a = 0; a < triangleCount; a++)
-    {
-        U32 i1 = *index_array++;
-        U32 i2 = *index_array++;
-        U32 i3 = *index_array++;
+   for (U32 a = 0; a < triangleCount; a++)
+   {
+      U32 i1 = *index_array++;
+      U32 i2 = *index_array++;
+      U32 i3 = *index_array++;
         
-        const LLVector4a& v1 = vertex[i1];
-        const LLVector4a& v2 = vertex[i2];
-        const LLVector4a& v3 = vertex[i3];
+      const LLVector4a& v1 = vertex[i1];
+      const LLVector4a& v2 = vertex[i2];
+      const LLVector4a& v3 = vertex[i3];
         
-        const LLVector2& w1 = texcoord[i1];
-        const LLVector2& w2 = texcoord[i2];
-        const LLVector2& w3 = texcoord[i3];
+      const LLVector2& w1 = texcoord[i1];
+      const LLVector2& w2 = texcoord[i2];
+      const LLVector2& w3 = texcoord[i3];
         
 		const F32* v1ptr = v1.getF32ptr();
 		const F32* v2ptr = v2.getF32ptr();
 		const F32* v3ptr = v3.getF32ptr();
 		
-        float x1 = v2ptr[0] - v1ptr[0];
-        float x2 = v3ptr[0] - v1ptr[0];
-        float y1 = v2ptr[1] - v1ptr[1];
-        float y2 = v3ptr[1] - v1ptr[1];
-        float z1 = v2ptr[2] - v1ptr[2];
-        float z2 = v3ptr[2] - v1ptr[2];
+      float x1 = v2ptr[0] - v1ptr[0];
+      float x2 = v3ptr[0] - v1ptr[0];
+      float y1 = v2ptr[1] - v1ptr[1];
+      float y2 = v3ptr[1] - v1ptr[1];
+      float z1 = v2ptr[2] - v1ptr[2];
+      float z2 = v3ptr[2] - v1ptr[2];
         
-        float s1 = w2.mV[0] - w1.mV[0];
-        float s2 = w3.mV[0] - w1.mV[0];
-        float t1 = w2.mV[1] - w1.mV[1];
-        float t2 = w3.mV[1] - w1.mV[1];
+      float s1 = w2.mV[0] - w1.mV[0];
+      float s2 = w3.mV[0] - w1.mV[0];
+      float t1 = w2.mV[1] - w1.mV[1];
+      float t2 = w3.mV[1] - w1.mV[1];
         
 		F32 rd = s1*t2-s2*t1;
 
diff --git a/indra/llrender/llrendertarget.cpp b/indra/llrender/llrendertarget.cpp
index 6e22712b94..09ef780424 100755
--- a/indra/llrender/llrendertarget.cpp
+++ b/indra/llrender/llrendertarget.cpp
@@ -572,8 +572,10 @@ void LLRenderTarget::copyContentsToFramebuffer(LLRenderTarget& source, S32 srcX0
 {
 	if (!source.mFBO)
 	{
-		llerrs << "Cannot copy framebuffer contents for non FBO render targets." << llendl;
+		llwarns << "Cannot copy framebuffer contents for non FBO render targets." << llendl;
+		return;
 	}
+
 	{
 		GLboolean write_depth = mask & GL_DEPTH_BUFFER_BIT ? TRUE : FALSE;
 
diff --git a/indra/newview/app_settings/settings.xml b/indra/newview/app_settings/settings.xml
index 344079b640..ebb01fb330 100755
--- a/indra/newview/app_settings/settings.xml
+++ b/indra/newview/app_settings/settings.xml
@@ -8592,6 +8592,18 @@
     </array>
   </map>
 
+  <key>RenderSpecularPrecision</key>
+  <map>
+    <key>Comment</key>
+    <string>Force 32-bit floating point LUT</string>
+    <key>Persist</key>
+    <integer>1</integer>
+    <key>Type</key>
+    <string>U32</string>
+    <key>Value</key>
+    <real>0</real>
+  </map>
+  
   <key>RenderSpecularResX</key>
   <map>
     <key>Comment</key>
diff --git a/indra/newview/llface.cpp b/indra/newview/llface.cpp
index f021f4ed0f..700b31f8d3 100755
--- a/indra/newview/llface.cpp
+++ b/indra/newview/llface.cpp
@@ -817,12 +817,6 @@ BOOL LLFace::genVolumeBBoxes(const LLVolume &volume, S32 f,
 			size.mul(scale);
 		}
 
-		// Catch potential badness from normalization before it happens
-		//
-		llassert(mat_normal.mMatrix[0].isFinite3() && (mat_normal.mMatrix[0].dot3(mat_normal.mMatrix[0]).getF32() > F_APPROXIMATELY_ZERO));
-		llassert(mat_normal.mMatrix[1].isFinite3() && (mat_normal.mMatrix[1].dot3(mat_normal.mMatrix[1]).getF32() > F_APPROXIMATELY_ZERO));
-		llassert(mat_normal.mMatrix[2].isFinite3() && (mat_normal.mMatrix[2].dot3(mat_normal.mMatrix[2]).getF32() > F_APPROXIMATELY_ZERO));
-
 		mat_normal.mMatrix[0].normalize3fast();
 		mat_normal.mMatrix[1].normalize3fast();
 		mat_normal.mMatrix[2].normalize3fast();
diff --git a/indra/newview/pipeline.cpp b/indra/newview/pipeline.cpp
index d37b086ae3..442b0b6d2d 100755
--- a/indra/newview/pipeline.cpp
+++ b/indra/newview/pipeline.cpp
@@ -1201,7 +1201,15 @@ void LLPipeline::releaseLUTBuffers()
 {
 	if (mLightFunc)
 	{
-		LLImageGL::deleteTextures(LLTexUnit::TT_TEXTURE, GL_R16F, 0, 1, &mLightFunc);
+		U32 use_high_precision = gSavedSettings.getU32("RenderSpecularPrecision");
+
+		U32 pix_format = use_high_precision ? GL_R32F : GL_R16F;
+
+#if LL_DARWIN
+		pix_format = GL_R32F;
+#endif
+
+		LLImageGL::deleteTextures(LLTexUnit::TT_TEXTURE, pix_format, 0, 1, &mLightFunc);
 		mLightFunc = 0;
 	}
 }
@@ -1400,13 +1408,18 @@ void LLPipeline::createLUTBuffers()
 					ls[y*lightResX+x] = k;*/
 				}
 			}
-			
-			U32 pix_format = GL_R16F;
-#if LL_DARWIN
 			// Need to work around limited precision with 10.6.8 and older drivers
 			//
+			U32 use_high_precision = gSavedSettings.getU32("RenderSpecularPrecision");
+				
+			U32 pix_format = use_high_precision ? GL_R32F : GL_R16F;
+
+#if LL_DARWIN
+			// Forced to work around 10.6.8. driver bugs on most every GPU
+			//
 			pix_format = GL_R32F;
 #endif
+
 			LLImageGL::generateTextures(LLTexUnit::TT_TEXTURE, pix_format, 1, &mLightFunc);
 			gGL.getTexUnit(0)->bindManual(LLTexUnit::TT_TEXTURE, mLightFunc);
 			LLImageGL::setManualImage(LLTexUnit::getInternalType(LLTexUnit::TT_TEXTURE), 0, pix_format, lightResX, lightResY, GL_RED, GL_FLOAT, ls, false);