From c3f14b915c38a4978745f12f1f816572cce4b5a0 Mon Sep 17 00:00:00 2001
From: Dave Parks <davep@lindenlab.com>
Date: Mon, 3 Jun 2013 12:50:48 -0500
Subject: NORSPEC-229 Fix for bad binormals on mirrored surfaces (use tangent
 calculator instead of binormal calculator, convert binormal centric code to
 tangent centric)

---
 indra/llmath/llvolume.cpp       | 327 ++++++++++++++++++++++------------------
 indra/llmath/llvolume.h         |  36 ++---
 indra/llmath/llvolumeoctree.cpp |  66 +++++---
 indra/llmath/llvolumeoctree.h   |   8 +-
 4 files changed, 234 insertions(+), 203 deletions(-)

(limited to 'indra/llmath')
diff --git a/indra/llmath/llvolume.cpp b/indra/llmath/llvolume.cpp
index 3f06e6b99e..bf03c971cd 100755
--- a/indra/llmath/llvolume.cpp
+++ b/indra/llmath/llvolume.cpp
@@ -2079,9 +2079,9 @@ void LLVolume::regen()
 	createVolumeFaces();
 }
 
-void LLVolume::genBinormals(S32 face)
+void LLVolume::genTangents(S32 face)
 {
-	mVolumeFaces[face].createBinormals();
+	mVolumeFaces[face].createTangents();
 }
 
 LLVolume::~LLVolume()
@@ -4392,7 +4392,7 @@ void LLVolume::generateSilhouetteVertices(std::vector<LLVector3> &vertices,
 				segments.push_back(vertices.size());
 #if DEBUG_SILHOUETTE_BINORMALS
 				vertices.push_back(face.mVertices[j].getPosition());
-				vertices.push_back(face.mVertices[j].getPosition() + face.mVertices[j].mBinormal*0.1f);
+				vertices.push_back(face.mVertices[j].getPosition() + face.mVertices[j].mTangent*0.1f);
 				normals.push_back(LLVector3(0,0,1));
 				normals.push_back(LLVector3(0,0,1));
 				segments.push_back(vertices.size());
@@ -4508,22 +4508,9 @@ void LLVolume::generateSilhouetteVertices(std::vector<LLVector3> &vertices,
 	}
 }
 
-S32 LLVolume::lineSegmentIntersect(const LLVector3& start, const LLVector3& end, 
-								   S32 face,
-								   LLVector3* intersection,LLVector2* tex_coord, LLVector3* normal, LLVector3* bi_normal)
-{
-	LLVector4a starta, enda;
-	starta.load3(start.mV);
-	enda.load3(end.mV);
-
-	return lineSegmentIntersect(starta, enda, face, intersection, tex_coord, normal, bi_normal);
-
-}
-
-
 S32 LLVolume::lineSegmentIntersect(const LLVector4a& start, const LLVector4a& end, 
 								   S32 face,
-								   LLVector3* intersection,LLVector2* tex_coord, LLVector3* normal, LLVector3* bi_normal)
+								   LLVector4a* intersection,LLVector2* tex_coord, LLVector4a* normal, LLVector4a* tangent_out)
 {
 	S32 hit_face = -1;
 	
@@ -4561,9 +4548,9 @@ S32 LLVolume::lineSegmentIntersect(const LLVector4a& start, const LLVector4a& en
 
         if (LLLineSegmentBoxIntersect(start, end, box_center, box_size))
 		{
-			if (bi_normal != NULL) // if the caller wants binormals, we may need to generate them
+			if (tangent_out != NULL) // if the caller wants tangents, we may need to generate them
 			{
-				genBinormals(i);
+				genTangents(i);
 			}
 
 			if (isUnique())
@@ -4597,7 +4584,7 @@ S32 LLVolume::lineSegmentIntersect(const LLVector4a& start, const LLVector4a& en
 								LLVector4a intersect = dir;
 								intersect.mul(closest_t);
 								intersect.add(start);
-								intersection->set(intersect.getF32ptr());
+								*intersection = intersect;
 							}
 
 
@@ -4612,19 +4599,42 @@ S32 LLVolume::lineSegmentIntersect(const LLVector4a& start, const LLVector4a& en
 
 							if (normal!= NULL)
 							{
-								LLVector4* norm = (LLVector4*) face.mNormals;
-
-								*normal		= ((1.f - a - b)  * LLVector3(norm[idx0]) + 
-									a              * LLVector3(norm[idx1]) +
-									b              * LLVector3(norm[idx2]));
+								LLVector4a* norm = face.mNormals;
+								
+								LLVector4a n1,n2,n3;
+								n1 = norm[idx0];
+								n1.mul(1.f-a-b);
+								
+								n2 = norm[idx1];
+								n2.mul(a);
+								
+								n3 = norm[idx2];
+								n3.mul(b);
+
+								n1.add(n2);
+								n1.add(n3);
+								
+								*normal		= n1; 
 							}
 
-							if (bi_normal != NULL)
+							if (tangent_out != NULL)
 							{
-								LLVector4* binormal = (LLVector4*) face.mBinormals;
-								*bi_normal = ((1.f - a - b)  * LLVector3(binormal[idx0]) + 
-										a              * LLVector3(binormal[idx1]) +
-										b              * LLVector3(binormal[idx2]));
+								LLVector4a* tangents = face.mTangents;
+								
+								LLVector4a t1,t2,t3;
+								t1 = tangents[idx0];
+								t1.mul(1.f-a-b);
+								
+								t2 = tangents[idx1];
+								t2.mul(a);
+								
+								t3 = tangents[idx2];
+								t3.mul(b);
+
+								t1.add(t2);
+								t1.add(t3);
+								
+								*tangent_out = t1; 
 							}
 						}
 					}
@@ -4637,7 +4647,7 @@ S32 LLVolume::lineSegmentIntersect(const LLVector4a& start, const LLVector4a& en
 					face.createOctree();
 				}
 			
-				LLOctreeTriangleRayIntersect intersect(start, dir, &face, &closest_t, intersection, tex_coord, normal, bi_normal);
+				LLOctreeTriangleRayIntersect intersect(start, dir, &face, &closest_t, intersection, tex_coord, normal, tangent_out);
 				intersect.traverse(face.mOctree);
 				if (intersect.mHitFace)
 				{
@@ -5183,7 +5193,7 @@ LLVolumeFace::LLVolumeFace() :
 	mNumIndices(0),
 	mPositions(NULL),
 	mNormals(NULL),
-	mBinormals(NULL),
+	mTangents(NULL),
 	mTexCoords(NULL),
 	mIndices(NULL),
 	mWeights(NULL),
@@ -5206,7 +5216,7 @@ LLVolumeFace::LLVolumeFace(const LLVolumeFace& src)
 	mNumIndices(0),
 	mPositions(NULL),
 	mNormals(NULL),
-	mBinormals(NULL),
+	mTangents(NULL),
 	mTexCoords(NULL),
 	mIndices(NULL),
 	mWeights(NULL),
@@ -5264,15 +5274,15 @@ LLVolumeFace& LLVolumeFace::operator=(const LLVolumeFace& src)
 		}
 
 
-		if (src.mBinormals)
+		if (src.mTangents)
 		{
-			allocateBinormals(src.mNumVertices);
-			LLVector4a::memcpyNonAliased16((F32*) mBinormals, (F32*) src.mBinormals, vert_size);
+			allocateTangents(src.mNumVertices);
+			LLVector4a::memcpyNonAliased16((F32*) mTangents, (F32*) src.mTangents, vert_size);
 		}
 		else
 		{
-			ll_aligned_free_16(mBinormals);
-			mBinormals = NULL;
+			ll_aligned_free_16(mTangents);
+			mTangents = NULL;
 		}
 
 		if (src.mWeights)
@@ -5316,8 +5326,8 @@ void LLVolumeFace::freeData()
 	mTexCoords = NULL;
 	ll_aligned_free_16(mIndices);
 	mIndices = NULL;
-	ll_aligned_free_16(mBinormals);
-	mBinormals = NULL;
+	ll_aligned_free_16(mTangents);
+	mTangents = NULL;
 	ll_aligned_free_16(mWeights);
 	mWeights = NULL;
 
@@ -5897,7 +5907,7 @@ void LLVolumeFace::cacheOptimize()
 	}
 
 	LLVector4a* binorm = NULL;
-	if (mBinormals)
+	if (mTangents)
 	{
 		binorm = (LLVector4a*) ll_aligned_malloc_16(sizeof(LLVector4a)*num_verts);
 	}
@@ -5922,9 +5932,9 @@ void LLVolumeFace::cacheOptimize()
 			{
 				wght[cur_idx] = mWeights[idx];
 			}
-			if (mBinormals)
+			if (mTangents)
 			{
-				binorm[cur_idx] = mBinormals[idx];
+				binorm[cur_idx] = mTangents[idx];
 			}
 
 			cur_idx++;
@@ -5940,13 +5950,13 @@ void LLVolumeFace::cacheOptimize()
 	ll_aligned_free_16(mNormals);
 	ll_aligned_free_16(mTexCoords);
 	ll_aligned_free_16(mWeights);
-	ll_aligned_free_16(mBinormals);
+	ll_aligned_free_16(mTangents);
 
 	mPositions = pos;
 	mNormals = norm;
 	mTexCoords = tc;
 	mWeights = wght;
-	mBinormals = binorm;
+	mTangents = binorm;
 
 	//std::string result = llformat("ACMR pre/post: %.3f/%.3f  --  %d triangles %d breaks", pre_acmr, post_acmr, mNumIndices/3, breaks);
 	//llinfos << result << llendl;
@@ -6027,7 +6037,7 @@ void LLVolumeFace::swapData(LLVolumeFace& rhs)
 {
 	llswap(rhs.mPositions, mPositions);
 	llswap(rhs.mNormals, mNormals);
-	llswap(rhs.mBinormals, mBinormals);
+	llswap(rhs.mTangents, mTangents);
 	llswap(rhs.mTexCoords, mTexCoords);
 	llswap(rhs.mIndices,mIndices);
 	llswap(rhs.mNumVertices, mNumVertices);
@@ -6116,22 +6126,11 @@ BOOL LLVolumeFace::createUnCutCubeCap(LLVolume* volume, BOOL partial_build)
 			corners[2].mTexCoord=swap;
 		}
 
-		LLVector4a binormal;
-		
-		calc_binormal_from_triangle( binormal,
-			corners[0].getPosition(), corners[0].mTexCoord,
-			corners[1].getPosition(), corners[1].mTexCoord,
-			corners[2].getPosition(), corners[2].mTexCoord);
-		
-		binormal.normalize3fast();
-
 		S32 size = (grid_size+1)*(grid_size+1);
 		resizeVertices(size);
-		allocateBinormals(size);
-
+		
 		LLVector4a* pos = (LLVector4a*) mPositions;
 		LLVector4a* norm = (LLVector4a*) mNormals;
-		LLVector4a* binorm = (LLVector4a*) mBinormals;
 		LLVector2* tc = (LLVector2*) mTexCoords;
 
 		for(int gx = 0;gx<grid_size+1;gx++)
@@ -6150,8 +6149,7 @@ BOOL LLVolumeFace::createUnCutCubeCap(LLVolume* volume, BOOL partial_build)
 				*pos++ = newVert.getPosition();
 				*norm++ = baseVert.getNormal();
 				*tc++ = newVert.mTexCoord;
-				*binorm++ = binormal;
-
+				
 				if (gx == 0 && gy == 0)
 				{
 					min = newVert.getPosition();
@@ -6227,8 +6225,7 @@ BOOL LLVolumeFace::createCap(LLVolume* volume, BOOL partial_build)
 	if (!(mTypeMask & HOLLOW_MASK) && !(mTypeMask & OPEN_MASK))
 	{
 		resizeVertices(num_vertices+1);
-		allocateBinormals(num_vertices+1);	
-
+		
 		if (!partial_build)
 		{
 			resizeIndices(num_indices+3);
@@ -6237,8 +6234,7 @@ BOOL LLVolumeFace::createCap(LLVolume* volume, BOOL partial_build)
 	else
 	{
 		resizeVertices(num_vertices);
-		allocateBinormals(num_vertices);
-
+		
 		if (!partial_build)
 		{
 			resizeIndices(num_indices);
@@ -6272,8 +6268,7 @@ BOOL LLVolumeFace::createCap(LLVolume* volume, BOOL partial_build)
 	LLVector2* tc = (LLVector2*) mTexCoords;
 	LLVector4a* pos = (LLVector4a*) mPositions;
 	LLVector4a* norm = (LLVector4a*) mNormals;
-	LLVector4a* binorm = (LLVector4a*) mBinormals;
-
+	
 	// Copy the vertices into the array
 	for (S32 i = 0; i < num_vertices; i++)
 	{
@@ -6309,13 +6304,6 @@ BOOL LLVolumeFace::createCap(LLVolume* volume, BOOL partial_build)
 
 	cuv = (min_uv + max_uv)*0.5f;
 
-	LLVector4a binormal;
-	calc_binormal_from_triangle(binormal,
-		*mCenter, cuv,
-		pos[0], tc[0],
-		pos[1], tc[1]);
-	binormal.normalize3fast();
-
 	LLVector4a normal;
 	LLVector4a d0, d1;
 	
@@ -6347,7 +6335,6 @@ BOOL LLVolumeFace::createCap(LLVolume* volume, BOOL partial_build)
 		
 	for (S32 i = 0; i < num_vertices; i++)
 	{
-		binorm[i].load4a(binormal.getF32ptr());
 		norm[i].load4a(normal.getF32ptr());
 	}
 
@@ -6589,59 +6576,65 @@ BOOL LLVolumeFace::createCap(LLVolume* volume, BOOL partial_build)
 	return TRUE;
 }
 
-void LLVolumeFace::createBinormals()
+void CalculateTangentArray(U32 vertexCount, const LLVector4a *vertex, const LLVector4a *normal,
+        const LLVector2 *texcoord, U32 triangleCount, const U16* index_array, LLVector4a *tangent);
+
+void LLVolumeFace::createTangents()
 {
-	if (!mBinormals)
+	if (!mTangents)
 	{
-		allocateBinormals(mNumVertices);
+		allocateTangents(mNumVertices);
 
-		//generate binormals
-		LLVector4a* pos = mPositions;
-		LLVector2* tc = (LLVector2*) mTexCoords;
-		LLVector4a* binorm = (LLVector4a*) mBinormals;
+		//generate tangents
+		//LLVector4a* pos = mPositions;
+		//LLVector2* tc = (LLVector2*) mTexCoords;
+		LLVector4a* binorm = (LLVector4a*) mTangents;
 
-		LLVector4a* end = mBinormals+mNumVertices;
+		LLVector4a* end = mTangents+mNumVertices;
 		while (binorm < end)
 		{
 			(*binorm++).clear();
 		}
 
-		binorm = mBinormals;
+		binorm = mTangents;
+
+		CalculateTangentArray(mNumVertices, mPositions, mNormals, mTexCoords, mNumIndices/3, mIndices, mTangents);
 
-		for (U32 i = 0; i < mNumIndices/3; i++) 
+		/*for (U32 i = 0; i < mNumIndices/3; i++) 
 		{	//for each triangle
 			const U16& i0 = mIndices[i*3+0];
 			const U16& i1 = mIndices[i*3+1];
 			const U16& i2 = mIndices[i*3+2];
 						
-			//calculate binormal
-			LLVector4a binormal;
-			calc_binormal_from_triangle(binormal,
+			//calculate tangent
+			LLVector4a tangent;
+			calc_tangent_from_triangle(tangent,
 										pos[i0], tc[i0],
 										pos[i1], tc[i1],
 										pos[i2], tc[i2]);
 
 
 			//add triangle normal to vertices
-			binorm[i0].add(binormal);
-			binorm[i1].add(binormal);
-			binorm[i2].add(binormal);
+			binorm[i0].add(tangent);
+			binorm[i1].add(tangent);
+			binorm[i2].add(tangent);
 
 			//even out quad contributions
 			if (i % 2 == 0) 
 			{
-				binorm[i2].add(binormal);
+				binorm[i2].add(tangent);
 			}
 			else 
 			{
-				binorm[i1].add(binormal);
+				binorm[i1].add(tangent);
 			}
-		}
+		}*/
+
 
-		//normalize binormals
+		//normalize tangents
 		for (U32 i = 0; i < mNumVertices; i++) 
 		{
-			binorm[i].normalize3fast();
+			//binorm[i].normalize3fast();
 			//bump map/planar projection code requires normals to be normalized
 			mNormals[i].normalize3fast();
 		}
@@ -6652,10 +6645,10 @@ void LLVolumeFace::resizeVertices(S32 num_verts)
 {
 	ll_aligned_free_16(mPositions);
 	ll_aligned_free_16(mNormals);
-	ll_aligned_free_16(mBinormals);
+	ll_aligned_free_16(mTangents);
 	ll_aligned_free_16(mTexCoords);
 
-	mBinormals = NULL;
+	mTangents = NULL;
 
 	if (num_verts)
 	{
@@ -6705,9 +6698,9 @@ void LLVolumeFace::pushVertex(const LLVector4a& pos, const LLVector4a& norm, con
 	ll_assert_aligned(mTexCoords,16);
 	
 
-	//just clear binormals
-	ll_aligned_free_16(mBinormals);
-	mBinormals = NULL;
+	//just clear tangents
+	ll_aligned_free_16(mTangents);
+	mTangents = NULL;
 
 	mPositions[mNumVertices] = pos;
 	mNormals[mNumVertices] = norm;
@@ -6716,10 +6709,10 @@ void LLVolumeFace::pushVertex(const LLVector4a& pos, const LLVector4a& norm, con
 	mNumVertices++;	
 }
 
-void LLVolumeFace::allocateBinormals(S32 num_verts)
+void LLVolumeFace::allocateTangents(S32 num_verts)
 {
-	ll_aligned_free_16(mBinormals);
-	mBinormals = (LLVector4a*) ll_aligned_malloc_16(sizeof(LLVector4a)*num_verts);
+	ll_aligned_free_16(mTangents);
+	mTangents = (LLVector4a*) ll_aligned_malloc_16(sizeof(LLVector4a)*num_verts);
 }
 
 void LLVolumeFace::allocateWeights(S32 num_verts)
@@ -7231,53 +7224,87 @@ BOOL LLVolumeFace::createSide(LLVolume* volume, BOOL partial_build)
 	return TRUE;
 }
 
-// Finds binormal based on three vertices with texture coordinates.
-// Fills in dummy values if the triangle has degenerate texture coordinates.
-void calc_binormal_from_triangle(LLVector4a& binormal,
-
-	const LLVector4a& pos0,
-	const LLVector2& tex0,
-	const LLVector4a& pos1,
-	const LLVector2& tex1,
-	const LLVector4a& pos2,
-	const LLVector2& tex2)
-{
-	LLVector4a rx0( pos0[VX], tex0.mV[VX], tex0.mV[VY] );
-	LLVector4a rx1( pos1[VX], tex1.mV[VX], tex1.mV[VY] );
-	LLVector4a rx2( pos2[VX], tex2.mV[VX], tex2.mV[VY] );
-	
-	LLVector4a ry0( pos0[VY], tex0.mV[VX], tex0.mV[VY] );
-	LLVector4a ry1( pos1[VY], tex1.mV[VX], tex1.mV[VY] );
-	LLVector4a ry2( pos2[VY], tex2.mV[VX], tex2.mV[VY] );
-
-	LLVector4a rz0( pos0[VZ], tex0.mV[VX], tex0.mV[VY] );
-	LLVector4a rz1( pos1[VZ], tex1.mV[VX], tex1.mV[VY] );
-	LLVector4a rz2( pos2[VZ], tex2.mV[VX], tex2.mV[VY] );
-	
-	LLVector4a lhs, rhs;
-
-	LLVector4a r0; 
-	lhs.setSub(rx0, rx1); rhs.setSub(rx0, rx2);
-	r0.setCross3(lhs, rhs);
+//adapted from Lengyel, Eric. “Computing Tangent Space Basis Vectors for an Arbitrary Mesh”. Terathon Software 3D Graphics Library, 2001. http://www.terathon.com/code/tangent.html
+void CalculateTangentArray(U32 vertexCount, const LLVector4a *vertex, const LLVector4a *normal,
+        const LLVector2 *texcoord, U32 triangleCount, const U16* index_array, LLVector4a *tangent)
+{
+    //LLVector4a *tan1 = new LLVector4a[vertexCount * 2];
+	LLVector4a* tan1 = (LLVector4a*) ll_aligned_malloc_16(vertexCount*2*sizeof(LLVector4a));
+
+    LLVector4a* tan2 = tan1 + vertexCount;
+
+	memset(tan1, 0, vertexCount*2*sizeof(LLVector4a));
+        
+    for (U32 a = 0; a < triangleCount; a++)
+    {
+        U32 i1 = *index_array++;
+        U32 i2 = *index_array++;
+        U32 i3 = *index_array++;
+        
+        const LLVector4a& v1 = vertex[i1];
+        const LLVector4a& v2 = vertex[i2];
+        const LLVector4a& v3 = vertex[i3];
+        
+        const LLVector2& w1 = texcoord[i1];
+        const LLVector2& w2 = texcoord[i2];
+        const LLVector2& w3 = texcoord[i3];
+        
+		const F32* v1ptr = v1.getF32ptr();
+		const F32* v2ptr = v2.getF32ptr();
+		const F32* v3ptr = v3.getF32ptr();
 		
-	LLVector4a r1;
-	lhs.setSub(ry0, ry1); rhs.setSub(ry0, ry2);
-	r1.setCross3(lhs, rhs);
-
-	LLVector4a r2;
-	lhs.setSub(rz0, rz1); rhs.setSub(rz0, rz2);
-	r2.setCross3(lhs, rhs);
+        float x1 = v2ptr[0] - v1ptr[0];
+        float x2 = v3ptr[0] - v1ptr[0];
+        float y1 = v2ptr[1] - v1ptr[1];
+        float y2 = v3ptr[1] - v1ptr[1];
+        float z1 = v2ptr[2] - v1ptr[2];
+        float z2 = v3ptr[2] - v1ptr[2];
+        
+        float s1 = w2.mV[0] - w1.mV[0];
+        float s2 = w3.mV[0] - w1.mV[0];
+        float t1 = w2.mV[1] - w1.mV[1];
+        float t2 = w3.mV[1] - w1.mV[1];
+        
+        float r = 1.0F / (s1 * t2 - s2 * t1);
+        LLVector4a sdir((t2 * x1 - t1 * x2) * r, (t2 * y1 - t1 * y2) * r,
+                (t2 * z1 - t1 * z2) * r);
+        LLVector4a tdir((s1 * x2 - s2 * x1) * r, (s1 * y2 - s2 * y1) * r,
+                (s1 * z2 - s2 * z1) * r);
+        
+        tan1[i1].add(sdir);
+        tan1[i2].add(sdir);
+        tan1[i3].add(sdir);
+        
+        tan2[i1].add(tdir);
+        tan2[i2].add(tdir);
+        tan2[i3].add(tdir);
+    }
+    
+    for (U32 a = 0; a < vertexCount; a++)
+    {
+        LLVector4a n = normal[a];
+        const LLVector4a& t = tan1[a];
+        
+		LLVector4a ncrosst;
+		ncrosst.setCross3(n,t);
+
+        // Gram-Schmidt orthogonalize
+        n.mul(n.dot3(t).getF32());
+
+		LLVector4a tsubn;
+		tsubn.setSub(t,n);
+
+		tsubn.normalize3fast();
+		
+        // Calculate handedness
+		F32 handedness = ncrosst.dot3(tan2[a]).getF32() < 0.f ? -1.f : 1.f;
+		
+		tsubn.getF32ptr()[3] = handedness;
 
-	if( r0[VX] && r1[VX] && r2[VX] )
-	{
-		binormal.set(
-				-r0[VZ] / r0[VX],
-				-r1[VZ] / r1[VX],
-				-r2[VZ] / r2[VX]);
-		// binormal.normVec();
-	}
-	else
-	{
-		binormal.set( 0, 1 , 0 );
-	}
+        tangent[a] = tsubn;
+    }
+    
+	ll_aligned_free_16(tan1);
 }
+
+
diff --git a/indra/llmath/llvolume.h b/indra/llmath/llvolume.h
index c845556557..164b8d6652 100755
--- a/indra/llmath/llvolume.h
+++ b/indra/llmath/llvolume.h
@@ -844,12 +844,12 @@ private:
 public:
 
 	BOOL create(LLVolume* volume, BOOL partial_build = FALSE);
-	void createBinormals();
+	void createTangents();
 	
 	void appendFace(const LLVolumeFace& face, LLMatrix4& transform, LLMatrix4& normal_tranform);
 
 	void resizeVertices(S32 num_verts);
-	void allocateBinormals(S32 num_verts);
+	void allocateTangents(S32 num_verts);
 	void allocateWeights(S32 num_verts);
 	void resizeIndices(S32 num_indices);
 	void fillFromLegacyData(std::vector<LLVolumeFace::VertexData>& v, std::vector<U16>& idx);
@@ -916,7 +916,7 @@ public:
 
 	LLVector4a* mPositions;
 	LLVector4a* mNormals;
-	LLVector4a* mBinormals;
+	LLVector4a* mTangents;
 	LLVector2*  mTexCoords;
 	U16* mIndices;
 
@@ -980,7 +980,7 @@ public:
 	void setDirty() { mPathp->setDirty(); mProfilep->setDirty(); }
 
 	void regen();
-	void genBinormals(S32 face);
+	void genTangents(S32 face);
 
 	BOOL isConvex() const;
 	BOOL isCap(S32 face);
@@ -1008,21 +1008,14 @@ public:
 	//get the face index of the face that intersects with the given line segment at the point 
 	//closest to start.  Moves end to the point of intersection.  Returns -1 if no intersection.
 	//Line segment must be in volume space.
-	S32 lineSegmentIntersect(const LLVector3& start, const LLVector3& end,
+	S32 lineSegmentIntersect(const LLVector4a& start, const LLVector4a& end,
 							 S32 face = -1,                          // which face to check, -1 = ALL_SIDES
-							 LLVector3* intersection = NULL,         // return the intersection point
+							 LLVector4a* intersection = NULL,         // return the intersection point
 							 LLVector2* tex_coord = NULL,            // return the texture coordinates of the intersection point
-							 LLVector3* normal = NULL,               // return the surface normal at the intersection point
-							 LLVector3* bi_normal = NULL             // return the surface bi-normal at the intersection point
+							 LLVector4a* normal = NULL,               // return the surface normal at the intersection point
+							 LLVector4a* tangent = NULL             // return the surface tangent at the intersection point
 		);
 
-	S32 lineSegmentIntersect(const LLVector4a& start, const LLVector4a& end, 
-								   S32 face = 1,
-								   LLVector3* intersection = NULL,
-								   LLVector2* tex_coord = NULL,
-								   LLVector3* normal = NULL,
-								   LLVector3* bi_normal = NULL);
-	
 	LLFaceID generateFaceMask();
 
 	BOOL isFaceMaskValid(LLFaceID face_mask);
@@ -1081,21 +1074,12 @@ public:
 
 std::ostream& operator<<(std::ostream &s, const LLVolumeParams &volume_params);
 
-void calc_binormal_from_triangle(
-		LLVector4a& binormal,
-		const LLVector4a& pos0,
-		const LLVector2& tex0,
-		const LLVector4a& pos1,
-		const LLVector2& tex1,
-		const LLVector4a& pos2,
-		const LLVector2& tex2);
-
 BOOL LLLineSegmentBoxIntersect(const F32* start, const F32* end, const F32* center, const F32* size);
 BOOL LLLineSegmentBoxIntersect(const LLVector3& start, const LLVector3& end, const LLVector3& center, const LLVector3& size);
 BOOL LLLineSegmentBoxIntersect(const LLVector4a& start, const LLVector4a& end, const LLVector4a& center, const LLVector4a& size);
 
-BOOL LLTriangleRayIntersect(const LLVector3& vert0, const LLVector3& vert1, const LLVector3& vert2, const LLVector3& orig, const LLVector3& dir,
-							F32& intersection_a, F32& intersection_b, F32& intersection_t, BOOL two_sided);
+//BOOL LLTriangleRayIntersect(const LLVector3& vert0, const LLVector3& vert1, const LLVector3& vert2, const LLVector3& orig, const LLVector3& dir,
+//							F32& intersection_a, F32& intersection_b, F32& intersection_t, BOOL two_sided);
 
 BOOL LLTriangleRayIntersect(const LLVector4a& vert0, const LLVector4a& vert1, const LLVector4a& vert2, const LLVector4a& orig, const LLVector4a& dir,
 							F32& intersection_a, F32& intersection_b, F32& intersection_t);
diff --git a/indra/llmath/llvolumeoctree.cpp b/indra/llmath/llvolumeoctree.cpp
index cc83cb7235..0728b49c1f 100755
--- a/indra/llmath/llvolumeoctree.cpp
+++ b/indra/llmath/llvolumeoctree.cpp
@@ -94,14 +94,14 @@ void LLVolumeOctreeListener::handleChildAddition(const LLOctreeNode<LLVolumeTria
 
 LLOctreeTriangleRayIntersect::LLOctreeTriangleRayIntersect(const LLVector4a& start, const LLVector4a& dir, 
 							   const LLVolumeFace* face, F32* closest_t,
-							   LLVector3* intersection,LLVector2* tex_coord, LLVector3* normal, LLVector3* bi_normal)
+							   LLVector4a* intersection,LLVector2* tex_coord, LLVector4a* normal, LLVector4a* tangent)
    : mFace(face),
      mStart(start),
 	 mDir(dir),
 	 mIntersection(intersection),
 	 mTexCoord(tex_coord),
 	 mNormal(normal),
-	 mBinormal(bi_normal),
+	 mTangent(tangent),
 	 mClosestT(closest_t),
 	 mHitFace(false)
 {
@@ -112,13 +112,7 @@ void LLOctreeTriangleRayIntersect::traverse(const LLOctreeNode<LLVolumeTriangle>
 {
 	LLVolumeOctreeListener* vl = (LLVolumeOctreeListener*) node->getListener(0);
 
-	/*const F32* start = mStart.getF32();
-	const F32* end = mEnd.getF32();
-	const F32* center = vl->mBounds[0].getF32();
-	const F32* size = vl->mBounds[1].getF32();*/
-
-	//if (LLLineSegmentBoxIntersect(mStart, mEnd, vl->mBounds[0], vl->mBounds[1]))
-	if (LLLineSegmentBoxIntersect(mStart.getF32ptr(), mEnd.getF32ptr(), vl->mBounds[0].getF32ptr(), vl->mBounds[1].getF32ptr()))
+	if (LLLineSegmentBoxIntersect(mStart, mEnd, vl->mBounds[0], vl->mBounds[1]))
 	{
 		node->accept(this);
 		for (S32 i = 0; i < node->getChildCount(); ++i)
@@ -152,34 +146,60 @@ void LLOctreeTriangleRayIntersect::visit(const LLOctreeNode<LLVolumeTriangle>* n
 					LLVector4a intersect = mDir;
 					intersect.mul(*mClosestT);
 					intersect.add(mStart);
-					mIntersection->set(intersect.getF32ptr());
+					*mIntersection = intersect;
 				}
 
+				U32 idx0 = tri->mIndex[0];
+				U32 idx1 = tri->mIndex[1];
+				U32 idx2 = tri->mIndex[2];
 
 				if (mTexCoord != NULL)
 				{
 					LLVector2* tc = (LLVector2*) mFace->mTexCoords;
-					*mTexCoord = ((1.f - a - b)  * tc[tri->mIndex[0]] +
-						a              * tc[tri->mIndex[1]] +
-						b              * tc[tri->mIndex[2]]);
+					*mTexCoord = ((1.f - a - b)  * tc[idx0] +
+						a              * tc[idx1] +
+						b              * tc[idx2]);
 
 				}
 
 				if (mNormal != NULL)
 				{
-					LLVector4* norm = (LLVector4*) mFace->mNormals;
-
-					*mNormal    = ((1.f - a - b)  * LLVector3(norm[tri->mIndex[0]]) + 
-						a              * LLVector3(norm[tri->mIndex[1]]) +
-						b              * LLVector3(norm[tri->mIndex[2]]));
+					LLVector4a* norm = mFace->mNormals;
+								
+					LLVector4a n1,n2,n3;
+					n1 = norm[idx0];
+					n1.mul(1.f-a-b);
+								
+					n2 = norm[idx1];
+					n2.mul(a);
+								
+					n3 = norm[idx2];
+					n3.mul(b);
+
+					n1.add(n2);
+					n1.add(n3);
+								
+					*mNormal		= n1; 
 				}
 
-				if (mBinormal != NULL)
+				if (mTangent != NULL)
 				{
-					LLVector4* binormal = (LLVector4*) mFace->mBinormals;
-					*mBinormal = ((1.f - a - b)  * LLVector3(binormal[tri->mIndex[0]]) + 
-							a              * LLVector3(binormal[tri->mIndex[1]]) +
-							b              * LLVector3(binormal[tri->mIndex[2]]));
+					LLVector4a* tangents = mFace->mTangents;
+								
+					LLVector4a t1,t2,t3;
+					t1 = tangents[idx0];
+					t1.mul(1.f-a-b);
+								
+					t2 = tangents[idx1];
+					t2.mul(a);
+								
+					t3 = tangents[idx2];
+					t3.mul(b);
+
+					t1.add(t2);
+					t1.add(t3);
+								
+					*mTangent = t1; 
 				}
 			}
 		}
diff --git a/indra/llmath/llvolumeoctree.h b/indra/llmath/llvolumeoctree.h
index 9ae34a0c4e..80d6ced36d 100755
--- a/indra/llmath/llvolumeoctree.h
+++ b/indra/llmath/llvolumeoctree.h
@@ -137,16 +137,16 @@ public:
 	LLVector4a mStart;
 	LLVector4a mDir;
 	LLVector4a mEnd;
-	LLVector3* mIntersection;
+	LLVector4a* mIntersection;
 	LLVector2* mTexCoord;
-	LLVector3* mNormal;
-	LLVector3* mBinormal;
+	LLVector4a* mNormal;
+	LLVector4a* mTangent;
 	F32* mClosestT;
 	bool mHitFace;
 
 	LLOctreeTriangleRayIntersect(const LLVector4a& start, const LLVector4a& dir, 
 								   const LLVolumeFace* face, F32* closest_t,
-								   LLVector3* intersection,LLVector2* tex_coord, LLVector3* normal, LLVector3* bi_normal);
+								   LLVector4a* intersection,LLVector2* tex_coord, LLVector4a* normal, LLVector4a* tangent);
 
 	void traverse(const LLOctreeNode<LLVolumeTriangle>* node);
 
-- 
cgit v1.3


From ddf15867e4dd1e3506f0cfd975d9e7aa8f7aab66 Mon Sep 17 00:00:00 2001
From: Dave Parks <davep@lindenlab.com>
Date: Fri, 7 Jun 2013 16:18:37 -0500
Subject: NORSPEC-242 Fix for infinity and NaN showing up in tangents and
 texture coordinates sometimes.

---
 indra/llmath/llvolume.cpp | 135 ++++++++++++++++++++--------------------------
 1 file changed, 58 insertions(+), 77 deletions(-)

(limited to 'indra/llmath')

diff --git a/indra/llmath/llvolume.cpp b/indra/llmath/llvolume.cpp
index bf03c971cd..c3528349eb 100755
--- a/indra/llmath/llvolume.cpp
+++ b/indra/llmath/llvolume.cpp
@@ -1392,7 +1392,7 @@ void LLPath::genNGon(const LLPathParams& params, S32 sides, F32 startOff, F32 en
 	pt->mScale.mV[VX] = hole_x * lerp(taper_x_begin, taper_x_end, t);
 	pt->mScale.mV[VY] = hole_y * lerp(taper_y_begin, taper_y_end, t);
 	pt->mTexT  = t;
-	
+
 	// Twist rotates the path along the x,y plane (I think) - DJS 04/05/02
 	twist.setQuat  (lerp(twist_begin,twist_end,t) * 2.f * F_PI - F_PI,0,0,1);
 	// Rotate the point around the circle's center.
@@ -1446,7 +1446,7 @@ void LLPath::genNGon(const LLPathParams& params, S32 sides, F32 startOff, F32 en
 	pt->mScale.mV[VX] = hole_x * lerp(taper_x_begin, taper_x_end, t);
 	pt->mScale.mV[VY] = hole_y * lerp(taper_y_begin, taper_y_end, t);
 	pt->mTexT  = t;
-	
+
 	// Twist rotates the path along the x,y plane (I think) - DJS 04/05/02
 	twist.setQuat  (lerp(twist_begin,twist_end,t) * 2.f * F_PI - F_PI,0,0,1);
 	// Rotate the point around the circle's center.
@@ -1594,7 +1594,7 @@ BOOL LLPath::generate(const LLPathParams& params, F32 detail, S32 split,
 			S32 sides = (S32)llfloor(llfloor((MIN_DETAIL_FACES * detail + twist_mag * 3.5f * (detail-0.5f))) * params.getRevolutions());
 
 			if (is_sculpted)
-				sides = sculpt_size;
+				sides = llmax(sculpt_size, 1);
 			
 			genNGon(params, sides);
 		}
@@ -1644,6 +1644,7 @@ BOOL LLPath::generate(const LLPathParams& params, F32 detail, S32 split,
 			mPath[i].mScale.mV[0] = lerp(1,params.getScale().mV[0],t);
 			mPath[i].mScale.mV[1] = lerp(1,params.getScale().mV[1],t);
 			mPath[i].mTexT  = t;
+
 			mPath[i].mRot.setQuat(F_PI * params.getTwist() * t,1,0,0);
 		}
 
@@ -2442,6 +2443,7 @@ bool LLVolume::unpackVolumeFaces(std::istream& is, S32 size)
 			LLVector4a pos_range;
 			pos_range.setSub(max_pos, min_pos);
 			LLVector2 tc_range2 = max_tc - min_tc;
+
 			LLVector4a tc_range;
 			tc_range.set(tc_range2[0], tc_range2[1], tc_range2[0], tc_range2[1]);
 			LLVector4a min_tc4(min_tc[0], min_tc[1], min_tc[0], min_tc[1]);
@@ -6304,24 +6306,6 @@ BOOL LLVolumeFace::createCap(LLVolume* volume, BOOL partial_build)
 
 	cuv = (min_uv + max_uv)*0.5f;
 
-	LLVector4a normal;
-	LLVector4a d0, d1;
-	
-
-	d0.setSub(*mCenter, pos[0]);
-	d1.setSub(*mCenter, pos[1]);
-
-	if (mTypeMask & TOP_MASK)
-	{
-		normal.setCross3(d0, d1);
-	}
-	else
-	{
-		normal.setCross3(d1, d0);
-	}
-
-	normal.normalize3fast();
-
 	VertexData vd;
 	vd.setPosition(*mCenter);
 	vd.mTexCoord = cuv;
@@ -6330,14 +6314,10 @@ BOOL LLVolumeFace::createCap(LLVolume* volume, BOOL partial_build)
 	{
 		pos[num_vertices] = *mCenter;
 		tc[num_vertices] = cuv;
+
 		num_vertices++;
 	}
 		
-	for (S32 i = 0; i < num_vertices; i++)
-	{
-		norm[i].load4a(normal.getF32ptr());
-	}
-
 	if (partial_build)
 	{
 		return TRUE;
@@ -6572,7 +6552,22 @@ BOOL LLVolumeFace::createCap(LLVolume* volume, BOOL partial_build)
 
 
 	}
-		
+
+	LLVector4a d0,d1;
+
+	d0.setSub(mPositions[mIndices[1]], mPositions[mIndices[0]]);
+	d1.setSub(mPositions[mIndices[2]], mPositions[mIndices[0]]);
+
+	LLVector4a normal;
+	normal.setCross3(d0,d1);
+
+	normal.normalize3fast();
+
+	for (S32 i = 0; i < num_vertices; i++)
+	{
+		norm[i].load4a(normal.getF32ptr());
+	}
+
 	return TRUE;
 }
 
@@ -6600,37 +6595,6 @@ void LLVolumeFace::createTangents()
 
 		CalculateTangentArray(mNumVertices, mPositions, mNormals, mTexCoords, mNumIndices/3, mIndices, mTangents);
 
-		/*for (U32 i = 0; i < mNumIndices/3; i++) 
-		{	//for each triangle
-			const U16& i0 = mIndices[i*3+0];
-			const U16& i1 = mIndices[i*3+1];
-			const U16& i2 = mIndices[i*3+2];
-						
-			//calculate tangent
-			LLVector4a tangent;
-			calc_tangent_from_triangle(tangent,
-										pos[i0], tc[i0],
-										pos[i1], tc[i1],
-										pos[i2], tc[i2]);
-
-
-			//add triangle normal to vertices
-			binorm[i0].add(tangent);
-			binorm[i1].add(tangent);
-			binorm[i2].add(tangent);
-
-			//even out quad contributions
-			if (i % 2 == 0) 
-			{
-				binorm[i2].add(tangent);
-			}
-			else 
-			{
-				binorm[i1].add(tangent);
-			}
-		}*/
-
-
 		//normalize tangents
 		for (U32 i = 0; i < mNumVertices; i++) 
 		{
@@ -6949,7 +6913,6 @@ BOOL LLVolumeFace::createSide(LLVolume* volume, BOOL partial_build)
 
 			if ((mTypeMask & INNER_MASK) && (mTypeMask & FLAT_MASK) && mNumS > 2 && s > 0)
 			{
-
 				pos[cur_vertex].load3(mesh[i].mPos.mV);
 				tc[cur_vertex] = LLVector2(ss,tt);
 			
@@ -6980,7 +6943,6 @@ BOOL LLVolumeFace::createSide(LLVolume* volume, BOOL partial_build)
 		}
 	}
 	
-
 	//get bounding box for this side
 	LLVector4a& face_min = mExtents[0];
 	LLVector4a& face_max = mExtents[1];
@@ -7265,26 +7227,36 @@ void CalculateTangentArray(U32 vertexCount, const LLVector4a *vertex, const LLVe
         float t1 = w2.mV[1] - w1.mV[1];
         float t2 = w3.mV[1] - w1.mV[1];
         
-        float r = 1.0F / (s1 * t2 - s2 * t1);
-        LLVector4a sdir((t2 * x1 - t1 * x2) * r, (t2 * y1 - t1 * y2) * r,
-                (t2 * z1 - t1 * z2) * r);
-        LLVector4a tdir((s1 * x2 - s2 * x1) * r, (s1 * y2 - s2 * y1) * r,
-                (s1 * z2 - s2 * z1) * r);
+		F32 rd = s1*t2-s2*t1;
+
+		float r = rd*rd > 0.f ? 1.0F / rd : 1024.f; //some made up large ratio for division by zero
+
+		llassert(llfinite(r));
+		llassert(!llisnan(r));
+
+		LLVector4a sdir((t2 * x1 - t1 * x2) * r, (t2 * y1 - t1 * y2) * r,
+				(t2 * z1 - t1 * z2) * r);
+		LLVector4a tdir((s1 * x2 - s2 * x1) * r, (s1 * y2 - s2 * y1) * r,
+				(s1 * z2 - s2 * z1) * r);
         
-        tan1[i1].add(sdir);
-        tan1[i2].add(sdir);
-        tan1[i3].add(sdir);
+		tan1[i1].add(sdir);
+		tan1[i2].add(sdir);
+		tan1[i3].add(sdir);
         
-        tan2[i1].add(tdir);
-        tan2[i2].add(tdir);
-        tan2[i3].add(tdir);
+		tan2[i1].add(tdir);
+		tan2[i2].add(tdir);
+		tan2[i3].add(tdir);
     }
     
     for (U32 a = 0; a < vertexCount; a++)
     {
         LLVector4a n = normal[a];
-        const LLVector4a& t = tan1[a];
+
+		const LLVector4a& t = tan1[a];
         
+		llassert(tan1[a].getLength3().getF32() >= 0.f);
+		llassert(tan2[a].getLength3().getF32() >= 0.f);
+
 		LLVector4a ncrosst;
 		ncrosst.setCross3(n,t);
 
@@ -7294,14 +7266,23 @@ void CalculateTangentArray(U32 vertexCount, const LLVector4a *vertex, const LLVe
 		LLVector4a tsubn;
 		tsubn.setSub(t,n);
 
-		tsubn.normalize3fast();
+		if (tsubn.dot3(tsubn).getF32() > F_APPROXIMATELY_ZERO)
+		{
+			tsubn.normalize3fast();
 		
-        // Calculate handedness
-		F32 handedness = ncrosst.dot3(tan2[a]).getF32() < 0.f ? -1.f : 1.f;
+			// Calculate handedness
+			F32 handedness = ncrosst.dot3(tan2[a]).getF32() < 0.f ? -1.f : 1.f;
 		
-		tsubn.getF32ptr()[3] = handedness;
+			tsubn.getF32ptr()[3] = handedness;
+
+			tangent[a] = tsubn;
 
-        tangent[a] = tsubn;
+			llassert(tangent[a].getLength3().getF32() > 0.f);
+		}
+		else
+		{ //degenerate, make up a value
+			tangent[a].set(0,0,1,1);
+		}
     }
     
 	ll_aligned_free_16(tan1);
-- 
cgit v1.3


From d09f1e71176f32a23be73b53559979727c242b4a Mon Sep 17 00:00:00 2001
From: Dave Parks <davep@lindenlab.com>
Date: Mon, 10 Jun 2013 16:01:53 -0500
Subject: NORSPEC-242 Followup, fix a couple more sources of NaN and infinity.

---
 indra/llmath/llvolume.cpp | 33 +++++++++++++++++++++++++++++++--
 1 file changed, 31 insertions(+), 2 deletions(-)

(limited to 'indra/llmath')

diff --git a/indra/llmath/llvolume.cpp b/indra/llmath/llvolume.cpp
index c3528349eb..f9dd843b92 100755
--- a/indra/llmath/llvolume.cpp
+++ b/indra/llmath/llvolume.cpp
@@ -6561,8 +6561,23 @@ BOOL LLVolumeFace::createCap(LLVolume* volume, BOOL partial_build)
 	LLVector4a normal;
 	normal.setCross3(d0,d1);
 
-	normal.normalize3fast();
+	if (normal.dot3(normal).getF32() > F_APPROXIMATELY_ZERO)
+	{
+		normal.normalize3fast();
+	}
+	else
+	{ //degenerate, make up a value
+		normal.set(0,0,1);
+	}
+
+	llassert(llfinite(normal.getF32ptr()[0]));
+	llassert(llfinite(normal.getF32ptr()[1]));
+	llassert(llfinite(normal.getF32ptr()[2]));
 
+	llassert(!llisnan(normal.getF32ptr()[0]));
+	llassert(!llisnan(normal.getF32ptr()[1]));
+	llassert(!llisnan(normal.getF32ptr()[2]));
+	
 	for (S32 i = 0; i < num_vertices; i++)
 	{
 		norm[i].load4a(normal.getF32ptr());
@@ -7048,6 +7063,14 @@ BOOL LLVolumeFace::createSide(LLVolume* volume, BOOL partial_build)
 		n[1]->add(c);
 		n[2]->add(c);
 		
+		llassert(llfinite(c.getF32ptr()[0]));
+		llassert(llfinite(c.getF32ptr()[1]));
+		llassert(llfinite(c.getF32ptr()[2]));
+
+		llassert(!llisnan(c.getF32ptr()[0]));
+		llassert(!llisnan(c.getF32ptr()[1]));
+		llassert(!llisnan(c.getF32ptr()[2]));
+
 		//even out quad contributions
 		n[i%2+1]->add(c);
 	}
@@ -7277,7 +7300,13 @@ void CalculateTangentArray(U32 vertexCount, const LLVector4a *vertex, const LLVe
 
 			tangent[a] = tsubn;
 
-			llassert(tangent[a].getLength3().getF32() > 0.f);
+			llassert(llfinite(tangent[a].getF32ptr()[0]));
+			llassert(llfinite(tangent[a].getF32ptr()[1]));
+			llassert(llfinite(tangent[a].getF32ptr()[2]));
+
+			llassert(!llisnan(tangent[a].getF32ptr()[0]));
+			llassert(!llisnan(tangent[a].getF32ptr()[1]));
+			llassert(!llisnan(tangent[a].getF32ptr()[2]));
 		}
 		else
 		{ //degenerate, make up a value
-- 
cgit v1.3


From 01f595d8716f7b80ab0088d93434fa77cc812c16 Mon Sep 17 00:00:00 2001
From: Graham Madarasz <graham@lindenlab.com>
Date: Tue, 11 Jun 2013 12:02:54 -0700
Subject: Speculative fix for tangent calc asserts on Mac

---
 indra/llmath/llvolume.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'indra/llmath')

diff --git a/indra/llmath/llvolume.cpp b/indra/llmath/llvolume.cpp
index f9dd843b92..1932272afb 100755
--- a/indra/llmath/llvolume.cpp
+++ b/indra/llmath/llvolume.cpp
@@ -7252,7 +7252,7 @@ void CalculateTangentArray(U32 vertexCount, const LLVector4a *vertex, const LLVe
         
 		F32 rd = s1*t2-s2*t1;
 
-		float r = rd*rd > 0.f ? 1.0F / rd : 1024.f; //some made up large ratio for division by zero
+		float r = ((rd*rd) > FLT_EPSILON) ? 1.0F / rd : 1024.f; //some made up large ratio for division by zero
 
 		llassert(llfinite(r));
 		llassert(!llisnan(r));
@@ -7276,7 +7276,7 @@ void CalculateTangentArray(U32 vertexCount, const LLVector4a *vertex, const LLVe
         LLVector4a n = normal[a];
 
 		const LLVector4a& t = tan1[a];
-        
+
 		llassert(tan1[a].getLength3().getF32() >= 0.f);
 		llassert(tan2[a].getLength3().getF32() >= 0.f);
 
-- 
cgit v1.3


From 48324a93833cee8aca7559588ee5f2b4afa250fa Mon Sep 17 00:00:00 2001
From: Graham Madarasz <graham@lindenlab.com>
Date: Wed, 12 Jun 2013 08:09:29 -0700
Subject: Fix issues with NaNs in tangent data from using normalize3fast on
 zero-length vectors and other data conditioning; also added assert to
 normalize3fast to make finding these problems easier in the future

---
 indra/llmath/llvector4a.inl |   9 ++++
 indra/llmath/llvolume.cpp   | 128 +++++++++++++++++++++++++++++++++-----------
 2 files changed, 106 insertions(+), 31 deletions(-)

(limited to 'indra/llmath')

diff --git a/indra/llmath/llvector4a.inl b/indra/llmath/llvector4a.inl
index 7c52ffef21..4589bac9fb 100755
--- a/indra/llmath/llvector4a.inl
+++ b/indra/llmath/llvector4a.inl
@@ -331,6 +331,9 @@ inline LLSimdScalar LLVector4a::dot4(const LLVector4a& b) const
 // Note that this does not consider zero length vectors!
 inline void LLVector4a::normalize3()
 {
+	// find out about bad math before it takes two man-days to track down
+	llassert(isFinite3() && !equals3(getZero()));
+
 	// lenSqrd = a dot a
 	LLVector4a lenSqrd; lenSqrd.setAllDot3( *this, *this );
 	// rsqrt = approximate reciprocal square (i.e., { ~1/len(a)^2, ~1/len(a)^2, ~1/len(a)^2, ~1/len(a)^2 }
@@ -379,6 +382,9 @@ inline void LLVector4a::normalize4()
 // Note that this does not consider zero length vectors!
 inline LLSimdScalar LLVector4a::normalize3withLength()
 {
+	// find out about bad math before it takes two man-days to track down
+	llassert(isFinite3() && !equals3(getZero()));
+
 	// lenSqrd = a dot a
 	LLVector4a lenSqrd; lenSqrd.setAllDot3( *this, *this );
 	// rsqrt = approximate reciprocal square (i.e., { ~1/len(a)^2, ~1/len(a)^2, ~1/len(a)^2, ~1/len(a)^2 }
@@ -404,6 +410,9 @@ inline LLSimdScalar LLVector4a::normalize3withLength()
 // Note that this does not consider zero length vectors!
 inline void LLVector4a::normalize3fast()
 {
+	// find out about bad math before it takes two man-days to track down
+	llassert(isFinite3() && !equals3(getZero()));
+
 	LLVector4a lenSqrd; lenSqrd.setAllDot3( *this, *this );
 	const LLQuad approxRsqrt = _mm_rsqrt_ps(lenSqrd.mQ);
 	mQ = _mm_mul_ps( mQ, approxRsqrt );
diff --git a/indra/llmath/llvolume.cpp b/indra/llmath/llvolume.cpp
index 1932272afb..bc2572375a 100755
--- a/indra/llmath/llvolume.cpp
+++ b/indra/llmath/llvolume.cpp
@@ -7209,46 +7209,53 @@ BOOL LLVolumeFace::createSide(LLVolume* volume, BOOL partial_build)
 	return TRUE;
 }
 
+#define TANGENTIAL_PARANOIA_ASSERTS 1
+
+#if TANGENTIAL_PARANOIA_ASSERTS
+	#define tangential_paranoia(a) llassert(a)
+#else
+	#define tangential_paranoia(a)
+#endif
+
 //adapted from Lengyel, Eric. “Computing Tangent Space Basis Vectors for an Arbitrary Mesh”. Terathon Software 3D Graphics Library, 2001. http://www.terathon.com/code/tangent.html
 void CalculateTangentArray(U32 vertexCount, const LLVector4a *vertex, const LLVector4a *normal,
         const LLVector2 *texcoord, U32 triangleCount, const U16* index_array, LLVector4a *tangent)
 {
-    //LLVector4a *tan1 = new LLVector4a[vertexCount * 2];
 	LLVector4a* tan1 = (LLVector4a*) ll_aligned_malloc_16(vertexCount*2*sizeof(LLVector4a));
 
-    LLVector4a* tan2 = tan1 + vertexCount;
+   LLVector4a* tan2 = tan1 + vertexCount;
 
 	memset(tan1, 0, vertexCount*2*sizeof(LLVector4a));
         
-    for (U32 a = 0; a < triangleCount; a++)
-    {
-        U32 i1 = *index_array++;
-        U32 i2 = *index_array++;
-        U32 i3 = *index_array++;
+   for (U32 a = 0; a < triangleCount; a++)
+   {
+      U32 i1 = *index_array++;
+      U32 i2 = *index_array++;
+      U32 i3 = *index_array++;
         
-        const LLVector4a& v1 = vertex[i1];
-        const LLVector4a& v2 = vertex[i2];
-        const LLVector4a& v3 = vertex[i3];
+      const LLVector4a& v1 = vertex[i1];
+      const LLVector4a& v2 = vertex[i2];
+      const LLVector4a& v3 = vertex[i3];
         
-        const LLVector2& w1 = texcoord[i1];
-        const LLVector2& w2 = texcoord[i2];
-        const LLVector2& w3 = texcoord[i3];
+      const LLVector2& w1 = texcoord[i1];
+      const LLVector2& w2 = texcoord[i2];
+      const LLVector2& w3 = texcoord[i3];
         
 		const F32* v1ptr = v1.getF32ptr();
 		const F32* v2ptr = v2.getF32ptr();
 		const F32* v3ptr = v3.getF32ptr();
 		
-        float x1 = v2ptr[0] - v1ptr[0];
-        float x2 = v3ptr[0] - v1ptr[0];
-        float y1 = v2ptr[1] - v1ptr[1];
-        float y2 = v3ptr[1] - v1ptr[1];
-        float z1 = v2ptr[2] - v1ptr[2];
-        float z2 = v3ptr[2] - v1ptr[2];
+      float x1 = v2ptr[0] - v1ptr[0];
+      float x2 = v3ptr[0] - v1ptr[0];
+      float y1 = v2ptr[1] - v1ptr[1];
+      float y2 = v3ptr[1] - v1ptr[1];
+      float z1 = v2ptr[2] - v1ptr[2];
+      float z2 = v3ptr[2] - v1ptr[2];
         
-        float s1 = w2.mV[0] - w1.mV[0];
-        float s2 = w3.mV[0] - w1.mV[0];
-        float t1 = w2.mV[1] - w1.mV[1];
-        float t2 = w3.mV[1] - w1.mV[1];
+      float s1 = w2.mV[0] - w1.mV[0];
+      float s2 = w3.mV[0] - w1.mV[0];
+      float t1 = w2.mV[1] - w1.mV[1];
+      float t2 = w3.mV[1] - w1.mV[1];
         
 		F32 rd = s1*t2-s2*t1;
 
@@ -7262,18 +7269,67 @@ void CalculateTangentArray(U32 vertexCount, const LLVector4a *vertex, const LLVe
 		LLVector4a tdir((s1 * x2 - s2 * x1) * r, (s1 * y2 - s2 * y1) * r,
 				(s1 * z2 - s2 * z1) * r);
         
+		
 		tan1[i1].add(sdir);
 		tan1[i2].add(sdir);
 		tan1[i3].add(sdir);
-        
+      
+		tangential_paranoia(tan1[i1].isFinite3());
+		tangential_paranoia(tan1[i2].isFinite3());
+		tangential_paranoia(tan1[i3].isFinite3());
+
 		tan2[i1].add(tdir);
 		tan2[i2].add(tdir);
 		tan2[i3].add(tdir);
-    }
-    
-    for (U32 a = 0; a < vertexCount; a++)
-    {
-        LLVector4a n = normal[a];
+
+		tangential_paranoia(tan2[i1].isFinite3());
+		tangential_paranoia(tan2[i2].isFinite3());
+		tangential_paranoia(tan2[i3].isFinite3());
+   }
+
+	// These appear to come out of the summing above distinctly non-unit-length
+	//
+	for (U32 a = 0; a < vertexCount; a++)
+	{
+		// Conditioning required by assets which don't necessarily reference every vert index
+		// (i.e. some of the tangents can end up uninitialized and therefore indeterminate/INF)
+		// and protection against zero length vectors which are not handled by normalize3fast.
+		//
+		if (!tan1[a].isFinite3() || tan1[a].equals3(LLVector4a::getZero()))
+		{
+			tan1[a].set(0,0,1,1);
+		}
+		else
+		{
+			tan1[a].normalize3fast();	
+		}
+
+		if (!tan2[a].isFinite3() || tan2[a].equals3(LLVector4a::getZero()))
+		{
+			tan2[a].set(0,0,1,1);
+		}
+		else
+		{
+			tan2[a].normalize3fast();
+		}		
+
+		const F32 cefgw = 0.03f;
+		tangential_paranoia(tan1[a].isFinite3());
+		tangential_paranoia(tan2[a].isFinite3());		
+		tangential_paranoia(tan1[a].isNormalized3(cefgw));
+		tangential_paranoia(tan2[a].isNormalized3(cefgw));	
+	}
+
+   for (U32 a = 0; a < vertexCount; a++)
+	{
+		LLVector4a n = normal[a];
+
+		if (!n.isFinite3() || n.equals3(LLVector4a::getZero()))
+		{
+			n.set(0,1,0,1);
+		}
+
+		n.normalize3fast();
 
 		const LLVector4a& t = tan1[a];
 
@@ -7283,12 +7339,20 @@ void CalculateTangentArray(U32 vertexCount, const LLVector4a *vertex, const LLVe
 		LLVector4a ncrosst;
 		ncrosst.setCross3(n,t);
 
-        // Gram-Schmidt orthogonalize
-        n.mul(n.dot3(t).getF32());
+		F32 n_dot_t = n.dot3(t).getF32();
+
+		tangential_paranoia(llfinite(n_dot_t) && !llisnan(n_dot_t));
+
+		// Gram-Schmidt orthogonalize
+      n.mul(n_dot_t);
+
+		tangential_paranoia(n.isFinite3());
 
 		LLVector4a tsubn;
 		tsubn.setSub(t,n);
 
+		tangential_paranoia(tsubn.isFinite3());
+
 		if (tsubn.dot3(tsubn).getF32() > F_APPROXIMATELY_ZERO)
 		{
 			tsubn.normalize3fast();
@@ -7300,6 +7364,8 @@ void CalculateTangentArray(U32 vertexCount, const LLVector4a *vertex, const LLVe
 
 			tangent[a] = tsubn;
 
+			tangential_paranoia(tangent[a].isNormalized3(0.1f));
+
 			llassert(llfinite(tangent[a].getF32ptr()[0]));
 			llassert(llfinite(tangent[a].getF32ptr()[1]));
 			llassert(llfinite(tangent[a].getF32ptr()[2]));
-- 
cgit v1.3


From d2b253f1f6072beead770519849ad3b18a1a4359 Mon Sep 17 00:00:00 2001
From: Graham Madarasz <graham@lindenlab.com>
Date: Wed, 12 Jun 2013 09:16:19 -0700
Subject: Changes to protect against use of normalize3fast on degenerate
 vectors

---
 indra/llappearance/llpolymorph.cpp   |  26 ++++++--
 indra/llmath/llvector4a.h            |   5 ++
 indra/llmath/llvector4a.inl          |  22 ++++++-
 indra/llmath/llvolume.cpp            | 116 +++++++++++++++--------------------
 indra/newview/llface.cpp             |  31 +++++++---
 indra/newview/llspatialpartition.cpp |   5 +-
 indra/newview/llvopartgroup.cpp      |  10 +++
 indra/newview/llvovolume.cpp         |   5 +-
 indra/newview/pipeline.cpp           |   2 +
 9 files changed, 139 insertions(+), 83 deletions(-)

(limited to 'indra/llmath')

diff --git a/indra/llappearance/llpolymorph.cpp b/indra/llappearance/llpolymorph.cpp
index 8a17819083..5e5813b9ac 100644
--- a/indra/llappearance/llpolymorph.cpp
+++ b/indra/llappearance/llpolymorph.cpp
@@ -568,6 +568,12 @@ void LLPolyMorphTarget::apply( ESex avatar_sex )
 
 		F32 *maskWeightArray = (mVertMask) ? mVertMask->getMorphMaskWeights() : NULL;
 
+		LLVector4a default_norm;
+		LLVector4a default_binorm;
+
+		default_norm.set(0,1,0,1);
+		default_binorm.set(1,0,0,1);
+
 		for(U32 vert_index_morph = 0; vert_index_morph < mMorphData->mNumIndices; vert_index_morph++)
 		{
 			S32 vert_index_mesh = mMorphData->mVertexIndices[vert_index_morph];
@@ -597,19 +603,31 @@ void LLPolyMorphTarget::apply( ESex avatar_sex )
 			norm.mul(delta_weight*maskWeight*NORMAL_SOFTEN_FACTOR);
 			scaled_normals[vert_index_mesh].add(norm);
 			norm = scaled_normals[vert_index_mesh];
-			norm.normalize3fast();
+
+			// guard against degenerate input data before we create NaNs below!
+			//
+			norm.normalize3fast_checked(&default_norm);
 			normals[vert_index_mesh] = norm;
 
 			// calculate new binormals
 			LLVector4a binorm = mMorphData->mBinormals[vert_index_morph];
+
+			// guard against degenerate input data before we create NaNs below!
+			//
+			if (!binorm.isFinite3() || (binorm.dot3(binorm).getF32() <= F_APPROXIMATELY_ZERO))
+			{
+				binorm.set(1,0,0,1);
+			}
+
 			binorm.mul(delta_weight*maskWeight*NORMAL_SOFTEN_FACTOR);
 			scaled_binormals[vert_index_mesh].add(binorm);
 			LLVector4a tangent;
 			tangent.setCross3(scaled_binormals[vert_index_mesh], norm);
 			LLVector4a& normalized_binormal = binormals[vert_index_mesh];
-			normalized_binormal.setCross3(norm, tangent); 
-			normalized_binormal.normalize3fast();
-			
+
+			normalized_binormal.setCross3(norm, tangent); 			
+			normalized_binormal.normalize3fast_checked(&default_binorm);
+
 			tex_coords[vert_index_mesh] += mMorphData->mTexCoords[vert_index_morph] * delta_weight * maskWeight;
 		}
 
diff --git a/indra/llmath/llvector4a.h b/indra/llmath/llvector4a.h
index 0526793d3a..94a61f2b1d 100755
--- a/indra/llmath/llvector4a.h
+++ b/indra/llmath/llvector4a.h
@@ -236,6 +236,11 @@ public:
 	// Note that this does not consider zero length vectors!
 	inline void normalize3fast();
 
+	// Normalize this vector with respect to the x, y, and z components only. Accurate only to 10-12 bits of precision. W component is destroyed
+	// Same as above except substitutes default vector contents if the vector is non-finite or degenerate due to zero length.
+	//
+	inline void normalize3fast_checked(LLVector4a* default = NULL);
+
 	// Return true if this vector is normalized with respect to x,y,z up to tolerance
 	inline LLBool32 isNormalized3( F32 tolerance = 1e-3 ) const;
 
diff --git a/indra/llmath/llvector4a.inl b/indra/llmath/llvector4a.inl
index 4589bac9fb..6860252a75 100755
--- a/indra/llmath/llvector4a.inl
+++ b/indra/llmath/llvector4a.inl
@@ -410,8 +410,26 @@ inline LLSimdScalar LLVector4a::normalize3withLength()
 // Note that this does not consider zero length vectors!
 inline void LLVector4a::normalize3fast()
 {
-	// find out about bad math before it takes two man-days to track down
-	llassert(isFinite3() && !equals3(getZero()));
+	LLVector4a lenSqrd; lenSqrd.setAllDot3( *this, *this );
+	const LLQuad approxRsqrt = _mm_rsqrt_ps(lenSqrd.mQ);
+	mQ = _mm_mul_ps( mQ, approxRsqrt );
+}
+
+// Normalize this vector with respect to the x, y, and z components only. Accurate only to 10-12 bits of precision. W component is destroyed
+// Note that this does not consider zero length vectors!
+inline void LLVector4a::normalize3fast_checked(LLVector4a* default)
+{
+	// handle bogus inputs before NaNs are generated below
+	//
+	if (!isFinite3() || (dot3(*this).getF32() < F_APPROXIMATELY_ZERO))
+	{
+		if (default)
+			*this = *default;
+		else
+			set(0,1,0,1);
+
+		return;
+	}
 
 	LLVector4a lenSqrd; lenSqrd.setAllDot3( *this, *this );
 	const LLQuad approxRsqrt = _mm_rsqrt_ps(lenSqrd.mQ);
diff --git a/indra/llmath/llvolume.cpp b/indra/llmath/llvolume.cpp
index bc2572375a..15621c2625 100755
--- a/indra/llmath/llvolume.cpp
+++ b/indra/llmath/llvolume.cpp
@@ -4472,6 +4472,9 @@ void LLVolume::generateSilhouetteVertices(std::vector<LLVector3> &vertices,
 					continue; //skip degenerate face
 				}
 
+				LLVector4a default_norm;
+				default_norm.set(0,1,0,1);
+
 				//for each edge
 				for (S32 k = 0; k < 3; k++) {
 					S32 index = face.mEdge[j*3+k];
@@ -4493,14 +4496,14 @@ void LLVolume::generateSilhouetteVertices(std::vector<LLVector3> &vertices,
 
 						norm_mat.rotate(n[v1], t);
 
-						t.normalize3fast();
+						t.normalize3fast_checked(&default_norm);
 						normals.push_back(LLVector3(t[0], t[1], t[2]));
 
 						mat.affineTransform(v[v2], t);
 						vertices.push_back(LLVector3(t[0], t[1], t[2]));
 						
 						norm_mat.rotate(n[v2], t);
-						t.normalize3fast();
+						t.normalize3fast_checked(&default_norm);
 						normals.push_back(LLVector3(t[0], t[1], t[2]));
 					}
 				}		
@@ -6096,6 +6099,9 @@ BOOL LLVolumeFace::createUnCutCubeCap(LLVolume* volume, BOOL partial_build)
 	{
 		VertexData	corners[4];
 		VertexData baseVert;
+		LLVector4a default_norm;
+		default_norm.set(0,1,0,1);
+
 		for(S32 t = 0; t < 4; t++)
 		{
 			corners[t].getPosition().load3( mesh[offset + (grid_size*t)].mPos.mV);
@@ -6108,8 +6114,8 @@ BOOL LLVolumeFace::createUnCutCubeCap(LLVolume* volume, BOOL partial_build)
 			lhs.setSub(corners[1].getPosition(), corners[0].getPosition());
 			LLVector4a rhs;
 			rhs.setSub(corners[2].getPosition(), corners[1].getPosition());
-			baseVert.getNormal().setCross3(lhs, rhs); 
-			baseVert.getNormal().normalize3fast();
+			baseVert.getNormal().setCross3(lhs, rhs);
+			baseVert.getNormal().normalize3fast_checked(&default_norm);
 		}
 
 		if(!(mTypeMask & TOP_MASK))
@@ -6559,17 +6565,12 @@ BOOL LLVolumeFace::createCap(LLVolume* volume, BOOL partial_build)
 	d1.setSub(mPositions[mIndices[2]], mPositions[mIndices[0]]);
 
 	LLVector4a normal;
-	normal.setCross3(d0,d1);
-
-	if (normal.dot3(normal).getF32() > F_APPROXIMATELY_ZERO)
-	{
-		normal.normalize3fast();
-	}
-	else
-	{ //degenerate, make up a value
-		normal.set(0,0,1);
-	}
+	LLVector4a default_norm;
+	default_norm.set(0,1,0,1);
 
+	normal.setCross3(d0,d1);
+	normal.normalize3fast_checked(&default_norm);
+	
 	llassert(llfinite(normal.getF32ptr()[0]));
 	llassert(llfinite(normal.getF32ptr()[1]));
 	llassert(llfinite(normal.getF32ptr()[2]));
@@ -6611,11 +6612,13 @@ void LLVolumeFace::createTangents()
 		CalculateTangentArray(mNumVertices, mPositions, mNormals, mTexCoords, mNumIndices/3, mIndices, mTangents);
 
 		//normalize tangents
+		LLVector4a default_norm;
+		default_norm.set(0,1,0,1);
 		for (U32 i = 0; i < mNumVertices; i++) 
 		{
 			//binorm[i].normalize3fast();
 			//bump map/planar projection code requires normals to be normalized
-			mNormals[i].normalize3fast();
+			mNormals[i].normalize3fast_checked(&default_norm);
 		}
 	}
 }
@@ -6793,6 +6796,9 @@ void LLVolumeFace::appendFace(const LLVolumeFace& face, LLMatrix4& mat_in, LLMat
 	mat.loadu(mat_in);
 	norm_mat.loadu(norm_mat_in);
 
+	LLVector4a default_norm;
+	default_norm.set(0,1,0,1);
+
 	for (U32 i = 0; i < face.mNumVertices; ++i)
 	{
 		//transform appended face position and store
@@ -6800,7 +6806,7 @@ void LLVolumeFace::appendFace(const LLVolumeFace& face, LLMatrix4& mat_in, LLMat
 
 		//transform appended face normal and store
 		norm_mat.rotate(src_norm[i], dst_norm[i]);
-		dst_norm[i].normalize3fast();
+		dst_norm[i].normalize3fast_checked(&default_norm);
 
 		//copy appended face texture coordinate
 		dst_tc[i] = src_tc[i];
@@ -7209,7 +7215,7 @@ BOOL LLVolumeFace::createSide(LLVolume* volume, BOOL partial_build)
 	return TRUE;
 }
 
-#define TANGENTIAL_PARANOIA_ASSERTS 1
+#define TANGENTIAL_PARANOIA_ASSERTS 0
 
 #if TANGENTIAL_PARANOIA_ASSERTS
 	#define tangential_paranoia(a) llassert(a)
@@ -7289,47 +7295,28 @@ void CalculateTangentArray(U32 vertexCount, const LLVector4a *vertex, const LLVe
 
 	// These appear to come out of the summing above distinctly non-unit-length
 	//
+	LLVector4a default_norm;
+	default_norm.set(0,1,0,1);
+
 	for (U32 a = 0; a < vertexCount; a++)
 	{
-		// Conditioning required by assets which don't necessarily reference every vert index
-		// (i.e. some of the tangents can end up uninitialized and therefore indeterminate/INF)
-		// and protection against zero length vectors which are not handled by normalize3fast.
-		//
-		if (!tan1[a].isFinite3() || tan1[a].equals3(LLVector4a::getZero()))
-		{
-			tan1[a].set(0,0,1,1);
-		}
-		else
-		{
-			tan1[a].normalize3fast();	
-		}
+		tan1[a].normalize3fast_checked(&default_norm);
+		tan2[a].normalize3fast_checked(&default_norm);
 
-		if (!tan2[a].isFinite3() || tan2[a].equals3(LLVector4a::getZero()))
-		{
-			tan2[a].set(0,0,1,1);
-		}
-		else
-		{
-			tan2[a].normalize3fast();
-		}		
-
-		const F32 cefgw = 0.03f;
 		tangential_paranoia(tan1[a].isFinite3());
 		tangential_paranoia(tan2[a].isFinite3());		
-		tangential_paranoia(tan1[a].isNormalized3(cefgw));
-		tangential_paranoia(tan2[a].isNormalized3(cefgw));	
+		tangential_paranoia(tan1[a].isNormalized3(0.03f));
+		tangential_paranoia(tan2[a].isNormalized3(0.03f));	
 	}
 
+	LLVector4a default_tangent;
+	default_tangent.set(0,0,1,1);
+
    for (U32 a = 0; a < vertexCount; a++)
 	{
 		LLVector4a n = normal[a];
-
-		if (!n.isFinite3() || n.equals3(LLVector4a::getZero()))
-		{
-			n.set(0,1,0,1);
-		}
-
-		n.normalize3fast();
+		
+		n.normalize3fast_checked(&default_norm);
 
 		const LLVector4a& t = tan1[a];
 
@@ -7353,34 +7340,27 @@ void CalculateTangentArray(U32 vertexCount, const LLVector4a *vertex, const LLVe
 
 		tangential_paranoia(tsubn.isFinite3());
 
-		if (tsubn.dot3(tsubn).getF32() > F_APPROXIMATELY_ZERO)
-		{
-			tsubn.normalize3fast();
+		tsubn.normalize3fast_checked(&default_tangent);
 		
-			// Calculate handedness
-			F32 handedness = ncrosst.dot3(tan2[a]).getF32() < 0.f ? -1.f : 1.f;
+		// Calculate handedness
+		F32 handedness = ncrosst.dot3(tan2[a]).getF32() < 0.f ? -1.f : 1.f;
 		
-			tsubn.getF32ptr()[3] = handedness;
+		tsubn.getF32ptr()[3] = handedness;
 
-			tangent[a] = tsubn;
+		tangent[a] = tsubn;
 
-			tangential_paranoia(tangent[a].isNormalized3(0.1f));
+		tangential_paranoia(tangent[a].isNormalized3(0.1f));
 
-			llassert(llfinite(tangent[a].getF32ptr()[0]));
-			llassert(llfinite(tangent[a].getF32ptr()[1]));
-			llassert(llfinite(tangent[a].getF32ptr()[2]));
+		llassert(llfinite(tangent[a].getF32ptr()[0]));
+		llassert(llfinite(tangent[a].getF32ptr()[1]));
+		llassert(llfinite(tangent[a].getF32ptr()[2]));
 
-			llassert(!llisnan(tangent[a].getF32ptr()[0]));
-			llassert(!llisnan(tangent[a].getF32ptr()[1]));
-			llassert(!llisnan(tangent[a].getF32ptr()[2]));
-		}
-		else
-		{ //degenerate, make up a value
-			tangent[a].set(0,0,1,1);
-		}
+		llassert(!llisnan(tangent[a].getF32ptr()[0]));
+		llassert(!llisnan(tangent[a].getF32ptr()[1]));
+		llassert(!llisnan(tangent[a].getF32ptr()[2]));
     }
     
-	ll_aligned_free_16(tan1);
+	 ll_aligned_free_16(tan1);
 }
 
 
diff --git a/indra/newview/llface.cpp b/indra/newview/llface.cpp
index 3e503cb750..b34370fa87 100755
--- a/indra/newview/llface.cpp
+++ b/indra/newview/llface.cpp
@@ -817,6 +817,12 @@ BOOL LLFace::genVolumeBBoxes(const LLVolume &volume, S32 f,
 			size.mul(scale);
 		}
 
+		// Catch potential badness from normalization before it happens
+		//
+		llassert(mat_normal.mMatrix[0].isFinite3() && (mat_normal.mMatrix[0].dot3(mat_normal.mMatrix[0]).getF32() > F_APPROXIMATELY_ZERO));
+		llassert(mat_normal.mMatrix[1].isFinite3() && (mat_normal.mMatrix[1].dot3(mat_normal.mMatrix[1]).getF32() > F_APPROXIMATELY_ZERO));
+		llassert(mat_normal.mMatrix[2].isFinite3() && (mat_normal.mMatrix[2].dot3(mat_normal.mMatrix[2]).getF32() > F_APPROXIMATELY_ZERO));
+
 		mat_normal.mMatrix[0].normalize3fast();
 		mat_normal.mMatrix[1].normalize3fast();
 		mat_normal.mMatrix[2].normalize3fast();
@@ -936,7 +942,9 @@ LLVector2 LLFace::surfaceToTexture(LLVector2 surface_coord, const LLVector4a& po
 		LLVector4a volume_normal;
 		LLVector3 v_normal(normal.getF32ptr());
 		volume_normal.load3(mDrawablep->getVOVolume()->agentDirectionToVolume(v_normal).mV);
-		volume_normal.normalize3fast();
+		LLVector4a default_norm;
+		default_norm.set(0,1,0,1);
+		volume_normal.normalize3fast_checked(&default_norm);
 		
 		if (texgen == LLTextureEntry::TEX_GEN_PLANAR)
 		{
@@ -1909,7 +1917,10 @@ BOOL LLFace::getGeometryVolume(const LLVolume& volume,
 							binormal.load3(t.mV);
 						}
 
-						binormal.normalize3fast();
+						LLVector4a default_binorm;
+						default_binorm.set(1,0,0,1);
+						binormal.normalize3fast_checked(&default_binorm);
+
 						LLVector2 tc = bump_tc[i];
 						tc += LLVector2( bump_s_primary_light_ray.dot3(tangent).getF32(), bump_t_primary_light_ray.dot3(binormal).getF32() );
 					
@@ -1996,12 +2007,13 @@ BOOL LLFace::getGeometryVolume(const LLVolume& volume,
 			LLFastTimer t(FTM_FACE_GEOM_NORMAL);
 			mVertexBuffer->getNormalStrider(norm, mGeomIndex, mGeomCount, map_range);
 			F32* normals = (F32*) norm.get();
-	
+			LLVector4a default_norm;
+			default_norm.set(0,1,0,1);
 			for (S32 i = 0; i < num_vertices; i++)
 			{	
 				LLVector4a normal;
 				mat_normal.rotate(vf.mNormals[i], normal);
-				normal.normalize3fast();
+				normal.normalize3fast_checked(&default_norm);
 				normal.store4a(normals);
 				normals += 4;
 			}
@@ -2024,12 +2036,14 @@ BOOL LLFace::getGeometryVolume(const LLVolume& volume,
 			mask.clear();
 			mask.setElement<3>();
 
+			LLVector4a default_tangent;
+			default_tangent.set(0,0,1,1);
+
 			for (S32 i = 0; i < num_vertices; i++)
 			{
 				LLVector4a tangent_out;
 				mat_normal.rotate(vf.mTangents[i], tangent_out);
-				tangent_out.normalize3fast();
-				
+				tangent_out.normalize3fast_checked(&default_tangent);
 				tangent_out.setSelectWithMask(mask, vf.mTangents[i], tangent_out);
 				tangent_out.store4a(tangents);
 				
@@ -2244,7 +2258,10 @@ BOOL LLFace::calcPixelArea(F32& cos_angle_to_view_dir, F32& radius)
 		dist *= 16.f;
 	}
 
-	lookAt.normalize3fast() ;	
+	LLVector4a default_lookat;
+	default_lookat.set(0,0,1,1);
+
+	lookAt.normalize3fast_checked(&default_lookat);	
 
 	//get area of circle around node
 	F32 app_angle = atanf((F32) sqrt(size_squared) / dist);
diff --git a/indra/newview/llspatialpartition.cpp b/indra/newview/llspatialpartition.cpp
index 78401020a6..dc99fd469b 100755
--- a/indra/newview/llspatialpartition.cpp
+++ b/indra/newview/llspatialpartition.cpp
@@ -1259,12 +1259,15 @@ F32 LLSpatialPartition::calcDistance(LLSpatialGroup* group, LLCamera& camera)
 
 	F32 dist = 0.f;
 
+	LLVector4a default_eyevec;
+	default_eyevec.set(0,0,1,1);
+
 	if (group->mDrawMap.find(LLRenderPass::PASS_ALPHA) != group->mDrawMap.end())
 	{
 		LLVector4a v = eye;
 
 		dist = eye.getLength3().getF32();
-		eye.normalize3fast();
+		eye.normalize3fast_checked(&default_eyevec);
 
 		if (!group->isState(LLSpatialGroup::ALPHA_DIRTY))
 		{
diff --git a/indra/newview/llvopartgroup.cpp b/indra/newview/llvopartgroup.cpp
index 6a7f26bdb5..b25213d85f 100755
--- a/indra/newview/llvopartgroup.cpp
+++ b/indra/newview/llvopartgroup.cpp
@@ -411,14 +411,21 @@ void LLVOPartGroup::getGeometry(S32 idx,
 	LLVector4a right;
 
 	right.setCross3(at, up);
+	// guard against NaNs in normalize below
+	llassert(right.dot3(right).getF32() > F_APPROXIMATELY_ZERO);
 	right.normalize3fast();
+
 	up.setCross3(right, at);
+	// guard against NaNs in normalize below
+	llassert(up.dot3(up).getF32() > F_APPROXIMATELY_ZERO);
 	up.normalize3fast();
 
 	if (part.mFlags & LLPartData::LL_PART_FOLLOW_VELOCITY_MASK)
 	{
 		LLVector4a normvel;
 		normvel.load3(part.mVelocity.mV);
+		// guard against NaNs in normalize below
+		llassert(normvel.dot3(normvel).getF32() > F_APPROXIMATELY_ZERO);
 		normvel.normalize3fast();
 		LLVector2 up_fracs;
 		up_fracs.mV[0] = normvel.dot3(right).getF32();
@@ -443,6 +450,9 @@ void LLVOPartGroup::getGeometry(S32 idx,
 
 		up = new_up;
 		right = t;
+		// guard against NaNs in normalize below
+		llassert(up.dot3(up).getF32() > F_APPROXIMATELY_ZERO);
+		llassert(right.dot3(right).getF32() > F_APPROXIMATELY_ZERO);
 		up.normalize3fast();
 		right.normalize3fast();
 	}
diff --git a/indra/newview/llvovolume.cpp b/indra/newview/llvovolume.cpp
index 0aa56fcc0f..8962d7cadf 100755
--- a/indra/newview/llvovolume.cpp
+++ b/indra/newview/llvovolume.cpp
@@ -3751,7 +3751,8 @@ BOOL LLVOVolume::lineSegmentIntersect(const LLVector4a& start, const LLVector4a&
 						{
 							*normal = n;
 						}
-
+						// guard against NaNs in normalize below
+						llassert(normal->dot3(*normal).getF32() > F_APPROXIMATELY_ZERO);
 						(*normal).normalize3fast();
 					}
 
@@ -3774,6 +3775,8 @@ BOOL LLVOVolume::lineSegmentIntersect(const LLVector4a& start, const LLVector4a&
 						{
 							*tangent = tn;
 						}
+						// guard against NaNs in normalize below
+						llassert(tangent->dot3(*tangent).getF32() > F_APPROXIMATELY_ZERO);
 						(*tangent).normalize3fast();
 					}
 
diff --git a/indra/newview/pipeline.cpp b/indra/newview/pipeline.cpp
index 05ef8060d4..72912db041 100755
--- a/indra/newview/pipeline.cpp
+++ b/indra/newview/pipeline.cpp
@@ -10569,11 +10569,13 @@ void LLPipeline::generateImpostor(LLVOAvatar* avatar)
 		LLVector4a left;
 		left.load3(camera.getLeftAxis().mV);
 		left.mul(left);
+		llassert(left.dot3(left).getF32() > F_APPROXIMATELY_ZERO);
 		left.normalize3fast();
 
 		LLVector4a up;
 		up.load3(camera.getUpAxis().mV);
 		up.mul(up);
+		llassert(up.dot3(up).getF32() > F_APPROXIMATELY_ZERO);
 		up.normalize3fast();
 
 		tdim.mV[0] = fabsf(half_height.dot3(left).getF32());
-- 
cgit v1.3


From 1a37ea367159b1ba6c5284add8dd338f96b64e84 Mon Sep 17 00:00:00 2001
From: Graham Madarasz <graham@lindenlab.com>
Date: Wed, 12 Jun 2013 09:38:04 -0700
Subject: Pedantic adj overly concerned with minute details or formalisms

---
 indra/llmath/llvector4a.h   | 3 ++-
 indra/llmath/llvector4a.inl | 6 +++---
 2 files changed, 5 insertions(+), 4 deletions(-)

(limited to 'indra/llmath')

diff --git a/indra/llmath/llvector4a.h b/indra/llmath/llvector4a.h
index 94a61f2b1d..79d0a44551 100755
--- a/indra/llmath/llvector4a.h
+++ b/indra/llmath/llvector4a.h
@@ -46,6 +46,7 @@ class LLRotation;
 // of this writing, July 08, 2010) about getting it implemented before you resort to
 // LLVector3/LLVector4. 
 /////////////////////////////////
+class LLVector4a;
 
 LL_ALIGN_PREFIX(16)
 class LLVector4a
@@ -239,7 +240,7 @@ public:
 	// Normalize this vector with respect to the x, y, and z components only. Accurate only to 10-12 bits of precision. W component is destroyed
 	// Same as above except substitutes default vector contents if the vector is non-finite or degenerate due to zero length.
 	//
-	inline void normalize3fast_checked(LLVector4a* default = NULL);
+	inline void normalize3fast_checked(LLVector4a* d = 0);
 
 	// Return true if this vector is normalized with respect to x,y,z up to tolerance
 	inline LLBool32 isNormalized3( F32 tolerance = 1e-3 ) const;
diff --git a/indra/llmath/llvector4a.inl b/indra/llmath/llvector4a.inl
index 6860252a75..d3e8a1c933 100755
--- a/indra/llmath/llvector4a.inl
+++ b/indra/llmath/llvector4a.inl
@@ -417,14 +417,14 @@ inline void LLVector4a::normalize3fast()
 
 // Normalize this vector with respect to the x, y, and z components only. Accurate only to 10-12 bits of precision. W component is destroyed
 // Note that this does not consider zero length vectors!
-inline void LLVector4a::normalize3fast_checked(LLVector4a* default)
+inline void LLVector4a::normalize3fast_checked(LLVector4a* d)
 {
 	// handle bogus inputs before NaNs are generated below
 	//
 	if (!isFinite3() || (dot3(*this).getF32() < F_APPROXIMATELY_ZERO))
 	{
-		if (default)
-			*this = *default;
+		if (d)
+			*this = *d;
 		else
 			set(0,1,0,1);
 
-- 
cgit v1.3


From 9726f3774d58e5d9d78648bb5185f694a9f70954 Mon Sep 17 00:00:00 2001
From: Graham Madarasz <graham@lindenlab.com>
Date: Wed, 12 Jun 2013 10:26:06 -0700
Subject: Backout tangent assert experiment

---
 indra/llmath/llvector4a.inl |  27 -------
 indra/llmath/llvolume.cpp   | 178 ++++++++++++++++----------------------------
 2 files changed, 66 insertions(+), 139 deletions(-)

(limited to 'indra/llmath')

diff --git a/indra/llmath/llvector4a.inl b/indra/llmath/llvector4a.inl
index 6860252a75..7c52ffef21 100755
--- a/indra/llmath/llvector4a.inl
+++ b/indra/llmath/llvector4a.inl
@@ -331,9 +331,6 @@ inline LLSimdScalar LLVector4a::dot4(const LLVector4a& b) const
 // Note that this does not consider zero length vectors!
 inline void LLVector4a::normalize3()
 {
-	// find out about bad math before it takes two man-days to track down
-	llassert(isFinite3() && !equals3(getZero()));
-
 	// lenSqrd = a dot a
 	LLVector4a lenSqrd; lenSqrd.setAllDot3( *this, *this );
 	// rsqrt = approximate reciprocal square (i.e., { ~1/len(a)^2, ~1/len(a)^2, ~1/len(a)^2, ~1/len(a)^2 }
@@ -382,9 +379,6 @@ inline void LLVector4a::normalize4()
 // Note that this does not consider zero length vectors!
 inline LLSimdScalar LLVector4a::normalize3withLength()
 {
-	// find out about bad math before it takes two man-days to track down
-	llassert(isFinite3() && !equals3(getZero()));
-
 	// lenSqrd = a dot a
 	LLVector4a lenSqrd; lenSqrd.setAllDot3( *this, *this );
 	// rsqrt = approximate reciprocal square (i.e., { ~1/len(a)^2, ~1/len(a)^2, ~1/len(a)^2, ~1/len(a)^2 }
@@ -415,27 +409,6 @@ inline void LLVector4a::normalize3fast()
 	mQ = _mm_mul_ps( mQ, approxRsqrt );
 }
 
-// Normalize this vector with respect to the x, y, and z components only. Accurate only to 10-12 bits of precision. W component is destroyed
-// Note that this does not consider zero length vectors!
-inline void LLVector4a::normalize3fast_checked(LLVector4a* default)
-{
-	// handle bogus inputs before NaNs are generated below
-	//
-	if (!isFinite3() || (dot3(*this).getF32() < F_APPROXIMATELY_ZERO))
-	{
-		if (default)
-			*this = *default;
-		else
-			set(0,1,0,1);
-
-		return;
-	}
-
-	LLVector4a lenSqrd; lenSqrd.setAllDot3( *this, *this );
-	const LLQuad approxRsqrt = _mm_rsqrt_ps(lenSqrd.mQ);
-	mQ = _mm_mul_ps( mQ, approxRsqrt );
-}
-
 // Return true if this vector is normalized with respect to x,y,z up to tolerance
 inline LLBool32 LLVector4a::isNormalized3( F32 tolerance ) const
 {
diff --git a/indra/llmath/llvolume.cpp b/indra/llmath/llvolume.cpp
index 15621c2625..1932272afb 100755
--- a/indra/llmath/llvolume.cpp
+++ b/indra/llmath/llvolume.cpp
@@ -4472,9 +4472,6 @@ void LLVolume::generateSilhouetteVertices(std::vector<LLVector3> &vertices,
 					continue; //skip degenerate face
 				}
 
-				LLVector4a default_norm;
-				default_norm.set(0,1,0,1);
-
 				//for each edge
 				for (S32 k = 0; k < 3; k++) {
 					S32 index = face.mEdge[j*3+k];
@@ -4496,14 +4493,14 @@ void LLVolume::generateSilhouetteVertices(std::vector<LLVector3> &vertices,
 
 						norm_mat.rotate(n[v1], t);
 
-						t.normalize3fast_checked(&default_norm);
+						t.normalize3fast();
 						normals.push_back(LLVector3(t[0], t[1], t[2]));
 
 						mat.affineTransform(v[v2], t);
 						vertices.push_back(LLVector3(t[0], t[1], t[2]));
 						
 						norm_mat.rotate(n[v2], t);
-						t.normalize3fast_checked(&default_norm);
+						t.normalize3fast();
 						normals.push_back(LLVector3(t[0], t[1], t[2]));
 					}
 				}		
@@ -6099,9 +6096,6 @@ BOOL LLVolumeFace::createUnCutCubeCap(LLVolume* volume, BOOL partial_build)
 	{
 		VertexData	corners[4];
 		VertexData baseVert;
-		LLVector4a default_norm;
-		default_norm.set(0,1,0,1);
-
 		for(S32 t = 0; t < 4; t++)
 		{
 			corners[t].getPosition().load3( mesh[offset + (grid_size*t)].mPos.mV);
@@ -6114,8 +6108,8 @@ BOOL LLVolumeFace::createUnCutCubeCap(LLVolume* volume, BOOL partial_build)
 			lhs.setSub(corners[1].getPosition(), corners[0].getPosition());
 			LLVector4a rhs;
 			rhs.setSub(corners[2].getPosition(), corners[1].getPosition());
-			baseVert.getNormal().setCross3(lhs, rhs);
-			baseVert.getNormal().normalize3fast_checked(&default_norm);
+			baseVert.getNormal().setCross3(lhs, rhs); 
+			baseVert.getNormal().normalize3fast();
 		}
 
 		if(!(mTypeMask & TOP_MASK))
@@ -6565,12 +6559,17 @@ BOOL LLVolumeFace::createCap(LLVolume* volume, BOOL partial_build)
 	d1.setSub(mPositions[mIndices[2]], mPositions[mIndices[0]]);
 
 	LLVector4a normal;
-	LLVector4a default_norm;
-	default_norm.set(0,1,0,1);
-
 	normal.setCross3(d0,d1);
-	normal.normalize3fast_checked(&default_norm);
-	
+
+	if (normal.dot3(normal).getF32() > F_APPROXIMATELY_ZERO)
+	{
+		normal.normalize3fast();
+	}
+	else
+	{ //degenerate, make up a value
+		normal.set(0,0,1);
+	}
+
 	llassert(llfinite(normal.getF32ptr()[0]));
 	llassert(llfinite(normal.getF32ptr()[1]));
 	llassert(llfinite(normal.getF32ptr()[2]));
@@ -6612,13 +6611,11 @@ void LLVolumeFace::createTangents()
 		CalculateTangentArray(mNumVertices, mPositions, mNormals, mTexCoords, mNumIndices/3, mIndices, mTangents);
 
 		//normalize tangents
-		LLVector4a default_norm;
-		default_norm.set(0,1,0,1);
 		for (U32 i = 0; i < mNumVertices; i++) 
 		{
 			//binorm[i].normalize3fast();
 			//bump map/planar projection code requires normals to be normalized
-			mNormals[i].normalize3fast_checked(&default_norm);
+			mNormals[i].normalize3fast();
 		}
 	}
 }
@@ -6796,9 +6793,6 @@ void LLVolumeFace::appendFace(const LLVolumeFace& face, LLMatrix4& mat_in, LLMat
 	mat.loadu(mat_in);
 	norm_mat.loadu(norm_mat_in);
 
-	LLVector4a default_norm;
-	default_norm.set(0,1,0,1);
-
 	for (U32 i = 0; i < face.mNumVertices; ++i)
 	{
 		//transform appended face position and store
@@ -6806,7 +6800,7 @@ void LLVolumeFace::appendFace(const LLVolumeFace& face, LLMatrix4& mat_in, LLMat
 
 		//transform appended face normal and store
 		norm_mat.rotate(src_norm[i], dst_norm[i]);
-		dst_norm[i].normalize3fast_checked(&default_norm);
+		dst_norm[i].normalize3fast();
 
 		//copy appended face texture coordinate
 		dst_tc[i] = src_tc[i];
@@ -7215,53 +7209,46 @@ BOOL LLVolumeFace::createSide(LLVolume* volume, BOOL partial_build)
 	return TRUE;
 }
 
-#define TANGENTIAL_PARANOIA_ASSERTS 0
-
-#if TANGENTIAL_PARANOIA_ASSERTS
-	#define tangential_paranoia(a) llassert(a)
-#else
-	#define tangential_paranoia(a)
-#endif
-
 //adapted from Lengyel, Eric. “Computing Tangent Space Basis Vectors for an Arbitrary Mesh”. Terathon Software 3D Graphics Library, 2001. http://www.terathon.com/code/tangent.html
 void CalculateTangentArray(U32 vertexCount, const LLVector4a *vertex, const LLVector4a *normal,
         const LLVector2 *texcoord, U32 triangleCount, const U16* index_array, LLVector4a *tangent)
 {
+    //LLVector4a *tan1 = new LLVector4a[vertexCount * 2];
 	LLVector4a* tan1 = (LLVector4a*) ll_aligned_malloc_16(vertexCount*2*sizeof(LLVector4a));
 
-   LLVector4a* tan2 = tan1 + vertexCount;
+    LLVector4a* tan2 = tan1 + vertexCount;
 
 	memset(tan1, 0, vertexCount*2*sizeof(LLVector4a));
         
-   for (U32 a = 0; a < triangleCount; a++)
-   {
-      U32 i1 = *index_array++;
-      U32 i2 = *index_array++;
-      U32 i3 = *index_array++;
+    for (U32 a = 0; a < triangleCount; a++)
+    {
+        U32 i1 = *index_array++;
+        U32 i2 = *index_array++;
+        U32 i3 = *index_array++;
         
-      const LLVector4a& v1 = vertex[i1];
-      const LLVector4a& v2 = vertex[i2];
-      const LLVector4a& v3 = vertex[i3];
+        const LLVector4a& v1 = vertex[i1];
+        const LLVector4a& v2 = vertex[i2];
+        const LLVector4a& v3 = vertex[i3];
         
-      const LLVector2& w1 = texcoord[i1];
-      const LLVector2& w2 = texcoord[i2];
-      const LLVector2& w3 = texcoord[i3];
+        const LLVector2& w1 = texcoord[i1];
+        const LLVector2& w2 = texcoord[i2];
+        const LLVector2& w3 = texcoord[i3];
         
 		const F32* v1ptr = v1.getF32ptr();
 		const F32* v2ptr = v2.getF32ptr();
 		const F32* v3ptr = v3.getF32ptr();
 		
-      float x1 = v2ptr[0] - v1ptr[0];
-      float x2 = v3ptr[0] - v1ptr[0];
-      float y1 = v2ptr[1] - v1ptr[1];
-      float y2 = v3ptr[1] - v1ptr[1];
-      float z1 = v2ptr[2] - v1ptr[2];
-      float z2 = v3ptr[2] - v1ptr[2];
+        float x1 = v2ptr[0] - v1ptr[0];
+        float x2 = v3ptr[0] - v1ptr[0];
+        float y1 = v2ptr[1] - v1ptr[1];
+        float y2 = v3ptr[1] - v1ptr[1];
+        float z1 = v2ptr[2] - v1ptr[2];
+        float z2 = v3ptr[2] - v1ptr[2];
         
-      float s1 = w2.mV[0] - w1.mV[0];
-      float s2 = w3.mV[0] - w1.mV[0];
-      float t1 = w2.mV[1] - w1.mV[1];
-      float t2 = w3.mV[1] - w1.mV[1];
+        float s1 = w2.mV[0] - w1.mV[0];
+        float s2 = w3.mV[0] - w1.mV[0];
+        float t1 = w2.mV[1] - w1.mV[1];
+        float t2 = w3.mV[1] - w1.mV[1];
         
 		F32 rd = s1*t2-s2*t1;
 
@@ -7275,48 +7262,18 @@ void CalculateTangentArray(U32 vertexCount, const LLVector4a *vertex, const LLVe
 		LLVector4a tdir((s1 * x2 - s2 * x1) * r, (s1 * y2 - s2 * y1) * r,
 				(s1 * z2 - s2 * z1) * r);
         
-		
 		tan1[i1].add(sdir);
 		tan1[i2].add(sdir);
 		tan1[i3].add(sdir);
-      
-		tangential_paranoia(tan1[i1].isFinite3());
-		tangential_paranoia(tan1[i2].isFinite3());
-		tangential_paranoia(tan1[i3].isFinite3());
-
+        
 		tan2[i1].add(tdir);
 		tan2[i2].add(tdir);
 		tan2[i3].add(tdir);
-
-		tangential_paranoia(tan2[i1].isFinite3());
-		tangential_paranoia(tan2[i2].isFinite3());
-		tangential_paranoia(tan2[i3].isFinite3());
-   }
-
-	// These appear to come out of the summing above distinctly non-unit-length
-	//
-	LLVector4a default_norm;
-	default_norm.set(0,1,0,1);
-
-	for (U32 a = 0; a < vertexCount; a++)
-	{
-		tan1[a].normalize3fast_checked(&default_norm);
-		tan2[a].normalize3fast_checked(&default_norm);
-
-		tangential_paranoia(tan1[a].isFinite3());
-		tangential_paranoia(tan2[a].isFinite3());		
-		tangential_paranoia(tan1[a].isNormalized3(0.03f));
-		tangential_paranoia(tan2[a].isNormalized3(0.03f));	
-	}
-
-	LLVector4a default_tangent;
-	default_tangent.set(0,0,1,1);
-
-   for (U32 a = 0; a < vertexCount; a++)
-	{
-		LLVector4a n = normal[a];
-		
-		n.normalize3fast_checked(&default_norm);
+    }
+    
+    for (U32 a = 0; a < vertexCount; a++)
+    {
+        LLVector4a n = normal[a];
 
 		const LLVector4a& t = tan1[a];
 
@@ -7326,41 +7283,38 @@ void CalculateTangentArray(U32 vertexCount, const LLVector4a *vertex, const LLVe
 		LLVector4a ncrosst;
 		ncrosst.setCross3(n,t);
 
-		F32 n_dot_t = n.dot3(t).getF32();
-
-		tangential_paranoia(llfinite(n_dot_t) && !llisnan(n_dot_t));
-
-		// Gram-Schmidt orthogonalize
-      n.mul(n_dot_t);
-
-		tangential_paranoia(n.isFinite3());
+        // Gram-Schmidt orthogonalize
+        n.mul(n.dot3(t).getF32());
 
 		LLVector4a tsubn;
 		tsubn.setSub(t,n);
 
-		tangential_paranoia(tsubn.isFinite3());
-
-		tsubn.normalize3fast_checked(&default_tangent);
+		if (tsubn.dot3(tsubn).getF32() > F_APPROXIMATELY_ZERO)
+		{
+			tsubn.normalize3fast();
 		
-		// Calculate handedness
-		F32 handedness = ncrosst.dot3(tan2[a]).getF32() < 0.f ? -1.f : 1.f;
+			// Calculate handedness
+			F32 handedness = ncrosst.dot3(tan2[a]).getF32() < 0.f ? -1.f : 1.f;
 		
-		tsubn.getF32ptr()[3] = handedness;
+			tsubn.getF32ptr()[3] = handedness;
 
-		tangent[a] = tsubn;
+			tangent[a] = tsubn;
 
-		tangential_paranoia(tangent[a].isNormalized3(0.1f));
+			llassert(llfinite(tangent[a].getF32ptr()[0]));
+			llassert(llfinite(tangent[a].getF32ptr()[1]));
+			llassert(llfinite(tangent[a].getF32ptr()[2]));
 
-		llassert(llfinite(tangent[a].getF32ptr()[0]));
-		llassert(llfinite(tangent[a].getF32ptr()[1]));
-		llassert(llfinite(tangent[a].getF32ptr()[2]));
-
-		llassert(!llisnan(tangent[a].getF32ptr()[0]));
-		llassert(!llisnan(tangent[a].getF32ptr()[1]));
-		llassert(!llisnan(tangent[a].getF32ptr()[2]));
+			llassert(!llisnan(tangent[a].getF32ptr()[0]));
+			llassert(!llisnan(tangent[a].getF32ptr()[1]));
+			llassert(!llisnan(tangent[a].getF32ptr()[2]));
+		}
+		else
+		{ //degenerate, make up a value
+			tangent[a].set(0,0,1,1);
+		}
     }
     
-	 ll_aligned_free_16(tan1);
+	ll_aligned_free_16(tan1);
 }
 
 
-- 
cgit v1.3


From 7b28a9a541464ece86e6d09fe7a85f307a710ddb Mon Sep 17 00:00:00 2001
From: Dave Parks <davep@lindenlab.com>
Date: Thu, 13 Jun 2013 14:38:51 -0500
Subject: Remove some now useless assertions.

---
 indra/llmath/llvolume.cpp | 8 --------
 1 file changed, 8 deletions(-)

(limited to 'indra/llmath')

diff --git a/indra/llmath/llvolume.cpp b/indra/llmath/llvolume.cpp
index 1932272afb..a8f4c52c5c 100755
--- a/indra/llmath/llvolume.cpp
+++ b/indra/llmath/llvolume.cpp
@@ -7299,14 +7299,6 @@ void CalculateTangentArray(U32 vertexCount, const LLVector4a *vertex, const LLVe
 			tsubn.getF32ptr()[3] = handedness;
 
 			tangent[a] = tsubn;
-
-			llassert(llfinite(tangent[a].getF32ptr()[0]));
-			llassert(llfinite(tangent[a].getF32ptr()[1]));
-			llassert(llfinite(tangent[a].getF32ptr()[2]));
-
-			llassert(!llisnan(tangent[a].getF32ptr()[0]));
-			llassert(!llisnan(tangent[a].getF32ptr()[1]));
-			llassert(!llisnan(tangent[a].getF32ptr()[2]));
 		}
 		else
 		{ //degenerate, make up a value
-- 
cgit v1.3


From fec6ab591ef644ee8058742f16849ca9ff53c6a6 Mon Sep 17 00:00:00 2001
From: Graham Linden <graham@lindenlab.com>
Date: Fri, 14 Jun 2013 09:32:23 -0700
Subject: Disable asserts in CalculateTangentArray (discussed with davep) to
 avoid debugging interruptions from bad assets

---
 indra/llmath/llvolume.cpp | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'indra/llmath')

diff --git a/indra/llmath/llvolume.cpp b/indra/llmath/llvolume.cpp
index 1932272afb..c4e1f0c84c 100755
--- a/indra/llmath/llvolume.cpp
+++ b/indra/llmath/llvolume.cpp
@@ -7300,13 +7300,15 @@ void CalculateTangentArray(U32 vertexCount, const LLVector4a *vertex, const LLVe
 
 			tangent[a] = tsubn;
 
+			/*
+			These are going off on invalid input and hindering other debugging.
 			llassert(llfinite(tangent[a].getF32ptr()[0]));
 			llassert(llfinite(tangent[a].getF32ptr()[1]));
 			llassert(llfinite(tangent[a].getF32ptr()[2]));
 
 			llassert(!llisnan(tangent[a].getF32ptr()[0]));
 			llassert(!llisnan(tangent[a].getF32ptr()[1]));
-			llassert(!llisnan(tangent[a].getF32ptr()[2]));
+			llassert(!llisnan(tangent[a].getF32ptr()[2]));*/
 		}
 		else
 		{ //degenerate, make up a value
-- 
cgit v1.3


From ccd04cd66c0a550694fefe41042ef47466780a92 Mon Sep 17 00:00:00 2001
From: Dave Parks <davep@lindenlab.com>
Date: Tue, 18 Jun 2013 17:24:21 -0500
Subject: Occlusion culling overhaul.

---
 indra/llmath/llvolume.cpp                          |   3 -
 indra/llrender/llglslshader.cpp                    |  65 ++++++++
 indra/llrender/llglslshader.h                      |   7 +
 indra/llrender/llrendertarget.cpp                  |  36 ++++-
 indra/llrender/llrendertarget.h                    |   6 +
 indra/newview/app_settings/settings.xml            |   3 +-
 .../shaders/class1/deferred/blurLightF.glsl        |  94 +++++------
 .../shaders/class1/interface/downsampleDepthF.glsl |  67 ++++++++
 .../class1/interface/downsampleDepthRectF.glsl     |  67 ++++++++
 .../shaders/class1/interface/downsampleDepthV.glsl |  59 +++++++
 indra/newview/llmeshrepository.cpp                 |   1 +
 indra/newview/llspatialpartition.cpp               | 180 +++++++++++++++++++--
 indra/newview/llviewerdisplay.cpp                  |   3 +-
 indra/newview/llviewershadermgr.cpp                |  24 +++
 indra/newview/llviewershadermgr.h                  |   4 +-
 indra/newview/llvovolume.cpp                       |   7 +-
 indra/newview/pipeline.cpp                         | 145 +++++++++++++++--
 indra/newview/pipeline.h                           |  10 +-
 18 files changed, 699 insertions(+), 82 deletions(-)
 create mode 100644 indra/newview/app_settings/shaders/class1/interface/downsampleDepthF.glsl
 create mode 100644 indra/newview/app_settings/shaders/class1/interface/downsampleDepthRectF.glsl
 create mode 100644 indra/newview/app_settings/shaders/class1/interface/downsampleDepthV.glsl

(limited to 'indra/llmath')

diff --git a/indra/llmath/llvolume.cpp b/indra/llmath/llvolume.cpp
index a8f4c52c5c..14cebfe5aa 100755
--- a/indra/llmath/llvolume.cpp
+++ b/indra/llmath/llvolume.cpp
@@ -7277,9 +7277,6 @@ void CalculateTangentArray(U32 vertexCount, const LLVector4a *vertex, const LLVe
 
 		const LLVector4a& t = tan1[a];
 
-		llassert(tan1[a].getLength3().getF32() >= 0.f);
-		llassert(tan2[a].getLength3().getF32() >= 0.f);
-
 		LLVector4a ncrosst;
 		ncrosst.setCross3(n,t);
 
diff --git a/indra/llrender/llglslshader.cpp b/indra/llrender/llglslshader.cpp
index 62191b4c1a..ac16e30796 100755
--- a/indra/llrender/llglslshader.cpp
+++ b/indra/llrender/llglslshader.cpp
@@ -149,6 +149,9 @@ void LLGLSLShader::clearStats()
 	mTimeElapsed = 0;
 	mSamplesDrawn = 0;
 	mDrawCalls = 0;
+	mTextureStateFetched = false;
+	mTextureMagFilter.clear();
+	mTextureMinFilter.clear();
 }
 
 void LLGLSLShader::dumpStats()
@@ -161,6 +164,16 @@ void LLGLSLShader::dumpStats()
 		{
 			llinfos << mShaderFiles[i].first << llendl;
 		}
+		for (U32 i = 0; i < mTexture.size(); ++i)
+		{
+			GLint idx = mTexture[i];
+			
+			if (idx >= 0)
+			{
+				GLint uniform_idx = getUniformLocation(i);
+				llinfos << mUniformNameMap[uniform_idx] << " - " << std::hex << mTextureMagFilter[i] << "/" << mTextureMinFilter[i] << std::dec << llendl;
+			}
+		}
 		llinfos << "=============================================" << llendl;
 
 		F32 ms = mTimeElapsed/1000000.f;
@@ -211,6 +224,39 @@ void LLGLSLShader::placeProfileQuery()
 		glGenQueriesARB(1, &mTimerQuery);
 	}
 
+	if (!mTextureStateFetched)
+	{
+		mTextureStateFetched = true;
+		mTextureMagFilter.resize(mTexture.size());
+		mTextureMinFilter.resize(mTexture.size());
+
+		U32 cur_active = gGL.getCurrentTexUnitIndex();
+
+		for (U32 i = 0; i < mTexture.size(); ++i)
+		{
+			GLint idx = mTexture[i];
+
+			if (idx >= 0)
+			{
+				gGL.getTexUnit(idx)->activate();
+
+				U32 mag = 0xFFFFFFFF;
+				U32 min = 0xFFFFFFFF;
+
+				U32 type = LLTexUnit::getInternalType(gGL.getTexUnit(idx)->getCurrType());
+
+				glGetTexParameteriv(type, GL_TEXTURE_MAG_FILTER, (GLint*) &mag);
+				glGetTexParameteriv(type, GL_TEXTURE_MIN_FILTER, (GLint*) &min);
+
+				mTextureMagFilter[i] = mag;
+				mTextureMinFilter[i] = min;
+			}
+		}
+
+		gGL.getTexUnit(cur_active)->activate();
+	}
+
+
 	glBeginQueryARB(GL_SAMPLES_PASSED, 1);
 	glBeginQueryARB(GL_TIME_ELAPSED, mTimerQuery);
 #endif
@@ -573,6 +619,7 @@ void LLGLSLShader::mapUniform(GLint index, const vector<string> * uniforms)
 		}
 
 		mUniformMap[name] = location;
+		mUniformNameMap[location] = name;
 		LL_DEBUGS("ShaderLoading") << "Uniform " << name << " is at location " << location << LL_ENDL;
 	
 		//find the index of this uniform
@@ -635,6 +682,7 @@ BOOL LLGLSLShader::mapUniforms(const vector<string> * uniforms)
 	mActiveTextureChannels = 0;
 	mUniform.clear();
 	mUniformMap.clear();
+	mUniformNameMap.clear();
 	mTexture.clear();
 	mValue.clear();
 	//initialize arrays
@@ -1152,6 +1200,23 @@ void LLGLSLShader::uniform1i(const string& uniform, GLint v)
 	}
 }
 
+void LLGLSLShader::uniform2i(const string& uniform, GLint i, GLint j)
+{
+	GLint location = getUniformLocation(uniform);
+				
+	if (location >= 0)
+	{
+		std::map<GLint, LLVector4>::iterator iter = mValue.find(location);
+		LLVector4 vec(i,j,0.f,0.f);
+		if (iter == mValue.end() || shouldChange(iter->second,vec))
+		{
+			glUniform2iARB(location, i, j);
+			mValue[location] = vec;
+		}
+	}
+}
+
+
 void LLGLSLShader::uniform1f(const string& uniform, GLfloat v)
 {
 	GLint location = getUniformLocation(uniform);
diff --git a/indra/llrender/llglslshader.h b/indra/llrender/llglslshader.h
index 3c775cde27..eabdb9fc92 100755
--- a/indra/llrender/llglslshader.h
+++ b/indra/llrender/llglslshader.h
@@ -111,6 +111,7 @@ public:
 	void uniform3fv(U32 index, U32 count, const GLfloat* v);
 	void uniform4fv(U32 index, U32 count, const GLfloat* v);
 	void uniform1i(const std::string& uniform, GLint i);
+	void uniform2i(const std::string& uniform, GLint i, GLint j);
 	void uniform1f(const std::string& uniform, GLfloat v);
 	void uniform2f(const std::string& uniform, GLfloat x, GLfloat y);
 	void uniform3f(const std::string& uniform, GLfloat x, GLfloat y, GLfloat z);
@@ -170,6 +171,7 @@ public:
 	U32 mAttributeMask;  //mask of which reserved attributes are set (lines up with LLVertexBuffer::getTypeMask())
 	std::vector<GLint> mUniform;   //lookup table of uniform enum to uniform location
 	std::map<std::string, GLint> mUniformMap;  //lookup map of uniform name to uniform location
+	std::map<GLint, std::string> mUniformNameMap; //lookup map of uniform location to uniform name
 	std::map<GLint, LLVector4> mValue; //lookup map of uniform location to last known value
 	std::vector<GLint> mTexture;
 	S32 mTotalUniformSize;
@@ -192,6 +194,11 @@ public:
 	static U64 sTotalSamplesDrawn;
 	U32 mDrawCalls;
 	static U32 sTotalDrawCalls;
+
+	bool mTextureStateFetched;
+	std::vector<U32> mTextureMagFilter;
+	std::vector<U32> mTextureMinFilter;
+	
 };
 
 //UI shader (declared here so llui_libtest will link properly)
diff --git a/indra/llrender/llrendertarget.cpp b/indra/llrender/llrendertarget.cpp
index 5fb4fc8e52..752486fdde 100755
--- a/indra/llrender/llrendertarget.cpp
+++ b/indra/llrender/llrendertarget.cpp
@@ -53,11 +53,23 @@ void check_framebuffer_status()
 bool LLRenderTarget::sUseFBO = false;
 U32 LLRenderTarget::sCurFBO = 0;
 
+
+extern S32 gGLViewport[4];
+
+//HEY DAVE HOOK THESE UP 
+U32 LLRenderTarget::sCurResX = 0;
+U32 LLRenderTarget::sCurResY = 0;
+
+
+/// !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+
 LLRenderTarget::LLRenderTarget() :
 	mResX(0),
 	mResY(0),
 	mFBO(0),
 	mPreviousFBO(0),
+	mPreviousResX(0),
+	mPreviousResY(0),
 	mDepth(0),
 	mStencil(0),
 	mUseDepth(false),
@@ -390,13 +402,12 @@ void LLRenderTarget::bindTarget()
 {
 	if (mFBO)
 	{
-		mPreviousFBO = sCurFBO;
-
 		stop_glerror();
 		
+		mPreviousFBO = sCurFBO;
 		glBindFramebuffer(GL_FRAMEBUFFER, mFBO);
 		sCurFBO = mFBO;
-
+		
 		stop_glerror();
 		if (gGLManager.mHasDrawBuffers)
 		{ //setup multiple render targets
@@ -418,7 +429,12 @@ void LLRenderTarget::bindTarget()
 		stop_glerror();
 	}
 
+	mPreviousResX = sCurResX;
+	mPreviousResY = sCurResY;
 	glViewport(0, 0, mResX, mResY);
+	sCurResX = mResX;
+	sCurResY = mResY;
+
 	sBoundTarget = this;
 }
 
@@ -489,6 +505,20 @@ void LLRenderTarget::flush(bool fetch_depth)
 		stop_glerror();
 		glBindFramebuffer(GL_FRAMEBUFFER, mPreviousFBO);
 		sCurFBO = mPreviousFBO;
+
+		if (mPreviousFBO)
+		{
+			glViewport(0, 0, mPreviousResX, mPreviousResY);
+			sCurResX = mPreviousResX;
+			sCurResY = mPreviousResY;
+		}
+		else
+		{
+			glViewport(gGLViewport[0],gGLViewport[1],gGLViewport[2],gGLViewport[3]);
+			sCurResX = gGLViewport[2];
+			sCurResY = gGLViewport[3];
+		}
+						
 		stop_glerror();
 	}
 }
diff --git a/indra/llrender/llrendertarget.h b/indra/llrender/llrendertarget.h
index 765a727b5b..66a9874a6b 100755
--- a/indra/llrender/llrendertarget.h
+++ b/indra/llrender/llrendertarget.h
@@ -63,6 +63,9 @@ public:
 	static bool sUseFBO; 
 	static U32 sBytesAllocated;
 	static U32 sCurFBO;
+	static U32 sCurResX;
+	static U32 sCurResY;
+
 
 	LLRenderTarget();
 	~LLRenderTarget();
@@ -146,6 +149,9 @@ protected:
 	std::vector<U32> mInternalFormat;
 	U32 mFBO;
 	U32 mPreviousFBO;
+	U32 mPreviousResX;
+	U32 mPreviousResY;
+
 	U32 mDepth;
 	bool mStencil;
 	bool mUseDepth;
diff --git a/indra/newview/app_settings/settings.xml b/indra/newview/app_settings/settings.xml
index 12ca902c59..344079b640 100755
--- a/indra/newview/app_settings/settings.xml
+++ b/indra/newview/app_settings/settings.xml
@@ -7732,7 +7732,7 @@
     <key>Type</key>
     <string>S32</string>
     <key>Value</key>
-    <integer>4</integer>
+    <integer>3</integer>
   </map>
 
   <key>OctreeAlphaDistanceFactor</key>
@@ -8504,7 +8504,6 @@
     <key>Value</key>
     <real>1.0</real>
   </map>
-
   <key>RenderDeferredTreeShadowBias</key>
   <map>
     <key>Comment</key>
diff --git a/indra/newview/app_settings/shaders/class1/deferred/blurLightF.glsl b/indra/newview/app_settings/shaders/class1/deferred/blurLightF.glsl
index 589ace086d..968a5f6b3d 100755
--- a/indra/newview/app_settings/shaders/class1/deferred/blurLightF.glsl
+++ b/indra/newview/app_settings/shaders/class1/deferred/blurLightF.glsl
@@ -46,11 +46,6 @@ VARYING vec2 vary_fragcoord;
 uniform mat4 inv_proj;
 uniform vec2 screen_res;
 
-vec3 getKern(int i)
-{
-	return kern[i];
-}
-
 vec4 getPosition(vec2 pos_screen)
 {
 	float depth = texture2DRect(depthMap, pos_screen.xy).r;
@@ -64,38 +59,38 @@ vec4 getPosition(vec2 pos_screen)
 	return pos;
 }
 
-#ifdef SINGLE_FP_ONLY
-vec2 encode_normal(vec3 n)
-{
-	vec2 sn;
-	sn.xy = (n.xy * vec2(0.5f,0.5f)) + vec2(0.5f,0.5f);
-	return sn;
-}
-
-vec3 decode_normal (vec2 enc)
-{
-	vec3 n;
-	n.xy = (enc.xy * vec2(2.0f,2.0f)) - vec2(1.0f,1.0f);
-	n.z = sqrt(1.0f - dot(n.xy,n.xy));
-	return n;
-}
-#else
-vec2 encode_normal(vec3 n)
-{
-	float f = sqrt(8 * n.z + 8);
-	return n.xy / f + 0.5;
-}
-
-vec3 decode_normal (vec2 enc)
-{
-    vec2 fenc = enc*4-2;
-    float f = dot(fenc,fenc);
-    float g = sqrt(1-f/4);
-    vec3 n;
-    n.xy = fenc*g;
-    n.z = 1-f/2;
-    return n;
-}
+#ifdef SINGLE_FP_ONLY
+vec2 encode_normal(vec3 n)
+{
+	vec2 sn;
+	sn.xy = (n.xy * vec2(0.5f,0.5f)) + vec2(0.5f,0.5f);
+	return sn;
+}
+
+vec3 decode_normal (vec2 enc)
+{
+	vec3 n;
+	n.xy = (enc.xy * vec2(2.0f,2.0f)) - vec2(1.0f,1.0f);
+	n.z = sqrt(1.0f - dot(n.xy,n.xy));
+	return n;
+}
+#else
+vec2 encode_normal(vec3 n)
+{
+	float f = sqrt(8 * n.z + 8);
+	return n.xy / f + 0.5;
+}
+
+vec3 decode_normal (vec2 enc)
+{
+    vec2 fenc = enc*4-2;
+    float f = dot(fenc,fenc);
+    float g = sqrt(1-f/4);
+    vec3 n;
+    n.xy = fenc*g;
+    n.z = 1-f/2;
+    return n;
+}
 #endif
 
 void main() 
@@ -110,7 +105,7 @@ void main()
 	vec2 dlt = kern_scale * delta / (1.0+norm.xy*norm.xy);
 	dlt /= max(-pos.z*dist_factor, 1.0);
 	
-	vec2 defined_weight = getKern(0).xy; // special case the first (centre) sample's weight in the blur; we have to sample it anyway so we get it for 'free'
+	vec2 defined_weight = kern[0].xy; // special case the first (centre) sample's weight in the blur; we have to sample it anyway so we get it for 'free'
 	vec4 col = defined_weight.xyxx * ccol;
 
 	// relax tolerance according to distance to avoid speckling artifacts, as angles and distances are a lot more abrupt within a small screen area at larger distances
@@ -120,28 +115,33 @@ void main()
 	float tc_mod = 0.5*(tc.x + tc.y); // mod(tc.x+tc.y,2)
 	tc_mod -= floor(tc_mod);
 	tc_mod *= 2.0;
-	tc += ( (tc_mod - 0.5) * getKern(1).z * dlt * 0.5 );
+	tc += ( (tc_mod - 0.5) * kern[1].z * dlt * 0.5 );
 
 	for (int i = 1; i < 4; i++)
 	{
-		vec2 samptc = tc + getKern(i).z*dlt;
-	        vec3 samppos = getPosition(samptc).xyz; 
+		vec2 samptc = tc + kern[i].z*dlt;
+	    vec3 samppos = getPosition(samptc).xyz; 
+
 		float d = dot(norm.xyz, samppos.xyz-pos.xyz);// dist from plane
+		
 		if (d*d <= pointplanedist_tolerance_pow2)
 		{
-			col += texture2DRect(lightMap, samptc)*getKern(i).xyxx;
-			defined_weight += getKern(i).xy;
+			col += texture2DRect(lightMap, samptc)*kern[i].xyxx;
+			defined_weight += kern[i].xy;
 		}
 	}
+
 	for (int i = 1; i < 4; i++)
 	{
-		vec2 samptc = tc - getKern(i).z*dlt;
-	        vec3 samppos = getPosition(samptc).xyz; 
+		vec2 samptc = tc - kern[i].z*dlt;
+	    vec3 samppos = getPosition(samptc).xyz; 
+
 		float d = dot(norm.xyz, samppos.xyz-pos.xyz);// dist from plane
+		
 		if (d*d <= pointplanedist_tolerance_pow2)
 		{
-			col += texture2DRect(lightMap, samptc)*getKern(i).xyxx;
-			defined_weight += getKern(i).xy;
+			col += texture2DRect(lightMap, samptc)*kern[i].xyxx;
+			defined_weight += kern[i].xy;
 		}
 	}
 
diff --git a/indra/newview/app_settings/shaders/class1/interface/downsampleDepthF.glsl b/indra/newview/app_settings/shaders/class1/interface/downsampleDepthF.glsl
new file mode 100644
index 0000000000..6523a06d22
--- /dev/null
+++ b/indra/newview/app_settings/shaders/class1/interface/downsampleDepthF.glsl
@@ -0,0 +1,67 @@
+/** 
+ * @file debugF.glsl
+ *
+ * $LicenseInfo:firstyear=2007&license=viewerlgpl$
+ * Second Life Viewer Source Code
+ * Copyright (C) 2011, Linden Research, Inc.
+ * 
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation;
+ * version 2.1 of the License only.
+ * 
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ * 
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+ * 
+ * Linden Research, Inc., 945 Battery Street, San Francisco, CA  94111  USA
+ * $/LicenseInfo$
+ */
+
+#ifdef DEFINE_GL_FRAGCOLOR
+out vec4 frag_color;
+#else
+#define frag_color gl_FragColor
+#endif
+
+uniform sampler2D depthMap;
+
+uniform float delta;
+
+VARYING vec2 tc0;
+VARYING vec2 tc1;
+VARYING vec2 tc2;
+VARYING vec2 tc3;
+VARYING vec2 tc4;
+VARYING vec2 tc5;
+VARYING vec2 tc6;
+VARYING vec2 tc7;
+VARYING vec2 tc8;
+
+void main() 
+{
+	vec4 depth1 = 
+		vec4(texture2D(depthMap, tc0).r,
+			texture2D(depthMap, tc1).r,
+			texture2D(depthMap, tc2).r,
+			texture2D(depthMap, tc3).r);
+
+	vec4 depth2 = 
+		vec4(texture2D(depthMap, tc4).r,
+			texture2D(depthMap, tc5).r,
+			texture2D(depthMap, tc6).r,
+			texture2D(depthMap, tc7).r);
+
+	depth1 = min(depth1, depth2);
+	float depth = min(depth1.x, depth1.y);
+	depth = min(depth, depth1.z);
+	depth = min(depth, depth1.w);
+	depth = min(depth, texture2D(depthMap, tc8).r);
+
+	gl_FragDepth = depth;
+}
diff --git a/indra/newview/app_settings/shaders/class1/interface/downsampleDepthRectF.glsl b/indra/newview/app_settings/shaders/class1/interface/downsampleDepthRectF.glsl
new file mode 100644
index 0000000000..2f89b8ed72
--- /dev/null
+++ b/indra/newview/app_settings/shaders/class1/interface/downsampleDepthRectF.glsl
@@ -0,0 +1,67 @@
+/** 
+ * @file debugF.glsl
+ *
+ * $LicenseInfo:firstyear=2007&license=viewerlgpl$
+ * Second Life Viewer Source Code
+ * Copyright (C) 2011, Linden Research, Inc.
+ * 
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation;
+ * version 2.1 of the License only.
+ * 
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ * 
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+ * 
+ * Linden Research, Inc., 945 Battery Street, San Francisco, CA  94111  USA
+ * $/LicenseInfo$
+ */
+
+#ifdef DEFINE_GL_FRAGCOLOR
+out vec4 frag_color;
+#else
+#define frag_color gl_FragColor
+#endif
+
+uniform sampler2DRect depthMap;
+
+uniform float delta;
+
+VARYING vec2 tc0;
+VARYING vec2 tc1;
+VARYING vec2 tc2;
+VARYING vec2 tc3;
+VARYING vec2 tc4;
+VARYING vec2 tc5;
+VARYING vec2 tc6;
+VARYING vec2 tc7;
+VARYING vec2 tc8;
+
+void main() 
+{
+	vec4 depth1 = 
+		vec4(texture2DRect(depthMap, tc0).r,
+			texture2DRect(depthMap, tc1).r,
+			texture2DRect(depthMap, tc2).r,
+			texture2DRect(depthMap, tc3).r);
+
+	vec4 depth2 = 
+		vec4(texture2DRect(depthMap, tc4).r,
+			texture2DRect(depthMap, tc5).r,
+			texture2DRect(depthMap, tc6).r,
+			texture2DRect(depthMap, tc7).r);
+
+	depth1 = min(depth1, depth2);
+	float depth = min(depth1.x, depth1.y);
+	depth = min(depth, depth1.z);
+	depth = min(depth, depth1.w);
+	depth = min(depth, texture2DRect(depthMap, tc8).r);
+
+	gl_FragDepth = depth;
+}
diff --git a/indra/newview/app_settings/shaders/class1/interface/downsampleDepthV.glsl b/indra/newview/app_settings/shaders/class1/interface/downsampleDepthV.glsl
new file mode 100644
index 0000000000..71d80911d6
--- /dev/null
+++ b/indra/newview/app_settings/shaders/class1/interface/downsampleDepthV.glsl
@@ -0,0 +1,59 @@
+/** 
+ * @file debugV.glsl
+ *
+ * $LicenseInfo:firstyear=2007&license=viewerlgpl$
+ * Second Life Viewer Source Code
+ * Copyright (C) 2011, Linden Research, Inc.
+ * 
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation;
+ * version 2.1 of the License only.
+ * 
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ * 
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+ * 
+ * Linden Research, Inc., 945 Battery Street, San Francisco, CA  94111  USA
+ * $/LicenseInfo$
+ */
+
+uniform mat4 modelview_projection_matrix;
+
+ATTRIBUTE vec3 position;
+
+uniform vec2 screen_res;
+
+uniform vec2 delta;
+
+VARYING vec2 tc0;
+VARYING vec2 tc1;
+VARYING vec2 tc2;
+VARYING vec2 tc3;
+VARYING vec2 tc4;
+VARYING vec2 tc5;
+VARYING vec2 tc6;
+VARYING vec2 tc7;
+VARYING vec2 tc8;
+
+void main()
+{
+	gl_Position = vec4(position, 1.0); 
+	
+	vec2 tc = (position.xy*0.5+0.5)*screen_res;
+	tc0 = tc+vec2(-delta.x,-delta.y);
+	tc1 = tc+vec2(0,-delta.y);
+	tc2 = tc+vec2(delta.x,-delta.y);
+	tc3 = tc+vec2(-delta.x,0);
+	tc4 = tc+vec2(0,0);
+	tc5 = tc+vec2(delta.x,0);
+	tc6 = tc+vec2(-delta.x,delta.y);
+	tc7 = tc+vec2(0,delta.y);
+	tc8 = tc+vec2(delta.x,delta.y);
+}
+
diff --git a/indra/newview/llmeshrepository.cpp b/indra/newview/llmeshrepository.cpp
index b47fe9d4b1..8d3539d297 100755
--- a/indra/newview/llmeshrepository.cpp
+++ b/indra/newview/llmeshrepository.cpp
@@ -1,3 +1,4 @@
+
 /** 
  * @file llmeshrepository.cpp
  * @brief Mesh repository implementation.
diff --git a/indra/newview/llspatialpartition.cpp b/indra/newview/llspatialpartition.cpp
index 78401020a6..941c578783 100755
--- a/indra/newview/llspatialpartition.cpp
+++ b/indra/newview/llspatialpartition.cpp
@@ -1498,6 +1498,8 @@ BOOL LLSpatialGroup::rebound()
 	if (mOctreeNode->getChildCount() == 1 && mOctreeNode->getElementCount() == 0)
 	{
 		LLSpatialGroup* group = (LLSpatialGroup*) mOctreeNode->getChild(0)->getListener(0);
+
+		//rebound single child
 		group->rebound();
 		
 		//copy single child's bounding box
@@ -1506,10 +1508,11 @@ BOOL LLSpatialGroup::rebound()
 		mExtents[0] = group->mExtents[0];
 		mExtents[1] = group->mExtents[1];
 		
+		//treat this node as a "chute" to a deeper level of the tree
 		group->setState(SKIP_FRUSTUM_CHECK);
 	}
 	else if (mOctreeNode->isLeaf())
-	{ //copy object bounding box if this is a leaf
+	{ //copy object bounding box if this is a leaf 
 		boundObjects(TRUE, mExtents[0], mExtents[1]);
 		mBounds[0] = mObjectBounds[0];
 		mBounds[1] = mObjectBounds[1];
@@ -1518,14 +1521,17 @@ BOOL LLSpatialGroup::rebound()
 	{
 		LLVector4a& newMin = mExtents[0];
 		LLVector4a& newMax = mExtents[1];
+		
+		//get bounding box of first child
 		LLSpatialGroup* group = (LLSpatialGroup*) mOctreeNode->getChild(0)->getListener(0);
 		group->clearState(SKIP_FRUSTUM_CHECK);
 		group->rebound();
+
 		//initialize to first child
 		newMin = group->mExtents[0];
 		newMax = group->mExtents[1];
 
-		//first, rebound children
+		//rebound remaining children, expanding bounding box to encompass children
 		for (U32 i = 1; i < mOctreeNode->getChildCount(); i++)
 		{
 			group = (LLSpatialGroup*) mOctreeNode->getChild(i)->getListener(0);
@@ -2506,7 +2512,7 @@ void pushBufferVerts(LLVertexBuffer* buffer, U32 mask)
 	}
 }
 
-void pushBufferVerts(LLSpatialGroup* group, U32 mask)
+void pushBufferVerts(LLSpatialGroup* group, U32 mask, bool push_alpha = true)
 {
 	if (group->mSpatialPartition->mRenderByGroup)
 	{
@@ -2515,7 +2521,10 @@ void pushBufferVerts(LLSpatialGroup* group, U32 mask)
 			LLDrawInfo* params = *(group->mDrawMap.begin()->second.begin());
 			LLRenderPass::applyModelMatrix(*params);
 		
-			pushBufferVerts(group->mVertexBuffer, mask);
+			if (push_alpha)
+			{
+				pushBufferVerts(group->mVertexBuffer, mask);
+			}
 
 			for (LLSpatialGroup::buffer_map_t::iterator i = group->mBufferMap.begin(); i != group->mBufferMap.end(); ++i)
 			{
@@ -2529,10 +2538,10 @@ void pushBufferVerts(LLSpatialGroup* group, U32 mask)
 			}
 		}
 	}
-	else
+	/*else
 	{
-		drawBox(group->mBounds[0], group->mBounds[1]);
-	}
+		//drawBox(group->mBounds[0], group->mBounds[1]);
+	}*/
 }
 
 void pushVertsColorCoded(LLSpatialGroup* group, U32 mask)
@@ -2705,17 +2714,53 @@ void renderOctree(LLSpatialGroup* group)
 //	drawBoxOutline(LLVector3(node->getCenter()), LLVector3(node->getSize()));
 }
 
+std::set<LLSpatialGroup*> visible_selected_groups;
+
 void renderVisibility(LLSpatialGroup* group, LLCamera* camera)
 {
-	LLGLEnable blend(GL_BLEND);
+	/*LLGLEnable blend(GL_BLEND);
 	gGL.setSceneBlendType(LLRender::BT_ALPHA);
 	LLGLEnable cull(GL_CULL_FACE);
-	glPolygonMode(GL_FRONT_AND_BACK, GL_LINE);
+	glPolygonMode(GL_FRONT_AND_BACK, GL_LINE);*/
 
-	BOOL render_objects = (!LLPipeline::sUseOcclusion || !group->isOcclusionState(LLSpatialGroup::OCCLUDED)) && group->isVisible() &&
+	/*BOOL render_objects = (!LLPipeline::sUseOcclusion || !group->isOcclusionState(LLSpatialGroup::OCCLUDED)) && group->isVisible() &&
 							!group->isEmpty();
 
+
 	if (render_objects)
+	{
+		LLGLDepthTest depth(GL_TRUE, GL_FALSE);
+
+		LLGLDisable blend(GL_BLEND);
+		gGL.diffuseColor4f(0.f, 0.75f, 0.f,0.5f);
+		pushBufferVerts(group, LLVertexBuffer::MAP_VERTEX, false);
+		
+		glPolygonMode(GL_FRONT_AND_BACK, GL_LINE);
+		glLineWidth(4.f);
+		gGL.diffuseColor4f(0.f, 0.5f, 0.f, 1.f);
+		pushBufferVerts(group, LLVertexBuffer::MAP_VERTEX, false);
+		glLineWidth(1.f);
+		glPolygonMode(GL_FRONT_AND_BACK, GL_FILL);
+
+		bool selected = false;
+		
+		for (LLSpatialGroup::element_iter iter = group->getDataBegin(); iter != group->getDataEnd(); ++iter)
+		{
+			LLDrawable* drawable = *iter;
+			if (drawable->getVObj().notNull() && drawable->getVObj()->isSelected())
+			{
+				selected = true;
+				break;
+			}
+		}
+		
+		if (selected)
+		{ //store for rendering occlusion volume as overlay
+			visible_selected_groups.insert(group);
+		}
+	}*/		
+
+	/*if (render_objects)
 	{
 		LLGLDepthTest depth_under(GL_TRUE, GL_FALSE, GL_GREATER);
 		gGL.diffuseColor4f(0, 0.5f, 0, 0.5f);
@@ -2740,6 +2785,59 @@ void renderVisibility(LLSpatialGroup* group, LLCamera* camera)
 			gGL.diffuseColor4f(0.f, 0.75f, 0.f,0.5f);
 			gGL.diffuseColor4f(0.f, 0.75f, 0.f, 0.5f);
 			pushBufferVerts(group, LLVertexBuffer::MAP_VERTEX);
+		
+			bool selected = false;
+		
+			for (LLSpatialGroup::element_iter iter = group->getDataBegin(); iter != group->getDataEnd(); ++iter)
+			{
+				LLDrawable* drawable = *iter;
+				if (drawable->getVObj().notNull() && drawable->getVObj()->isSelected())
+				{
+					selected = true;
+					break;
+				}
+			}
+		
+			if (selected)
+			{ //store for rendering occlusion volume as overlay
+				visible_selected_groups.insert(group);
+			}
+		}		
+	}*/
+}
+
+void renderXRay(LLSpatialGroup* group, LLCamera* camera)
+{
+	BOOL render_objects = (!LLPipeline::sUseOcclusion || !group->isOcclusionState(LLSpatialGroup::OCCLUDED)) && group->isVisible() &&
+							!group->isEmpty();
+	
+	if (render_objects)
+	{
+		pushBufferVerts(group, LLVertexBuffer::MAP_VERTEX, false);
+
+		bool selected = false;
+
+		for (LLSpatialGroup::element_iter iter = group->getDataBegin(); iter != group->getDataEnd(); ++iter)
+		{
+			LLDrawable* drawable = *iter;
+			if (drawable->getVObj().notNull() && drawable->getVObj()->isSelected())
+			{
+				selected = true;
+				break;
+			}
+		}
+
+		if (selected)
+		{ //store for rendering occlusion volume as overlay
+
+			if (!group->mSpatialPartition->isBridge())
+			{
+				visible_selected_groups.insert(group);
+			}
+			else
+			{
+				visible_selected_groups.insert(group->mSpatialPartition->asBridge()->getSpatialGroup());
+			}
 		}
 	}
 }
@@ -4210,6 +4308,48 @@ public:
 	}
 };
 
+class LLOctreeRenderXRay : public LLOctreeTraveler<LLDrawable>
+{
+public:
+	LLCamera* mCamera;
+	LLOctreeRenderXRay(LLCamera* camera): mCamera(camera) {}
+	
+	virtual void traverse(const LLSpatialGroup::OctreeNode* node)
+	{
+		LLSpatialGroup* group = (LLSpatialGroup*) node->getListener(0);
+		
+		if (!mCamera || mCamera->AABBInFrustumNoFarClip(group->mBounds[0], group->mBounds[1]))
+		{
+			node->accept(this);
+			stop_glerror();
+
+			for (U32 i = 0; i < node->getChildCount(); i++)
+			{
+				traverse(node->getChild(i));
+				stop_glerror();
+			}
+			
+			//render visibility wireframe
+			if (gPipeline.hasRenderDebugMask(LLPipeline::RENDER_DEBUG_OCCLUSION))
+			{
+				group->rebuildGeom();
+				group->rebuildMesh();
+
+				gGL.flush();
+				gGL.pushMatrix();
+				gGLLastMatrix = NULL;
+				gGL.loadMatrix(gGLModelView);
+				renderXRay(group, mCamera);
+				stop_glerror();
+				gGLLastMatrix = NULL;
+				gGL.popMatrix();
+			}
+		}
+	}
+
+	virtual void visit(const LLSpatialGroup::OctreeNode* node) {}
+
+};
 
 class LLOctreeRenderPhysicsShapes : public LLOctreeTraveler<LLDrawable>
 {
@@ -4437,6 +4577,26 @@ void LLSpatialPartition::renderDebug()
 	LLOctreeRenderNonOccluded render_debug(camera);
 	render_debug.traverse(mOctree);
 
+
+	if (gPipeline.hasRenderDebugMask(LLPipeline::RENDER_DEBUG_OCCLUSION))
+	{
+		{
+			LLGLEnable cull(GL_CULL_FACE);
+			
+			LLGLEnable blend(GL_BLEND);
+			LLGLDepthTest depth_under(GL_TRUE, GL_FALSE, GL_GREATER);
+			glPolygonMode(GL_FRONT_AND_BACK, GL_LINE);
+			gGL.diffuseColor4f(0.5f, 0.0f, 0, 0.25f);
+
+			LLGLEnable offset(GL_POLYGON_OFFSET_LINE);
+			glPolygonOffset(-1.f, -1.f);
+
+			LLOctreeRenderXRay xray(camera);
+			xray.traverse(mOctree);
+
+			glPolygonMode(GL_FRONT_AND_BACK, GL_FILL);
+		}
+	}
 	if (LLGLSLShader::sNoFixedFunction)
 	{
 		gDebugProgram.unbind();
diff --git a/indra/newview/llviewerdisplay.cpp b/indra/newview/llviewerdisplay.cpp
index 9117bf1c01..23038e529b 100755
--- a/indra/newview/llviewerdisplay.cpp
+++ b/indra/newview/llviewerdisplay.cpp
@@ -661,6 +661,7 @@ void display(BOOL rebuild, F32 zoom_factor, int subfield, BOOL for_snapshot)
 
 		static LLCullResult result;
 		LLViewerCamera::sCurCameraID = LLViewerCamera::CAMERA_WORLD;
+		LLPipeline::sUnderWaterRender = LLViewerCamera::getInstance()->cameraUnderWater() ? TRUE : FALSE;
 		gPipeline.updateCull(*LLViewerCamera::getInstance(), result, water_clip);
 		stop_glerror();
 
@@ -867,7 +868,7 @@ void display(BOOL rebuild, F32 zoom_factor, int subfield, BOOL for_snapshot)
 		//}
 
 		LLPipeline::sUnderWaterRender = LLViewerCamera::getInstance()->cameraUnderWater() ? TRUE : FALSE;
-		
+
 		LLGLState::checkStates();
 		LLGLState::checkClientArrays();
 
diff --git a/indra/newview/llviewershadermgr.cpp b/indra/newview/llviewershadermgr.cpp
index b9e0847935..e24237522a 100755
--- a/indra/newview/llviewershadermgr.cpp
+++ b/indra/newview/llviewershadermgr.cpp
@@ -93,6 +93,8 @@ LLGLSLShader	gTwoTextureAddProgram;
 LLGLSLShader	gOneTextureNoColorProgram;
 LLGLSLShader	gDebugProgram;
 LLGLSLShader	gClipProgram;
+LLGLSLShader	gDownsampleDepthProgram;
+LLGLSLShader	gDownsampleDepthRectProgram;
 LLGLSLShader	gAlphaMaskProgram;
 
 //object shaders
@@ -702,6 +704,8 @@ void LLViewerShaderMgr::unloadShaders()
 	gOcclusionCubeProgram.unload();
 	gDebugProgram.unload();
 	gClipProgram.unload();
+	gDownsampleDepthProgram.unload();
+	gDownsampleDepthRectProgram.unload();
 	gAlphaMaskProgram.unload();
 	gUIProgram.unload();
 	gPathfindingProgram.unload();
@@ -3001,6 +3005,26 @@ BOOL LLViewerShaderMgr::loadShadersInterface()
 		success = gClipProgram.createShader(NULL, NULL);
 	}
 
+	if (success)
+	{
+		gDownsampleDepthProgram.mName = "DownsampleDepth Shader";
+		gDownsampleDepthProgram.mShaderFiles.clear();
+		gDownsampleDepthProgram.mShaderFiles.push_back(make_pair("interface/downsampleDepthV.glsl", GL_VERTEX_SHADER_ARB));
+		gDownsampleDepthProgram.mShaderFiles.push_back(make_pair("interface/downsampleDepthF.glsl", GL_FRAGMENT_SHADER_ARB));
+		gDownsampleDepthProgram.mShaderLevel = mVertexShaderLevel[SHADER_INTERFACE];
+		success = gDownsampleDepthProgram.createShader(NULL, NULL);
+	}
+
+	if (success)
+	{
+		gDownsampleDepthRectProgram.mName = "DownsampleDepthRect Shader";
+		gDownsampleDepthRectProgram.mShaderFiles.clear();
+		gDownsampleDepthRectProgram.mShaderFiles.push_back(make_pair("interface/downsampleDepthV.glsl", GL_VERTEX_SHADER_ARB));
+		gDownsampleDepthRectProgram.mShaderFiles.push_back(make_pair("interface/downsampleDepthRectF.glsl", GL_FRAGMENT_SHADER_ARB));
+		gDownsampleDepthRectProgram.mShaderLevel = mVertexShaderLevel[SHADER_INTERFACE];
+		success = gDownsampleDepthRectProgram.createShader(NULL, NULL);
+	}
+
 	if (success)
 	{
 		gAlphaMaskProgram.mName = "Alpha Mask Shader";
diff --git a/indra/newview/llviewershadermgr.h b/indra/newview/llviewershadermgr.h
index 8c7de05062..438853cd6f 100755
--- a/indra/newview/llviewershadermgr.h
+++ b/indra/newview/llviewershadermgr.h
@@ -230,6 +230,8 @@ extern LLGLSLShader			gSplatTextureRectProgram;
 extern LLGLSLShader			gGlowCombineFXAAProgram;
 extern LLGLSLShader			gDebugProgram;
 extern LLGLSLShader			gClipProgram;
+extern LLGLSLShader			gDownsampleDepthProgram;
+extern LLGLSLShader			gDownsampleDepthRectProgram;
 
 //output tex0[tc0] + tex1[tc1]
 extern LLGLSLShader			gTwoTextureAddProgram;
@@ -322,6 +324,7 @@ extern LLGLSLShader			gWLCloudProgram;
 extern LLGLSLShader			gPostColorFilterProgram;
 extern LLGLSLShader			gPostNightVisionProgram;
 
+
 // Deferred rendering shaders
 extern LLGLSLShader			gDeferredImpostorProgram;
 extern LLGLSLShader			gDeferredWaterProgram;
@@ -369,7 +372,6 @@ extern LLGLSLShader			gDeferredSkinnedFullbrightShinyProgram;
 extern LLGLSLShader			gDeferredSkinnedFullbrightProgram;
 extern LLGLSLShader			gNormalMapGenProgram;
 
-
 // Deferred materials shaders
 extern LLGLSLShader			gDeferredMaterialProgram[LLMaterial::SHADER_COUNT*2];
 
diff --git a/indra/newview/llvovolume.cpp b/indra/newview/llvovolume.cpp
index bd3be5b9cf..f41057fd1f 100755
--- a/indra/newview/llvovolume.cpp
+++ b/indra/newview/llvovolume.cpp
@@ -3515,7 +3515,12 @@ F32 LLVOVolume::getBinRadius()
 	}
 	else if (mDrawable->isStatic())
 	{
-		radius = llmax((S32) mDrawable->getRadius(), 1)*size_factor;
+		F32 szf = size_factor;
+
+		radius = llmax(mDrawable->getRadius(), szf);
+		
+		radius = powf(radius, 1.f+szf/radius);
+
 		radius *= 1.f + mDrawable->mDistanceWRTCamera * distance_factor[1];
 		radius += mDrawable->mDistanceWRTCamera * distance_factor[0];
 	}
diff --git a/indra/newview/pipeline.cpp b/indra/newview/pipeline.cpp
index 9a4a233b54..6efdf47ec5 100755
--- a/indra/newview/pipeline.cpp
+++ b/indra/newview/pipeline.cpp
@@ -279,7 +279,7 @@ std::string gPoolNames[] =
 	"POOL_ALPHA"
 };
 
-void drawBox(const LLVector3& c, const LLVector3& r);
+void drawBox(const LLVector4a& c, const LLVector4a& r);
 void drawBoxOutline(const LLVector3& pos, const LLVector3& size);
 U32 nhpo2(U32 v);
 LLVertexBuffer* ll_create_cube_vb(U32 type_mask, U32 usage);
@@ -929,9 +929,12 @@ bool LLPipeline::allocateScreenBuffer(U32 resX, U32 resY, U32 samples)
 		S32 shadow_detail = RenderShadowDetail;
 		BOOL ssao = RenderDeferredSSAO;
 		
+		const U32 occlusion_divisor = 3;
+
 		//allocate deferred rendering color buffers
 		if (!mDeferredScreen.allocate(resX, resY, GL_SRGB8_ALPHA8, TRUE, TRUE, LLTexUnit::TT_RECT_TEXTURE, FALSE, samples)) return false;
 		if (!mDeferredDepth.allocate(resX, resY, 0, TRUE, FALSE, LLTexUnit::TT_RECT_TEXTURE, FALSE, samples)) return false;
+		if (!mOcclusionDepth.allocate(resX/occlusion_divisor, resY/occlusion_divisor, 0, TRUE, FALSE, LLTexUnit::TT_RECT_TEXTURE, FALSE, samples)) return false;
 		if (!addDeferredAttachments(mDeferredScreen)) return false;
 		
 		GLuint screenFormat = GL_RGBA16;
@@ -972,6 +975,7 @@ bool LLPipeline::allocateScreenBuffer(U32 resX, U32 resY, U32 samples)
 			for (U32 i = 0; i < 4; i++)
 			{
 				if (!mShadow[i].allocate(sun_shadow_map_width,U32(resY*scale), 0, TRUE, FALSE, LLTexUnit::TT_TEXTURE)) return false;
+				if (!mShadowOcclusion[i].allocate(mShadow[i].getWidth()/occlusion_divisor, mShadow[i].getHeight()/occlusion_divisor, 0, TRUE, FALSE, LLTexUnit::TT_TEXTURE)) return false;
 			}
 		}
 		else
@@ -979,6 +983,7 @@ bool LLPipeline::allocateScreenBuffer(U32 resX, U32 resY, U32 samples)
 			for (U32 i = 0; i < 4; i++)
 			{
 				mShadow[i].release();
+				mShadowOcclusion[i].release();
 			}
 		}
 
@@ -991,6 +996,7 @@ bool LLPipeline::allocateScreenBuffer(U32 resX, U32 resY, U32 samples)
 			for (U32 i = 4; i < 6; i++)
 			{
 				if (!mShadow[i].allocate(spot_shadow_map_width, height, 0, TRUE, FALSE)) return false;
+				if (!mShadowOcclusion[i].allocate(mShadow[i].getWidth()/occlusion_divisor, mShadow[i].getHeight()/occlusion_divisor, 0, TRUE, FALSE)) return false;
 			}
 		}
 		else
@@ -998,6 +1004,7 @@ bool LLPipeline::allocateScreenBuffer(U32 resX, U32 resY, U32 samples)
 			for (U32 i = 4; i < 6; i++)
 			{
 				mShadow[i].release();
+				mShadowOcclusion[i].release();
 			}
 		}
 
@@ -1014,11 +1021,13 @@ bool LLPipeline::allocateScreenBuffer(U32 resX, U32 resY, U32 samples)
 		for (U32 i = 0; i < 6; i++)
 		{
 			mShadow[i].release();
+			mShadowOcclusion[i].release();
 		}
 		mFXAABuffer.release();
 		mScreen.release();
 		mDeferredScreen.release(); //make sure to release any render targets that share a depth buffer with mDeferredScreen first
 		mDeferredDepth.release();
+		mOcclusionDepth.release();
 						
 		if (!mScreen.allocate(resX, resY, GL_RGBA, TRUE, TRUE, LLTexUnit::TT_RECT_TEXTURE, FALSE)) return false;		
 	}
@@ -2433,7 +2442,14 @@ void LLPipeline::updateCull(LLCamera& camera, LLCullResult& result, S32 water_cl
 
 	if (to_texture)
 	{
-		mScreen.bindTarget();
+		if (LLPipeline::sRenderDeferred && !LLPipeline::sUnderWaterRender)
+		{
+			mOcclusionDepth.bindTarget();
+		}
+		else
+		{
+			mScreen.bindTarget();
+		}
 	}
 
 	if (sUseOcclusion > 1)
@@ -2571,7 +2587,14 @@ void LLPipeline::updateCull(LLCamera& camera, LLCullResult& result, S32 water_cl
 
 	if (to_texture)
 	{
-		mScreen.flush();
+		if (LLPipeline::sRenderDeferred && !LLPipeline::sUnderWaterRender)
+		{
+			mOcclusionDepth.flush();
+		}
+		else
+		{
+			mScreen.flush();
+		}
 	}
 }
 
@@ -2639,6 +2662,75 @@ void LLPipeline::markOccluder(LLSpatialGroup* group)
 	}
 }
 
+void LLPipeline::downsampleDepthBuffer(LLRenderTarget& source, LLRenderTarget& dest, LLRenderTarget* scratch_space)
+{
+	LLGLSLShader* last_shader = LLGLSLShader::sCurBoundShaderPtr;
+
+	LLGLSLShader* shader = NULL;
+
+	if (scratch_space)
+	{
+		scratch_space->copyContents(source, 
+									0, 0, source.getWidth(), source.getHeight(), 
+									0, 0, scratch_space->getWidth(), scratch_space->getHeight(), GL_DEPTH_BUFFER_BIT, GL_NEAREST);
+	}
+
+	dest.bindTarget();
+
+	
+	gDownsampleDepthProgram.bind();
+
+	LLStrider<LLVector3> vert; 
+	mDeferredVB->getVertexStrider(vert);
+	LLStrider<LLVector2> tc0;
+		
+	vert[0].set(-1,1,0);
+	vert[1].set(-1,-3,0);
+	vert[2].set(3,1,0);
+	
+	if (source.getUsage() == LLTexUnit::TT_RECT_TEXTURE)
+	{
+		shader = &gDownsampleDepthRectProgram;
+		shader->bind();
+		shader->uniform2f("delta", 1.f, 1.f);
+		shader->uniform2f(LLShaderMgr::DEFERRED_SCREEN_RES, source.getWidth(), source.getHeight());
+	}
+	else
+	{
+		shader = &gDownsampleDepthProgram;
+		shader->bind();
+		shader->uniform2f("delta", 1.f/source.getWidth(), 1.f/source.getHeight());
+		shader->uniform2f(LLShaderMgr::DEFERRED_SCREEN_RES, 1.f, 1.f);
+	}
+
+	gGL.getTexUnit(0)->bind(scratch_space ? scratch_space : &source, TRUE);
+
+	{
+		LLGLDepthTest depth(GL_TRUE, GL_TRUE, GL_ALWAYS);
+		mDeferredVB->setBuffer(LLVertexBuffer::MAP_VERTEX);
+		mDeferredVB->drawArrays(LLRender::TRIANGLES, 0, 3);
+	}
+	
+	dest.flush();
+	
+	if (last_shader)
+	{
+		last_shader->bind();
+	}
+	else
+	{
+		gDownsampleDepthProgram.unbind();
+	}
+}
+
+void LLPipeline::doOcclusion(LLCamera& camera, LLRenderTarget& source, LLRenderTarget& dest, LLRenderTarget* scratch_space)
+{
+	downsampleDepthBuffer(source, dest, scratch_space);
+	dest.bindTarget();
+	doOcclusion(camera);
+	dest.flush();
+}
+
 void LLPipeline::doOcclusion(LLCamera& camera)
 {
 	if (LLPipeline::sUseOcclusion > 1 && sCull->hasOcclusionGroups())
@@ -4551,7 +4643,7 @@ void LLPipeline::renderGeomDeferred(LLCamera& camera)
 	gGL.setColorMask(true, false);
 }
 
-void LLPipeline::renderGeomPostDeferred(LLCamera& camera)
+void LLPipeline::renderGeomPostDeferred(LLCamera& camera, bool do_occlusion)
 {
 	LLFastTimer t(FTM_POST_DEFERRED_POOLS);
 	U32 cur_type = 0;
@@ -4566,7 +4658,7 @@ void LLPipeline::renderGeomPostDeferred(LLCamera& camera)
 	gGL.setColorMask(true, false);
 
 	pool_set_t::iterator iter1 = mPools.begin();
-	BOOL occlude = LLPipeline::sUseOcclusion > 1;
+	BOOL occlude = LLPipeline::sUseOcclusion > 1 && do_occlusion;
 
 	while ( iter1 != mPools.end() )
 	{
@@ -4580,7 +4672,7 @@ void LLPipeline::renderGeomPostDeferred(LLCamera& camera)
 			gGLLastMatrix = NULL;
 			gGL.loadMatrix(gGLModelView);
 			LLGLSLShader::bindNoShader();
-			doOcclusion(camera);
+			doOcclusion(camera, mScreen, mOcclusionDepth, &mDeferredDepth);
 			gGL.setColorMask(true, false);
 		}
 
@@ -4798,6 +4890,7 @@ void LLPipeline::renderPhysicsDisplay()
 	mPhysicsDisplay.flush();
 }
 
+extern std::set<LLSpatialGroup*> visible_selected_groups;
 
 void LLPipeline::renderDebug()
 {
@@ -5208,6 +5301,27 @@ void LLPipeline::renderDebug()
 		}
 	}
 
+	if (gPipeline.hasRenderDebugMask(LLPipeline::RENDER_DEBUG_OCCLUSION) && LLGLSLShader::sNoFixedFunction)
+	{ //render visible selected group occlusion geometry
+		gDebugProgram.bind();
+		LLGLDepthTest depth(GL_TRUE, GL_FALSE);
+		gGL.diffuseColor3f(1,0,1);
+		for (std::set<LLSpatialGroup*>::iterator iter = visible_selected_groups.begin(); iter != visible_selected_groups.end(); ++iter)
+		{
+			LLSpatialGroup* group = *iter;
+
+			LLVector4a fudge;
+			fudge.splat(0.25f); //SG_OCCLUSION_FUDGE
+
+			LLVector4a size;
+			size.setAdd(fudge, group->mBounds[1]);
+
+			drawBox(group->mBounds[0], size);
+		}
+	}
+
+	visible_selected_groups.clear();
+
 	if (LLGLSLShader::sNoFixedFunction)
 	{
 		gUIProgram.bind();
@@ -8205,11 +8319,7 @@ void LLPipeline::renderDeferredLighting()
 
 		LLStrider<LLVector3> vert; 
 		mDeferredVB->getVertexStrider(vert);
-		LLStrider<LLVector2> tc0;
-		LLStrider<LLVector2> tc1;
-		mDeferredVB->getTexCoord0Strider(tc0);
-		mDeferredVB->getTexCoord1Strider(tc1);
-
+		
 		vert[0].set(-1,1,0);
 		vert[1].set(-1,-3,0);
 		vert[2].set(3,1,0);
@@ -8390,7 +8500,7 @@ void LLPipeline::renderDeferredLighting()
 										LLPipeline::END_RENDER_TYPES);
 								
 			
-			renderGeomPostDeferred(*LLViewerCamera::getInstance());
+			renderGeomPostDeferred(*LLViewerCamera::getInstance(), false);
 			gPipeline.popRenderTypeMask();
 		}
 
@@ -9324,9 +9434,15 @@ void LLPipeline::renderShadow(glh::matrix4f& view, glh::matrix4f& proj, LLCamera
 		gDeferredShadowCubeProgram.bind();
 	}
 
+	LLRenderTarget& occlusion_target = mShadowOcclusion[LLViewerCamera::sCurCameraID-1];
+
+	occlusion_target.bindTarget();
 	updateCull(shadow_cam, result);
+	occlusion_target.flush();
+
 	stateSort(shadow_cam, result);
 	
+	
 	//generate shadow map
 	gGL.matrixMode(LLRender::MM_PROJECTION);
 	gGL.pushMatrix();
@@ -9414,7 +9530,10 @@ void LLPipeline::renderShadow(glh::matrix4f& view, glh::matrix4f& proj, LLCamera
 	gDeferredShadowCubeProgram.bind();
 	gGLLastMatrix = NULL;
 	gGL.loadMatrix(gGLModelView);
-	doOcclusion(shadow_cam);
+
+	LLRenderTarget& occlusion_source = mShadow[LLViewerCamera::sCurCameraID-1];
+
+	doOcclusion(shadow_cam, occlusion_source, occlusion_target);
 
 	if (use_shader)
 	{
diff --git a/indra/newview/pipeline.h b/indra/newview/pipeline.h
index 2c023a6f70..70dcf80407 100755
--- a/indra/newview/pipeline.h
+++ b/indra/newview/pipeline.h
@@ -176,6 +176,12 @@ public:
 	// Object related methods
 	void        markVisible(LLDrawable *drawablep, LLCamera& camera);
 	void		markOccluder(LLSpatialGroup* group);
+
+	//downsample source to dest, taking the maximum depth value per pixel in source and writing to dest
+	// if source's depth buffer cannot be bound for reading, a scratch space depth buffer must be provided
+	void		downsampleDepthBuffer(LLRenderTarget& source, LLRenderTarget& dest, LLRenderTarget* scratch_space = NULL);
+
+	void		doOcclusion(LLCamera& camera, LLRenderTarget& source, LLRenderTarget& dest, LLRenderTarget* scratch_space = NULL);
 	void		doOcclusion(LLCamera& camera);
 	void		markNotCulled(LLSpatialGroup* group, LLCamera &camera);
 	void        markMoved(LLDrawable *drawablep, BOOL damped_motion = FALSE);
@@ -275,7 +281,7 @@ public:
 
 	void renderGeom(LLCamera& camera, BOOL forceVBOUpdate = FALSE);
 	void renderGeomDeferred(LLCamera& camera);
-	void renderGeomPostDeferred(LLCamera& camera);
+	void renderGeomPostDeferred(LLCamera& camera, bool do_occlusion=true);
 	void renderGeomShadow(LLCamera& camera);
 	void bindDeferredShader(LLGLSLShader& shader, U32 light_index = 0, U32 noise_map = 0xFFFFFFFF);
 	void setupSpotLight(LLGLSLShader& shader, LLDrawable* drawablep);
@@ -603,6 +609,7 @@ public:
 	LLRenderTarget			mFXAABuffer;
 	LLRenderTarget			mEdgeMap;
 	LLRenderTarget			mDeferredDepth;
+	LLRenderTarget			mOcclusionDepth;
 	LLRenderTarget			mDeferredLight;
 	LLRenderTarget			mHighlight;
 	LLRenderTarget			mPhysicsDisplay;
@@ -615,6 +622,7 @@ public:
 
 	//sun shadow map
 	LLRenderTarget			mShadow[6];
+	LLRenderTarget			mShadowOcclusion[6];
 	std::vector<LLVector3>	mShadowFrustPoints[4];
 	LLVector4				mShadowError;
 	LLVector4				mShadowFOV;
-- 
cgit v1.3