diff options
-rwxr-xr-x | indra/llmath/llvector4a.inl | 27 | ||||
-rwxr-xr-x | indra/llmath/llvolume.cpp | 178 |
2 files changed, 66 insertions, 139 deletions
diff --git a/indra/llmath/llvector4a.inl b/indra/llmath/llvector4a.inl index 6860252a75..7c52ffef21 100755 --- a/indra/llmath/llvector4a.inl +++ b/indra/llmath/llvector4a.inl @@ -331,9 +331,6 @@ inline LLSimdScalar LLVector4a::dot4(const LLVector4a& b) const // Note that this does not consider zero length vectors! inline void LLVector4a::normalize3() { - // find out about bad math before it takes two man-days to track down - llassert(isFinite3() && !equals3(getZero())); - // lenSqrd = a dot a LLVector4a lenSqrd; lenSqrd.setAllDot3( *this, *this ); // rsqrt = approximate reciprocal square (i.e., { ~1/len(a)^2, ~1/len(a)^2, ~1/len(a)^2, ~1/len(a)^2 } @@ -382,9 +379,6 @@ inline void LLVector4a::normalize4() // Note that this does not consider zero length vectors! inline LLSimdScalar LLVector4a::normalize3withLength() { - // find out about bad math before it takes two man-days to track down - llassert(isFinite3() && !equals3(getZero())); - // lenSqrd = a dot a LLVector4a lenSqrd; lenSqrd.setAllDot3( *this, *this ); // rsqrt = approximate reciprocal square (i.e., { ~1/len(a)^2, ~1/len(a)^2, ~1/len(a)^2, ~1/len(a)^2 } @@ -415,27 +409,6 @@ inline void LLVector4a::normalize3fast() mQ = _mm_mul_ps( mQ, approxRsqrt ); } -// Normalize this vector with respect to the x, y, and z components only. Accurate only to 10-12 bits of precision. W component is destroyed -// Note that this does not consider zero length vectors! -inline void LLVector4a::normalize3fast_checked(LLVector4a* default) -{ - // handle bogus inputs before NaNs are generated below - // - if (!isFinite3() || (dot3(*this).getF32() < F_APPROXIMATELY_ZERO)) - { - if (default) - *this = *default; - else - set(0,1,0,1); - - return; - } - - LLVector4a lenSqrd; lenSqrd.setAllDot3( *this, *this ); - const LLQuad approxRsqrt = _mm_rsqrt_ps(lenSqrd.mQ); - mQ = _mm_mul_ps( mQ, approxRsqrt ); -} - // Return true if this vector is normalized with respect to x,y,z up to tolerance inline LLBool32 LLVector4a::isNormalized3( F32 tolerance ) const { diff --git a/indra/llmath/llvolume.cpp b/indra/llmath/llvolume.cpp index 15621c2625..1932272afb 100755 --- a/indra/llmath/llvolume.cpp +++ b/indra/llmath/llvolume.cpp @@ -4472,9 +4472,6 @@ void LLVolume::generateSilhouetteVertices(std::vector<LLVector3> &vertices, continue; //skip degenerate face } - LLVector4a default_norm; - default_norm.set(0,1,0,1); - //for each edge for (S32 k = 0; k < 3; k++) { S32 index = face.mEdge[j*3+k]; @@ -4496,14 +4493,14 @@ void LLVolume::generateSilhouetteVertices(std::vector<LLVector3> &vertices, norm_mat.rotate(n[v1], t); - t.normalize3fast_checked(&default_norm); + t.normalize3fast(); normals.push_back(LLVector3(t[0], t[1], t[2])); mat.affineTransform(v[v2], t); vertices.push_back(LLVector3(t[0], t[1], t[2])); norm_mat.rotate(n[v2], t); - t.normalize3fast_checked(&default_norm); + t.normalize3fast(); normals.push_back(LLVector3(t[0], t[1], t[2])); } } @@ -6099,9 +6096,6 @@ BOOL LLVolumeFace::createUnCutCubeCap(LLVolume* volume, BOOL partial_build) { VertexData corners[4]; VertexData baseVert; - LLVector4a default_norm; - default_norm.set(0,1,0,1); - for(S32 t = 0; t < 4; t++) { corners[t].getPosition().load3( mesh[offset + (grid_size*t)].mPos.mV); @@ -6114,8 +6108,8 @@ BOOL LLVolumeFace::createUnCutCubeCap(LLVolume* volume, BOOL partial_build) lhs.setSub(corners[1].getPosition(), corners[0].getPosition()); LLVector4a rhs; rhs.setSub(corners[2].getPosition(), corners[1].getPosition()); - baseVert.getNormal().setCross3(lhs, rhs); - baseVert.getNormal().normalize3fast_checked(&default_norm); + baseVert.getNormal().setCross3(lhs, rhs); + baseVert.getNormal().normalize3fast(); } if(!(mTypeMask & TOP_MASK)) @@ -6565,12 +6559,17 @@ BOOL LLVolumeFace::createCap(LLVolume* volume, BOOL partial_build) d1.setSub(mPositions[mIndices[2]], mPositions[mIndices[0]]); LLVector4a normal; - LLVector4a default_norm; - default_norm.set(0,1,0,1); - normal.setCross3(d0,d1); - normal.normalize3fast_checked(&default_norm); - + + if (normal.dot3(normal).getF32() > F_APPROXIMATELY_ZERO) + { + normal.normalize3fast(); + } + else + { //degenerate, make up a value + normal.set(0,0,1); + } + llassert(llfinite(normal.getF32ptr()[0])); llassert(llfinite(normal.getF32ptr()[1])); llassert(llfinite(normal.getF32ptr()[2])); @@ -6612,13 +6611,11 @@ void LLVolumeFace::createTangents() CalculateTangentArray(mNumVertices, mPositions, mNormals, mTexCoords, mNumIndices/3, mIndices, mTangents); //normalize tangents - LLVector4a default_norm; - default_norm.set(0,1,0,1); for (U32 i = 0; i < mNumVertices; i++) { //binorm[i].normalize3fast(); //bump map/planar projection code requires normals to be normalized - mNormals[i].normalize3fast_checked(&default_norm); + mNormals[i].normalize3fast(); } } } @@ -6796,9 +6793,6 @@ void LLVolumeFace::appendFace(const LLVolumeFace& face, LLMatrix4& mat_in, LLMat mat.loadu(mat_in); norm_mat.loadu(norm_mat_in); - LLVector4a default_norm; - default_norm.set(0,1,0,1); - for (U32 i = 0; i < face.mNumVertices; ++i) { //transform appended face position and store @@ -6806,7 +6800,7 @@ void LLVolumeFace::appendFace(const LLVolumeFace& face, LLMatrix4& mat_in, LLMat //transform appended face normal and store norm_mat.rotate(src_norm[i], dst_norm[i]); - dst_norm[i].normalize3fast_checked(&default_norm); + dst_norm[i].normalize3fast(); //copy appended face texture coordinate dst_tc[i] = src_tc[i]; @@ -7215,53 +7209,46 @@ BOOL LLVolumeFace::createSide(LLVolume* volume, BOOL partial_build) return TRUE; } -#define TANGENTIAL_PARANOIA_ASSERTS 0 - -#if TANGENTIAL_PARANOIA_ASSERTS - #define tangential_paranoia(a) llassert(a) -#else - #define tangential_paranoia(a) -#endif - //adapted from Lengyel, Eric. “Computing Tangent Space Basis Vectors for an Arbitrary Mesh”. Terathon Software 3D Graphics Library, 2001. http://www.terathon.com/code/tangent.html void CalculateTangentArray(U32 vertexCount, const LLVector4a *vertex, const LLVector4a *normal, const LLVector2 *texcoord, U32 triangleCount, const U16* index_array, LLVector4a *tangent) { + //LLVector4a *tan1 = new LLVector4a[vertexCount * 2]; LLVector4a* tan1 = (LLVector4a*) ll_aligned_malloc_16(vertexCount*2*sizeof(LLVector4a)); - LLVector4a* tan2 = tan1 + vertexCount; + LLVector4a* tan2 = tan1 + vertexCount; memset(tan1, 0, vertexCount*2*sizeof(LLVector4a)); - for (U32 a = 0; a < triangleCount; a++) - { - U32 i1 = *index_array++; - U32 i2 = *index_array++; - U32 i3 = *index_array++; + for (U32 a = 0; a < triangleCount; a++) + { + U32 i1 = *index_array++; + U32 i2 = *index_array++; + U32 i3 = *index_array++; - const LLVector4a& v1 = vertex[i1]; - const LLVector4a& v2 = vertex[i2]; - const LLVector4a& v3 = vertex[i3]; + const LLVector4a& v1 = vertex[i1]; + const LLVector4a& v2 = vertex[i2]; + const LLVector4a& v3 = vertex[i3]; - const LLVector2& w1 = texcoord[i1]; - const LLVector2& w2 = texcoord[i2]; - const LLVector2& w3 = texcoord[i3]; + const LLVector2& w1 = texcoord[i1]; + const LLVector2& w2 = texcoord[i2]; + const LLVector2& w3 = texcoord[i3]; const F32* v1ptr = v1.getF32ptr(); const F32* v2ptr = v2.getF32ptr(); const F32* v3ptr = v3.getF32ptr(); - float x1 = v2ptr[0] - v1ptr[0]; - float x2 = v3ptr[0] - v1ptr[0]; - float y1 = v2ptr[1] - v1ptr[1]; - float y2 = v3ptr[1] - v1ptr[1]; - float z1 = v2ptr[2] - v1ptr[2]; - float z2 = v3ptr[2] - v1ptr[2]; + float x1 = v2ptr[0] - v1ptr[0]; + float x2 = v3ptr[0] - v1ptr[0]; + float y1 = v2ptr[1] - v1ptr[1]; + float y2 = v3ptr[1] - v1ptr[1]; + float z1 = v2ptr[2] - v1ptr[2]; + float z2 = v3ptr[2] - v1ptr[2]; - float s1 = w2.mV[0] - w1.mV[0]; - float s2 = w3.mV[0] - w1.mV[0]; - float t1 = w2.mV[1] - w1.mV[1]; - float t2 = w3.mV[1] - w1.mV[1]; + float s1 = w2.mV[0] - w1.mV[0]; + float s2 = w3.mV[0] - w1.mV[0]; + float t1 = w2.mV[1] - w1.mV[1]; + float t2 = w3.mV[1] - w1.mV[1]; F32 rd = s1*t2-s2*t1; @@ -7275,48 +7262,18 @@ void CalculateTangentArray(U32 vertexCount, const LLVector4a *vertex, const LLVe LLVector4a tdir((s1 * x2 - s2 * x1) * r, (s1 * y2 - s2 * y1) * r, (s1 * z2 - s2 * z1) * r); - tan1[i1].add(sdir); tan1[i2].add(sdir); tan1[i3].add(sdir); - - tangential_paranoia(tan1[i1].isFinite3()); - tangential_paranoia(tan1[i2].isFinite3()); - tangential_paranoia(tan1[i3].isFinite3()); - + tan2[i1].add(tdir); tan2[i2].add(tdir); tan2[i3].add(tdir); - - tangential_paranoia(tan2[i1].isFinite3()); - tangential_paranoia(tan2[i2].isFinite3()); - tangential_paranoia(tan2[i3].isFinite3()); - } - - // These appear to come out of the summing above distinctly non-unit-length - // - LLVector4a default_norm; - default_norm.set(0,1,0,1); - - for (U32 a = 0; a < vertexCount; a++) - { - tan1[a].normalize3fast_checked(&default_norm); - tan2[a].normalize3fast_checked(&default_norm); - - tangential_paranoia(tan1[a].isFinite3()); - tangential_paranoia(tan2[a].isFinite3()); - tangential_paranoia(tan1[a].isNormalized3(0.03f)); - tangential_paranoia(tan2[a].isNormalized3(0.03f)); - } - - LLVector4a default_tangent; - default_tangent.set(0,0,1,1); - - for (U32 a = 0; a < vertexCount; a++) - { - LLVector4a n = normal[a]; - - n.normalize3fast_checked(&default_norm); + } + + for (U32 a = 0; a < vertexCount; a++) + { + LLVector4a n = normal[a]; const LLVector4a& t = tan1[a]; @@ -7326,41 +7283,38 @@ void CalculateTangentArray(U32 vertexCount, const LLVector4a *vertex, const LLVe LLVector4a ncrosst; ncrosst.setCross3(n,t); - F32 n_dot_t = n.dot3(t).getF32(); - - tangential_paranoia(llfinite(n_dot_t) && !llisnan(n_dot_t)); - - // Gram-Schmidt orthogonalize - n.mul(n_dot_t); - - tangential_paranoia(n.isFinite3()); + // Gram-Schmidt orthogonalize + n.mul(n.dot3(t).getF32()); LLVector4a tsubn; tsubn.setSub(t,n); - tangential_paranoia(tsubn.isFinite3()); - - tsubn.normalize3fast_checked(&default_tangent); + if (tsubn.dot3(tsubn).getF32() > F_APPROXIMATELY_ZERO) + { + tsubn.normalize3fast(); - // Calculate handedness - F32 handedness = ncrosst.dot3(tan2[a]).getF32() < 0.f ? -1.f : 1.f; + // Calculate handedness + F32 handedness = ncrosst.dot3(tan2[a]).getF32() < 0.f ? -1.f : 1.f; - tsubn.getF32ptr()[3] = handedness; + tsubn.getF32ptr()[3] = handedness; - tangent[a] = tsubn; + tangent[a] = tsubn; - tangential_paranoia(tangent[a].isNormalized3(0.1f)); + llassert(llfinite(tangent[a].getF32ptr()[0])); + llassert(llfinite(tangent[a].getF32ptr()[1])); + llassert(llfinite(tangent[a].getF32ptr()[2])); - llassert(llfinite(tangent[a].getF32ptr()[0])); - llassert(llfinite(tangent[a].getF32ptr()[1])); - llassert(llfinite(tangent[a].getF32ptr()[2])); - - llassert(!llisnan(tangent[a].getF32ptr()[0])); - llassert(!llisnan(tangent[a].getF32ptr()[1])); - llassert(!llisnan(tangent[a].getF32ptr()[2])); + llassert(!llisnan(tangent[a].getF32ptr()[0])); + llassert(!llisnan(tangent[a].getF32ptr()[1])); + llassert(!llisnan(tangent[a].getF32ptr()[2])); + } + else + { //degenerate, make up a value + tangent[a].set(0,0,1,1); + } } - ll_aligned_free_16(tan1); + ll_aligned_free_16(tan1); } |