From c3f14b915c38a4978745f12f1f816572cce4b5a0 Mon Sep 17 00:00:00 2001 From: Dave Parks Date: Mon, 3 Jun 2013 12:50:48 -0500 Subject: NORSPEC-229 Fix for bad binormals on mirrored surfaces (use tangent calculator instead of binormal calculator, convert binormal centric code to tangent centric) --- indra/llmath/llvolume.cpp | 327 ++++++++++++++++++++++------------------ indra/llmath/llvolume.h | 36 ++--- indra/llmath/llvolumeoctree.cpp | 66 +++++--- indra/llmath/llvolumeoctree.h | 8 +- 4 files changed, 234 insertions(+), 203 deletions(-) (limited to 'indra/llmath') diff --git a/indra/llmath/llvolume.cpp b/indra/llmath/llvolume.cpp index 3f06e6b99e..bf03c971cd 100755 --- a/indra/llmath/llvolume.cpp +++ b/indra/llmath/llvolume.cpp @@ -2079,9 +2079,9 @@ void LLVolume::regen() createVolumeFaces(); } -void LLVolume::genBinormals(S32 face) +void LLVolume::genTangents(S32 face) { - mVolumeFaces[face].createBinormals(); + mVolumeFaces[face].createTangents(); } LLVolume::~LLVolume() @@ -4392,7 +4392,7 @@ void LLVolume::generateSilhouetteVertices(std::vector &vertices, segments.push_back(vertices.size()); #if DEBUG_SILHOUETTE_BINORMALS vertices.push_back(face.mVertices[j].getPosition()); - vertices.push_back(face.mVertices[j].getPosition() + face.mVertices[j].mBinormal*0.1f); + vertices.push_back(face.mVertices[j].getPosition() + face.mVertices[j].mTangent*0.1f); normals.push_back(LLVector3(0,0,1)); normals.push_back(LLVector3(0,0,1)); segments.push_back(vertices.size()); @@ -4508,22 +4508,9 @@ void LLVolume::generateSilhouetteVertices(std::vector &vertices, } } -S32 LLVolume::lineSegmentIntersect(const LLVector3& start, const LLVector3& end, - S32 face, - LLVector3* intersection,LLVector2* tex_coord, LLVector3* normal, LLVector3* bi_normal) -{ - LLVector4a starta, enda; - starta.load3(start.mV); - enda.load3(end.mV); - - return lineSegmentIntersect(starta, enda, face, intersection, tex_coord, normal, bi_normal); - -} - - S32 LLVolume::lineSegmentIntersect(const LLVector4a& start, const LLVector4a& end, S32 face, - LLVector3* intersection,LLVector2* tex_coord, LLVector3* normal, LLVector3* bi_normal) + LLVector4a* intersection,LLVector2* tex_coord, LLVector4a* normal, LLVector4a* tangent_out) { S32 hit_face = -1; @@ -4561,9 +4548,9 @@ S32 LLVolume::lineSegmentIntersect(const LLVector4a& start, const LLVector4a& en if (LLLineSegmentBoxIntersect(start, end, box_center, box_size)) { - if (bi_normal != NULL) // if the caller wants binormals, we may need to generate them + if (tangent_out != NULL) // if the caller wants tangents, we may need to generate them { - genBinormals(i); + genTangents(i); } if (isUnique()) @@ -4597,7 +4584,7 @@ S32 LLVolume::lineSegmentIntersect(const LLVector4a& start, const LLVector4a& en LLVector4a intersect = dir; intersect.mul(closest_t); intersect.add(start); - intersection->set(intersect.getF32ptr()); + *intersection = intersect; } @@ -4612,19 +4599,42 @@ S32 LLVolume::lineSegmentIntersect(const LLVector4a& start, const LLVector4a& en if (normal!= NULL) { - LLVector4* norm = (LLVector4*) face.mNormals; - - *normal = ((1.f - a - b) * LLVector3(norm[idx0]) + - a * LLVector3(norm[idx1]) + - b * LLVector3(norm[idx2])); + LLVector4a* norm = face.mNormals; + + LLVector4a n1,n2,n3; + n1 = norm[idx0]; + n1.mul(1.f-a-b); + + n2 = norm[idx1]; + n2.mul(a); + + n3 = norm[idx2]; + n3.mul(b); + + n1.add(n2); + n1.add(n3); + + *normal = n1; } - if (bi_normal != NULL) + if (tangent_out != NULL) { - LLVector4* binormal = (LLVector4*) face.mBinormals; - *bi_normal = ((1.f - a - b) * LLVector3(binormal[idx0]) + - a * LLVector3(binormal[idx1]) + - b * LLVector3(binormal[idx2])); + LLVector4a* tangents = face.mTangents; + + LLVector4a t1,t2,t3; + t1 = tangents[idx0]; + t1.mul(1.f-a-b); + + t2 = tangents[idx1]; + t2.mul(a); + + t3 = tangents[idx2]; + t3.mul(b); + + t1.add(t2); + t1.add(t3); + + *tangent_out = t1; } } } @@ -4637,7 +4647,7 @@ S32 LLVolume::lineSegmentIntersect(const LLVector4a& start, const LLVector4a& en face.createOctree(); } - LLOctreeTriangleRayIntersect intersect(start, dir, &face, &closest_t, intersection, tex_coord, normal, bi_normal); + LLOctreeTriangleRayIntersect intersect(start, dir, &face, &closest_t, intersection, tex_coord, normal, tangent_out); intersect.traverse(face.mOctree); if (intersect.mHitFace) { @@ -5183,7 +5193,7 @@ LLVolumeFace::LLVolumeFace() : mNumIndices(0), mPositions(NULL), mNormals(NULL), - mBinormals(NULL), + mTangents(NULL), mTexCoords(NULL), mIndices(NULL), mWeights(NULL), @@ -5206,7 +5216,7 @@ LLVolumeFace::LLVolumeFace(const LLVolumeFace& src) mNumIndices(0), mPositions(NULL), mNormals(NULL), - mBinormals(NULL), + mTangents(NULL), mTexCoords(NULL), mIndices(NULL), mWeights(NULL), @@ -5264,15 +5274,15 @@ LLVolumeFace& LLVolumeFace::operator=(const LLVolumeFace& src) } - if (src.mBinormals) + if (src.mTangents) { - allocateBinormals(src.mNumVertices); - LLVector4a::memcpyNonAliased16((F32*) mBinormals, (F32*) src.mBinormals, vert_size); + allocateTangents(src.mNumVertices); + LLVector4a::memcpyNonAliased16((F32*) mTangents, (F32*) src.mTangents, vert_size); } else { - ll_aligned_free_16(mBinormals); - mBinormals = NULL; + ll_aligned_free_16(mTangents); + mTangents = NULL; } if (src.mWeights) @@ -5316,8 +5326,8 @@ void LLVolumeFace::freeData() mTexCoords = NULL; ll_aligned_free_16(mIndices); mIndices = NULL; - ll_aligned_free_16(mBinormals); - mBinormals = NULL; + ll_aligned_free_16(mTangents); + mTangents = NULL; ll_aligned_free_16(mWeights); mWeights = NULL; @@ -5897,7 +5907,7 @@ void LLVolumeFace::cacheOptimize() } LLVector4a* binorm = NULL; - if (mBinormals) + if (mTangents) { binorm = (LLVector4a*) ll_aligned_malloc_16(sizeof(LLVector4a)*num_verts); } @@ -5922,9 +5932,9 @@ void LLVolumeFace::cacheOptimize() { wght[cur_idx] = mWeights[idx]; } - if (mBinormals) + if (mTangents) { - binorm[cur_idx] = mBinormals[idx]; + binorm[cur_idx] = mTangents[idx]; } cur_idx++; @@ -5940,13 +5950,13 @@ void LLVolumeFace::cacheOptimize() ll_aligned_free_16(mNormals); ll_aligned_free_16(mTexCoords); ll_aligned_free_16(mWeights); - ll_aligned_free_16(mBinormals); + ll_aligned_free_16(mTangents); mPositions = pos; mNormals = norm; mTexCoords = tc; mWeights = wght; - mBinormals = binorm; + mTangents = binorm; //std::string result = llformat("ACMR pre/post: %.3f/%.3f -- %d triangles %d breaks", pre_acmr, post_acmr, mNumIndices/3, breaks); //llinfos << result << llendl; @@ -6027,7 +6037,7 @@ void LLVolumeFace::swapData(LLVolumeFace& rhs) { llswap(rhs.mPositions, mPositions); llswap(rhs.mNormals, mNormals); - llswap(rhs.mBinormals, mBinormals); + llswap(rhs.mTangents, mTangents); llswap(rhs.mTexCoords, mTexCoords); llswap(rhs.mIndices,mIndices); llswap(rhs.mNumVertices, mNumVertices); @@ -6116,22 +6126,11 @@ BOOL LLVolumeFace::createUnCutCubeCap(LLVolume* volume, BOOL partial_build) corners[2].mTexCoord=swap; } - LLVector4a binormal; - - calc_binormal_from_triangle( binormal, - corners[0].getPosition(), corners[0].mTexCoord, - corners[1].getPosition(), corners[1].mTexCoord, - corners[2].getPosition(), corners[2].mTexCoord); - - binormal.normalize3fast(); - S32 size = (grid_size+1)*(grid_size+1); resizeVertices(size); - allocateBinormals(size); - + LLVector4a* pos = (LLVector4a*) mPositions; LLVector4a* norm = (LLVector4a*) mNormals; - LLVector4a* binorm = (LLVector4a*) mBinormals; LLVector2* tc = (LLVector2*) mTexCoords; for(int gx = 0;gx& v, std::vector& idx); @@ -916,7 +916,7 @@ public: LLVector4a* mPositions; LLVector4a* mNormals; - LLVector4a* mBinormals; + LLVector4a* mTangents; LLVector2* mTexCoords; U16* mIndices; @@ -980,7 +980,7 @@ public: void setDirty() { mPathp->setDirty(); mProfilep->setDirty(); } void regen(); - void genBinormals(S32 face); + void genTangents(S32 face); BOOL isConvex() const; BOOL isCap(S32 face); @@ -1008,21 +1008,14 @@ public: //get the face index of the face that intersects with the given line segment at the point //closest to start. Moves end to the point of intersection. Returns -1 if no intersection. //Line segment must be in volume space. - S32 lineSegmentIntersect(const LLVector3& start, const LLVector3& end, + S32 lineSegmentIntersect(const LLVector4a& start, const LLVector4a& end, S32 face = -1, // which face to check, -1 = ALL_SIDES - LLVector3* intersection = NULL, // return the intersection point + LLVector4a* intersection = NULL, // return the intersection point LLVector2* tex_coord = NULL, // return the texture coordinates of the intersection point - LLVector3* normal = NULL, // return the surface normal at the intersection point - LLVector3* bi_normal = NULL // return the surface bi-normal at the intersection point + LLVector4a* normal = NULL, // return the surface normal at the intersection point + LLVector4a* tangent = NULL // return the surface tangent at the intersection point ); - S32 lineSegmentIntersect(const LLVector4a& start, const LLVector4a& end, - S32 face = 1, - LLVector3* intersection = NULL, - LLVector2* tex_coord = NULL, - LLVector3* normal = NULL, - LLVector3* bi_normal = NULL); - LLFaceID generateFaceMask(); BOOL isFaceMaskValid(LLFaceID face_mask); @@ -1081,21 +1074,12 @@ public: std::ostream& operator<<(std::ostream &s, const LLVolumeParams &volume_params); -void calc_binormal_from_triangle( - LLVector4a& binormal, - const LLVector4a& pos0, - const LLVector2& tex0, - const LLVector4a& pos1, - const LLVector2& tex1, - const LLVector4a& pos2, - const LLVector2& tex2); - BOOL LLLineSegmentBoxIntersect(const F32* start, const F32* end, const F32* center, const F32* size); BOOL LLLineSegmentBoxIntersect(const LLVector3& start, const LLVector3& end, const LLVector3& center, const LLVector3& size); BOOL LLLineSegmentBoxIntersect(const LLVector4a& start, const LLVector4a& end, const LLVector4a& center, const LLVector4a& size); -BOOL LLTriangleRayIntersect(const LLVector3& vert0, const LLVector3& vert1, const LLVector3& vert2, const LLVector3& orig, const LLVector3& dir, - F32& intersection_a, F32& intersection_b, F32& intersection_t, BOOL two_sided); +//BOOL LLTriangleRayIntersect(const LLVector3& vert0, const LLVector3& vert1, const LLVector3& vert2, const LLVector3& orig, const LLVector3& dir, +// F32& intersection_a, F32& intersection_b, F32& intersection_t, BOOL two_sided); BOOL LLTriangleRayIntersect(const LLVector4a& vert0, const LLVector4a& vert1, const LLVector4a& vert2, const LLVector4a& orig, const LLVector4a& dir, F32& intersection_a, F32& intersection_b, F32& intersection_t); diff --git a/indra/llmath/llvolumeoctree.cpp b/indra/llmath/llvolumeoctree.cpp index cc83cb7235..0728b49c1f 100755 --- a/indra/llmath/llvolumeoctree.cpp +++ b/indra/llmath/llvolumeoctree.cpp @@ -94,14 +94,14 @@ void LLVolumeOctreeListener::handleChildAddition(const LLOctreeNode { LLVolumeOctreeListener* vl = (LLVolumeOctreeListener*) node->getListener(0); - /*const F32* start = mStart.getF32(); - const F32* end = mEnd.getF32(); - const F32* center = vl->mBounds[0].getF32(); - const F32* size = vl->mBounds[1].getF32();*/ - - //if (LLLineSegmentBoxIntersect(mStart, mEnd, vl->mBounds[0], vl->mBounds[1])) - if (LLLineSegmentBoxIntersect(mStart.getF32ptr(), mEnd.getF32ptr(), vl->mBounds[0].getF32ptr(), vl->mBounds[1].getF32ptr())) + if (LLLineSegmentBoxIntersect(mStart, mEnd, vl->mBounds[0], vl->mBounds[1])) { node->accept(this); for (S32 i = 0; i < node->getChildCount(); ++i) @@ -152,34 +146,60 @@ void LLOctreeTriangleRayIntersect::visit(const LLOctreeNode* n LLVector4a intersect = mDir; intersect.mul(*mClosestT); intersect.add(mStart); - mIntersection->set(intersect.getF32ptr()); + *mIntersection = intersect; } + U32 idx0 = tri->mIndex[0]; + U32 idx1 = tri->mIndex[1]; + U32 idx2 = tri->mIndex[2]; if (mTexCoord != NULL) { LLVector2* tc = (LLVector2*) mFace->mTexCoords; - *mTexCoord = ((1.f - a - b) * tc[tri->mIndex[0]] + - a * tc[tri->mIndex[1]] + - b * tc[tri->mIndex[2]]); + *mTexCoord = ((1.f - a - b) * tc[idx0] + + a * tc[idx1] + + b * tc[idx2]); } if (mNormal != NULL) { - LLVector4* norm = (LLVector4*) mFace->mNormals; - - *mNormal = ((1.f - a - b) * LLVector3(norm[tri->mIndex[0]]) + - a * LLVector3(norm[tri->mIndex[1]]) + - b * LLVector3(norm[tri->mIndex[2]])); + LLVector4a* norm = mFace->mNormals; + + LLVector4a n1,n2,n3; + n1 = norm[idx0]; + n1.mul(1.f-a-b); + + n2 = norm[idx1]; + n2.mul(a); + + n3 = norm[idx2]; + n3.mul(b); + + n1.add(n2); + n1.add(n3); + + *mNormal = n1; } - if (mBinormal != NULL) + if (mTangent != NULL) { - LLVector4* binormal = (LLVector4*) mFace->mBinormals; - *mBinormal = ((1.f - a - b) * LLVector3(binormal[tri->mIndex[0]]) + - a * LLVector3(binormal[tri->mIndex[1]]) + - b * LLVector3(binormal[tri->mIndex[2]])); + LLVector4a* tangents = mFace->mTangents; + + LLVector4a t1,t2,t3; + t1 = tangents[idx0]; + t1.mul(1.f-a-b); + + t2 = tangents[idx1]; + t2.mul(a); + + t3 = tangents[idx2]; + t3.mul(b); + + t1.add(t2); + t1.add(t3); + + *mTangent = t1; } } } diff --git a/indra/llmath/llvolumeoctree.h b/indra/llmath/llvolumeoctree.h index 9ae34a0c4e..80d6ced36d 100755 --- a/indra/llmath/llvolumeoctree.h +++ b/indra/llmath/llvolumeoctree.h @@ -137,16 +137,16 @@ public: LLVector4a mStart; LLVector4a mDir; LLVector4a mEnd; - LLVector3* mIntersection; + LLVector4a* mIntersection; LLVector2* mTexCoord; - LLVector3* mNormal; - LLVector3* mBinormal; + LLVector4a* mNormal; + LLVector4a* mTangent; F32* mClosestT; bool mHitFace; LLOctreeTriangleRayIntersect(const LLVector4a& start, const LLVector4a& dir, const LLVolumeFace* face, F32* closest_t, - LLVector3* intersection,LLVector2* tex_coord, LLVector3* normal, LLVector3* bi_normal); + LLVector4a* intersection,LLVector2* tex_coord, LLVector4a* normal, LLVector4a* tangent); void traverse(const LLOctreeNode* node); -- cgit v1.3 From ddf15867e4dd1e3506f0cfd975d9e7aa8f7aab66 Mon Sep 17 00:00:00 2001 From: Dave Parks Date: Fri, 7 Jun 2013 16:18:37 -0500 Subject: NORSPEC-242 Fix for infinity and NaN showing up in tangents and texture coordinates sometimes. --- indra/llmath/llvolume.cpp | 135 ++++++++++++++++++++-------------------------- 1 file changed, 58 insertions(+), 77 deletions(-) (limited to 'indra/llmath') diff --git a/indra/llmath/llvolume.cpp b/indra/llmath/llvolume.cpp index bf03c971cd..c3528349eb 100755 --- a/indra/llmath/llvolume.cpp +++ b/indra/llmath/llvolume.cpp @@ -1392,7 +1392,7 @@ void LLPath::genNGon(const LLPathParams& params, S32 sides, F32 startOff, F32 en pt->mScale.mV[VX] = hole_x * lerp(taper_x_begin, taper_x_end, t); pt->mScale.mV[VY] = hole_y * lerp(taper_y_begin, taper_y_end, t); pt->mTexT = t; - + // Twist rotates the path along the x,y plane (I think) - DJS 04/05/02 twist.setQuat (lerp(twist_begin,twist_end,t) * 2.f * F_PI - F_PI,0,0,1); // Rotate the point around the circle's center. @@ -1446,7 +1446,7 @@ void LLPath::genNGon(const LLPathParams& params, S32 sides, F32 startOff, F32 en pt->mScale.mV[VX] = hole_x * lerp(taper_x_begin, taper_x_end, t); pt->mScale.mV[VY] = hole_y * lerp(taper_y_begin, taper_y_end, t); pt->mTexT = t; - + // Twist rotates the path along the x,y plane (I think) - DJS 04/05/02 twist.setQuat (lerp(twist_begin,twist_end,t) * 2.f * F_PI - F_PI,0,0,1); // Rotate the point around the circle's center. @@ -1594,7 +1594,7 @@ BOOL LLPath::generate(const LLPathParams& params, F32 detail, S32 split, S32 sides = (S32)llfloor(llfloor((MIN_DETAIL_FACES * detail + twist_mag * 3.5f * (detail-0.5f))) * params.getRevolutions()); if (is_sculpted) - sides = sculpt_size; + sides = llmax(sculpt_size, 1); genNGon(params, sides); } @@ -1644,6 +1644,7 @@ BOOL LLPath::generate(const LLPathParams& params, F32 detail, S32 split, mPath[i].mScale.mV[0] = lerp(1,params.getScale().mV[0],t); mPath[i].mScale.mV[1] = lerp(1,params.getScale().mV[1],t); mPath[i].mTexT = t; + mPath[i].mRot.setQuat(F_PI * params.getTwist() * t,1,0,0); } @@ -2442,6 +2443,7 @@ bool LLVolume::unpackVolumeFaces(std::istream& is, S32 size) LLVector4a pos_range; pos_range.setSub(max_pos, min_pos); LLVector2 tc_range2 = max_tc - min_tc; + LLVector4a tc_range; tc_range.set(tc_range2[0], tc_range2[1], tc_range2[0], tc_range2[1]); LLVector4a min_tc4(min_tc[0], min_tc[1], min_tc[0], min_tc[1]); @@ -6304,24 +6306,6 @@ BOOL LLVolumeFace::createCap(LLVolume* volume, BOOL partial_build) cuv = (min_uv + max_uv)*0.5f; - LLVector4a normal; - LLVector4a d0, d1; - - - d0.setSub(*mCenter, pos[0]); - d1.setSub(*mCenter, pos[1]); - - if (mTypeMask & TOP_MASK) - { - normal.setCross3(d0, d1); - } - else - { - normal.setCross3(d1, d0); - } - - normal.normalize3fast(); - VertexData vd; vd.setPosition(*mCenter); vd.mTexCoord = cuv; @@ -6330,14 +6314,10 @@ BOOL LLVolumeFace::createCap(LLVolume* volume, BOOL partial_build) { pos[num_vertices] = *mCenter; tc[num_vertices] = cuv; + num_vertices++; } - for (S32 i = 0; i < num_vertices; i++) - { - norm[i].load4a(normal.getF32ptr()); - } - if (partial_build) { return TRUE; @@ -6572,7 +6552,22 @@ BOOL LLVolumeFace::createCap(LLVolume* volume, BOOL partial_build) } - + + LLVector4a d0,d1; + + d0.setSub(mPositions[mIndices[1]], mPositions[mIndices[0]]); + d1.setSub(mPositions[mIndices[2]], mPositions[mIndices[0]]); + + LLVector4a normal; + normal.setCross3(d0,d1); + + normal.normalize3fast(); + + for (S32 i = 0; i < num_vertices; i++) + { + norm[i].load4a(normal.getF32ptr()); + } + return TRUE; } @@ -6600,37 +6595,6 @@ void LLVolumeFace::createTangents() CalculateTangentArray(mNumVertices, mPositions, mNormals, mTexCoords, mNumIndices/3, mIndices, mTangents); - /*for (U32 i = 0; i < mNumIndices/3; i++) - { //for each triangle - const U16& i0 = mIndices[i*3+0]; - const U16& i1 = mIndices[i*3+1]; - const U16& i2 = mIndices[i*3+2]; - - //calculate tangent - LLVector4a tangent; - calc_tangent_from_triangle(tangent, - pos[i0], tc[i0], - pos[i1], tc[i1], - pos[i2], tc[i2]); - - - //add triangle normal to vertices - binorm[i0].add(tangent); - binorm[i1].add(tangent); - binorm[i2].add(tangent); - - //even out quad contributions - if (i % 2 == 0) - { - binorm[i2].add(tangent); - } - else - { - binorm[i1].add(tangent); - } - }*/ - - //normalize tangents for (U32 i = 0; i < mNumVertices; i++) { @@ -6949,7 +6913,6 @@ BOOL LLVolumeFace::createSide(LLVolume* volume, BOOL partial_build) if ((mTypeMask & INNER_MASK) && (mTypeMask & FLAT_MASK) && mNumS > 2 && s > 0) { - pos[cur_vertex].load3(mesh[i].mPos.mV); tc[cur_vertex] = LLVector2(ss,tt); @@ -6980,7 +6943,6 @@ BOOL LLVolumeFace::createSide(LLVolume* volume, BOOL partial_build) } } - //get bounding box for this side LLVector4a& face_min = mExtents[0]; LLVector4a& face_max = mExtents[1]; @@ -7265,26 +7227,36 @@ void CalculateTangentArray(U32 vertexCount, const LLVector4a *vertex, const LLVe float t1 = w2.mV[1] - w1.mV[1]; float t2 = w3.mV[1] - w1.mV[1]; - float r = 1.0F / (s1 * t2 - s2 * t1); - LLVector4a sdir((t2 * x1 - t1 * x2) * r, (t2 * y1 - t1 * y2) * r, - (t2 * z1 - t1 * z2) * r); - LLVector4a tdir((s1 * x2 - s2 * x1) * r, (s1 * y2 - s2 * y1) * r, - (s1 * z2 - s2 * z1) * r); + F32 rd = s1*t2-s2*t1; + + float r = rd*rd > 0.f ? 1.0F / rd : 1024.f; //some made up large ratio for division by zero + + llassert(llfinite(r)); + llassert(!llisnan(r)); + + LLVector4a sdir((t2 * x1 - t1 * x2) * r, (t2 * y1 - t1 * y2) * r, + (t2 * z1 - t1 * z2) * r); + LLVector4a tdir((s1 * x2 - s2 * x1) * r, (s1 * y2 - s2 * y1) * r, + (s1 * z2 - s2 * z1) * r); - tan1[i1].add(sdir); - tan1[i2].add(sdir); - tan1[i3].add(sdir); + tan1[i1].add(sdir); + tan1[i2].add(sdir); + tan1[i3].add(sdir); - tan2[i1].add(tdir); - tan2[i2].add(tdir); - tan2[i3].add(tdir); + tan2[i1].add(tdir); + tan2[i2].add(tdir); + tan2[i3].add(tdir); } for (U32 a = 0; a < vertexCount; a++) { LLVector4a n = normal[a]; - const LLVector4a& t = tan1[a]; + + const LLVector4a& t = tan1[a]; + llassert(tan1[a].getLength3().getF32() >= 0.f); + llassert(tan2[a].getLength3().getF32() >= 0.f); + LLVector4a ncrosst; ncrosst.setCross3(n,t); @@ -7294,14 +7266,23 @@ void CalculateTangentArray(U32 vertexCount, const LLVector4a *vertex, const LLVe LLVector4a tsubn; tsubn.setSub(t,n); - tsubn.normalize3fast(); + if (tsubn.dot3(tsubn).getF32() > F_APPROXIMATELY_ZERO) + { + tsubn.normalize3fast(); - // Calculate handedness - F32 handedness = ncrosst.dot3(tan2[a]).getF32() < 0.f ? -1.f : 1.f; + // Calculate handedness + F32 handedness = ncrosst.dot3(tan2[a]).getF32() < 0.f ? -1.f : 1.f; - tsubn.getF32ptr()[3] = handedness; + tsubn.getF32ptr()[3] = handedness; + + tangent[a] = tsubn; - tangent[a] = tsubn; + llassert(tangent[a].getLength3().getF32() > 0.f); + } + else + { //degenerate, make up a value + tangent[a].set(0,0,1,1); + } } ll_aligned_free_16(tan1); -- cgit v1.3 From d09f1e71176f32a23be73b53559979727c242b4a Mon Sep 17 00:00:00 2001 From: Dave Parks Date: Mon, 10 Jun 2013 16:01:53 -0500 Subject: NORSPEC-242 Followup, fix a couple more sources of NaN and infinity. --- indra/llmath/llvolume.cpp | 33 +++++++++++++++++++++++++++++++-- 1 file changed, 31 insertions(+), 2 deletions(-) (limited to 'indra/llmath') diff --git a/indra/llmath/llvolume.cpp b/indra/llmath/llvolume.cpp index c3528349eb..f9dd843b92 100755 --- a/indra/llmath/llvolume.cpp +++ b/indra/llmath/llvolume.cpp @@ -6561,8 +6561,23 @@ BOOL LLVolumeFace::createCap(LLVolume* volume, BOOL partial_build) LLVector4a normal; normal.setCross3(d0,d1); - normal.normalize3fast(); + if (normal.dot3(normal).getF32() > F_APPROXIMATELY_ZERO) + { + normal.normalize3fast(); + } + else + { //degenerate, make up a value + normal.set(0,0,1); + } + + llassert(llfinite(normal.getF32ptr()[0])); + llassert(llfinite(normal.getF32ptr()[1])); + llassert(llfinite(normal.getF32ptr()[2])); + llassert(!llisnan(normal.getF32ptr()[0])); + llassert(!llisnan(normal.getF32ptr()[1])); + llassert(!llisnan(normal.getF32ptr()[2])); + for (S32 i = 0; i < num_vertices; i++) { norm[i].load4a(normal.getF32ptr()); @@ -7048,6 +7063,14 @@ BOOL LLVolumeFace::createSide(LLVolume* volume, BOOL partial_build) n[1]->add(c); n[2]->add(c); + llassert(llfinite(c.getF32ptr()[0])); + llassert(llfinite(c.getF32ptr()[1])); + llassert(llfinite(c.getF32ptr()[2])); + + llassert(!llisnan(c.getF32ptr()[0])); + llassert(!llisnan(c.getF32ptr()[1])); + llassert(!llisnan(c.getF32ptr()[2])); + //even out quad contributions n[i%2+1]->add(c); } @@ -7277,7 +7300,13 @@ void CalculateTangentArray(U32 vertexCount, const LLVector4a *vertex, const LLVe tangent[a] = tsubn; - llassert(tangent[a].getLength3().getF32() > 0.f); + llassert(llfinite(tangent[a].getF32ptr()[0])); + llassert(llfinite(tangent[a].getF32ptr()[1])); + llassert(llfinite(tangent[a].getF32ptr()[2])); + + llassert(!llisnan(tangent[a].getF32ptr()[0])); + llassert(!llisnan(tangent[a].getF32ptr()[1])); + llassert(!llisnan(tangent[a].getF32ptr()[2])); } else { //degenerate, make up a value -- cgit v1.3 From 01f595d8716f7b80ab0088d93434fa77cc812c16 Mon Sep 17 00:00:00 2001 From: Graham Madarasz Date: Tue, 11 Jun 2013 12:02:54 -0700 Subject: Speculative fix for tangent calc asserts on Mac --- indra/llmath/llvolume.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'indra/llmath') diff --git a/indra/llmath/llvolume.cpp b/indra/llmath/llvolume.cpp index f9dd843b92..1932272afb 100755 --- a/indra/llmath/llvolume.cpp +++ b/indra/llmath/llvolume.cpp @@ -7252,7 +7252,7 @@ void CalculateTangentArray(U32 vertexCount, const LLVector4a *vertex, const LLVe F32 rd = s1*t2-s2*t1; - float r = rd*rd > 0.f ? 1.0F / rd : 1024.f; //some made up large ratio for division by zero + float r = ((rd*rd) > FLT_EPSILON) ? 1.0F / rd : 1024.f; //some made up large ratio for division by zero llassert(llfinite(r)); llassert(!llisnan(r)); @@ -7276,7 +7276,7 @@ void CalculateTangentArray(U32 vertexCount, const LLVector4a *vertex, const LLVe LLVector4a n = normal[a]; const LLVector4a& t = tan1[a]; - + llassert(tan1[a].getLength3().getF32() >= 0.f); llassert(tan2[a].getLength3().getF32() >= 0.f); -- cgit v1.3 From 48324a93833cee8aca7559588ee5f2b4afa250fa Mon Sep 17 00:00:00 2001 From: Graham Madarasz Date: Wed, 12 Jun 2013 08:09:29 -0700 Subject: Fix issues with NaNs in tangent data from using normalize3fast on zero-length vectors and other data conditioning; also added assert to normalize3fast to make finding these problems easier in the future --- indra/llmath/llvector4a.inl | 9 ++++ indra/llmath/llvolume.cpp | 128 +++++++++++++++++++++++++++++++++----------- 2 files changed, 106 insertions(+), 31 deletions(-) (limited to 'indra/llmath') diff --git a/indra/llmath/llvector4a.inl b/indra/llmath/llvector4a.inl index 7c52ffef21..4589bac9fb 100755 --- a/indra/llmath/llvector4a.inl +++ b/indra/llmath/llvector4a.inl @@ -331,6 +331,9 @@ inline LLSimdScalar LLVector4a::dot4(const LLVector4a& b) const // Note that this does not consider zero length vectors! inline void LLVector4a::normalize3() { + // find out about bad math before it takes two man-days to track down + llassert(isFinite3() && !equals3(getZero())); + // lenSqrd = a dot a LLVector4a lenSqrd; lenSqrd.setAllDot3( *this, *this ); // rsqrt = approximate reciprocal square (i.e., { ~1/len(a)^2, ~1/len(a)^2, ~1/len(a)^2, ~1/len(a)^2 } @@ -379,6 +382,9 @@ inline void LLVector4a::normalize4() // Note that this does not consider zero length vectors! inline LLSimdScalar LLVector4a::normalize3withLength() { + // find out about bad math before it takes two man-days to track down + llassert(isFinite3() && !equals3(getZero())); + // lenSqrd = a dot a LLVector4a lenSqrd; lenSqrd.setAllDot3( *this, *this ); // rsqrt = approximate reciprocal square (i.e., { ~1/len(a)^2, ~1/len(a)^2, ~1/len(a)^2, ~1/len(a)^2 } @@ -404,6 +410,9 @@ inline LLSimdScalar LLVector4a::normalize3withLength() // Note that this does not consider zero length vectors! inline void LLVector4a::normalize3fast() { + // find out about bad math before it takes two man-days to track down + llassert(isFinite3() && !equals3(getZero())); + LLVector4a lenSqrd; lenSqrd.setAllDot3( *this, *this ); const LLQuad approxRsqrt = _mm_rsqrt_ps(lenSqrd.mQ); mQ = _mm_mul_ps( mQ, approxRsqrt ); diff --git a/indra/llmath/llvolume.cpp b/indra/llmath/llvolume.cpp index 1932272afb..bc2572375a 100755 --- a/indra/llmath/llvolume.cpp +++ b/indra/llmath/llvolume.cpp @@ -7209,46 +7209,53 @@ BOOL LLVolumeFace::createSide(LLVolume* volume, BOOL partial_build) return TRUE; } +#define TANGENTIAL_PARANOIA_ASSERTS 1 + +#if TANGENTIAL_PARANOIA_ASSERTS + #define tangential_paranoia(a) llassert(a) +#else + #define tangential_paranoia(a) +#endif + //adapted from Lengyel, Eric. “Computing Tangent Space Basis Vectors for an Arbitrary Mesh”. Terathon Software 3D Graphics Library, 2001. http://www.terathon.com/code/tangent.html void CalculateTangentArray(U32 vertexCount, const LLVector4a *vertex, const LLVector4a *normal, const LLVector2 *texcoord, U32 triangleCount, const U16* index_array, LLVector4a *tangent) { - //LLVector4a *tan1 = new LLVector4a[vertexCount * 2]; LLVector4a* tan1 = (LLVector4a*) ll_aligned_malloc_16(vertexCount*2*sizeof(LLVector4a)); - LLVector4a* tan2 = tan1 + vertexCount; + LLVector4a* tan2 = tan1 + vertexCount; memset(tan1, 0, vertexCount*2*sizeof(LLVector4a)); - for (U32 a = 0; a < triangleCount; a++) - { - U32 i1 = *index_array++; - U32 i2 = *index_array++; - U32 i3 = *index_array++; + for (U32 a = 0; a < triangleCount; a++) + { + U32 i1 = *index_array++; + U32 i2 = *index_array++; + U32 i3 = *index_array++; - const LLVector4a& v1 = vertex[i1]; - const LLVector4a& v2 = vertex[i2]; - const LLVector4a& v3 = vertex[i3]; + const LLVector4a& v1 = vertex[i1]; + const LLVector4a& v2 = vertex[i2]; + const LLVector4a& v3 = vertex[i3]; - const LLVector2& w1 = texcoord[i1]; - const LLVector2& w2 = texcoord[i2]; - const LLVector2& w3 = texcoord[i3]; + const LLVector2& w1 = texcoord[i1]; + const LLVector2& w2 = texcoord[i2]; + const LLVector2& w3 = texcoord[i3]; const F32* v1ptr = v1.getF32ptr(); const F32* v2ptr = v2.getF32ptr(); const F32* v3ptr = v3.getF32ptr(); - float x1 = v2ptr[0] - v1ptr[0]; - float x2 = v3ptr[0] - v1ptr[0]; - float y1 = v2ptr[1] - v1ptr[1]; - float y2 = v3ptr[1] - v1ptr[1]; - float z1 = v2ptr[2] - v1ptr[2]; - float z2 = v3ptr[2] - v1ptr[2]; + float x1 = v2ptr[0] - v1ptr[0]; + float x2 = v3ptr[0] - v1ptr[0]; + float y1 = v2ptr[1] - v1ptr[1]; + float y2 = v3ptr[1] - v1ptr[1]; + float z1 = v2ptr[2] - v1ptr[2]; + float z2 = v3ptr[2] - v1ptr[2]; - float s1 = w2.mV[0] - w1.mV[0]; - float s2 = w3.mV[0] - w1.mV[0]; - float t1 = w2.mV[1] - w1.mV[1]; - float t2 = w3.mV[1] - w1.mV[1]; + float s1 = w2.mV[0] - w1.mV[0]; + float s2 = w3.mV[0] - w1.mV[0]; + float t1 = w2.mV[1] - w1.mV[1]; + float t2 = w3.mV[1] - w1.mV[1]; F32 rd = s1*t2-s2*t1; @@ -7262,18 +7269,67 @@ void CalculateTangentArray(U32 vertexCount, const LLVector4a *vertex, const LLVe LLVector4a tdir((s1 * x2 - s2 * x1) * r, (s1 * y2 - s2 * y1) * r, (s1 * z2 - s2 * z1) * r); + tan1[i1].add(sdir); tan1[i2].add(sdir); tan1[i3].add(sdir); - + + tangential_paranoia(tan1[i1].isFinite3()); + tangential_paranoia(tan1[i2].isFinite3()); + tangential_paranoia(tan1[i3].isFinite3()); + tan2[i1].add(tdir); tan2[i2].add(tdir); tan2[i3].add(tdir); - } - - for (U32 a = 0; a < vertexCount; a++) - { - LLVector4a n = normal[a]; + + tangential_paranoia(tan2[i1].isFinite3()); + tangential_paranoia(tan2[i2].isFinite3()); + tangential_paranoia(tan2[i3].isFinite3()); + } + + // These appear to come out of the summing above distinctly non-unit-length + // + for (U32 a = 0; a < vertexCount; a++) + { + // Conditioning required by assets which don't necessarily reference every vert index + // (i.e. some of the tangents can end up uninitialized and therefore indeterminate/INF) + // and protection against zero length vectors which are not handled by normalize3fast. + // + if (!tan1[a].isFinite3() || tan1[a].equals3(LLVector4a::getZero())) + { + tan1[a].set(0,0,1,1); + } + else + { + tan1[a].normalize3fast(); + } + + if (!tan2[a].isFinite3() || tan2[a].equals3(LLVector4a::getZero())) + { + tan2[a].set(0,0,1,1); + } + else + { + tan2[a].normalize3fast(); + } + + const F32 cefgw = 0.03f; + tangential_paranoia(tan1[a].isFinite3()); + tangential_paranoia(tan2[a].isFinite3()); + tangential_paranoia(tan1[a].isNormalized3(cefgw)); + tangential_paranoia(tan2[a].isNormalized3(cefgw)); + } + + for (U32 a = 0; a < vertexCount; a++) + { + LLVector4a n = normal[a]; + + if (!n.isFinite3() || n.equals3(LLVector4a::getZero())) + { + n.set(0,1,0,1); + } + + n.normalize3fast(); const LLVector4a& t = tan1[a]; @@ -7283,12 +7339,20 @@ void CalculateTangentArray(U32 vertexCount, const LLVector4a *vertex, const LLVe LLVector4a ncrosst; ncrosst.setCross3(n,t); - // Gram-Schmidt orthogonalize - n.mul(n.dot3(t).getF32()); + F32 n_dot_t = n.dot3(t).getF32(); + + tangential_paranoia(llfinite(n_dot_t) && !llisnan(n_dot_t)); + + // Gram-Schmidt orthogonalize + n.mul(n_dot_t); + + tangential_paranoia(n.isFinite3()); LLVector4a tsubn; tsubn.setSub(t,n); + tangential_paranoia(tsubn.isFinite3()); + if (tsubn.dot3(tsubn).getF32() > F_APPROXIMATELY_ZERO) { tsubn.normalize3fast(); @@ -7300,6 +7364,8 @@ void CalculateTangentArray(U32 vertexCount, const LLVector4a *vertex, const LLVe tangent[a] = tsubn; + tangential_paranoia(tangent[a].isNormalized3(0.1f)); + llassert(llfinite(tangent[a].getF32ptr()[0])); llassert(llfinite(tangent[a].getF32ptr()[1])); llassert(llfinite(tangent[a].getF32ptr()[2])); -- cgit v1.3 From d2b253f1f6072beead770519849ad3b18a1a4359 Mon Sep 17 00:00:00 2001 From: Graham Madarasz Date: Wed, 12 Jun 2013 09:16:19 -0700 Subject: Changes to protect against use of normalize3fast on degenerate vectors --- indra/llappearance/llpolymorph.cpp | 26 ++++++-- indra/llmath/llvector4a.h | 5 ++ indra/llmath/llvector4a.inl | 22 ++++++- indra/llmath/llvolume.cpp | 116 +++++++++++++++-------------------- indra/newview/llface.cpp | 31 +++++++--- indra/newview/llspatialpartition.cpp | 5 +- indra/newview/llvopartgroup.cpp | 10 +++ indra/newview/llvovolume.cpp | 5 +- indra/newview/pipeline.cpp | 2 + 9 files changed, 139 insertions(+), 83 deletions(-) (limited to 'indra/llmath') diff --git a/indra/llappearance/llpolymorph.cpp b/indra/llappearance/llpolymorph.cpp index 8a17819083..5e5813b9ac 100644 --- a/indra/llappearance/llpolymorph.cpp +++ b/indra/llappearance/llpolymorph.cpp @@ -568,6 +568,12 @@ void LLPolyMorphTarget::apply( ESex avatar_sex ) F32 *maskWeightArray = (mVertMask) ? mVertMask->getMorphMaskWeights() : NULL; + LLVector4a default_norm; + LLVector4a default_binorm; + + default_norm.set(0,1,0,1); + default_binorm.set(1,0,0,1); + for(U32 vert_index_morph = 0; vert_index_morph < mMorphData->mNumIndices; vert_index_morph++) { S32 vert_index_mesh = mMorphData->mVertexIndices[vert_index_morph]; @@ -597,19 +603,31 @@ void LLPolyMorphTarget::apply( ESex avatar_sex ) norm.mul(delta_weight*maskWeight*NORMAL_SOFTEN_FACTOR); scaled_normals[vert_index_mesh].add(norm); norm = scaled_normals[vert_index_mesh]; - norm.normalize3fast(); + + // guard against degenerate input data before we create NaNs below! + // + norm.normalize3fast_checked(&default_norm); normals[vert_index_mesh] = norm; // calculate new binormals LLVector4a binorm = mMorphData->mBinormals[vert_index_morph]; + + // guard against degenerate input data before we create NaNs below! + // + if (!binorm.isFinite3() || (binorm.dot3(binorm).getF32() <= F_APPROXIMATELY_ZERO)) + { + binorm.set(1,0,0,1); + } + binorm.mul(delta_weight*maskWeight*NORMAL_SOFTEN_FACTOR); scaled_binormals[vert_index_mesh].add(binorm); LLVector4a tangent; tangent.setCross3(scaled_binormals[vert_index_mesh], norm); LLVector4a& normalized_binormal = binormals[vert_index_mesh]; - normalized_binormal.setCross3(norm, tangent); - normalized_binormal.normalize3fast(); - + + normalized_binormal.setCross3(norm, tangent); + normalized_binormal.normalize3fast_checked(&default_binorm); + tex_coords[vert_index_mesh] += mMorphData->mTexCoords[vert_index_morph] * delta_weight * maskWeight; } diff --git a/indra/llmath/llvector4a.h b/indra/llmath/llvector4a.h index 0526793d3a..94a61f2b1d 100755 --- a/indra/llmath/llvector4a.h +++ b/indra/llmath/llvector4a.h @@ -236,6 +236,11 @@ public: // Note that this does not consider zero length vectors! inline void normalize3fast(); + // Normalize this vector with respect to the x, y, and z components only. Accurate only to 10-12 bits of precision. W component is destroyed + // Same as above except substitutes default vector contents if the vector is non-finite or degenerate due to zero length. + // + inline void normalize3fast_checked(LLVector4a* default = NULL); + // Return true if this vector is normalized with respect to x,y,z up to tolerance inline LLBool32 isNormalized3( F32 tolerance = 1e-3 ) const; diff --git a/indra/llmath/llvector4a.inl b/indra/llmath/llvector4a.inl index 4589bac9fb..6860252a75 100755 --- a/indra/llmath/llvector4a.inl +++ b/indra/llmath/llvector4a.inl @@ -410,8 +410,26 @@ inline LLSimdScalar LLVector4a::normalize3withLength() // Note that this does not consider zero length vectors! inline void LLVector4a::normalize3fast() { - // find out about bad math before it takes two man-days to track down - llassert(isFinite3() && !equals3(getZero())); + LLVector4a lenSqrd; lenSqrd.setAllDot3( *this, *this ); + const LLQuad approxRsqrt = _mm_rsqrt_ps(lenSqrd.mQ); + mQ = _mm_mul_ps( mQ, approxRsqrt ); +} + +// Normalize this vector with respect to the x, y, and z components only. Accurate only to 10-12 bits of precision. W component is destroyed +// Note that this does not consider zero length vectors! +inline void LLVector4a::normalize3fast_checked(LLVector4a* default) +{ + // handle bogus inputs before NaNs are generated below + // + if (!isFinite3() || (dot3(*this).getF32() < F_APPROXIMATELY_ZERO)) + { + if (default) + *this = *default; + else + set(0,1,0,1); + + return; + } LLVector4a lenSqrd; lenSqrd.setAllDot3( *this, *this ); const LLQuad approxRsqrt = _mm_rsqrt_ps(lenSqrd.mQ); diff --git a/indra/llmath/llvolume.cpp b/indra/llmath/llvolume.cpp index bc2572375a..15621c2625 100755 --- a/indra/llmath/llvolume.cpp +++ b/indra/llmath/llvolume.cpp @@ -4472,6 +4472,9 @@ void LLVolume::generateSilhouetteVertices(std::vector &vertices, continue; //skip degenerate face } + LLVector4a default_norm; + default_norm.set(0,1,0,1); + //for each edge for (S32 k = 0; k < 3; k++) { S32 index = face.mEdge[j*3+k]; @@ -4493,14 +4496,14 @@ void LLVolume::generateSilhouetteVertices(std::vector &vertices, norm_mat.rotate(n[v1], t); - t.normalize3fast(); + t.normalize3fast_checked(&default_norm); normals.push_back(LLVector3(t[0], t[1], t[2])); mat.affineTransform(v[v2], t); vertices.push_back(LLVector3(t[0], t[1], t[2])); norm_mat.rotate(n[v2], t); - t.normalize3fast(); + t.normalize3fast_checked(&default_norm); normals.push_back(LLVector3(t[0], t[1], t[2])); } } @@ -6096,6 +6099,9 @@ BOOL LLVolumeFace::createUnCutCubeCap(LLVolume* volume, BOOL partial_build) { VertexData corners[4]; VertexData baseVert; + LLVector4a default_norm; + default_norm.set(0,1,0,1); + for(S32 t = 0; t < 4; t++) { corners[t].getPosition().load3( mesh[offset + (grid_size*t)].mPos.mV); @@ -6108,8 +6114,8 @@ BOOL LLVolumeFace::createUnCutCubeCap(LLVolume* volume, BOOL partial_build) lhs.setSub(corners[1].getPosition(), corners[0].getPosition()); LLVector4a rhs; rhs.setSub(corners[2].getPosition(), corners[1].getPosition()); - baseVert.getNormal().setCross3(lhs, rhs); - baseVert.getNormal().normalize3fast(); + baseVert.getNormal().setCross3(lhs, rhs); + baseVert.getNormal().normalize3fast_checked(&default_norm); } if(!(mTypeMask & TOP_MASK)) @@ -6559,17 +6565,12 @@ BOOL LLVolumeFace::createCap(LLVolume* volume, BOOL partial_build) d1.setSub(mPositions[mIndices[2]], mPositions[mIndices[0]]); LLVector4a normal; - normal.setCross3(d0,d1); - - if (normal.dot3(normal).getF32() > F_APPROXIMATELY_ZERO) - { - normal.normalize3fast(); - } - else - { //degenerate, make up a value - normal.set(0,0,1); - } + LLVector4a default_norm; + default_norm.set(0,1,0,1); + normal.setCross3(d0,d1); + normal.normalize3fast_checked(&default_norm); + llassert(llfinite(normal.getF32ptr()[0])); llassert(llfinite(normal.getF32ptr()[1])); llassert(llfinite(normal.getF32ptr()[2])); @@ -6611,11 +6612,13 @@ void LLVolumeFace::createTangents() CalculateTangentArray(mNumVertices, mPositions, mNormals, mTexCoords, mNumIndices/3, mIndices, mTangents); //normalize tangents + LLVector4a default_norm; + default_norm.set(0,1,0,1); for (U32 i = 0; i < mNumVertices; i++) { //binorm[i].normalize3fast(); //bump map/planar projection code requires normals to be normalized - mNormals[i].normalize3fast(); + mNormals[i].normalize3fast_checked(&default_norm); } } } @@ -6793,6 +6796,9 @@ void LLVolumeFace::appendFace(const LLVolumeFace& face, LLMatrix4& mat_in, LLMat mat.loadu(mat_in); norm_mat.loadu(norm_mat_in); + LLVector4a default_norm; + default_norm.set(0,1,0,1); + for (U32 i = 0; i < face.mNumVertices; ++i) { //transform appended face position and store @@ -6800,7 +6806,7 @@ void LLVolumeFace::appendFace(const LLVolumeFace& face, LLMatrix4& mat_in, LLMat //transform appended face normal and store norm_mat.rotate(src_norm[i], dst_norm[i]); - dst_norm[i].normalize3fast(); + dst_norm[i].normalize3fast_checked(&default_norm); //copy appended face texture coordinate dst_tc[i] = src_tc[i]; @@ -7209,7 +7215,7 @@ BOOL LLVolumeFace::createSide(LLVolume* volume, BOOL partial_build) return TRUE; } -#define TANGENTIAL_PARANOIA_ASSERTS 1 +#define TANGENTIAL_PARANOIA_ASSERTS 0 #if TANGENTIAL_PARANOIA_ASSERTS #define tangential_paranoia(a) llassert(a) @@ -7289,47 +7295,28 @@ void CalculateTangentArray(U32 vertexCount, const LLVector4a *vertex, const LLVe // These appear to come out of the summing above distinctly non-unit-length // + LLVector4a default_norm; + default_norm.set(0,1,0,1); + for (U32 a = 0; a < vertexCount; a++) { - // Conditioning required by assets which don't necessarily reference every vert index - // (i.e. some of the tangents can end up uninitialized and therefore indeterminate/INF) - // and protection against zero length vectors which are not handled by normalize3fast. - // - if (!tan1[a].isFinite3() || tan1[a].equals3(LLVector4a::getZero())) - { - tan1[a].set(0,0,1,1); - } - else - { - tan1[a].normalize3fast(); - } + tan1[a].normalize3fast_checked(&default_norm); + tan2[a].normalize3fast_checked(&default_norm); - if (!tan2[a].isFinite3() || tan2[a].equals3(LLVector4a::getZero())) - { - tan2[a].set(0,0,1,1); - } - else - { - tan2[a].normalize3fast(); - } - - const F32 cefgw = 0.03f; tangential_paranoia(tan1[a].isFinite3()); tangential_paranoia(tan2[a].isFinite3()); - tangential_paranoia(tan1[a].isNormalized3(cefgw)); - tangential_paranoia(tan2[a].isNormalized3(cefgw)); + tangential_paranoia(tan1[a].isNormalized3(0.03f)); + tangential_paranoia(tan2[a].isNormalized3(0.03f)); } + LLVector4a default_tangent; + default_tangent.set(0,0,1,1); + for (U32 a = 0; a < vertexCount; a++) { LLVector4a n = normal[a]; - - if (!n.isFinite3() || n.equals3(LLVector4a::getZero())) - { - n.set(0,1,0,1); - } - - n.normalize3fast(); + + n.normalize3fast_checked(&default_norm); const LLVector4a& t = tan1[a]; @@ -7353,34 +7340,27 @@ void CalculateTangentArray(U32 vertexCount, const LLVector4a *vertex, const LLVe tangential_paranoia(tsubn.isFinite3()); - if (tsubn.dot3(tsubn).getF32() > F_APPROXIMATELY_ZERO) - { - tsubn.normalize3fast(); + tsubn.normalize3fast_checked(&default_tangent); - // Calculate handedness - F32 handedness = ncrosst.dot3(tan2[a]).getF32() < 0.f ? -1.f : 1.f; + // Calculate handedness + F32 handedness = ncrosst.dot3(tan2[a]).getF32() < 0.f ? -1.f : 1.f; - tsubn.getF32ptr()[3] = handedness; + tsubn.getF32ptr()[3] = handedness; - tangent[a] = tsubn; + tangent[a] = tsubn; - tangential_paranoia(tangent[a].isNormalized3(0.1f)); + tangential_paranoia(tangent[a].isNormalized3(0.1f)); - llassert(llfinite(tangent[a].getF32ptr()[0])); - llassert(llfinite(tangent[a].getF32ptr()[1])); - llassert(llfinite(tangent[a].getF32ptr()[2])); + llassert(llfinite(tangent[a].getF32ptr()[0])); + llassert(llfinite(tangent[a].getF32ptr()[1])); + llassert(llfinite(tangent[a].getF32ptr()[2])); - llassert(!llisnan(tangent[a].getF32ptr()[0])); - llassert(!llisnan(tangent[a].getF32ptr()[1])); - llassert(!llisnan(tangent[a].getF32ptr()[2])); - } - else - { //degenerate, make up a value - tangent[a].set(0,0,1,1); - } + llassert(!llisnan(tangent[a].getF32ptr()[0])); + llassert(!llisnan(tangent[a].getF32ptr()[1])); + llassert(!llisnan(tangent[a].getF32ptr()[2])); } - ll_aligned_free_16(tan1); + ll_aligned_free_16(tan1); } diff --git a/indra/newview/llface.cpp b/indra/newview/llface.cpp index 3e503cb750..b34370fa87 100755 --- a/indra/newview/llface.cpp +++ b/indra/newview/llface.cpp @@ -817,6 +817,12 @@ BOOL LLFace::genVolumeBBoxes(const LLVolume &volume, S32 f, size.mul(scale); } + // Catch potential badness from normalization before it happens + // + llassert(mat_normal.mMatrix[0].isFinite3() && (mat_normal.mMatrix[0].dot3(mat_normal.mMatrix[0]).getF32() > F_APPROXIMATELY_ZERO)); + llassert(mat_normal.mMatrix[1].isFinite3() && (mat_normal.mMatrix[1].dot3(mat_normal.mMatrix[1]).getF32() > F_APPROXIMATELY_ZERO)); + llassert(mat_normal.mMatrix[2].isFinite3() && (mat_normal.mMatrix[2].dot3(mat_normal.mMatrix[2]).getF32() > F_APPROXIMATELY_ZERO)); + mat_normal.mMatrix[0].normalize3fast(); mat_normal.mMatrix[1].normalize3fast(); mat_normal.mMatrix[2].normalize3fast(); @@ -936,7 +942,9 @@ LLVector2 LLFace::surfaceToTexture(LLVector2 surface_coord, const LLVector4a& po LLVector4a volume_normal; LLVector3 v_normal(normal.getF32ptr()); volume_normal.load3(mDrawablep->getVOVolume()->agentDirectionToVolume(v_normal).mV); - volume_normal.normalize3fast(); + LLVector4a default_norm; + default_norm.set(0,1,0,1); + volume_normal.normalize3fast_checked(&default_norm); if (texgen == LLTextureEntry::TEX_GEN_PLANAR) { @@ -1909,7 +1917,10 @@ BOOL LLFace::getGeometryVolume(const LLVolume& volume, binormal.load3(t.mV); } - binormal.normalize3fast(); + LLVector4a default_binorm; + default_binorm.set(1,0,0,1); + binormal.normalize3fast_checked(&default_binorm); + LLVector2 tc = bump_tc[i]; tc += LLVector2( bump_s_primary_light_ray.dot3(tangent).getF32(), bump_t_primary_light_ray.dot3(binormal).getF32() ); @@ -1996,12 +2007,13 @@ BOOL LLFace::getGeometryVolume(const LLVolume& volume, LLFastTimer t(FTM_FACE_GEOM_NORMAL); mVertexBuffer->getNormalStrider(norm, mGeomIndex, mGeomCount, map_range); F32* normals = (F32*) norm.get(); - + LLVector4a default_norm; + default_norm.set(0,1,0,1); for (S32 i = 0; i < num_vertices; i++) { LLVector4a normal; mat_normal.rotate(vf.mNormals[i], normal); - normal.normalize3fast(); + normal.normalize3fast_checked(&default_norm); normal.store4a(normals); normals += 4; } @@ -2024,12 +2036,14 @@ BOOL LLFace::getGeometryVolume(const LLVolume& volume, mask.clear(); mask.setElement<3>(); + LLVector4a default_tangent; + default_tangent.set(0,0,1,1); + for (S32 i = 0; i < num_vertices; i++) { LLVector4a tangent_out; mat_normal.rotate(vf.mTangents[i], tangent_out); - tangent_out.normalize3fast(); - + tangent_out.normalize3fast_checked(&default_tangent); tangent_out.setSelectWithMask(mask, vf.mTangents[i], tangent_out); tangent_out.store4a(tangents); @@ -2244,7 +2258,10 @@ BOOL LLFace::calcPixelArea(F32& cos_angle_to_view_dir, F32& radius) dist *= 16.f; } - lookAt.normalize3fast() ; + LLVector4a default_lookat; + default_lookat.set(0,0,1,1); + + lookAt.normalize3fast_checked(&default_lookat); //get area of circle around node F32 app_angle = atanf((F32) sqrt(size_squared) / dist); diff --git a/indra/newview/llspatialpartition.cpp b/indra/newview/llspatialpartition.cpp index 78401020a6..dc99fd469b 100755 --- a/indra/newview/llspatialpartition.cpp +++ b/indra/newview/llspatialpartition.cpp @@ -1259,12 +1259,15 @@ F32 LLSpatialPartition::calcDistance(LLSpatialGroup* group, LLCamera& camera) F32 dist = 0.f; + LLVector4a default_eyevec; + default_eyevec.set(0,0,1,1); + if (group->mDrawMap.find(LLRenderPass::PASS_ALPHA) != group->mDrawMap.end()) { LLVector4a v = eye; dist = eye.getLength3().getF32(); - eye.normalize3fast(); + eye.normalize3fast_checked(&default_eyevec); if (!group->isState(LLSpatialGroup::ALPHA_DIRTY)) { diff --git a/indra/newview/llvopartgroup.cpp b/indra/newview/llvopartgroup.cpp index 6a7f26bdb5..b25213d85f 100755 --- a/indra/newview/llvopartgroup.cpp +++ b/indra/newview/llvopartgroup.cpp @@ -411,14 +411,21 @@ void LLVOPartGroup::getGeometry(S32 idx, LLVector4a right; right.setCross3(at, up); + // guard against NaNs in normalize below + llassert(right.dot3(right).getF32() > F_APPROXIMATELY_ZERO); right.normalize3fast(); + up.setCross3(right, at); + // guard against NaNs in normalize below + llassert(up.dot3(up).getF32() > F_APPROXIMATELY_ZERO); up.normalize3fast(); if (part.mFlags & LLPartData::LL_PART_FOLLOW_VELOCITY_MASK) { LLVector4a normvel; normvel.load3(part.mVelocity.mV); + // guard against NaNs in normalize below + llassert(normvel.dot3(normvel).getF32() > F_APPROXIMATELY_ZERO); normvel.normalize3fast(); LLVector2 up_fracs; up_fracs.mV[0] = normvel.dot3(right).getF32(); @@ -443,6 +450,9 @@ void LLVOPartGroup::getGeometry(S32 idx, up = new_up; right = t; + // guard against NaNs in normalize below + llassert(up.dot3(up).getF32() > F_APPROXIMATELY_ZERO); + llassert(right.dot3(right).getF32() > F_APPROXIMATELY_ZERO); up.normalize3fast(); right.normalize3fast(); } diff --git a/indra/newview/llvovolume.cpp b/indra/newview/llvovolume.cpp index 0aa56fcc0f..8962d7cadf 100755 --- a/indra/newview/llvovolume.cpp +++ b/indra/newview/llvovolume.cpp @@ -3751,7 +3751,8 @@ BOOL LLVOVolume::lineSegmentIntersect(const LLVector4a& start, const LLVector4a& { *normal = n; } - + // guard against NaNs in normalize below + llassert(normal->dot3(*normal).getF32() > F_APPROXIMATELY_ZERO); (*normal).normalize3fast(); } @@ -3774,6 +3775,8 @@ BOOL LLVOVolume::lineSegmentIntersect(const LLVector4a& start, const LLVector4a& { *tangent = tn; } + // guard against NaNs in normalize below + llassert(tangent->dot3(*tangent).getF32() > F_APPROXIMATELY_ZERO); (*tangent).normalize3fast(); } diff --git a/indra/newview/pipeline.cpp b/indra/newview/pipeline.cpp index 05ef8060d4..72912db041 100755 --- a/indra/newview/pipeline.cpp +++ b/indra/newview/pipeline.cpp @@ -10569,11 +10569,13 @@ void LLPipeline::generateImpostor(LLVOAvatar* avatar) LLVector4a left; left.load3(camera.getLeftAxis().mV); left.mul(left); + llassert(left.dot3(left).getF32() > F_APPROXIMATELY_ZERO); left.normalize3fast(); LLVector4a up; up.load3(camera.getUpAxis().mV); up.mul(up); + llassert(up.dot3(up).getF32() > F_APPROXIMATELY_ZERO); up.normalize3fast(); tdim.mV[0] = fabsf(half_height.dot3(left).getF32()); -- cgit v1.3 From 1a37ea367159b1ba6c5284add8dd338f96b64e84 Mon Sep 17 00:00:00 2001 From: Graham Madarasz Date: Wed, 12 Jun 2013 09:38:04 -0700 Subject: Pedantic adj overly concerned with minute details or formalisms --- indra/llmath/llvector4a.h | 3 ++- indra/llmath/llvector4a.inl | 6 +++--- 2 files changed, 5 insertions(+), 4 deletions(-) (limited to 'indra/llmath') diff --git a/indra/llmath/llvector4a.h b/indra/llmath/llvector4a.h index 94a61f2b1d..79d0a44551 100755 --- a/indra/llmath/llvector4a.h +++ b/indra/llmath/llvector4a.h @@ -46,6 +46,7 @@ class LLRotation; // of this writing, July 08, 2010) about getting it implemented before you resort to // LLVector3/LLVector4. ///////////////////////////////// +class LLVector4a; LL_ALIGN_PREFIX(16) class LLVector4a @@ -239,7 +240,7 @@ public: // Normalize this vector with respect to the x, y, and z components only. Accurate only to 10-12 bits of precision. W component is destroyed // Same as above except substitutes default vector contents if the vector is non-finite or degenerate due to zero length. // - inline void normalize3fast_checked(LLVector4a* default = NULL); + inline void normalize3fast_checked(LLVector4a* d = 0); // Return true if this vector is normalized with respect to x,y,z up to tolerance inline LLBool32 isNormalized3( F32 tolerance = 1e-3 ) const; diff --git a/indra/llmath/llvector4a.inl b/indra/llmath/llvector4a.inl index 6860252a75..d3e8a1c933 100755 --- a/indra/llmath/llvector4a.inl +++ b/indra/llmath/llvector4a.inl @@ -417,14 +417,14 @@ inline void LLVector4a::normalize3fast() // Normalize this vector with respect to the x, y, and z components only. Accurate only to 10-12 bits of precision. W component is destroyed // Note that this does not consider zero length vectors! -inline void LLVector4a::normalize3fast_checked(LLVector4a* default) +inline void LLVector4a::normalize3fast_checked(LLVector4a* d) { // handle bogus inputs before NaNs are generated below // if (!isFinite3() || (dot3(*this).getF32() < F_APPROXIMATELY_ZERO)) { - if (default) - *this = *default; + if (d) + *this = *d; else set(0,1,0,1); -- cgit v1.3 From 9726f3774d58e5d9d78648bb5185f694a9f70954 Mon Sep 17 00:00:00 2001 From: Graham Madarasz Date: Wed, 12 Jun 2013 10:26:06 -0700 Subject: Backout tangent assert experiment --- indra/llmath/llvector4a.inl | 27 ------- indra/llmath/llvolume.cpp | 178 ++++++++++++++++---------------------------- 2 files changed, 66 insertions(+), 139 deletions(-) (limited to 'indra/llmath') diff --git a/indra/llmath/llvector4a.inl b/indra/llmath/llvector4a.inl index 6860252a75..7c52ffef21 100755 --- a/indra/llmath/llvector4a.inl +++ b/indra/llmath/llvector4a.inl @@ -331,9 +331,6 @@ inline LLSimdScalar LLVector4a::dot4(const LLVector4a& b) const // Note that this does not consider zero length vectors! inline void LLVector4a::normalize3() { - // find out about bad math before it takes two man-days to track down - llassert(isFinite3() && !equals3(getZero())); - // lenSqrd = a dot a LLVector4a lenSqrd; lenSqrd.setAllDot3( *this, *this ); // rsqrt = approximate reciprocal square (i.e., { ~1/len(a)^2, ~1/len(a)^2, ~1/len(a)^2, ~1/len(a)^2 } @@ -382,9 +379,6 @@ inline void LLVector4a::normalize4() // Note that this does not consider zero length vectors! inline LLSimdScalar LLVector4a::normalize3withLength() { - // find out about bad math before it takes two man-days to track down - llassert(isFinite3() && !equals3(getZero())); - // lenSqrd = a dot a LLVector4a lenSqrd; lenSqrd.setAllDot3( *this, *this ); // rsqrt = approximate reciprocal square (i.e., { ~1/len(a)^2, ~1/len(a)^2, ~1/len(a)^2, ~1/len(a)^2 } @@ -415,27 +409,6 @@ inline void LLVector4a::normalize3fast() mQ = _mm_mul_ps( mQ, approxRsqrt ); } -// Normalize this vector with respect to the x, y, and z components only. Accurate only to 10-12 bits of precision. W component is destroyed -// Note that this does not consider zero length vectors! -inline void LLVector4a::normalize3fast_checked(LLVector4a* default) -{ - // handle bogus inputs before NaNs are generated below - // - if (!isFinite3() || (dot3(*this).getF32() < F_APPROXIMATELY_ZERO)) - { - if (default) - *this = *default; - else - set(0,1,0,1); - - return; - } - - LLVector4a lenSqrd; lenSqrd.setAllDot3( *this, *this ); - const LLQuad approxRsqrt = _mm_rsqrt_ps(lenSqrd.mQ); - mQ = _mm_mul_ps( mQ, approxRsqrt ); -} - // Return true if this vector is normalized with respect to x,y,z up to tolerance inline LLBool32 LLVector4a::isNormalized3( F32 tolerance ) const { diff --git a/indra/llmath/llvolume.cpp b/indra/llmath/llvolume.cpp index 15621c2625..1932272afb 100755 --- a/indra/llmath/llvolume.cpp +++ b/indra/llmath/llvolume.cpp @@ -4472,9 +4472,6 @@ void LLVolume::generateSilhouetteVertices(std::vector &vertices, continue; //skip degenerate face } - LLVector4a default_norm; - default_norm.set(0,1,0,1); - //for each edge for (S32 k = 0; k < 3; k++) { S32 index = face.mEdge[j*3+k]; @@ -4496,14 +4493,14 @@ void LLVolume::generateSilhouetteVertices(std::vector &vertices, norm_mat.rotate(n[v1], t); - t.normalize3fast_checked(&default_norm); + t.normalize3fast(); normals.push_back(LLVector3(t[0], t[1], t[2])); mat.affineTransform(v[v2], t); vertices.push_back(LLVector3(t[0], t[1], t[2])); norm_mat.rotate(n[v2], t); - t.normalize3fast_checked(&default_norm); + t.normalize3fast(); normals.push_back(LLVector3(t[0], t[1], t[2])); } } @@ -6099,9 +6096,6 @@ BOOL LLVolumeFace::createUnCutCubeCap(LLVolume* volume, BOOL partial_build) { VertexData corners[4]; VertexData baseVert; - LLVector4a default_norm; - default_norm.set(0,1,0,1); - for(S32 t = 0; t < 4; t++) { corners[t].getPosition().load3( mesh[offset + (grid_size*t)].mPos.mV); @@ -6114,8 +6108,8 @@ BOOL LLVolumeFace::createUnCutCubeCap(LLVolume* volume, BOOL partial_build) lhs.setSub(corners[1].getPosition(), corners[0].getPosition()); LLVector4a rhs; rhs.setSub(corners[2].getPosition(), corners[1].getPosition()); - baseVert.getNormal().setCross3(lhs, rhs); - baseVert.getNormal().normalize3fast_checked(&default_norm); + baseVert.getNormal().setCross3(lhs, rhs); + baseVert.getNormal().normalize3fast(); } if(!(mTypeMask & TOP_MASK)) @@ -6565,12 +6559,17 @@ BOOL LLVolumeFace::createCap(LLVolume* volume, BOOL partial_build) d1.setSub(mPositions[mIndices[2]], mPositions[mIndices[0]]); LLVector4a normal; - LLVector4a default_norm; - default_norm.set(0,1,0,1); - normal.setCross3(d0,d1); - normal.normalize3fast_checked(&default_norm); - + + if (normal.dot3(normal).getF32() > F_APPROXIMATELY_ZERO) + { + normal.normalize3fast(); + } + else + { //degenerate, make up a value + normal.set(0,0,1); + } + llassert(llfinite(normal.getF32ptr()[0])); llassert(llfinite(normal.getF32ptr()[1])); llassert(llfinite(normal.getF32ptr()[2])); @@ -6612,13 +6611,11 @@ void LLVolumeFace::createTangents() CalculateTangentArray(mNumVertices, mPositions, mNormals, mTexCoords, mNumIndices/3, mIndices, mTangents); //normalize tangents - LLVector4a default_norm; - default_norm.set(0,1,0,1); for (U32 i = 0; i < mNumVertices; i++) { //binorm[i].normalize3fast(); //bump map/planar projection code requires normals to be normalized - mNormals[i].normalize3fast_checked(&default_norm); + mNormals[i].normalize3fast(); } } } @@ -6796,9 +6793,6 @@ void LLVolumeFace::appendFace(const LLVolumeFace& face, LLMatrix4& mat_in, LLMat mat.loadu(mat_in); norm_mat.loadu(norm_mat_in); - LLVector4a default_norm; - default_norm.set(0,1,0,1); - for (U32 i = 0; i < face.mNumVertices; ++i) { //transform appended face position and store @@ -6806,7 +6800,7 @@ void LLVolumeFace::appendFace(const LLVolumeFace& face, LLMatrix4& mat_in, LLMat //transform appended face normal and store norm_mat.rotate(src_norm[i], dst_norm[i]); - dst_norm[i].normalize3fast_checked(&default_norm); + dst_norm[i].normalize3fast(); //copy appended face texture coordinate dst_tc[i] = src_tc[i]; @@ -7215,53 +7209,46 @@ BOOL LLVolumeFace::createSide(LLVolume* volume, BOOL partial_build) return TRUE; } -#define TANGENTIAL_PARANOIA_ASSERTS 0 - -#if TANGENTIAL_PARANOIA_ASSERTS - #define tangential_paranoia(a) llassert(a) -#else - #define tangential_paranoia(a) -#endif - //adapted from Lengyel, Eric. “Computing Tangent Space Basis Vectors for an Arbitrary Mesh”. Terathon Software 3D Graphics Library, 2001. http://www.terathon.com/code/tangent.html void CalculateTangentArray(U32 vertexCount, const LLVector4a *vertex, const LLVector4a *normal, const LLVector2 *texcoord, U32 triangleCount, const U16* index_array, LLVector4a *tangent) { + //LLVector4a *tan1 = new LLVector4a[vertexCount * 2]; LLVector4a* tan1 = (LLVector4a*) ll_aligned_malloc_16(vertexCount*2*sizeof(LLVector4a)); - LLVector4a* tan2 = tan1 + vertexCount; + LLVector4a* tan2 = tan1 + vertexCount; memset(tan1, 0, vertexCount*2*sizeof(LLVector4a)); - for (U32 a = 0; a < triangleCount; a++) - { - U32 i1 = *index_array++; - U32 i2 = *index_array++; - U32 i3 = *index_array++; + for (U32 a = 0; a < triangleCount; a++) + { + U32 i1 = *index_array++; + U32 i2 = *index_array++; + U32 i3 = *index_array++; - const LLVector4a& v1 = vertex[i1]; - const LLVector4a& v2 = vertex[i2]; - const LLVector4a& v3 = vertex[i3]; + const LLVector4a& v1 = vertex[i1]; + const LLVector4a& v2 = vertex[i2]; + const LLVector4a& v3 = vertex[i3]; - const LLVector2& w1 = texcoord[i1]; - const LLVector2& w2 = texcoord[i2]; - const LLVector2& w3 = texcoord[i3]; + const LLVector2& w1 = texcoord[i1]; + const LLVector2& w2 = texcoord[i2]; + const LLVector2& w3 = texcoord[i3]; const F32* v1ptr = v1.getF32ptr(); const F32* v2ptr = v2.getF32ptr(); const F32* v3ptr = v3.getF32ptr(); - float x1 = v2ptr[0] - v1ptr[0]; - float x2 = v3ptr[0] - v1ptr[0]; - float y1 = v2ptr[1] - v1ptr[1]; - float y2 = v3ptr[1] - v1ptr[1]; - float z1 = v2ptr[2] - v1ptr[2]; - float z2 = v3ptr[2] - v1ptr[2]; + float x1 = v2ptr[0] - v1ptr[0]; + float x2 = v3ptr[0] - v1ptr[0]; + float y1 = v2ptr[1] - v1ptr[1]; + float y2 = v3ptr[1] - v1ptr[1]; + float z1 = v2ptr[2] - v1ptr[2]; + float z2 = v3ptr[2] - v1ptr[2]; - float s1 = w2.mV[0] - w1.mV[0]; - float s2 = w3.mV[0] - w1.mV[0]; - float t1 = w2.mV[1] - w1.mV[1]; - float t2 = w3.mV[1] - w1.mV[1]; + float s1 = w2.mV[0] - w1.mV[0]; + float s2 = w3.mV[0] - w1.mV[0]; + float t1 = w2.mV[1] - w1.mV[1]; + float t2 = w3.mV[1] - w1.mV[1]; F32 rd = s1*t2-s2*t1; @@ -7275,48 +7262,18 @@ void CalculateTangentArray(U32 vertexCount, const LLVector4a *vertex, const LLVe LLVector4a tdir((s1 * x2 - s2 * x1) * r, (s1 * y2 - s2 * y1) * r, (s1 * z2 - s2 * z1) * r); - tan1[i1].add(sdir); tan1[i2].add(sdir); tan1[i3].add(sdir); - - tangential_paranoia(tan1[i1].isFinite3()); - tangential_paranoia(tan1[i2].isFinite3()); - tangential_paranoia(tan1[i3].isFinite3()); - + tan2[i1].add(tdir); tan2[i2].add(tdir); tan2[i3].add(tdir); - - tangential_paranoia(tan2[i1].isFinite3()); - tangential_paranoia(tan2[i2].isFinite3()); - tangential_paranoia(tan2[i3].isFinite3()); - } - - // These appear to come out of the summing above distinctly non-unit-length - // - LLVector4a default_norm; - default_norm.set(0,1,0,1); - - for (U32 a = 0; a < vertexCount; a++) - { - tan1[a].normalize3fast_checked(&default_norm); - tan2[a].normalize3fast_checked(&default_norm); - - tangential_paranoia(tan1[a].isFinite3()); - tangential_paranoia(tan2[a].isFinite3()); - tangential_paranoia(tan1[a].isNormalized3(0.03f)); - tangential_paranoia(tan2[a].isNormalized3(0.03f)); - } - - LLVector4a default_tangent; - default_tangent.set(0,0,1,1); - - for (U32 a = 0; a < vertexCount; a++) - { - LLVector4a n = normal[a]; - - n.normalize3fast_checked(&default_norm); + } + + for (U32 a = 0; a < vertexCount; a++) + { + LLVector4a n = normal[a]; const LLVector4a& t = tan1[a]; @@ -7326,41 +7283,38 @@ void CalculateTangentArray(U32 vertexCount, const LLVector4a *vertex, const LLVe LLVector4a ncrosst; ncrosst.setCross3(n,t); - F32 n_dot_t = n.dot3(t).getF32(); - - tangential_paranoia(llfinite(n_dot_t) && !llisnan(n_dot_t)); - - // Gram-Schmidt orthogonalize - n.mul(n_dot_t); - - tangential_paranoia(n.isFinite3()); + // Gram-Schmidt orthogonalize + n.mul(n.dot3(t).getF32()); LLVector4a tsubn; tsubn.setSub(t,n); - tangential_paranoia(tsubn.isFinite3()); - - tsubn.normalize3fast_checked(&default_tangent); + if (tsubn.dot3(tsubn).getF32() > F_APPROXIMATELY_ZERO) + { + tsubn.normalize3fast(); - // Calculate handedness - F32 handedness = ncrosst.dot3(tan2[a]).getF32() < 0.f ? -1.f : 1.f; + // Calculate handedness + F32 handedness = ncrosst.dot3(tan2[a]).getF32() < 0.f ? -1.f : 1.f; - tsubn.getF32ptr()[3] = handedness; + tsubn.getF32ptr()[3] = handedness; - tangent[a] = tsubn; + tangent[a] = tsubn; - tangential_paranoia(tangent[a].isNormalized3(0.1f)); + llassert(llfinite(tangent[a].getF32ptr()[0])); + llassert(llfinite(tangent[a].getF32ptr()[1])); + llassert(llfinite(tangent[a].getF32ptr()[2])); - llassert(llfinite(tangent[a].getF32ptr()[0])); - llassert(llfinite(tangent[a].getF32ptr()[1])); - llassert(llfinite(tangent[a].getF32ptr()[2])); - - llassert(!llisnan(tangent[a].getF32ptr()[0])); - llassert(!llisnan(tangent[a].getF32ptr()[1])); - llassert(!llisnan(tangent[a].getF32ptr()[2])); + llassert(!llisnan(tangent[a].getF32ptr()[0])); + llassert(!llisnan(tangent[a].getF32ptr()[1])); + llassert(!llisnan(tangent[a].getF32ptr()[2])); + } + else + { //degenerate, make up a value + tangent[a].set(0,0,1,1); + } } - ll_aligned_free_16(tan1); + ll_aligned_free_16(tan1); } -- cgit v1.3 From 7b28a9a541464ece86e6d09fe7a85f307a710ddb Mon Sep 17 00:00:00 2001 From: Dave Parks Date: Thu, 13 Jun 2013 14:38:51 -0500 Subject: Remove some now useless assertions. --- indra/llmath/llvolume.cpp | 8 -------- 1 file changed, 8 deletions(-) (limited to 'indra/llmath') diff --git a/indra/llmath/llvolume.cpp b/indra/llmath/llvolume.cpp index 1932272afb..a8f4c52c5c 100755 --- a/indra/llmath/llvolume.cpp +++ b/indra/llmath/llvolume.cpp @@ -7299,14 +7299,6 @@ void CalculateTangentArray(U32 vertexCount, const LLVector4a *vertex, const LLVe tsubn.getF32ptr()[3] = handedness; tangent[a] = tsubn; - - llassert(llfinite(tangent[a].getF32ptr()[0])); - llassert(llfinite(tangent[a].getF32ptr()[1])); - llassert(llfinite(tangent[a].getF32ptr()[2])); - - llassert(!llisnan(tangent[a].getF32ptr()[0])); - llassert(!llisnan(tangent[a].getF32ptr()[1])); - llassert(!llisnan(tangent[a].getF32ptr()[2])); } else { //degenerate, make up a value -- cgit v1.3 From fec6ab591ef644ee8058742f16849ca9ff53c6a6 Mon Sep 17 00:00:00 2001 From: Graham Linden Date: Fri, 14 Jun 2013 09:32:23 -0700 Subject: Disable asserts in CalculateTangentArray (discussed with davep) to avoid debugging interruptions from bad assets --- indra/llmath/llvolume.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'indra/llmath') diff --git a/indra/llmath/llvolume.cpp b/indra/llmath/llvolume.cpp index 1932272afb..c4e1f0c84c 100755 --- a/indra/llmath/llvolume.cpp +++ b/indra/llmath/llvolume.cpp @@ -7300,13 +7300,15 @@ void CalculateTangentArray(U32 vertexCount, const LLVector4a *vertex, const LLVe tangent[a] = tsubn; + /* + These are going off on invalid input and hindering other debugging. llassert(llfinite(tangent[a].getF32ptr()[0])); llassert(llfinite(tangent[a].getF32ptr()[1])); llassert(llfinite(tangent[a].getF32ptr()[2])); llassert(!llisnan(tangent[a].getF32ptr()[0])); llassert(!llisnan(tangent[a].getF32ptr()[1])); - llassert(!llisnan(tangent[a].getF32ptr()[2])); + llassert(!llisnan(tangent[a].getF32ptr()[2]));*/ } else { //degenerate, make up a value -- cgit v1.3 From ccd04cd66c0a550694fefe41042ef47466780a92 Mon Sep 17 00:00:00 2001 From: Dave Parks Date: Tue, 18 Jun 2013 17:24:21 -0500 Subject: Occlusion culling overhaul. --- indra/llmath/llvolume.cpp | 3 - indra/llrender/llglslshader.cpp | 65 ++++++++ indra/llrender/llglslshader.h | 7 + indra/llrender/llrendertarget.cpp | 36 ++++- indra/llrender/llrendertarget.h | 6 + indra/newview/app_settings/settings.xml | 3 +- .../shaders/class1/deferred/blurLightF.glsl | 94 +++++------ .../shaders/class1/interface/downsampleDepthF.glsl | 67 ++++++++ .../class1/interface/downsampleDepthRectF.glsl | 67 ++++++++ .../shaders/class1/interface/downsampleDepthV.glsl | 59 +++++++ indra/newview/llmeshrepository.cpp | 1 + indra/newview/llspatialpartition.cpp | 180 +++++++++++++++++++-- indra/newview/llviewerdisplay.cpp | 3 +- indra/newview/llviewershadermgr.cpp | 24 +++ indra/newview/llviewershadermgr.h | 4 +- indra/newview/llvovolume.cpp | 7 +- indra/newview/pipeline.cpp | 145 +++++++++++++++-- indra/newview/pipeline.h | 10 +- 18 files changed, 699 insertions(+), 82 deletions(-) create mode 100644 indra/newview/app_settings/shaders/class1/interface/downsampleDepthF.glsl create mode 100644 indra/newview/app_settings/shaders/class1/interface/downsampleDepthRectF.glsl create mode 100644 indra/newview/app_settings/shaders/class1/interface/downsampleDepthV.glsl (limited to 'indra/llmath') diff --git a/indra/llmath/llvolume.cpp b/indra/llmath/llvolume.cpp index a8f4c52c5c..14cebfe5aa 100755 --- a/indra/llmath/llvolume.cpp +++ b/indra/llmath/llvolume.cpp @@ -7277,9 +7277,6 @@ void CalculateTangentArray(U32 vertexCount, const LLVector4a *vertex, const LLVe const LLVector4a& t = tan1[a]; - llassert(tan1[a].getLength3().getF32() >= 0.f); - llassert(tan2[a].getLength3().getF32() >= 0.f); - LLVector4a ncrosst; ncrosst.setCross3(n,t); diff --git a/indra/llrender/llglslshader.cpp b/indra/llrender/llglslshader.cpp index 62191b4c1a..ac16e30796 100755 --- a/indra/llrender/llglslshader.cpp +++ b/indra/llrender/llglslshader.cpp @@ -149,6 +149,9 @@ void LLGLSLShader::clearStats() mTimeElapsed = 0; mSamplesDrawn = 0; mDrawCalls = 0; + mTextureStateFetched = false; + mTextureMagFilter.clear(); + mTextureMinFilter.clear(); } void LLGLSLShader::dumpStats() @@ -161,6 +164,16 @@ void LLGLSLShader::dumpStats() { llinfos << mShaderFiles[i].first << llendl; } + for (U32 i = 0; i < mTexture.size(); ++i) + { + GLint idx = mTexture[i]; + + if (idx >= 0) + { + GLint uniform_idx = getUniformLocation(i); + llinfos << mUniformNameMap[uniform_idx] << " - " << std::hex << mTextureMagFilter[i] << "/" << mTextureMinFilter[i] << std::dec << llendl; + } + } llinfos << "=============================================" << llendl; F32 ms = mTimeElapsed/1000000.f; @@ -211,6 +224,39 @@ void LLGLSLShader::placeProfileQuery() glGenQueriesARB(1, &mTimerQuery); } + if (!mTextureStateFetched) + { + mTextureStateFetched = true; + mTextureMagFilter.resize(mTexture.size()); + mTextureMinFilter.resize(mTexture.size()); + + U32 cur_active = gGL.getCurrentTexUnitIndex(); + + for (U32 i = 0; i < mTexture.size(); ++i) + { + GLint idx = mTexture[i]; + + if (idx >= 0) + { + gGL.getTexUnit(idx)->activate(); + + U32 mag = 0xFFFFFFFF; + U32 min = 0xFFFFFFFF; + + U32 type = LLTexUnit::getInternalType(gGL.getTexUnit(idx)->getCurrType()); + + glGetTexParameteriv(type, GL_TEXTURE_MAG_FILTER, (GLint*) &mag); + glGetTexParameteriv(type, GL_TEXTURE_MIN_FILTER, (GLint*) &min); + + mTextureMagFilter[i] = mag; + mTextureMinFilter[i] = min; + } + } + + gGL.getTexUnit(cur_active)->activate(); + } + + glBeginQueryARB(GL_SAMPLES_PASSED, 1); glBeginQueryARB(GL_TIME_ELAPSED, mTimerQuery); #endif @@ -573,6 +619,7 @@ void LLGLSLShader::mapUniform(GLint index, const vector * uniforms) } mUniformMap[name] = location; + mUniformNameMap[location] = name; LL_DEBUGS("ShaderLoading") << "Uniform " << name << " is at location " << location << LL_ENDL; //find the index of this uniform @@ -635,6 +682,7 @@ BOOL LLGLSLShader::mapUniforms(const vector * uniforms) mActiveTextureChannels = 0; mUniform.clear(); mUniformMap.clear(); + mUniformNameMap.clear(); mTexture.clear(); mValue.clear(); //initialize arrays @@ -1152,6 +1200,23 @@ void LLGLSLShader::uniform1i(const string& uniform, GLint v) } } +void LLGLSLShader::uniform2i(const string& uniform, GLint i, GLint j) +{ + GLint location = getUniformLocation(uniform); + + if (location >= 0) + { + std::map::iterator iter = mValue.find(location); + LLVector4 vec(i,j,0.f,0.f); + if (iter == mValue.end() || shouldChange(iter->second,vec)) + { + glUniform2iARB(location, i, j); + mValue[location] = vec; + } + } +} + + void LLGLSLShader::uniform1f(const string& uniform, GLfloat v) { GLint location = getUniformLocation(uniform); diff --git a/indra/llrender/llglslshader.h b/indra/llrender/llglslshader.h index 3c775cde27..eabdb9fc92 100755 --- a/indra/llrender/llglslshader.h +++ b/indra/llrender/llglslshader.h @@ -111,6 +111,7 @@ public: void uniform3fv(U32 index, U32 count, const GLfloat* v); void uniform4fv(U32 index, U32 count, const GLfloat* v); void uniform1i(const std::string& uniform, GLint i); + void uniform2i(const std::string& uniform, GLint i, GLint j); void uniform1f(const std::string& uniform, GLfloat v); void uniform2f(const std::string& uniform, GLfloat x, GLfloat y); void uniform3f(const std::string& uniform, GLfloat x, GLfloat y, GLfloat z); @@ -170,6 +171,7 @@ public: U32 mAttributeMask; //mask of which reserved attributes are set (lines up with LLVertexBuffer::getTypeMask()) std::vector mUniform; //lookup table of uniform enum to uniform location std::map mUniformMap; //lookup map of uniform name to uniform location + std::map mUniformNameMap; //lookup map of uniform location to uniform name std::map mValue; //lookup map of uniform location to last known value std::vector mTexture; S32 mTotalUniformSize; @@ -192,6 +194,11 @@ public: static U64 sTotalSamplesDrawn; U32 mDrawCalls; static U32 sTotalDrawCalls; + + bool mTextureStateFetched; + std::vector mTextureMagFilter; + std::vector mTextureMinFilter; + }; //UI shader (declared here so llui_libtest will link properly) diff --git a/indra/llrender/llrendertarget.cpp b/indra/llrender/llrendertarget.cpp index 5fb4fc8e52..752486fdde 100755 --- a/indra/llrender/llrendertarget.cpp +++ b/indra/llrender/llrendertarget.cpp @@ -53,11 +53,23 @@ void check_framebuffer_status() bool LLRenderTarget::sUseFBO = false; U32 LLRenderTarget::sCurFBO = 0; + +extern S32 gGLViewport[4]; + +//HEY DAVE HOOK THESE UP +U32 LLRenderTarget::sCurResX = 0; +U32 LLRenderTarget::sCurResY = 0; + + +/// !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + LLRenderTarget::LLRenderTarget() : mResX(0), mResY(0), mFBO(0), mPreviousFBO(0), + mPreviousResX(0), + mPreviousResY(0), mDepth(0), mStencil(0), mUseDepth(false), @@ -390,13 +402,12 @@ void LLRenderTarget::bindTarget() { if (mFBO) { - mPreviousFBO = sCurFBO; - stop_glerror(); + mPreviousFBO = sCurFBO; glBindFramebuffer(GL_FRAMEBUFFER, mFBO); sCurFBO = mFBO; - + stop_glerror(); if (gGLManager.mHasDrawBuffers) { //setup multiple render targets @@ -418,7 +429,12 @@ void LLRenderTarget::bindTarget() stop_glerror(); } + mPreviousResX = sCurResX; + mPreviousResY = sCurResY; glViewport(0, 0, mResX, mResY); + sCurResX = mResX; + sCurResY = mResY; + sBoundTarget = this; } @@ -489,6 +505,20 @@ void LLRenderTarget::flush(bool fetch_depth) stop_glerror(); glBindFramebuffer(GL_FRAMEBUFFER, mPreviousFBO); sCurFBO = mPreviousFBO; + + if (mPreviousFBO) + { + glViewport(0, 0, mPreviousResX, mPreviousResY); + sCurResX = mPreviousResX; + sCurResY = mPreviousResY; + } + else + { + glViewport(gGLViewport[0],gGLViewport[1],gGLViewport[2],gGLViewport[3]); + sCurResX = gGLViewport[2]; + sCurResY = gGLViewport[3]; + } + stop_glerror(); } } diff --git a/indra/llrender/llrendertarget.h b/indra/llrender/llrendertarget.h index 765a727b5b..66a9874a6b 100755 --- a/indra/llrender/llrendertarget.h +++ b/indra/llrender/llrendertarget.h @@ -63,6 +63,9 @@ public: static bool sUseFBO; static U32 sBytesAllocated; static U32 sCurFBO; + static U32 sCurResX; + static U32 sCurResY; + LLRenderTarget(); ~LLRenderTarget(); @@ -146,6 +149,9 @@ protected: std::vector mInternalFormat; U32 mFBO; U32 mPreviousFBO; + U32 mPreviousResX; + U32 mPreviousResY; + U32 mDepth; bool mStencil; bool mUseDepth; diff --git a/indra/newview/app_settings/settings.xml b/indra/newview/app_settings/settings.xml index 12ca902c59..344079b640 100755 --- a/indra/newview/app_settings/settings.xml +++ b/indra/newview/app_settings/settings.xml @@ -7732,7 +7732,7 @@ Type S32 Value - 4 + 3 OctreeAlphaDistanceFactor @@ -8504,7 +8504,6 @@ Value 1.0 - RenderDeferredTreeShadowBias Comment diff --git a/indra/newview/app_settings/shaders/class1/deferred/blurLightF.glsl b/indra/newview/app_settings/shaders/class1/deferred/blurLightF.glsl index 589ace086d..968a5f6b3d 100755 --- a/indra/newview/app_settings/shaders/class1/deferred/blurLightF.glsl +++ b/indra/newview/app_settings/shaders/class1/deferred/blurLightF.glsl @@ -46,11 +46,6 @@ VARYING vec2 vary_fragcoord; uniform mat4 inv_proj; uniform vec2 screen_res; -vec3 getKern(int i) -{ - return kern[i]; -} - vec4 getPosition(vec2 pos_screen) { float depth = texture2DRect(depthMap, pos_screen.xy).r; @@ -64,38 +59,38 @@ vec4 getPosition(vec2 pos_screen) return pos; } -#ifdef SINGLE_FP_ONLY -vec2 encode_normal(vec3 n) -{ - vec2 sn; - sn.xy = (n.xy * vec2(0.5f,0.5f)) + vec2(0.5f,0.5f); - return sn; -} - -vec3 decode_normal (vec2 enc) -{ - vec3 n; - n.xy = (enc.xy * vec2(2.0f,2.0f)) - vec2(1.0f,1.0f); - n.z = sqrt(1.0f - dot(n.xy,n.xy)); - return n; -} -#else -vec2 encode_normal(vec3 n) -{ - float f = sqrt(8 * n.z + 8); - return n.xy / f + 0.5; -} - -vec3 decode_normal (vec2 enc) -{ - vec2 fenc = enc*4-2; - float f = dot(fenc,fenc); - float g = sqrt(1-f/4); - vec3 n; - n.xy = fenc*g; - n.z = 1-f/2; - return n; -} +#ifdef SINGLE_FP_ONLY +vec2 encode_normal(vec3 n) +{ + vec2 sn; + sn.xy = (n.xy * vec2(0.5f,0.5f)) + vec2(0.5f,0.5f); + return sn; +} + +vec3 decode_normal (vec2 enc) +{ + vec3 n; + n.xy = (enc.xy * vec2(2.0f,2.0f)) - vec2(1.0f,1.0f); + n.z = sqrt(1.0f - dot(n.xy,n.xy)); + return n; +} +#else +vec2 encode_normal(vec3 n) +{ + float f = sqrt(8 * n.z + 8); + return n.xy / f + 0.5; +} + +vec3 decode_normal (vec2 enc) +{ + vec2 fenc = enc*4-2; + float f = dot(fenc,fenc); + float g = sqrt(1-f/4); + vec3 n; + n.xy = fenc*g; + n.z = 1-f/2; + return n; +} #endif void main() @@ -110,7 +105,7 @@ void main() vec2 dlt = kern_scale * delta / (1.0+norm.xy*norm.xy); dlt /= max(-pos.z*dist_factor, 1.0); - vec2 defined_weight = getKern(0).xy; // special case the first (centre) sample's weight in the blur; we have to sample it anyway so we get it for 'free' + vec2 defined_weight = kern[0].xy; // special case the first (centre) sample's weight in the blur; we have to sample it anyway so we get it for 'free' vec4 col = defined_weight.xyxx * ccol; // relax tolerance according to distance to avoid speckling artifacts, as angles and distances are a lot more abrupt within a small screen area at larger distances @@ -120,28 +115,33 @@ void main() float tc_mod = 0.5*(tc.x + tc.y); // mod(tc.x+tc.y,2) tc_mod -= floor(tc_mod); tc_mod *= 2.0; - tc += ( (tc_mod - 0.5) * getKern(1).z * dlt * 0.5 ); + tc += ( (tc_mod - 0.5) * kern[1].z * dlt * 0.5 ); for (int i = 1; i < 4; i++) { - vec2 samptc = tc + getKern(i).z*dlt; - vec3 samppos = getPosition(samptc).xyz; + vec2 samptc = tc + kern[i].z*dlt; + vec3 samppos = getPosition(samptc).xyz; + float d = dot(norm.xyz, samppos.xyz-pos.xyz);// dist from plane + if (d*d <= pointplanedist_tolerance_pow2) { - col += texture2DRect(lightMap, samptc)*getKern(i).xyxx; - defined_weight += getKern(i).xy; + col += texture2DRect(lightMap, samptc)*kern[i].xyxx; + defined_weight += kern[i].xy; } } + for (int i = 1; i < 4; i++) { - vec2 samptc = tc - getKern(i).z*dlt; - vec3 samppos = getPosition(samptc).xyz; + vec2 samptc = tc - kern[i].z*dlt; + vec3 samppos = getPosition(samptc).xyz; + float d = dot(norm.xyz, samppos.xyz-pos.xyz);// dist from plane + if (d*d <= pointplanedist_tolerance_pow2) { - col += texture2DRect(lightMap, samptc)*getKern(i).xyxx; - defined_weight += getKern(i).xy; + col += texture2DRect(lightMap, samptc)*kern[i].xyxx; + defined_weight += kern[i].xy; } } diff --git a/indra/newview/app_settings/shaders/class1/interface/downsampleDepthF.glsl b/indra/newview/app_settings/shaders/class1/interface/downsampleDepthF.glsl new file mode 100644 index 0000000000..6523a06d22 --- /dev/null +++ b/indra/newview/app_settings/shaders/class1/interface/downsampleDepthF.glsl @@ -0,0 +1,67 @@ +/** + * @file debugF.glsl + * + * $LicenseInfo:firstyear=2007&license=viewerlgpl$ + * Second Life Viewer Source Code + * Copyright (C) 2011, Linden Research, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; + * version 2.1 of the License only. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + * Linden Research, Inc., 945 Battery Street, San Francisco, CA 94111 USA + * $/LicenseInfo$ + */ + +#ifdef DEFINE_GL_FRAGCOLOR +out vec4 frag_color; +#else +#define frag_color gl_FragColor +#endif + +uniform sampler2D depthMap; + +uniform float delta; + +VARYING vec2 tc0; +VARYING vec2 tc1; +VARYING vec2 tc2; +VARYING vec2 tc3; +VARYING vec2 tc4; +VARYING vec2 tc5; +VARYING vec2 tc6; +VARYING vec2 tc7; +VARYING vec2 tc8; + +void main() +{ + vec4 depth1 = + vec4(texture2D(depthMap, tc0).r, + texture2D(depthMap, tc1).r, + texture2D(depthMap, tc2).r, + texture2D(depthMap, tc3).r); + + vec4 depth2 = + vec4(texture2D(depthMap, tc4).r, + texture2D(depthMap, tc5).r, + texture2D(depthMap, tc6).r, + texture2D(depthMap, tc7).r); + + depth1 = min(depth1, depth2); + float depth = min(depth1.x, depth1.y); + depth = min(depth, depth1.z); + depth = min(depth, depth1.w); + depth = min(depth, texture2D(depthMap, tc8).r); + + gl_FragDepth = depth; +} diff --git a/indra/newview/app_settings/shaders/class1/interface/downsampleDepthRectF.glsl b/indra/newview/app_settings/shaders/class1/interface/downsampleDepthRectF.glsl new file mode 100644 index 0000000000..2f89b8ed72 --- /dev/null +++ b/indra/newview/app_settings/shaders/class1/interface/downsampleDepthRectF.glsl @@ -0,0 +1,67 @@ +/** + * @file debugF.glsl + * + * $LicenseInfo:firstyear=2007&license=viewerlgpl$ + * Second Life Viewer Source Code + * Copyright (C) 2011, Linden Research, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; + * version 2.1 of the License only. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + * Linden Research, Inc., 945 Battery Street, San Francisco, CA 94111 USA + * $/LicenseInfo$ + */ + +#ifdef DEFINE_GL_FRAGCOLOR +out vec4 frag_color; +#else +#define frag_color gl_FragColor +#endif + +uniform sampler2DRect depthMap; + +uniform float delta; + +VARYING vec2 tc0; +VARYING vec2 tc1; +VARYING vec2 tc2; +VARYING vec2 tc3; +VARYING vec2 tc4; +VARYING vec2 tc5; +VARYING vec2 tc6; +VARYING vec2 tc7; +VARYING vec2 tc8; + +void main() +{ + vec4 depth1 = + vec4(texture2DRect(depthMap, tc0).r, + texture2DRect(depthMap, tc1).r, + texture2DRect(depthMap, tc2).r, + texture2DRect(depthMap, tc3).r); + + vec4 depth2 = + vec4(texture2DRect(depthMap, tc4).r, + texture2DRect(depthMap, tc5).r, + texture2DRect(depthMap, tc6).r, + texture2DRect(depthMap, tc7).r); + + depth1 = min(depth1, depth2); + float depth = min(depth1.x, depth1.y); + depth = min(depth, depth1.z); + depth = min(depth, depth1.w); + depth = min(depth, texture2DRect(depthMap, tc8).r); + + gl_FragDepth = depth; +} diff --git a/indra/newview/app_settings/shaders/class1/interface/downsampleDepthV.glsl b/indra/newview/app_settings/shaders/class1/interface/downsampleDepthV.glsl new file mode 100644 index 0000000000..71d80911d6 --- /dev/null +++ b/indra/newview/app_settings/shaders/class1/interface/downsampleDepthV.glsl @@ -0,0 +1,59 @@ +/** + * @file debugV.glsl + * + * $LicenseInfo:firstyear=2007&license=viewerlgpl$ + * Second Life Viewer Source Code + * Copyright (C) 2011, Linden Research, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; + * version 2.1 of the License only. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + * Linden Research, Inc., 945 Battery Street, San Francisco, CA 94111 USA + * $/LicenseInfo$ + */ + +uniform mat4 modelview_projection_matrix; + +ATTRIBUTE vec3 position; + +uniform vec2 screen_res; + +uniform vec2 delta; + +VARYING vec2 tc0; +VARYING vec2 tc1; +VARYING vec2 tc2; +VARYING vec2 tc3; +VARYING vec2 tc4; +VARYING vec2 tc5; +VARYING vec2 tc6; +VARYING vec2 tc7; +VARYING vec2 tc8; + +void main() +{ + gl_Position = vec4(position, 1.0); + + vec2 tc = (position.xy*0.5+0.5)*screen_res; + tc0 = tc+vec2(-delta.x,-delta.y); + tc1 = tc+vec2(0,-delta.y); + tc2 = tc+vec2(delta.x,-delta.y); + tc3 = tc+vec2(-delta.x,0); + tc4 = tc+vec2(0,0); + tc5 = tc+vec2(delta.x,0); + tc6 = tc+vec2(-delta.x,delta.y); + tc7 = tc+vec2(0,delta.y); + tc8 = tc+vec2(delta.x,delta.y); +} + diff --git a/indra/newview/llmeshrepository.cpp b/indra/newview/llmeshrepository.cpp index b47fe9d4b1..8d3539d297 100755 --- a/indra/newview/llmeshrepository.cpp +++ b/indra/newview/llmeshrepository.cpp @@ -1,3 +1,4 @@ + /** * @file llmeshrepository.cpp * @brief Mesh repository implementation. diff --git a/indra/newview/llspatialpartition.cpp b/indra/newview/llspatialpartition.cpp index 78401020a6..941c578783 100755 --- a/indra/newview/llspatialpartition.cpp +++ b/indra/newview/llspatialpartition.cpp @@ -1498,6 +1498,8 @@ BOOL LLSpatialGroup::rebound() if (mOctreeNode->getChildCount() == 1 && mOctreeNode->getElementCount() == 0) { LLSpatialGroup* group = (LLSpatialGroup*) mOctreeNode->getChild(0)->getListener(0); + + //rebound single child group->rebound(); //copy single child's bounding box @@ -1506,10 +1508,11 @@ BOOL LLSpatialGroup::rebound() mExtents[0] = group->mExtents[0]; mExtents[1] = group->mExtents[1]; + //treat this node as a "chute" to a deeper level of the tree group->setState(SKIP_FRUSTUM_CHECK); } else if (mOctreeNode->isLeaf()) - { //copy object bounding box if this is a leaf + { //copy object bounding box if this is a leaf boundObjects(TRUE, mExtents[0], mExtents[1]); mBounds[0] = mObjectBounds[0]; mBounds[1] = mObjectBounds[1]; @@ -1518,14 +1521,17 @@ BOOL LLSpatialGroup::rebound() { LLVector4a& newMin = mExtents[0]; LLVector4a& newMax = mExtents[1]; + + //get bounding box of first child LLSpatialGroup* group = (LLSpatialGroup*) mOctreeNode->getChild(0)->getListener(0); group->clearState(SKIP_FRUSTUM_CHECK); group->rebound(); + //initialize to first child newMin = group->mExtents[0]; newMax = group->mExtents[1]; - //first, rebound children + //rebound remaining children, expanding bounding box to encompass children for (U32 i = 1; i < mOctreeNode->getChildCount(); i++) { group = (LLSpatialGroup*) mOctreeNode->getChild(i)->getListener(0); @@ -2506,7 +2512,7 @@ void pushBufferVerts(LLVertexBuffer* buffer, U32 mask) } } -void pushBufferVerts(LLSpatialGroup* group, U32 mask) +void pushBufferVerts(LLSpatialGroup* group, U32 mask, bool push_alpha = true) { if (group->mSpatialPartition->mRenderByGroup) { @@ -2515,7 +2521,10 @@ void pushBufferVerts(LLSpatialGroup* group, U32 mask) LLDrawInfo* params = *(group->mDrawMap.begin()->second.begin()); LLRenderPass::applyModelMatrix(*params); - pushBufferVerts(group->mVertexBuffer, mask); + if (push_alpha) + { + pushBufferVerts(group->mVertexBuffer, mask); + } for (LLSpatialGroup::buffer_map_t::iterator i = group->mBufferMap.begin(); i != group->mBufferMap.end(); ++i) { @@ -2529,10 +2538,10 @@ void pushBufferVerts(LLSpatialGroup* group, U32 mask) } } } - else + /*else { - drawBox(group->mBounds[0], group->mBounds[1]); - } + //drawBox(group->mBounds[0], group->mBounds[1]); + }*/ } void pushVertsColorCoded(LLSpatialGroup* group, U32 mask) @@ -2705,17 +2714,53 @@ void renderOctree(LLSpatialGroup* group) // drawBoxOutline(LLVector3(node->getCenter()), LLVector3(node->getSize())); } +std::set visible_selected_groups; + void renderVisibility(LLSpatialGroup* group, LLCamera* camera) { - LLGLEnable blend(GL_BLEND); + /*LLGLEnable blend(GL_BLEND); gGL.setSceneBlendType(LLRender::BT_ALPHA); LLGLEnable cull(GL_CULL_FACE); - glPolygonMode(GL_FRONT_AND_BACK, GL_LINE); + glPolygonMode(GL_FRONT_AND_BACK, GL_LINE);*/ - BOOL render_objects = (!LLPipeline::sUseOcclusion || !group->isOcclusionState(LLSpatialGroup::OCCLUDED)) && group->isVisible() && + /*BOOL render_objects = (!LLPipeline::sUseOcclusion || !group->isOcclusionState(LLSpatialGroup::OCCLUDED)) && group->isVisible() && !group->isEmpty(); + if (render_objects) + { + LLGLDepthTest depth(GL_TRUE, GL_FALSE); + + LLGLDisable blend(GL_BLEND); + gGL.diffuseColor4f(0.f, 0.75f, 0.f,0.5f); + pushBufferVerts(group, LLVertexBuffer::MAP_VERTEX, false); + + glPolygonMode(GL_FRONT_AND_BACK, GL_LINE); + glLineWidth(4.f); + gGL.diffuseColor4f(0.f, 0.5f, 0.f, 1.f); + pushBufferVerts(group, LLVertexBuffer::MAP_VERTEX, false); + glLineWidth(1.f); + glPolygonMode(GL_FRONT_AND_BACK, GL_FILL); + + bool selected = false; + + for (LLSpatialGroup::element_iter iter = group->getDataBegin(); iter != group->getDataEnd(); ++iter) + { + LLDrawable* drawable = *iter; + if (drawable->getVObj().notNull() && drawable->getVObj()->isSelected()) + { + selected = true; + break; + } + } + + if (selected) + { //store for rendering occlusion volume as overlay + visible_selected_groups.insert(group); + } + }*/ + + /*if (render_objects) { LLGLDepthTest depth_under(GL_TRUE, GL_FALSE, GL_GREATER); gGL.diffuseColor4f(0, 0.5f, 0, 0.5f); @@ -2740,6 +2785,59 @@ void renderVisibility(LLSpatialGroup* group, LLCamera* camera) gGL.diffuseColor4f(0.f, 0.75f, 0.f,0.5f); gGL.diffuseColor4f(0.f, 0.75f, 0.f, 0.5f); pushBufferVerts(group, LLVertexBuffer::MAP_VERTEX); + + bool selected = false; + + for (LLSpatialGroup::element_iter iter = group->getDataBegin(); iter != group->getDataEnd(); ++iter) + { + LLDrawable* drawable = *iter; + if (drawable->getVObj().notNull() && drawable->getVObj()->isSelected()) + { + selected = true; + break; + } + } + + if (selected) + { //store for rendering occlusion volume as overlay + visible_selected_groups.insert(group); + } + } + }*/ +} + +void renderXRay(LLSpatialGroup* group, LLCamera* camera) +{ + BOOL render_objects = (!LLPipeline::sUseOcclusion || !group->isOcclusionState(LLSpatialGroup::OCCLUDED)) && group->isVisible() && + !group->isEmpty(); + + if (render_objects) + { + pushBufferVerts(group, LLVertexBuffer::MAP_VERTEX, false); + + bool selected = false; + + for (LLSpatialGroup::element_iter iter = group->getDataBegin(); iter != group->getDataEnd(); ++iter) + { + LLDrawable* drawable = *iter; + if (drawable->getVObj().notNull() && drawable->getVObj()->isSelected()) + { + selected = true; + break; + } + } + + if (selected) + { //store for rendering occlusion volume as overlay + + if (!group->mSpatialPartition->isBridge()) + { + visible_selected_groups.insert(group); + } + else + { + visible_selected_groups.insert(group->mSpatialPartition->asBridge()->getSpatialGroup()); + } } } } @@ -4210,6 +4308,48 @@ public: } }; +class LLOctreeRenderXRay : public LLOctreeTraveler +{ +public: + LLCamera* mCamera; + LLOctreeRenderXRay(LLCamera* camera): mCamera(camera) {} + + virtual void traverse(const LLSpatialGroup::OctreeNode* node) + { + LLSpatialGroup* group = (LLSpatialGroup*) node->getListener(0); + + if (!mCamera || mCamera->AABBInFrustumNoFarClip(group->mBounds[0], group->mBounds[1])) + { + node->accept(this); + stop_glerror(); + + for (U32 i = 0; i < node->getChildCount(); i++) + { + traverse(node->getChild(i)); + stop_glerror(); + } + + //render visibility wireframe + if (gPipeline.hasRenderDebugMask(LLPipeline::RENDER_DEBUG_OCCLUSION)) + { + group->rebuildGeom(); + group->rebuildMesh(); + + gGL.flush(); + gGL.pushMatrix(); + gGLLastMatrix = NULL; + gGL.loadMatrix(gGLModelView); + renderXRay(group, mCamera); + stop_glerror(); + gGLLastMatrix = NULL; + gGL.popMatrix(); + } + } + } + + virtual void visit(const LLSpatialGroup::OctreeNode* node) {} + +}; class LLOctreeRenderPhysicsShapes : public LLOctreeTraveler { @@ -4437,6 +4577,26 @@ void LLSpatialPartition::renderDebug() LLOctreeRenderNonOccluded render_debug(camera); render_debug.traverse(mOctree); + + if (gPipeline.hasRenderDebugMask(LLPipeline::RENDER_DEBUG_OCCLUSION)) + { + { + LLGLEnable cull(GL_CULL_FACE); + + LLGLEnable blend(GL_BLEND); + LLGLDepthTest depth_under(GL_TRUE, GL_FALSE, GL_GREATER); + glPolygonMode(GL_FRONT_AND_BACK, GL_LINE); + gGL.diffuseColor4f(0.5f, 0.0f, 0, 0.25f); + + LLGLEnable offset(GL_POLYGON_OFFSET_LINE); + glPolygonOffset(-1.f, -1.f); + + LLOctreeRenderXRay xray(camera); + xray.traverse(mOctree); + + glPolygonMode(GL_FRONT_AND_BACK, GL_FILL); + } + } if (LLGLSLShader::sNoFixedFunction) { gDebugProgram.unbind(); diff --git a/indra/newview/llviewerdisplay.cpp b/indra/newview/llviewerdisplay.cpp index 9117bf1c01..23038e529b 100755 --- a/indra/newview/llviewerdisplay.cpp +++ b/indra/newview/llviewerdisplay.cpp @@ -661,6 +661,7 @@ void display(BOOL rebuild, F32 zoom_factor, int subfield, BOOL for_snapshot) static LLCullResult result; LLViewerCamera::sCurCameraID = LLViewerCamera::CAMERA_WORLD; + LLPipeline::sUnderWaterRender = LLViewerCamera::getInstance()->cameraUnderWater() ? TRUE : FALSE; gPipeline.updateCull(*LLViewerCamera::getInstance(), result, water_clip); stop_glerror(); @@ -867,7 +868,7 @@ void display(BOOL rebuild, F32 zoom_factor, int subfield, BOOL for_snapshot) //} LLPipeline::sUnderWaterRender = LLViewerCamera::getInstance()->cameraUnderWater() ? TRUE : FALSE; - + LLGLState::checkStates(); LLGLState::checkClientArrays(); diff --git a/indra/newview/llviewershadermgr.cpp b/indra/newview/llviewershadermgr.cpp index b9e0847935..e24237522a 100755 --- a/indra/newview/llviewershadermgr.cpp +++ b/indra/newview/llviewershadermgr.cpp @@ -93,6 +93,8 @@ LLGLSLShader gTwoTextureAddProgram; LLGLSLShader gOneTextureNoColorProgram; LLGLSLShader gDebugProgram; LLGLSLShader gClipProgram; +LLGLSLShader gDownsampleDepthProgram; +LLGLSLShader gDownsampleDepthRectProgram; LLGLSLShader gAlphaMaskProgram; //object shaders @@ -702,6 +704,8 @@ void LLViewerShaderMgr::unloadShaders() gOcclusionCubeProgram.unload(); gDebugProgram.unload(); gClipProgram.unload(); + gDownsampleDepthProgram.unload(); + gDownsampleDepthRectProgram.unload(); gAlphaMaskProgram.unload(); gUIProgram.unload(); gPathfindingProgram.unload(); @@ -3001,6 +3005,26 @@ BOOL LLViewerShaderMgr::loadShadersInterface() success = gClipProgram.createShader(NULL, NULL); } + if (success) + { + gDownsampleDepthProgram.mName = "DownsampleDepth Shader"; + gDownsampleDepthProgram.mShaderFiles.clear(); + gDownsampleDepthProgram.mShaderFiles.push_back(make_pair("interface/downsampleDepthV.glsl", GL_VERTEX_SHADER_ARB)); + gDownsampleDepthProgram.mShaderFiles.push_back(make_pair("interface/downsampleDepthF.glsl", GL_FRAGMENT_SHADER_ARB)); + gDownsampleDepthProgram.mShaderLevel = mVertexShaderLevel[SHADER_INTERFACE]; + success = gDownsampleDepthProgram.createShader(NULL, NULL); + } + + if (success) + { + gDownsampleDepthRectProgram.mName = "DownsampleDepthRect Shader"; + gDownsampleDepthRectProgram.mShaderFiles.clear(); + gDownsampleDepthRectProgram.mShaderFiles.push_back(make_pair("interface/downsampleDepthV.glsl", GL_VERTEX_SHADER_ARB)); + gDownsampleDepthRectProgram.mShaderFiles.push_back(make_pair("interface/downsampleDepthRectF.glsl", GL_FRAGMENT_SHADER_ARB)); + gDownsampleDepthRectProgram.mShaderLevel = mVertexShaderLevel[SHADER_INTERFACE]; + success = gDownsampleDepthRectProgram.createShader(NULL, NULL); + } + if (success) { gAlphaMaskProgram.mName = "Alpha Mask Shader"; diff --git a/indra/newview/llviewershadermgr.h b/indra/newview/llviewershadermgr.h index 8c7de05062..438853cd6f 100755 --- a/indra/newview/llviewershadermgr.h +++ b/indra/newview/llviewershadermgr.h @@ -230,6 +230,8 @@ extern LLGLSLShader gSplatTextureRectProgram; extern LLGLSLShader gGlowCombineFXAAProgram; extern LLGLSLShader gDebugProgram; extern LLGLSLShader gClipProgram; +extern LLGLSLShader gDownsampleDepthProgram; +extern LLGLSLShader gDownsampleDepthRectProgram; //output tex0[tc0] + tex1[tc1] extern LLGLSLShader gTwoTextureAddProgram; @@ -322,6 +324,7 @@ extern LLGLSLShader gWLCloudProgram; extern LLGLSLShader gPostColorFilterProgram; extern LLGLSLShader gPostNightVisionProgram; + // Deferred rendering shaders extern LLGLSLShader gDeferredImpostorProgram; extern LLGLSLShader gDeferredWaterProgram; @@ -369,7 +372,6 @@ extern LLGLSLShader gDeferredSkinnedFullbrightShinyProgram; extern LLGLSLShader gDeferredSkinnedFullbrightProgram; extern LLGLSLShader gNormalMapGenProgram; - // Deferred materials shaders extern LLGLSLShader gDeferredMaterialProgram[LLMaterial::SHADER_COUNT*2]; diff --git a/indra/newview/llvovolume.cpp b/indra/newview/llvovolume.cpp index bd3be5b9cf..f41057fd1f 100755 --- a/indra/newview/llvovolume.cpp +++ b/indra/newview/llvovolume.cpp @@ -3515,7 +3515,12 @@ F32 LLVOVolume::getBinRadius() } else if (mDrawable->isStatic()) { - radius = llmax((S32) mDrawable->getRadius(), 1)*size_factor; + F32 szf = size_factor; + + radius = llmax(mDrawable->getRadius(), szf); + + radius = powf(radius, 1.f+szf/radius); + radius *= 1.f + mDrawable->mDistanceWRTCamera * distance_factor[1]; radius += mDrawable->mDistanceWRTCamera * distance_factor[0]; } diff --git a/indra/newview/pipeline.cpp b/indra/newview/pipeline.cpp index 9a4a233b54..6efdf47ec5 100755 --- a/indra/newview/pipeline.cpp +++ b/indra/newview/pipeline.cpp @@ -279,7 +279,7 @@ std::string gPoolNames[] = "POOL_ALPHA" }; -void drawBox(const LLVector3& c, const LLVector3& r); +void drawBox(const LLVector4a& c, const LLVector4a& r); void drawBoxOutline(const LLVector3& pos, const LLVector3& size); U32 nhpo2(U32 v); LLVertexBuffer* ll_create_cube_vb(U32 type_mask, U32 usage); @@ -929,9 +929,12 @@ bool LLPipeline::allocateScreenBuffer(U32 resX, U32 resY, U32 samples) S32 shadow_detail = RenderShadowDetail; BOOL ssao = RenderDeferredSSAO; + const U32 occlusion_divisor = 3; + //allocate deferred rendering color buffers if (!mDeferredScreen.allocate(resX, resY, GL_SRGB8_ALPHA8, TRUE, TRUE, LLTexUnit::TT_RECT_TEXTURE, FALSE, samples)) return false; if (!mDeferredDepth.allocate(resX, resY, 0, TRUE, FALSE, LLTexUnit::TT_RECT_TEXTURE, FALSE, samples)) return false; + if (!mOcclusionDepth.allocate(resX/occlusion_divisor, resY/occlusion_divisor, 0, TRUE, FALSE, LLTexUnit::TT_RECT_TEXTURE, FALSE, samples)) return false; if (!addDeferredAttachments(mDeferredScreen)) return false; GLuint screenFormat = GL_RGBA16; @@ -972,6 +975,7 @@ bool LLPipeline::allocateScreenBuffer(U32 resX, U32 resY, U32 samples) for (U32 i = 0; i < 4; i++) { if (!mShadow[i].allocate(sun_shadow_map_width,U32(resY*scale), 0, TRUE, FALSE, LLTexUnit::TT_TEXTURE)) return false; + if (!mShadowOcclusion[i].allocate(mShadow[i].getWidth()/occlusion_divisor, mShadow[i].getHeight()/occlusion_divisor, 0, TRUE, FALSE, LLTexUnit::TT_TEXTURE)) return false; } } else @@ -979,6 +983,7 @@ bool LLPipeline::allocateScreenBuffer(U32 resX, U32 resY, U32 samples) for (U32 i = 0; i < 4; i++) { mShadow[i].release(); + mShadowOcclusion[i].release(); } } @@ -991,6 +996,7 @@ bool LLPipeline::allocateScreenBuffer(U32 resX, U32 resY, U32 samples) for (U32 i = 4; i < 6; i++) { if (!mShadow[i].allocate(spot_shadow_map_width, height, 0, TRUE, FALSE)) return false; + if (!mShadowOcclusion[i].allocate(mShadow[i].getWidth()/occlusion_divisor, mShadow[i].getHeight()/occlusion_divisor, 0, TRUE, FALSE)) return false; } } else @@ -998,6 +1004,7 @@ bool LLPipeline::allocateScreenBuffer(U32 resX, U32 resY, U32 samples) for (U32 i = 4; i < 6; i++) { mShadow[i].release(); + mShadowOcclusion[i].release(); } } @@ -1014,11 +1021,13 @@ bool LLPipeline::allocateScreenBuffer(U32 resX, U32 resY, U32 samples) for (U32 i = 0; i < 6; i++) { mShadow[i].release(); + mShadowOcclusion[i].release(); } mFXAABuffer.release(); mScreen.release(); mDeferredScreen.release(); //make sure to release any render targets that share a depth buffer with mDeferredScreen first mDeferredDepth.release(); + mOcclusionDepth.release(); if (!mScreen.allocate(resX, resY, GL_RGBA, TRUE, TRUE, LLTexUnit::TT_RECT_TEXTURE, FALSE)) return false; } @@ -2433,7 +2442,14 @@ void LLPipeline::updateCull(LLCamera& camera, LLCullResult& result, S32 water_cl if (to_texture) { - mScreen.bindTarget(); + if (LLPipeline::sRenderDeferred && !LLPipeline::sUnderWaterRender) + { + mOcclusionDepth.bindTarget(); + } + else + { + mScreen.bindTarget(); + } } if (sUseOcclusion > 1) @@ -2571,7 +2587,14 @@ void LLPipeline::updateCull(LLCamera& camera, LLCullResult& result, S32 water_cl if (to_texture) { - mScreen.flush(); + if (LLPipeline::sRenderDeferred && !LLPipeline::sUnderWaterRender) + { + mOcclusionDepth.flush(); + } + else + { + mScreen.flush(); + } } } @@ -2639,6 +2662,75 @@ void LLPipeline::markOccluder(LLSpatialGroup* group) } } +void LLPipeline::downsampleDepthBuffer(LLRenderTarget& source, LLRenderTarget& dest, LLRenderTarget* scratch_space) +{ + LLGLSLShader* last_shader = LLGLSLShader::sCurBoundShaderPtr; + + LLGLSLShader* shader = NULL; + + if (scratch_space) + { + scratch_space->copyContents(source, + 0, 0, source.getWidth(), source.getHeight(), + 0, 0, scratch_space->getWidth(), scratch_space->getHeight(), GL_DEPTH_BUFFER_BIT, GL_NEAREST); + } + + dest.bindTarget(); + + + gDownsampleDepthProgram.bind(); + + LLStrider vert; + mDeferredVB->getVertexStrider(vert); + LLStrider tc0; + + vert[0].set(-1,1,0); + vert[1].set(-1,-3,0); + vert[2].set(3,1,0); + + if (source.getUsage() == LLTexUnit::TT_RECT_TEXTURE) + { + shader = &gDownsampleDepthRectProgram; + shader->bind(); + shader->uniform2f("delta", 1.f, 1.f); + shader->uniform2f(LLShaderMgr::DEFERRED_SCREEN_RES, source.getWidth(), source.getHeight()); + } + else + { + shader = &gDownsampleDepthProgram; + shader->bind(); + shader->uniform2f("delta", 1.f/source.getWidth(), 1.f/source.getHeight()); + shader->uniform2f(LLShaderMgr::DEFERRED_SCREEN_RES, 1.f, 1.f); + } + + gGL.getTexUnit(0)->bind(scratch_space ? scratch_space : &source, TRUE); + + { + LLGLDepthTest depth(GL_TRUE, GL_TRUE, GL_ALWAYS); + mDeferredVB->setBuffer(LLVertexBuffer::MAP_VERTEX); + mDeferredVB->drawArrays(LLRender::TRIANGLES, 0, 3); + } + + dest.flush(); + + if (last_shader) + { + last_shader->bind(); + } + else + { + gDownsampleDepthProgram.unbind(); + } +} + +void LLPipeline::doOcclusion(LLCamera& camera, LLRenderTarget& source, LLRenderTarget& dest, LLRenderTarget* scratch_space) +{ + downsampleDepthBuffer(source, dest, scratch_space); + dest.bindTarget(); + doOcclusion(camera); + dest.flush(); +} + void LLPipeline::doOcclusion(LLCamera& camera) { if (LLPipeline::sUseOcclusion > 1 && sCull->hasOcclusionGroups()) @@ -4551,7 +4643,7 @@ void LLPipeline::renderGeomDeferred(LLCamera& camera) gGL.setColorMask(true, false); } -void LLPipeline::renderGeomPostDeferred(LLCamera& camera) +void LLPipeline::renderGeomPostDeferred(LLCamera& camera, bool do_occlusion) { LLFastTimer t(FTM_POST_DEFERRED_POOLS); U32 cur_type = 0; @@ -4566,7 +4658,7 @@ void LLPipeline::renderGeomPostDeferred(LLCamera& camera) gGL.setColorMask(true, false); pool_set_t::iterator iter1 = mPools.begin(); - BOOL occlude = LLPipeline::sUseOcclusion > 1; + BOOL occlude = LLPipeline::sUseOcclusion > 1 && do_occlusion; while ( iter1 != mPools.end() ) { @@ -4580,7 +4672,7 @@ void LLPipeline::renderGeomPostDeferred(LLCamera& camera) gGLLastMatrix = NULL; gGL.loadMatrix(gGLModelView); LLGLSLShader::bindNoShader(); - doOcclusion(camera); + doOcclusion(camera, mScreen, mOcclusionDepth, &mDeferredDepth); gGL.setColorMask(true, false); } @@ -4798,6 +4890,7 @@ void LLPipeline::renderPhysicsDisplay() mPhysicsDisplay.flush(); } +extern std::set visible_selected_groups; void LLPipeline::renderDebug() { @@ -5208,6 +5301,27 @@ void LLPipeline::renderDebug() } } + if (gPipeline.hasRenderDebugMask(LLPipeline::RENDER_DEBUG_OCCLUSION) && LLGLSLShader::sNoFixedFunction) + { //render visible selected group occlusion geometry + gDebugProgram.bind(); + LLGLDepthTest depth(GL_TRUE, GL_FALSE); + gGL.diffuseColor3f(1,0,1); + for (std::set::iterator iter = visible_selected_groups.begin(); iter != visible_selected_groups.end(); ++iter) + { + LLSpatialGroup* group = *iter; + + LLVector4a fudge; + fudge.splat(0.25f); //SG_OCCLUSION_FUDGE + + LLVector4a size; + size.setAdd(fudge, group->mBounds[1]); + + drawBox(group->mBounds[0], size); + } + } + + visible_selected_groups.clear(); + if (LLGLSLShader::sNoFixedFunction) { gUIProgram.bind(); @@ -8205,11 +8319,7 @@ void LLPipeline::renderDeferredLighting() LLStrider vert; mDeferredVB->getVertexStrider(vert); - LLStrider tc0; - LLStrider tc1; - mDeferredVB->getTexCoord0Strider(tc0); - mDeferredVB->getTexCoord1Strider(tc1); - + vert[0].set(-1,1,0); vert[1].set(-1,-3,0); vert[2].set(3,1,0); @@ -8390,7 +8500,7 @@ void LLPipeline::renderDeferredLighting() LLPipeline::END_RENDER_TYPES); - renderGeomPostDeferred(*LLViewerCamera::getInstance()); + renderGeomPostDeferred(*LLViewerCamera::getInstance(), false); gPipeline.popRenderTypeMask(); } @@ -9324,9 +9434,15 @@ void LLPipeline::renderShadow(glh::matrix4f& view, glh::matrix4f& proj, LLCamera gDeferredShadowCubeProgram.bind(); } + LLRenderTarget& occlusion_target = mShadowOcclusion[LLViewerCamera::sCurCameraID-1]; + + occlusion_target.bindTarget(); updateCull(shadow_cam, result); + occlusion_target.flush(); + stateSort(shadow_cam, result); + //generate shadow map gGL.matrixMode(LLRender::MM_PROJECTION); gGL.pushMatrix(); @@ -9414,7 +9530,10 @@ void LLPipeline::renderShadow(glh::matrix4f& view, glh::matrix4f& proj, LLCamera gDeferredShadowCubeProgram.bind(); gGLLastMatrix = NULL; gGL.loadMatrix(gGLModelView); - doOcclusion(shadow_cam); + + LLRenderTarget& occlusion_source = mShadow[LLViewerCamera::sCurCameraID-1]; + + doOcclusion(shadow_cam, occlusion_source, occlusion_target); if (use_shader) { diff --git a/indra/newview/pipeline.h b/indra/newview/pipeline.h index 2c023a6f70..70dcf80407 100755 --- a/indra/newview/pipeline.h +++ b/indra/newview/pipeline.h @@ -176,6 +176,12 @@ public: // Object related methods void markVisible(LLDrawable *drawablep, LLCamera& camera); void markOccluder(LLSpatialGroup* group); + + //downsample source to dest, taking the maximum depth value per pixel in source and writing to dest + // if source's depth buffer cannot be bound for reading, a scratch space depth buffer must be provided + void downsampleDepthBuffer(LLRenderTarget& source, LLRenderTarget& dest, LLRenderTarget* scratch_space = NULL); + + void doOcclusion(LLCamera& camera, LLRenderTarget& source, LLRenderTarget& dest, LLRenderTarget* scratch_space = NULL); void doOcclusion(LLCamera& camera); void markNotCulled(LLSpatialGroup* group, LLCamera &camera); void markMoved(LLDrawable *drawablep, BOOL damped_motion = FALSE); @@ -275,7 +281,7 @@ public: void renderGeom(LLCamera& camera, BOOL forceVBOUpdate = FALSE); void renderGeomDeferred(LLCamera& camera); - void renderGeomPostDeferred(LLCamera& camera); + void renderGeomPostDeferred(LLCamera& camera, bool do_occlusion=true); void renderGeomShadow(LLCamera& camera); void bindDeferredShader(LLGLSLShader& shader, U32 light_index = 0, U32 noise_map = 0xFFFFFFFF); void setupSpotLight(LLGLSLShader& shader, LLDrawable* drawablep); @@ -603,6 +609,7 @@ public: LLRenderTarget mFXAABuffer; LLRenderTarget mEdgeMap; LLRenderTarget mDeferredDepth; + LLRenderTarget mOcclusionDepth; LLRenderTarget mDeferredLight; LLRenderTarget mHighlight; LLRenderTarget mPhysicsDisplay; @@ -615,6 +622,7 @@ public: //sun shadow map LLRenderTarget mShadow[6]; + LLRenderTarget mShadowOcclusion[6]; std::vector mShadowFrustPoints[4]; LLVector4 mShadowError; LLVector4 mShadowFOV; -- cgit v1.3