From f5e5396c3a17b6bcdc4eb49cda304a9047920fe1 Mon Sep 17 00:00:00 2001 From: Dave Parks Date: Tue, 26 Feb 2013 15:15:08 -0600 Subject: MAINT-2371 First set of profile guided optimizations. Reviewed by Graham --- indra/llmath/llvolume.cpp | 97 ++++++++++++++++++++++++++--------------------- indra/llmath/llvolume.h | 3 ++ 2 files changed, 56 insertions(+), 44 deletions(-) (limited to 'indra/llmath') diff --git a/indra/llmath/llvolume.cpp b/indra/llmath/llvolume.cpp index 02c8d2b86f..77d89568df 100644 --- a/indra/llmath/llvolume.cpp +++ b/indra/llmath/llvolume.cpp @@ -5187,7 +5187,8 @@ LLVolumeFace::LLVolumeFace() : mTexCoords(NULL), mIndices(NULL), mWeights(NULL), - mOctree(NULL) + mOctree(NULL), + mOptimized(FALSE) { mExtents = (LLVector4a*) ll_aligned_malloc_16(sizeof(LLVector4a)*3); mExtents[0].splat(-0.5f); @@ -5517,14 +5518,14 @@ class LLVCacheVertexData public: S32 mIdx; S32 mCacheTag; - F32 mScore; + F64 mScore; U32 mActiveTriangles; std::vector mTriangles; LLVCacheVertexData() { mCacheTag = -1; - mScore = 0.f; + mScore = 0.0; mActiveTriangles = 0; mIdx = -1; } @@ -5534,13 +5535,13 @@ class LLVCacheTriangleData { public: bool mActive; - F32 mScore; + F64 mScore; LLVCacheVertexData* mVertex[3]; LLVCacheTriangleData() { mActive = true; - mScore = 0.f; + mScore = 0.0; mVertex[0] = mVertex[1] = mVertex[2] = NULL; } @@ -5551,7 +5552,7 @@ public: { if (mVertex[i]) { - llassert_always(mVertex[i]->mActiveTriangles > 0); + llassert(mVertex[i]->mActiveTriangles > 0); mVertex[i]->mActiveTriangles--; } } @@ -5563,44 +5564,44 @@ public: } }; -const F32 FindVertexScore_CacheDecayPower = 1.5f; -const F32 FindVertexScore_LastTriScore = 0.75f; -const F32 FindVertexScore_ValenceBoostScale = 2.0f; -const F32 FindVertexScore_ValenceBoostPower = 0.5f; +const F64 FindVertexScore_CacheDecayPower = 1.5; +const F64 FindVertexScore_LastTriScore = 0.75; +const F64 FindVertexScore_ValenceBoostScale = 2.0; +const F64 FindVertexScore_ValenceBoostPower = 0.5; const U32 MaxSizeVertexCache = 32; +const F64 FindVertexScore_Scaler = 1.0/(MaxSizeVertexCache-3); -F32 find_vertex_score(LLVCacheVertexData& data) +F64 find_vertex_score(LLVCacheVertexData& data) { - if (data.mActiveTriangles == 0) - { //no triangle references this vertex - return -1.f; - } - - F32 score = 0.f; + F64 score = -1.0; - S32 cache_idx = data.mCacheTag; + if (data.mActiveTriangles >= 0) + { + score = 0.0; + + S32 cache_idx = data.mCacheTag; - if (cache_idx < 0) - { - //not in cache - } - else - { - if (cache_idx < 3) - { //vertex was in the last triangle - score = FindVertexScore_LastTriScore; + if (cache_idx < 0) + { + //not in cache } else - { //more points for being higher in the cache - F32 scaler = 1.f/(MaxSizeVertexCache-3); - score = 1.f-((cache_idx-3)*scaler); - score = powf(score, FindVertexScore_CacheDecayPower); + { + if (cache_idx < 3) + { //vertex was in the last triangle + score = FindVertexScore_LastTriScore; + } + else + { //more points for being higher in the cache + score = 1.0-((cache_idx-3)*FindVertexScore_Scaler); + score = pow(score, FindVertexScore_CacheDecayPower); + } } - } - //bonus points for having low valence - F32 valence_boost = powf((F32)data.mActiveTriangles, -FindVertexScore_ValenceBoostPower); - score += FindVertexScore_ValenceBoostScale * valence_boost; + //bonus points for having low valence + F64 valence_boost = pow((F64)data.mActiveTriangles, -FindVertexScore_ValenceBoostPower); + score += FindVertexScore_ValenceBoostScale * valence_boost; + } return score; } @@ -5720,7 +5721,7 @@ public: if (mCache[i]) { mCache[i]->mScore = find_vertex_score(*(mCache[i])); - llassert_always(mCache[i]->mCacheTag == i); + llassert(mCache[i]->mCacheTag == i); } } @@ -5728,11 +5729,14 @@ public: //update triangle scores for (U32 i = 0; i < MaxSizeVertexCache+3; ++i) { - if (mCache[i]) + LLVCacheVertexData* data = mCache[i]; + if (data) { - for (U32 j = 0; j < mCache[i]->mTriangles.size(); ++j) + U32 count = data->mTriangles.size(); + + for (U32 j = 0; j < count; ++j) { - LLVCacheTriangleData* tri = mCache[i]->mTriangles[j]; + LLVCacheTriangleData* tri = data->mTriangles[j]; if (tri->mActive) { tri->mScore = tri->mVertex[0]->mScore; @@ -5753,7 +5757,7 @@ public: { if (mCache[i]) { - llassert_always(mCache[i]->mCacheTag == -1); + llassert(mCache[i]->mCacheTag == -1); mCache[i] = NULL; } } @@ -5765,6 +5769,9 @@ void LLVolumeFace::cacheOptimize() { //optimize for vertex cache according to Forsyth method: // http://home.comcast.net/~tom_forsyth/papers/fast_vert_cache_opt.html + llassert(!mOptimized); + mOptimized = TRUE; + LLVCacheLRU cache; if (mNumVertices < 3) @@ -5810,12 +5817,14 @@ void LLVolumeFace::cacheOptimize() for (U32 i = 0; i < mNumVertices; i++) { //initialize score values (no cache -- might try a fifo cache here) - vertex_data[i].mScore = find_vertex_score(vertex_data[i]); - vertex_data[i].mActiveTriangles = vertex_data[i].mTriangles.size(); + LLVCacheVertexData& data = vertex_data[i]; + + data.mScore = find_vertex_score(data); + data.mActiveTriangles = data.mTriangles.size(); - for (U32 j = 0; j < vertex_data[i].mTriangles.size(); ++j) + for (U32 j = 0; j < data.mActiveTriangles; ++j) { - vertex_data[i].mTriangles[j]->mScore += vertex_data[i].mScore; + data.mTriangles[j]->mScore += data.mScore; } } diff --git a/indra/llmath/llvolume.h b/indra/llmath/llvolume.h index c845556557..99158c1c44 100644 --- a/indra/llmath/llvolume.h +++ b/indra/llmath/llvolume.h @@ -933,6 +933,9 @@ public: LLOctreeNode* mOctree; + //whether or not face has been cache optimized + BOOL mOptimized; + private: BOOL createUnCutCubeCap(LLVolume* volume, BOOL partial_build = FALSE); BOOL createCap(LLVolume* volume, BOOL partial_build = FALSE); -- cgit v1.2.3 From 609ed855e1160505238378a1be49e2b92e8496f5 Mon Sep 17 00:00:00 2001 From: Dave Parks Date: Mon, 4 Mar 2013 18:01:42 -0600 Subject: MAINT-2371 More optimizations. Reviewed by Graham --- indra/llmath/llvolume.cpp | 220 ++++++++++++++++++++++++++-------------------- indra/llmath/llvolume.h | 1 + 2 files changed, 127 insertions(+), 94 deletions(-) (limited to 'indra/llmath') diff --git a/indra/llmath/llvolume.cpp b/indra/llmath/llvolume.cpp index 77d89568df..d614695efb 100644 --- a/indra/llmath/llvolume.cpp +++ b/indra/llmath/llvolume.cpp @@ -5180,6 +5180,7 @@ LLVolumeFace::LLVolumeFace() : mNumS(0), mNumT(0), mNumVertices(0), + mNumAllocatedVertices(0), mNumIndices(0), mPositions(NULL), mNormals(NULL), @@ -5204,6 +5205,7 @@ LLVolumeFace::LLVolumeFace(const LLVolumeFace& src) mNumS(0), mNumT(0), mNumVertices(0), + mNumAllocatedVertices(0), mNumIndices(0), mPositions(NULL), mNormals(NULL), @@ -5258,12 +5260,6 @@ LLVolumeFace& LLVolumeFace::operator=(const LLVolumeFace& src) { LLVector4a::memcpyNonAliased16((F32*) mTexCoords, (F32*) src.mTexCoords, tc_size); } - else - { - ll_aligned_free_16(mTexCoords) ; - mTexCoords = NULL ; - } - if (src.mBinormals) { @@ -5311,10 +5307,11 @@ void LLVolumeFace::freeData() { ll_aligned_free_16(mPositions); mPositions = NULL; - ll_aligned_free_16( mNormals); + + //normals and texture coordinates are part of the same buffer as mPositions, do not free them separately mNormals = NULL; - ll_aligned_free_16(mTexCoords); mTexCoords = NULL; + ll_aligned_free_16(mIndices); mIndices = NULL; ll_aligned_free_16(mBinormals); @@ -5496,18 +5493,6 @@ void LLVolumeFace::optimize(F32 angle_cutoff) llassert(new_face.mNumIndices == mNumIndices); llassert(new_face.mNumVertices <= mNumVertices); - if (angle_cutoff > 1.f && !mNormals) - { - ll_aligned_free_16(new_face.mNormals); - new_face.mNormals = NULL; - } - - if (!mTexCoords) - { - ll_aligned_free_16(new_face.mTexCoords); - new_face.mTexCoords = NULL; - } - swapData(new_face); } @@ -5708,35 +5693,44 @@ public: void updateScores() { - for (U32 i = MaxSizeVertexCache; i < MaxSizeVertexCache+3; ++i) - { //trailing 3 vertices aren't actually in the cache for scoring purposes - if (mCache[i]) + LLVCacheVertexData** data_iter = mCache+MaxSizeVertexCache; + LLVCacheVertexData** end_data = mCache+MaxSizeVertexCache+3; + + while(data_iter != end_data) + { + LLVCacheVertexData* data = *data_iter++; + //trailing 3 vertices aren't actually in the cache for scoring purposes + if (data) { - mCache[i]->mCacheTag = -1; + data->mCacheTag = -1; } } - for (U32 i = 0; i < MaxSizeVertexCache; ++i) + data_iter = mCache; + end_data = mCache+MaxSizeVertexCache; + + while (data_iter != end_data) { //update scores of vertices in cache - if (mCache[i]) + LLVCacheVertexData* data = *data_iter++; + if (data) { - mCache[i]->mScore = find_vertex_score(*(mCache[i])); - llassert(mCache[i]->mCacheTag == i); + data->mScore = find_vertex_score(*data); } } mBestTriangle = NULL; //update triangle scores - for (U32 i = 0; i < MaxSizeVertexCache+3; ++i) + data_iter = mCache; + end_data = mCache+MaxSizeVertexCache+3; + + while (data_iter != end_data) { - LLVCacheVertexData* data = mCache[i]; + LLVCacheVertexData* data = *data_iter++; if (data) { - U32 count = data->mTriangles.size(); - - for (U32 j = 0; j < count; ++j) + for (std::vector::iterator iter = data->mTriangles.begin(), end_iter = data->mTriangles.end(); iter != end_iter; ++iter) { - LLVCacheTriangleData* tri = data->mTriangles[j]; + LLVCacheTriangleData* tri = *iter; if (tri->mActive) { tri->mScore = tri->mVertex[0]->mScore; @@ -5753,13 +5747,17 @@ public: } //knock trailing 3 vertices off the cache - for (U32 i = MaxSizeVertexCache; i < MaxSizeVertexCache+3; ++i) + data_iter = mCache+MaxSizeVertexCache; + end_data = mCache+MaxSizeVertexCache+3; + while (data_iter != end_data) { - if (mCache[i]) + LLVCacheVertexData* data = *data_iter; + if (data) { - llassert(mCache[i]->mCacheTag == -1); - mCache[i] = NULL; + llassert(data->mCacheTag == -1); + *data_iter = NULL; } + ++data_iter; } } }; @@ -5894,10 +5892,10 @@ void LLVolumeFace::cacheOptimize() //allocate space for new buffer S32 num_verts = mNumVertices; - LLVector4a* pos = (LLVector4a*) ll_aligned_malloc_16(sizeof(LLVector4a)*num_verts); - LLVector4a* norm = (LLVector4a*) ll_aligned_malloc_16(sizeof(LLVector4a)*num_verts); S32 size = ((num_verts*sizeof(LLVector2)) + 0xF) & ~0xF; - LLVector2* tc = (LLVector2*) ll_aligned_malloc_16(size); + LLVector4a* pos = (LLVector4a*) ll_aligned_malloc(sizeof(LLVector4a)*2*num_verts+size, 64); + LLVector4a* norm = pos + num_verts; + LLVector2* tc = (LLVector2*) (norm + num_verts); LLVector4a* wght = NULL; if (mWeights) @@ -5945,9 +5943,8 @@ void LLVolumeFace::cacheOptimize() mIndices[i] = new_idx[mIndices[i]]; } - ll_aligned_free_16(mPositions); - ll_aligned_free_16(mNormals); - ll_aligned_free_16(mTexCoords); + ll_aligned_free(mPositions); + // DO NOT free mNormals and mTexCoords as they are part of mPositions buffer ll_aligned_free_16(mWeights); ll_aligned_free_16(mBinormals); @@ -6664,24 +6661,22 @@ void LLVolumeFace::createBinormals() void LLVolumeFace::resizeVertices(S32 num_verts) { - ll_aligned_free_16(mPositions); - ll_aligned_free_16(mNormals); + ll_aligned_free(mPositions); + //DO NOT free mNormals and mTexCoords as they are part of mPositions buffer ll_aligned_free_16(mBinormals); - ll_aligned_free_16(mTexCoords); - + mBinormals = NULL; if (num_verts) { - mPositions = (LLVector4a*) ll_aligned_malloc_16(sizeof(LLVector4a)*num_verts); - ll_assert_aligned(mPositions, 16); - mNormals = (LLVector4a*) ll_aligned_malloc_16(sizeof(LLVector4a)*num_verts); - ll_assert_aligned(mNormals, 16); - //pad texture coordinate block end to allow for QWORD reads S32 size = ((num_verts*sizeof(LLVector2)) + 0xF) & ~0xF; - mTexCoords = (LLVector2*) ll_aligned_malloc_16(size); - ll_assert_aligned(mTexCoords, 16); + + mPositions = (LLVector4a*) ll_aligned_malloc(sizeof(LLVector4a)*2*num_verts+size, 64); + mNormals = mPositions+num_verts; + mTexCoords = (LLVector2*) (mNormals+num_verts); + + ll_assert_aligned(mPositions, 64); } else { @@ -6691,6 +6686,7 @@ void LLVolumeFace::resizeVertices(S32 num_verts) } mNumVertices = num_verts; + mNumAllocatedVertices = num_verts; } void LLVolumeFace::pushVertex(const LLVolumeFace::VertexData& cv) @@ -6701,27 +6697,43 @@ void LLVolumeFace::pushVertex(const LLVolumeFace::VertexData& cv) void LLVolumeFace::pushVertex(const LLVector4a& pos, const LLVector4a& norm, const LLVector2& tc) { S32 new_verts = mNumVertices+1; - S32 new_size = new_verts*16; - S32 old_size = mNumVertices*16; - //positions - mPositions = (LLVector4a*) ll_aligned_realloc_16(mPositions, new_size, old_size); - ll_assert_aligned(mPositions,16); + if (new_verts > mNumAllocatedVertices) + { + //double buffer size on expansion + new_verts *= 2; + + S32 new_tc_size = ((new_verts*8)+0xF) & ~0xF; + S32 old_tc_size = ((mNumVertices*8)+0xF) & ~0xF; + + S32 old_vsize = mNumVertices*16; + + S32 new_size = new_verts*16*2+new_tc_size; + + LLVector4a* old_buf = mPositions; + + mPositions = (LLVector4a*) ll_aligned_malloc(new_size, 64); + mNormals = mPositions+new_verts; + mTexCoords = (LLVector2*) (mNormals+new_verts); + + //positions + LLVector4a::memcpyNonAliased16((F32*) mPositions, (F32*) old_buf, old_vsize); + + //normals + LLVector4a::memcpyNonAliased16((F32*) mNormals, (F32*) (old_buf+mNumVertices), old_vsize); - //normals - mNormals = (LLVector4a*) ll_aligned_realloc_16(mNormals, new_size, old_size); - ll_assert_aligned(mNormals,16); - - //tex coords - new_size = ((new_verts*8)+0xF) & ~0xF; - old_size = ((mNumVertices*8)+0xF) & ~0xF; - mTexCoords = (LLVector2*) ll_aligned_realloc_16(mTexCoords, new_size, old_size); - ll_assert_aligned(mTexCoords,16); + //tex coords + LLVector4a::memcpyNonAliased16((F32*) mTexCoords, (F32*) (old_buf+mNumVertices*2), old_tc_size); + //just clear binormals + ll_aligned_free_16(mBinormals); - //just clear binormals - ll_aligned_free_16(mBinormals); - mBinormals = NULL; + ll_aligned_free(old_buf); + + mNumAllocatedVertices = new_verts; + + mBinormals = NULL; + } mPositions[mNumVertices] = pos; mNormals[mNumVertices] = norm; @@ -6810,13 +6822,23 @@ void LLVolumeFace::appendFace(const LLVolumeFace& face, LLMatrix4& mat_in, LLMat llerrs << "Cannot append empty face." << llendl; } + U32 old_vsize = mNumVertices*16; + U32 new_vsize = new_count * 16; + U32 old_tcsize = (mNumVertices*sizeof(LLVector2)+0xF) & ~0xF; + U32 new_tcsize = (new_count*sizeof(LLVector2)+0xF) & ~0xF; + U32 new_size = new_vsize * 2 + new_tcsize; + //allocate new buffer space - mPositions = (LLVector4a*) ll_aligned_realloc_16(mPositions, new_count*sizeof(LLVector4a), mNumVertices*sizeof(LLVector4a)); - ll_assert_aligned(mPositions, 16); - mNormals = (LLVector4a*) ll_aligned_realloc_16(mNormals, new_count*sizeof(LLVector4a), mNumVertices*sizeof(LLVector4a)); - ll_assert_aligned(mNormals, 16); - mTexCoords = (LLVector2*) ll_aligned_realloc_16(mTexCoords, (new_count*sizeof(LLVector2)+0xF) & ~0xF, (mNumVertices*sizeof(LLVector2)+0xF) & ~0xF); - ll_assert_aligned(mTexCoords, 16); + LLVector4a* old_buf = mPositions; + mPositions = (LLVector4a*) ll_aligned_malloc(new_size, 64); + mNormals = mPositions + new_count; + mTexCoords = (LLVector2*) (mNormals+new_count); + + mNumAllocatedVertices = new_count; + + LLVector4a::memcpyNonAliased16((F32*) mPositions, (F32*) old_buf, old_vsize); + LLVector4a::memcpyNonAliased16((F32*) mNormals, (F32*) (old_buf+mNumVertices), old_vsize); + LLVector4a::memcpyNonAliased16((F32*) mTexCoords, (F32*) (old_buf+mNumVertices*2), old_tcsize); mNumVertices = new_count; @@ -6912,12 +6934,15 @@ BOOL LLVolumeFace::createSide(LLVolume* volume, BOOL partial_build) LLVector4a* pos = (LLVector4a*) mPositions; LLVector4a* norm = (LLVector4a*) mNormals; LLVector2* tc = (LLVector2*) mTexCoords; - S32 begin_stex = llfloor( profile[mBeginS].mV[2] ); + F32 begin_stex = floorf(profile[mBeginS].mV[2]); S32 num_s = ((mTypeMask & INNER_MASK) && (mTypeMask & FLAT_MASK) && mNumS > 2) ? mNumS/2 : mNumS; S32 cur_vertex = 0; + S32 end_t = mBeginT+mNumT; + bool test = (mTypeMask & INNER_MASK) && (mTypeMask & FLAT_MASK) && mNumS > 2; + // Copy the vertices into the array - for (t = mBeginT; t < mBeginT + mNumT; t++) + for (t = mBeginT; t < end_t; t++) { tt = path_data[t].mTexT; for (s = 0; s < num_s; s++) @@ -6968,9 +6993,8 @@ BOOL LLVolumeFace::createSide(LLVolume* volume, BOOL partial_build) norm[cur_vertex].clear(); cur_vertex++; - if ((mTypeMask & INNER_MASK) && (mTypeMask & FLAT_MASK) && mNumS > 2 && s > 0) + if (test && s > 0) { - pos[cur_vertex].load3(mesh[i].mPos.mV); tc[cur_vertex] = LLVector2(ss,tt); @@ -7085,30 +7109,38 @@ BOOL LLVolumeFace::createSide(LLVolume* volume, BOOL partial_build) } //generate normals - for (U32 i = 0; i < mNumIndices/3; i++) //for each triangle + U32 count = mNumIndices/3; + + for (U32 i = 0; i < count; i++) //for each triangle { const U16* idx = &(mIndices[i*3]); - - LLVector4a* v[] = - { pos+idx[0], pos+idx[1], pos+idx[2] }; + LLVector4a& v0 = *(pos+idx[0]); + LLVector4a& v1 = *(pos+idx[1]); + LLVector4a& v2 = *(pos+idx[2]); - LLVector4a* n[] = - { norm+idx[0], norm+idx[1], norm+idx[2] }; + LLVector4a& n0 = *(norm+idx[0]); + LLVector4a& n1 = *(norm+idx[1]); + LLVector4a& n2 = *(norm+idx[2]); //calculate triangle normal LLVector4a a, b, c; - a.setSub(*v[0], *v[1]); - b.setSub(*v[0], *v[2]); + a.setSub(v0, v1); + b.setSub(v0, v2); c.setCross3(a,b); - n[0]->add(c); - n[1]->add(c); - n[2]->add(c); + n0.add(c); + n1.add(c); + n2.add(c); //even out quad contributions - n[i%2+1]->add(c); + switch (i%2+1) + { + case 0: n0.add(c); break; + case 1: n1.add(c); break; + case 2: n2.add(c); break; + }; } // adjust normals based on wrapping and stitching diff --git a/indra/llmath/llvolume.h b/indra/llmath/llvolume.h index 99158c1c44..1d3b0fe52f 100644 --- a/indra/llmath/llvolume.h +++ b/indra/llmath/llvolume.h @@ -912,6 +912,7 @@ public: LLVector2 mTexCoordExtents[2]; //minimum and maximum of texture coordinates of the face. S32 mNumVertices; + S32 mNumAllocatedVertices; S32 mNumIndices; LLVector4a* mPositions; -- cgit v1.2.3 From 1816582b929737f92ee68a1422e3be4e7c02f542 Mon Sep 17 00:00:00 2001 From: Graham Madarasz Date: Wed, 6 Mar 2013 09:09:07 -0800 Subject: Fix crashes from using single alloc for pos/norm/tc in volume face data fighting with old free call in model loading code --- indra/llmath/llvolume.cpp | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) (limited to 'indra/llmath') diff --git a/indra/llmath/llvolume.cpp b/indra/llmath/llvolume.cpp index f989e8ed17..f503eea107 100644 --- a/indra/llmath/llvolume.cpp +++ b/indra/llmath/llvolume.cpp @@ -5307,7 +5307,7 @@ LLVolumeFace::~LLVolumeFace() void LLVolumeFace::freeData() { - ll_aligned_free_16(mPositions); + ll_aligned_free(mPositions); mPositions = NULL; //normals and texture coordinates are part of the same buffer as mPositions, do not free them separately @@ -5492,10 +5492,11 @@ void LLVolumeFace::optimize(F32 angle_cutoff) } } - llassert(new_face.mNumIndices == mNumIndices); - llassert(new_face.mNumVertices <= mNumVertices); - - swapData(new_face); + if (new_face.mNumVertices) + { + llassert(new_face.mNumIndices == mNumIndices); + swapData(new_face); + } } class LLVCacheTriangleData; -- cgit v1.2.3 From f8e059deee28500b88c8c172eaa8c4d7ca657748 Mon Sep 17 00:00:00 2001 From: Dave Parks Date: Fri, 8 Mar 2013 17:11:30 -0600 Subject: MAINT-2371 Lat round of optimizations. Reviewed by Graham --- indra/llmath/llmatrix4a.h | 11 +- indra/llmath/llvector4a.inl | 13 +- indra/llmath/llvolume.cpp | 1791 ++++++++++++++++--------------------------- indra/llmath/llvolume.h | 79 +- 4 files changed, 707 insertions(+), 1187 deletions(-) (limited to 'indra/llmath') diff --git a/indra/llmath/llmatrix4a.h b/indra/llmath/llmatrix4a.h index c4cefdb4fa..d141298f69 100644 --- a/indra/llmath/llmatrix4a.h +++ b/indra/llmath/llmatrix4a.h @@ -107,15 +107,14 @@ public: inline void rotate(const LLVector4a& v, LLVector4a& res) { + LLVector4a y,z; + res = _mm_shuffle_ps(v, v, _MM_SHUFFLE(0, 0, 0, 0)); - res.mul(mMatrix[0]); - - LLVector4a y; y = _mm_shuffle_ps(v, v, _MM_SHUFFLE(1, 1, 1, 1)); - y.mul(mMatrix[1]); - - LLVector4a z; z = _mm_shuffle_ps(v, v, _MM_SHUFFLE(2, 2, 2, 2)); + + res.mul(mMatrix[0]); + y.mul(mMatrix[1]); z.mul(mMatrix[2]); res.add(y); diff --git a/indra/llmath/llvector4a.inl b/indra/llmath/llvector4a.inl index 7c52ffef21..35a67204ec 100644 --- a/indra/llmath/llvector4a.inl +++ b/indra/llmath/llvector4a.inl @@ -460,16 +460,13 @@ inline void LLVector4a::setMax(const LLVector4a& lhs, const LLVector4a& rhs) mQ = _mm_max_ps(lhs.mQ, rhs.mQ); } -// Set this to (c * lhs) + rhs * ( 1 - c) +// Set this to lhs + (rhs-lhs)*c inline void LLVector4a::setLerp(const LLVector4a& lhs, const LLVector4a& rhs, F32 c) { - LLVector4a a = lhs; - a.mul(c); - - LLVector4a b = rhs; - b.mul(1.f-c); - - setAdd(a, b); + LLVector4a t; + t.setSub(rhs,lhs); + t.mul(c); + setAdd(lhs, t); } inline LLBool32 LLVector4a::isFinite3() const diff --git a/indra/llmath/llvolume.cpp b/indra/llmath/llvolume.cpp index f989e8ed17..9fc72fd801 100644 --- a/indra/llmath/llvolume.cpp +++ b/indra/llmath/llvolume.cpp @@ -94,6 +94,95 @@ const S32 SCULPT_MIN_AREA_DETAIL = 1; extern BOOL gDebugGL; +bool less_than_max_mag(const LLVector4a& vec); + +template +LLAlignedArray::LLAlignedArray() +{ + mArray = NULL; + mElementCount = 0; + mCapacity = 0; +} + +template +LLAlignedArray::~LLAlignedArray() +{ + ll_aligned_free(mArray); + mArray = NULL; + mElementCount = 0; + mCapacity = 0; +} + +template +void LLAlignedArray::push_back(const T& elem) +{ + T* old_buf = NULL; + if (mCapacity <= mElementCount) + { + mCapacity++; + mCapacity *= 2; + T* new_buf = (T*) ll_aligned_malloc(mCapacity*sizeof(T), alignment); + if (mArray) + { + LLVector4a::memcpyNonAliased16((F32*) new_buf, (F32*) mArray, sizeof(T)*mElementCount); + } + old_buf = mArray; + mArray = new_buf; + } + + mArray[mElementCount++] = elem; + + //delete old array here to prevent error on a.push_back(a[0]) + ll_aligned_free(old_buf); +} + +template +void LLAlignedArray::resize(U32 size) +{ + if (mCapacity < size) + { + mCapacity = size+mCapacity*2; + T* new_buf = mCapacity > 0 ? (T*) ll_aligned_malloc(mCapacity*sizeof(T), alignment) : NULL; + if (mArray) + { + LLVector4a::memcpyNonAliased16((F32*) new_buf, (F32*) mArray, sizeof(T)*mElementCount); + ll_aligned_free(mArray); + } + + /*for (U32 i = mElementCount; i < mCapacity; ++i) + { + new(new_buf+i) T(); + }*/ + mArray = new_buf; + } + + mElementCount = size; +} + + +template +T& LLAlignedArray::operator[](int idx) +{ + llassert(idx < mElementCount); + return mArray[idx]; +} + +template +const T& LLAlignedArray::operator[](int idx) const +{ + llassert(idx < mElementCount); + return mArray[idx]; +} + +template +T* LLAlignedArray::append(S32 N) +{ + U32 sz = size(); + resize(sz+N); + return &((*this)[sz]); +} + + BOOL check_same_clock_dir( const LLVector3& pt1, const LLVector3& pt2, const LLVector3& pt3, const LLVector3& norm) { LLVector3 test = (pt2-pt1)%(pt3-pt2); @@ -474,7 +563,7 @@ void LLProfile::genNGon(const LLProfileParams& params, S32 sides, F32 offset, F3 const F32 tableScale[] = { 1, 1, 1, 0.5f, 0.707107f, 0.53f, 0.525f, 0.5f }; F32 scale = 0.5f; F32 t, t_step, t_first, t_fraction, ang, ang_step; - LLVector3 pt1,pt2; + LLVector4a pt1,pt2; F32 begin = params.getBegin(); F32 end = params.getEnd(); @@ -497,20 +586,21 @@ void LLProfile::genNGon(const LLProfileParams& params, S32 sides, F32 offset, F3 // Starting t and ang values for the first face t = t_first; ang = 2.0f*F_PI*(t*ang_scale + offset); - pt1.setVec(cos(ang)*scale,sin(ang)*scale, t); + pt1.set(cos(ang)*scale,sin(ang)*scale, t); // Increment to the next point. // pt2 is the end point on the fractional face t += t_step; ang += ang_step; - pt2.setVec(cos(ang)*scale,sin(ang)*scale,t); + pt2.set(cos(ang)*scale,sin(ang)*scale,t); t_fraction = (begin - t_first)*sides; // Only use if it's not almost exactly on an edge. if (t_fraction < 0.9999f) { - LLVector3 new_pt = lerp(pt1, pt2, t_fraction); + LLVector4a new_pt; + new_pt.setLerp(pt1, pt2, t_fraction); mProfile.push_back(new_pt); } @@ -518,12 +608,17 @@ void LLProfile::genNGon(const LLProfileParams& params, S32 sides, F32 offset, F3 while (t < end) { // Iterate through all the integer steps of t. - pt1.setVec(cos(ang)*scale,sin(ang)*scale,t); + pt1.set(cos(ang)*scale,sin(ang)*scale,t); if (mProfile.size() > 0) { - LLVector3 p = mProfile[mProfile.size()-1]; + LLVector4a p = mProfile[mProfile.size()-1]; for (S32 i = 0; i < split && mProfile.size() > 0; i++) { - mProfile.push_back(p+(pt1-p) * 1.0f/(float)(split+1) * (float)(i+1)); + //mProfile.push_back(p+(pt1-p) * 1.0f/(float)(split+1) * (float)(i+1)); + LLVector4a new_pt; + new_pt.setSub(pt1, p); + new_pt.mul(1.0f/(float)(split+1) * (float)(i+1)); + new_pt.add(p); + mProfile.push_back(new_pt); } } mProfile.push_back(pt1); @@ -536,18 +631,25 @@ void LLProfile::genNGon(const LLProfileParams& params, S32 sides, F32 offset, F3 // pt1 is the first point on the fractional face // pt2 is the end point on the fractional face - pt2.setVec(cos(ang)*scale,sin(ang)*scale,t); + pt2.set(cos(ang)*scale,sin(ang)*scale,t); // Find the fraction that we need to add to the end point. t_fraction = (end - (t - t_step))*sides; if (t_fraction > 0.0001f) { - LLVector3 new_pt = lerp(pt1, pt2, t_fraction); + LLVector4a new_pt; + new_pt.setLerp(pt1, pt2, t_fraction); if (mProfile.size() > 0) { - LLVector3 p = mProfile[mProfile.size()-1]; + LLVector4a p = mProfile[mProfile.size()-1]; for (S32 i = 0; i < split && mProfile.size() > 0; i++) { - mProfile.push_back(p+(new_pt-p) * 1.0f/(float)(split+1) * (float)(i+1)); + //mProfile.push_back(p+(new_pt-p) * 1.0f/(float)(split+1) * (float)(i+1)); + + LLVector4a pt1; + pt1.setSub(new_pt, p); + pt1.mul(1.0f/(float)(split+1) * (float)(i+1)); + pt1.add(p); + mProfile.push_back(pt1); } } mProfile.push_back(new_pt); @@ -568,7 +670,7 @@ void LLProfile::genNGon(const LLProfileParams& params, S32 sides, F32 offset, F3 if (params.getHollow() <= 0) { // put center point if not hollow. - mProfile.push_back(LLVector3(0,0,0)); + mProfile.push_back(LLVector4a(0,0,0)); } } else @@ -581,103 +683,6 @@ void LLProfile::genNGon(const LLProfileParams& params, S32 sides, F32 offset, F3 mTotal = mProfile.size(); } -void LLProfile::genNormals(const LLProfileParams& params) -{ - S32 count = mProfile.size(); - - S32 outer_count; - if (mTotalOut) - { - outer_count = mTotalOut; - } - else - { - outer_count = mTotal / 2; - } - - mEdgeNormals.resize(count * 2); - mEdgeCenters.resize(count * 2); - mNormals.resize(count); - - LLVector2 pt0,pt1; - - BOOL hollow = (params.getHollow() > 0); - - S32 i0, i1, i2, i3, i4; - - // Parametrically generate normal - for (i2 = 0; i2 < count; i2++) - { - mNormals[i2].mV[0] = mProfile[i2].mV[0]; - mNormals[i2].mV[1] = mProfile[i2].mV[1]; - if (hollow && (i2 >= outer_count)) - { - mNormals[i2] *= -1.f; - } - if (mNormals[i2].magVec() < 0.001) - { - // Special case for point at center, get adjacent points. - i1 = (i2 - 1) >= 0 ? i2 - 1 : count - 1; - i0 = (i1 - 1) >= 0 ? i1 - 1 : count - 1; - i3 = (i2 + 1) < count ? i2 + 1 : 0; - i4 = (i3 + 1) < count ? i3 + 1 : 0; - - pt0.setVec(mProfile[i1].mV[VX] + mProfile[i1].mV[VX] - mProfile[i0].mV[VX], - mProfile[i1].mV[VY] + mProfile[i1].mV[VY] - mProfile[i0].mV[VY]); - pt1.setVec(mProfile[i3].mV[VX] + mProfile[i3].mV[VX] - mProfile[i4].mV[VX], - mProfile[i3].mV[VY] + mProfile[i3].mV[VY] - mProfile[i4].mV[VY]); - - mNormals[i2] = pt0 + pt1; - mNormals[i2] *= 0.5f; - } - mNormals[i2].normVec(); - } - - S32 num_normal_sets = isConcave() ? 2 : 1; - for (S32 normal_set = 0; normal_set < num_normal_sets; normal_set++) - { - S32 point_num; - for (point_num = 0; point_num < mTotal; point_num++) - { - LLVector3 point_1 = mProfile[point_num]; - point_1.mV[VZ] = 0.f; - - LLVector3 point_2; - - if (isConcave() && normal_set == 0 && point_num == (mTotal - 1) / 2) - { - point_2 = mProfile[mTotal - 1]; - } - else if (isConcave() && normal_set == 1 && point_num == mTotal - 1) - { - point_2 = mProfile[(mTotal - 1) / 2]; - } - else - { - LLVector3 delta_pos; - S32 neighbor_point = (point_num + 1) % mTotal; - while(delta_pos.magVecSquared() < 0.01f * 0.01f) - { - point_2 = mProfile[neighbor_point]; - delta_pos = point_2 - point_1; - neighbor_point = (neighbor_point + 1) % mTotal; - if (neighbor_point == point_num) - { - break; - } - } - } - - point_2.mV[VZ] = 0.f; - LLVector3 face_normal = (point_2 - point_1) % LLVector3::z_axis; - face_normal.normVec(); - mEdgeNormals[normal_set * count + point_num] = face_normal; - mEdgeCenters[normal_set * count + point_num] = lerp(point_1, point_2, 0.5f); - } - } -} - - // Hollow is percent of the original bounding box, not of this particular // profile's geometry. Thus, a swept triangle needs lower hollow values than // a swept square. @@ -693,12 +698,13 @@ LLProfile::Face* LLProfile::addHole(const LLProfileParams& params, BOOL flat, F3 Face *face = addFace(mTotalOut, mTotal-mTotalOut,0,LL_FACE_INNER_SIDE, flat); - std::vector pt; + static LLAlignedArray pt; pt.resize(mTotal) ; for (S32 i=mTotalOut;imPos.setVec(0 + lerp(0,params.getShear().mV[0],s) + pt->mPos.set(0 + lerp(0,params.getShear().mV[0],s) + lerp(-skew ,skew, t) * 0.5f, c + lerp(0,params.getShear().mV[1],s), s); - pt->mScale.mV[VX] = hole_x * lerp(taper_x_begin, taper_x_end, t); - pt->mScale.mV[VY] = hole_y * lerp(taper_y_begin, taper_y_end, t); + pt->mScale.set(hole_x * lerp(taper_x_begin, taper_x_end, t), + hole_y * lerp(taper_y_begin, taper_y_end, t), + 0,1); pt->mTexT = t; // Twist rotates the path along the x,y plane (I think) - DJS 04/05/02 twist.setQuat (lerp(twist_begin,twist_end,t) * 2.f * F_PI - F_PI,0,0,1); // Rotate the point around the circle's center. qang.setQuat (ang,path_axis); - pt->mRot = twist * qang; + + LLMatrix3 rot(twist * qang); + + pt->mRot.loadu(rot); t+=step; @@ -1408,51 +1419,55 @@ void LLPath::genNGon(const LLPathParams& params, S32 sides, F32 startOff, F32 en // Run through the non-cut dependent points. while (t < params.getEnd()) { - pt = vector_append(mPath, 1); + pt = mPath.append(1); ang = 2.0f*F_PI*revolutions * t; c = cos(ang)*lerp(radius_start, radius_end, t); s = sin(ang)*lerp(radius_start, radius_end, t); - pt->mPos.setVec(0 + lerp(0,params.getShear().mV[0],s) + pt->mPos.set(0 + lerp(0,params.getShear().mV[0],s) + lerp(-skew ,skew, t) * 0.5f, c + lerp(0,params.getShear().mV[1],s), s); - pt->mScale.mV[VX] = hole_x * lerp(taper_x_begin, taper_x_end, t); - pt->mScale.mV[VY] = hole_y * lerp(taper_y_begin, taper_y_end, t); + pt->mScale.set(hole_x * lerp(taper_x_begin, taper_x_end, t), + hole_y * lerp(taper_y_begin, taper_y_end, t), + 0,1); pt->mTexT = t; // Twist rotates the path along the x,y plane (I think) - DJS 04/05/02 twist.setQuat (lerp(twist_begin,twist_end,t) * 2.f * F_PI - F_PI,0,0,1); // Rotate the point around the circle's center. qang.setQuat (ang,path_axis); - pt->mRot = twist * qang; - + LLMatrix3 tmp(twist*qang); + pt->mRot.loadu(tmp); + t+=step; } // Make one final pass for the end cut. t = params.getEnd(); - pt = vector_append(mPath, 1); + pt = mPath.append(1); ang = 2.0f*F_PI*revolutions * t; c = cos(ang)*lerp(radius_start, radius_end, t); s = sin(ang)*lerp(radius_start, radius_end, t); - pt->mPos.setVec(0 + lerp(0,params.getShear().mV[0],s) + pt->mPos.set(0 + lerp(0,params.getShear().mV[0],s) + lerp(-skew ,skew, t) * 0.5f, c + lerp(0,params.getShear().mV[1],s), s); - pt->mScale.mV[VX] = hole_x * lerp(taper_x_begin, taper_x_end, t); - pt->mScale.mV[VY] = hole_y * lerp(taper_y_begin, taper_y_end, t); + pt->mScale.set(hole_x * lerp(taper_x_begin, taper_x_end, t), + hole_y * lerp(taper_y_begin, taper_y_end, t), + 0,1); pt->mTexT = t; // Twist rotates the path along the x,y plane (I think) - DJS 04/05/02 twist.setQuat (lerp(twist_begin,twist_end,t) * 2.f * F_PI - F_PI,0,0,1); // Rotate the point around the circle's center. qang.setQuat (ang,path_axis); - pt->mRot = twist * qang; - + LLMatrix3 tmp(twist*qang); + pt->mRot.loadu(tmp); + mTotal = mPath.size(); } @@ -1549,7 +1564,7 @@ BOOL LLPath::generate(const LLPathParams& params, F32 detail, S32 split, mDirty = FALSE; S32 np = 2; // hardcode for line - mPath.clear(); + mPath.resize(0); mOpen = TRUE; // Is this 0xf0 mask really necessary? DK 03/02/05 @@ -1575,12 +1590,16 @@ BOOL LLPath::generate(const LLPathParams& params, F32 detail, S32 split, for (S32 i=0;imPath.size() * mProfilep->mProfile.size()) > (1u << 20)) - { - llinfos << "sizeS: " << mPathp->mPath.size() << " sizeT: " << mProfilep->mProfile.size() << llendl ; - llinfos << "path_detail : " << path_detail << " split: " << split << " profile_detail: " << profile_detail << llendl ; - llinfos << mParams << llendl ; - llinfos << "more info to check if mProfilep is deleted or not." << llendl ; - llinfos << mProfilep->mNormals.size() << " : " << mProfilep->mFaces.size() << " : " << mProfilep->mEdgeNormals.size() << " : " << mProfilep->mEdgeCenters.size() << llendl ; - - llerrs << "LLVolume corrupted!" << llendl ; - } - //******************************************************************** - BOOL regenPath = mPathp->generate(mParams.getPathParams(), path_detail, split); BOOL regenProf = mProfilep->generate(mParams.getProfileParams(), mPathp->isOpen(),profile_detail, split); @@ -2163,21 +2176,6 @@ BOOL LLVolume::generate() S32 sizeS = mPathp->mPath.size(); S32 sizeT = mProfilep->mProfile.size(); - //******************************************************************** - //debug info, to be removed - if((U32)(sizeS * sizeT) > (1u << 20)) - { - llinfos << "regenPath: " << (S32)regenPath << " regenProf: " << (S32)regenProf << llendl ; - llinfos << "sizeS: " << sizeS << " sizeT: " << sizeT << llendl ; - llinfos << "path_detail : " << path_detail << " split: " << split << " profile_detail: " << profile_detail << llendl ; - llinfos << mParams << llendl ; - llinfos << "more info to check if mProfilep is deleted or not." << llendl ; - llinfos << mProfilep->mNormals.size() << " : " << mProfilep->mFaces.size() << " : " << mProfilep->mEdgeNormals.size() << " : " << mProfilep->mEdgeCenters.size() << llendl ; - - llerrs << "LLVolume corrupted!" << llendl ; - } - //******************************************************************** - sNumMeshPoints -= mMesh.size(); mMesh.resize(sizeT * sizeS); sNumMeshPoints += mMesh.size(); @@ -2185,22 +2183,39 @@ BOOL LLVolume::generate() //generate vertex positions // Run along the path. + LLVector4a* dst = mMesh.mArray; + for (S32 s = 0; s < sizeS; ++s) { - LLVector2 scale = mPathp->mPath[s].mScale; - LLQuaternion rot = mPathp->mPath[s].mRot; + F32* scale = mPathp->mPath[s].mScale.getF32ptr(); + + F32 sc [] = + { scale[0], 0, 0, 0, + 0, scale[1], 0, 0, + 0, 0, scale[2], 0, + 0, 0, 0, 1 }; + + LLMatrix4 rot((F32*) mPathp->mPath[s].mRot.mMatrix); + LLMatrix4 scale_mat(sc); + + scale_mat *= rot; + + LLMatrix4a rot_mat; + rot_mat.loadu(scale_mat); + + LLVector4a* profile = mProfilep->mProfile.mArray; + LLVector4a* end_profile = profile+sizeT; + LLVector4a offset = mPathp->mPath[s].mPos; + + LLVector4a tmp; // Run along the profile. - for (S32 t = 0; t < sizeT; ++t) + while (profile < end_profile) { - S32 m = s*sizeT + t; - Point& pt = mMesh[m]; - - pt.mPos.mV[0] = mProfilep->mProfile[t].mV[0] * scale.mV[0]; - pt.mPos.mV[1] = mProfilep->mProfile[t].mV[1] * scale.mV[1]; - pt.mPos.mV[2] = 0.0f; - pt.mPos = pt.mPos * rot; - pt.mPos += mPathp->mPath[s].mPos; + rot_mat.rotate(*profile++, tmp); + dst->setAdd(tmp,offset); + llassert(less_than_max_mag(*dst)); + ++dst; } } @@ -2210,9 +2225,11 @@ BOOL LLVolume::generate() LLFaceID id = iter->mFaceID; mFaceMask |= id; } - + LL_CHECK_MEMORY return TRUE; } + + LL_CHECK_MEMORY return FALSE; } @@ -2790,14 +2807,16 @@ void LLVolume::createVolumeFaces() } -inline LLVector3 sculpt_rgb_to_vector(U8 r, U8 g, U8 b) +inline LLVector4a sculpt_rgb_to_vector(U8 r, U8 g, U8 b) { // maps RGB values to vector values [0..255] -> [-0.5..0.5] - LLVector3 value; - value.mV[VX] = r / 255.f - 0.5f; - value.mV[VY] = g / 255.f - 0.5f; - value.mV[VZ] = b / 255.f - 0.5f; + LLVector4a value; + LLVector4a sub(0.5f, 0.5f, 0.5f); + value.set(r,g,b); + value.mul(1.f/255.f); + value.sub(sub); + return value; } @@ -2817,21 +2836,21 @@ inline U32 sculpt_st_to_index(S32 s, S32 t, S32 size_s, S32 size_t, U16 sculpt_w } -inline LLVector3 sculpt_index_to_vector(U32 index, const U8* sculpt_data) +inline LLVector4a sculpt_index_to_vector(U32 index, const U8* sculpt_data) { - LLVector3 v = sculpt_rgb_to_vector(sculpt_data[index], sculpt_data[index+1], sculpt_data[index+2]); + LLVector4a v = sculpt_rgb_to_vector(sculpt_data[index], sculpt_data[index+1], sculpt_data[index+2]); return v; } -inline LLVector3 sculpt_st_to_vector(S32 s, S32 t, S32 size_s, S32 size_t, U16 sculpt_width, U16 sculpt_height, S8 sculpt_components, const U8* sculpt_data) +inline LLVector4a sculpt_st_to_vector(S32 s, S32 t, S32 size_s, S32 size_t, U16 sculpt_width, U16 sculpt_height, S8 sculpt_components, const U8* sculpt_data) { U32 index = sculpt_st_to_index(s, t, size_s, size_t, sculpt_width, sculpt_height, sculpt_components); return sculpt_index_to_vector(index, sculpt_data); } -inline LLVector3 sculpt_xy_to_vector(U32 x, U32 y, U16 sculpt_width, U16 sculpt_height, S8 sculpt_components, const U8* sculpt_data) +inline LLVector4a sculpt_xy_to_vector(U32 x, U32 y, U16 sculpt_width, U16 sculpt_height, S8 sculpt_components, const U8* sculpt_data) { U32 index = sculpt_xy_to_index(x, y, sculpt_width, sculpt_height, sculpt_components); @@ -2853,15 +2872,26 @@ F32 LLVolume::sculptGetSurfaceArea() for (S32 t = 0; t < sizeT-1; t++) { // get four corners of quad - LLVector3 p1 = mMesh[(s )*sizeT + (t )].mPos; - LLVector3 p2 = mMesh[(s+1)*sizeT + (t )].mPos; - LLVector3 p3 = mMesh[(s )*sizeT + (t+1)].mPos; - LLVector3 p4 = mMesh[(s+1)*sizeT + (t+1)].mPos; + LLVector4a& p1 = mMesh[(s )*sizeT + (t )]; + LLVector4a& p2 = mMesh[(s+1)*sizeT + (t )]; + LLVector4a& p3 = mMesh[(s )*sizeT + (t+1)]; + LLVector4a& p4 = mMesh[(s+1)*sizeT + (t+1)]; // compute the area of the quad by taking the length of the cross product of the two triangles - LLVector3 cross1 = (p1 - p2) % (p1 - p3); - LLVector3 cross2 = (p4 - p2) % (p4 - p3); - area += (cross1.magVec() + cross2.magVec()) / 2.f; + LLVector4a v0,v1,v2,v3; + v0.setSub(p1,p2); + v1.setSub(p1,p3); + v2.setSub(p4,p2); + v3.setSub(p4,p3); + + LLVector4a cross1, cross2; + cross1.setCross3(v0,v1); + cross2.setCross3(v2,v3); + + //LLVector3 cross1 = (p1 - p2) % (p1 - p3); + //LLVector3 cross2 = (p4 - p2) % (p4 - p3); + + area += (cross1.getLength3() + cross2.getLength3()).getF32() / 2.f; } } @@ -2882,17 +2912,19 @@ void LLVolume::sculptGeneratePlaceholder() for (S32 t = 0; t < sizeT; t++) { S32 i = t + line; - Point& pt = mMesh[i]; + LLVector4a& pt = mMesh[i]; F32 u = (F32)s/(sizeS-1); F32 v = (F32)t/(sizeT-1); const F32 RADIUS = (F32) 0.3; - - pt.mPos.mV[0] = (F32)(sin(F_PI * v) * cos(2.0 * F_PI * u) * RADIUS); - pt.mPos.mV[1] = (F32)(sin(F_PI * v) * sin(2.0 * F_PI * u) * RADIUS); - pt.mPos.mV[2] = (F32)(cos(F_PI * v) * RADIUS); + + F32* p = pt.getF32ptr(); + + p[0] = (F32)(sin(F_PI * v) * cos(2.0 * F_PI * u) * RADIUS); + p[1] = (F32)(sin(F_PI * v) * sin(2.0 * F_PI * u) * RADIUS); + p[2] = (F32)(cos(F_PI * v) * RADIUS); } line += sizeT; @@ -2917,7 +2949,7 @@ void LLVolume::sculptGenerateMapVertices(U16 sculpt_width, U16 sculpt_height, S8 for (S32 t = 0; t < sizeT; t++) { S32 i = t + line; - Point& pt = mMesh[i]; + LLVector4a& pt = mMesh[i]; S32 reversed_t = t; @@ -2974,11 +3006,12 @@ void LLVolume::sculptGenerateMapVertices(U16 sculpt_width, U16 sculpt_height, S8 } } - pt.mPos = sculpt_xy_to_vector(x, y, sculpt_width, sculpt_height, sculpt_components, sculpt_data); + pt = sculpt_xy_to_vector(x, y, sculpt_width, sculpt_height, sculpt_components, sculpt_data); if (sculpt_mirror) { - pt.mPos.mV[VX] *= -1.f; + LLVector4a scale(-1.f,1,1,1); + pt.mul(scale); } } @@ -3560,803 +3593,125 @@ bool LLVolumeParams::validate(U8 prof_curve, F32 prof_begin, F32 prof_end, F32 h return true; } -S32 *LLVolume::getTriangleIndices(U32 &num_indices) const +void LLVolume::getLoDTriangleCounts(const LLVolumeParams& params, S32* counts) +{ //attempt to approximate the number of triangles that will result from generating a volume LoD set for the + //supplied LLVolumeParams -- inaccurate, but a close enough approximation for determining streaming cost + F32 detail[] = {1.f, 1.5f, 2.5f, 4.f}; + for (S32 i = 0; i < 4; i++) + { + S32 count = 0; + S32 path_points = LLPath::getNumPoints(params.getPathParams(), detail[i]); + S32 profile_points = LLProfile::getNumPoints(params.getProfileParams(), false, detail[i]); + + count = (profile_points-1)*2*(path_points-1); + count += profile_points*2; + + counts[i] = count; + } +} + + +S32 LLVolume::getNumTriangles(S32* vcount) const { - S32 expected_num_triangle_indices = getNumTriangleIndices(); - if (expected_num_triangle_indices > MAX_VOLUME_TRIANGLE_INDICES) + U32 triangle_count = 0; + U32 vertex_count = 0; + + for (S32 i = 0; i < getNumVolumeFaces(); ++i) { - // we don't allow LLVolumes with this many vertices - llwarns << "Couldn't allocate triangle indices" << llendl; - num_indices = 0; - return NULL; + const LLVolumeFace& face = getVolumeFace(i); + triangle_count += face.mNumIndices/3; + + vertex_count += face.mNumVertices; } - S32* index = new S32[expected_num_triangle_indices]; - S32 count = 0; - // Let's do this totally diffently, as we don't care about faces... - // Counter-clockwise triangles are forward facing... + if (vcount) + { + *vcount = vertex_count; + } + + return triangle_count; +} + - BOOL open = getProfile().isOpen(); - BOOL hollow = (mParams.getProfileParams().getHollow() > 0); - BOOL path_open = getPath().isOpen(); - S32 size_s, size_s_out, size_t; - S32 s, t, i; - size_s = getProfile().getTotal(); - size_s_out = getProfile().getTotalOut(); - size_t = getPath().mPath.size(); +//----------------------------------------------------------------------------- +// generateSilhouetteVertices() +//----------------------------------------------------------------------------- +void LLVolume::generateSilhouetteVertices(std::vector &vertices, + std::vector &normals, + const LLVector3& obj_cam_vec_in, + const LLMatrix4& mat_in, + const LLMatrix3& norm_mat_in, + S32 face_mask) +{ + LLMatrix4a mat; + mat.loadu(mat_in); + + LLMatrix4a norm_mat; + norm_mat.loadu(norm_mat_in); + + LLVector4a obj_cam_vec; + obj_cam_vec.load3(obj_cam_vec_in.mV); - // NOTE -- if the construction of the triangles below ever changes - // then getNumTriangleIndices() method may also have to be updated. + vertices.clear(); + normals.clear(); - if (open) /* Flawfinder: ignore */ + if ((mParams.getSculptType() & LL_SCULPT_TYPE_MASK) == LL_SCULPT_TYPE_MESH) { - if (hollow) + return; + } + + S32 cur_index = 0; + //for each face + for (face_list_t::iterator iter = mVolumeFaces.begin(); + iter != mVolumeFaces.end(); ++iter) + { + LLVolumeFace& face = *iter; + + if (!(face_mask & (0x1 << cur_index++)) || + face.mNumIndices == 0 || face.mEdge.empty()) { - // Open hollow -- much like the closed solid, except we - // we need to stitch up the gap between s=0 and s=size_s-1 + continue; + } - for (t = 0; t < size_t - 1; t++) - { - // The outer face, first cut, and inner face - for (s = 0; s < size_s - 1; s++) - { - i = s + t*size_s; - index[count++] = i; // x,y - index[count++] = i + 1; // x+1,y - index[count++] = i + size_s; // x,y+1 + if (face.mTypeMask & (LLVolumeFace::CAP_MASK)) { - index[count++] = i + size_s; // x,y+1 - index[count++] = i + 1; // x+1,y - index[count++] = i + size_s + 1; // x+1,y+1 - } + } + else { - // The other cut face - index[count++] = s + t*size_s; // x,y - index[count++] = 0 + t*size_s; // x+1,y - index[count++] = s + (t+1)*size_s; // x,y+1 - - index[count++] = s + (t+1)*size_s; // x,y+1 - index[count++] = 0 + t*size_s; // x+1,y - index[count++] = 0 + (t+1)*size_s; // x+1,y+1 - } + //============================================== + //DEBUG draw edge map instead of silhouette edge + //============================================== - // Do the top and bottom caps, if necessary - if (path_open) - { - // Top cap - S32 pt1 = 0; - S32 pt2 = size_s-1; - S32 i = (size_t - 1)*size_s; +#if DEBUG_SILHOUETTE_EDGE_MAP - while (pt2 - pt1 > 1) - { - // Use the profile points instead of the mesh, since you want - // the un-transformed profile distances. - LLVector3 p1 = getProfile().mProfile[pt1]; - LLVector3 p2 = getProfile().mProfile[pt2]; - LLVector3 pa = getProfile().mProfile[pt1+1]; - LLVector3 pb = getProfile().mProfile[pt2-1]; - - p1.mV[VZ] = 0.f; - p2.mV[VZ] = 0.f; - pa.mV[VZ] = 0.f; - pb.mV[VZ] = 0.f; - - // Use area of triangle to determine backfacing - F32 area_1a2, area_1ba, area_21b, area_2ab; - area_1a2 = (p1.mV[0]*pa.mV[1] - pa.mV[0]*p1.mV[1]) + - (pa.mV[0]*p2.mV[1] - p2.mV[0]*pa.mV[1]) + - (p2.mV[0]*p1.mV[1] - p1.mV[0]*p2.mV[1]); - - area_1ba = (p1.mV[0]*pb.mV[1] - pb.mV[0]*p1.mV[1]) + - (pb.mV[0]*pa.mV[1] - pa.mV[0]*pb.mV[1]) + - (pa.mV[0]*p1.mV[1] - p1.mV[0]*pa.mV[1]); - - area_21b = (p2.mV[0]*p1.mV[1] - p1.mV[0]*p2.mV[1]) + - (p1.mV[0]*pb.mV[1] - pb.mV[0]*p1.mV[1]) + - (pb.mV[0]*p2.mV[1] - p2.mV[0]*pb.mV[1]); - - area_2ab = (p2.mV[0]*pa.mV[1] - pa.mV[0]*p2.mV[1]) + - (pa.mV[0]*pb.mV[1] - pb.mV[0]*pa.mV[1]) + - (pb.mV[0]*p2.mV[1] - p2.mV[0]*pb.mV[1]); - - BOOL use_tri1a2 = TRUE; - BOOL tri_1a2 = TRUE; - BOOL tri_21b = TRUE; - - if (area_1a2 < 0) - { - tri_1a2 = FALSE; - } - if (area_2ab < 0) - { - // Can't use, because it contains point b - tri_1a2 = FALSE; - } - if (area_21b < 0) - { - tri_21b = FALSE; - } - if (area_1ba < 0) - { - // Can't use, because it contains point b - tri_21b = FALSE; - } + //for each triangle + U32 count = face.mNumIndices; + for (U32 j = 0; j < count/3; j++) { + //get vertices + S32 v1 = face.mIndices[j*3+0]; + S32 v2 = face.mIndices[j*3+1]; + S32 v3 = face.mIndices[j*3+2]; - if (!tri_1a2) - { - use_tri1a2 = FALSE; - } - else if (!tri_21b) - { - use_tri1a2 = TRUE; - } - else - { - LLVector3 d1 = p1 - pa; - LLVector3 d2 = p2 - pb; + //get current face center + LLVector3 cCenter = (face.mVertices[v1].getPosition() + + face.mVertices[v2].getPosition() + + face.mVertices[v3].getPosition()) / 3.0f; - if (d1.magVecSquared() < d2.magVecSquared()) - { - use_tri1a2 = TRUE; - } - else - { - use_tri1a2 = FALSE; - } + //for each edge + for (S32 k = 0; k < 3; k++) { + S32 nIndex = face.mEdge[j*3+k]; + if (nIndex <= -1) { + continue; } - if (use_tri1a2) - { - index[count++] = pt1 + i; - index[count++] = pt1 + 1 + i; - index[count++] = pt2 + i; - pt1++; - } - else - { - index[count++] = pt1 + i; - index[count++] = pt2 - 1 + i; - index[count++] = pt2 + i; - pt2--; + if (nIndex >= (S32) count/3) { + continue; } - } - - // Bottom cap - pt1 = 0; - pt2 = size_s-1; - while (pt2 - pt1 > 1) - { - // Use the profile points instead of the mesh, since you want - // the un-transformed profile distances. - LLVector3 p1 = getProfile().mProfile[pt1]; - LLVector3 p2 = getProfile().mProfile[pt2]; - LLVector3 pa = getProfile().mProfile[pt1+1]; - LLVector3 pb = getProfile().mProfile[pt2-1]; - - p1.mV[VZ] = 0.f; - p2.mV[VZ] = 0.f; - pa.mV[VZ] = 0.f; - pb.mV[VZ] = 0.f; - - // Use area of triangle to determine backfacing - F32 area_1a2, area_1ba, area_21b, area_2ab; - area_1a2 = (p1.mV[0]*pa.mV[1] - pa.mV[0]*p1.mV[1]) + - (pa.mV[0]*p2.mV[1] - p2.mV[0]*pa.mV[1]) + - (p2.mV[0]*p1.mV[1] - p1.mV[0]*p2.mV[1]); - - area_1ba = (p1.mV[0]*pb.mV[1] - pb.mV[0]*p1.mV[1]) + - (pb.mV[0]*pa.mV[1] - pa.mV[0]*pb.mV[1]) + - (pa.mV[0]*p1.mV[1] - p1.mV[0]*pa.mV[1]); - - area_21b = (p2.mV[0]*p1.mV[1] - p1.mV[0]*p2.mV[1]) + - (p1.mV[0]*pb.mV[1] - pb.mV[0]*p1.mV[1]) + - (pb.mV[0]*p2.mV[1] - p2.mV[0]*pb.mV[1]); - - area_2ab = (p2.mV[0]*pa.mV[1] - pa.mV[0]*p2.mV[1]) + - (pa.mV[0]*pb.mV[1] - pb.mV[0]*pa.mV[1]) + - (pb.mV[0]*p2.mV[1] - p2.mV[0]*pb.mV[1]); - - BOOL use_tri1a2 = TRUE; - BOOL tri_1a2 = TRUE; - BOOL tri_21b = TRUE; - - if (area_1a2 < 0) - { - tri_1a2 = FALSE; - } - if (area_2ab < 0) - { - // Can't use, because it contains point b - tri_1a2 = FALSE; - } - if (area_21b < 0) - { - tri_21b = FALSE; - } - if (area_1ba < 0) - { - // Can't use, because it contains point b - tri_21b = FALSE; - } - - if (!tri_1a2) - { - use_tri1a2 = FALSE; - } - else if (!tri_21b) - { - use_tri1a2 = TRUE; - } - else - { - LLVector3 d1 = p1 - pa; - LLVector3 d2 = p2 - pb; - - if (d1.magVecSquared() < d2.magVecSquared()) - { - use_tri1a2 = TRUE; - } - else - { - use_tri1a2 = FALSE; - } - } - - if (use_tri1a2) - { - index[count++] = pt1; - index[count++] = pt2; - index[count++] = pt1 + 1; - pt1++; - } - else - { - index[count++] = pt1; - index[count++] = pt2; - index[count++] = pt2 - 1; - pt2--; - } - } - } - } - else - { - // Open solid - - for (t = 0; t < size_t - 1; t++) - { - // Outer face + 1 cut face - for (s = 0; s < size_s - 1; s++) - { - i = s + t*size_s; - - index[count++] = i; // x,y - index[count++] = i + 1; // x+1,y - index[count++] = i + size_s; // x,y+1 - - index[count++] = i + size_s; // x,y+1 - index[count++] = i + 1; // x+1,y - index[count++] = i + size_s + 1; // x+1,y+1 - } - - // The other cut face - index[count++] = (size_s - 1) + (t*size_s); // x,y - index[count++] = 0 + t*size_s; // x+1,y - index[count++] = (size_s - 1) + (t+1)*size_s; // x,y+1 - - index[count++] = (size_s - 1) + (t+1)*size_s; // x,y+1 - index[count++] = 0 + (t*size_s); // x+1,y - index[count++] = 0 + (t+1)*size_s; // x+1,y+1 - } - - // Do the top and bottom caps, if necessary - if (path_open) - { - for (s = 0; s < size_s - 2; s++) - { - index[count++] = s+1; - index[count++] = s; - index[count++] = size_s - 1; - } - - // We've got a top cap - S32 offset = (size_t - 1)*size_s; - for (s = 0; s < size_s - 2; s++) - { - // Inverted ordering from bottom cap. - index[count++] = offset + size_s - 1; - index[count++] = offset + s; - index[count++] = offset + s + 1; - } - } - } - } - else if (hollow) - { - // Closed hollow - // Outer face - - for (t = 0; t < size_t - 1; t++) - { - for (s = 0; s < size_s_out - 1; s++) - { - i = s + t*size_s; - - index[count++] = i; // x,y - index[count++] = i + 1; // x+1,y - index[count++] = i + size_s; // x,y+1 - - index[count++] = i + size_s; // x,y+1 - index[count++] = i + 1; // x+1,y - index[count++] = i + 1 + size_s; // x+1,y+1 - } - } - - // Inner face - // Invert facing from outer face - for (t = 0; t < size_t - 1; t++) - { - for (s = size_s_out; s < size_s - 1; s++) - { - i = s + t*size_s; - - index[count++] = i; // x,y - index[count++] = i + 1; // x+1,y - index[count++] = i + size_s; // x,y+1 - - index[count++] = i + size_s; // x,y+1 - index[count++] = i + 1; // x+1,y - index[count++] = i + 1 + size_s; // x+1,y+1 - } - } - - // Do the top and bottom caps, if necessary - if (path_open) - { - // Top cap - S32 pt1 = 0; - S32 pt2 = size_s-1; - S32 i = (size_t - 1)*size_s; - - while (pt2 - pt1 > 1) - { - // Use the profile points instead of the mesh, since you want - // the un-transformed profile distances. - LLVector3 p1 = getProfile().mProfile[pt1]; - LLVector3 p2 = getProfile().mProfile[pt2]; - LLVector3 pa = getProfile().mProfile[pt1+1]; - LLVector3 pb = getProfile().mProfile[pt2-1]; - - p1.mV[VZ] = 0.f; - p2.mV[VZ] = 0.f; - pa.mV[VZ] = 0.f; - pb.mV[VZ] = 0.f; - - // Use area of triangle to determine backfacing - F32 area_1a2, area_1ba, area_21b, area_2ab; - area_1a2 = (p1.mV[0]*pa.mV[1] - pa.mV[0]*p1.mV[1]) + - (pa.mV[0]*p2.mV[1] - p2.mV[0]*pa.mV[1]) + - (p2.mV[0]*p1.mV[1] - p1.mV[0]*p2.mV[1]); - - area_1ba = (p1.mV[0]*pb.mV[1] - pb.mV[0]*p1.mV[1]) + - (pb.mV[0]*pa.mV[1] - pa.mV[0]*pb.mV[1]) + - (pa.mV[0]*p1.mV[1] - p1.mV[0]*pa.mV[1]); - - area_21b = (p2.mV[0]*p1.mV[1] - p1.mV[0]*p2.mV[1]) + - (p1.mV[0]*pb.mV[1] - pb.mV[0]*p1.mV[1]) + - (pb.mV[0]*p2.mV[1] - p2.mV[0]*pb.mV[1]); - - area_2ab = (p2.mV[0]*pa.mV[1] - pa.mV[0]*p2.mV[1]) + - (pa.mV[0]*pb.mV[1] - pb.mV[0]*pa.mV[1]) + - (pb.mV[0]*p2.mV[1] - p2.mV[0]*pb.mV[1]); - - BOOL use_tri1a2 = TRUE; - BOOL tri_1a2 = TRUE; - BOOL tri_21b = TRUE; - - if (area_1a2 < 0) - { - tri_1a2 = FALSE; - } - if (area_2ab < 0) - { - // Can't use, because it contains point b - tri_1a2 = FALSE; - } - if (area_21b < 0) - { - tri_21b = FALSE; - } - if (area_1ba < 0) - { - // Can't use, because it contains point b - tri_21b = FALSE; - } - - if (!tri_1a2) - { - use_tri1a2 = FALSE; - } - else if (!tri_21b) - { - use_tri1a2 = TRUE; - } - else - { - LLVector3 d1 = p1 - pa; - LLVector3 d2 = p2 - pb; - - if (d1.magVecSquared() < d2.magVecSquared()) - { - use_tri1a2 = TRUE; - } - else - { - use_tri1a2 = FALSE; - } - } - - if (use_tri1a2) - { - index[count++] = pt1 + i; - index[count++] = pt1 + 1 + i; - index[count++] = pt2 + i; - pt1++; - } - else - { - index[count++] = pt1 + i; - index[count++] = pt2 - 1 + i; - index[count++] = pt2 + i; - pt2--; - } - } - - // Bottom cap - pt1 = 0; - pt2 = size_s-1; - while (pt2 - pt1 > 1) - { - // Use the profile points instead of the mesh, since you want - // the un-transformed profile distances. - LLVector3 p1 = getProfile().mProfile[pt1]; - LLVector3 p2 = getProfile().mProfile[pt2]; - LLVector3 pa = getProfile().mProfile[pt1+1]; - LLVector3 pb = getProfile().mProfile[pt2-1]; - - p1.mV[VZ] = 0.f; - p2.mV[VZ] = 0.f; - pa.mV[VZ] = 0.f; - pb.mV[VZ] = 0.f; - - // Use area of triangle to determine backfacing - F32 area_1a2, area_1ba, area_21b, area_2ab; - area_1a2 = (p1.mV[0]*pa.mV[1] - pa.mV[0]*p1.mV[1]) + - (pa.mV[0]*p2.mV[1] - p2.mV[0]*pa.mV[1]) + - (p2.mV[0]*p1.mV[1] - p1.mV[0]*p2.mV[1]); - - area_1ba = (p1.mV[0]*pb.mV[1] - pb.mV[0]*p1.mV[1]) + - (pb.mV[0]*pa.mV[1] - pa.mV[0]*pb.mV[1]) + - (pa.mV[0]*p1.mV[1] - p1.mV[0]*pa.mV[1]); - - area_21b = (p2.mV[0]*p1.mV[1] - p1.mV[0]*p2.mV[1]) + - (p1.mV[0]*pb.mV[1] - pb.mV[0]*p1.mV[1]) + - (pb.mV[0]*p2.mV[1] - p2.mV[0]*pb.mV[1]); - - area_2ab = (p2.mV[0]*pa.mV[1] - pa.mV[0]*p2.mV[1]) + - (pa.mV[0]*pb.mV[1] - pb.mV[0]*pa.mV[1]) + - (pb.mV[0]*p2.mV[1] - p2.mV[0]*pb.mV[1]); - - BOOL use_tri1a2 = TRUE; - BOOL tri_1a2 = TRUE; - BOOL tri_21b = TRUE; - - if (area_1a2 < 0) - { - tri_1a2 = FALSE; - } - if (area_2ab < 0) - { - // Can't use, because it contains point b - tri_1a2 = FALSE; - } - if (area_21b < 0) - { - tri_21b = FALSE; - } - if (area_1ba < 0) - { - // Can't use, because it contains point b - tri_21b = FALSE; - } - - if (!tri_1a2) - { - use_tri1a2 = FALSE; - } - else if (!tri_21b) - { - use_tri1a2 = TRUE; - } - else - { - LLVector3 d1 = p1 - pa; - LLVector3 d2 = p2 - pb; - - if (d1.magVecSquared() < d2.magVecSquared()) - { - use_tri1a2 = TRUE; - } - else - { - use_tri1a2 = FALSE; - } - } - - if (use_tri1a2) - { - index[count++] = pt1; - index[count++] = pt2; - index[count++] = pt1 + 1; - pt1++; - } - else - { - index[count++] = pt1; - index[count++] = pt2; - index[count++] = pt2 - 1; - pt2--; - } - } - } - } - else - { - // Closed solid. Easy case. - for (t = 0; t < size_t - 1; t++) - { - for (s = 0; s < size_s - 1; s++) - { - // Should wrap properly, but for now... - i = s + t*size_s; - - index[count++] = i; // x,y - index[count++] = i + 1; // x+1,y - index[count++] = i + size_s; // x,y+1 - - index[count++] = i + size_s; // x,y+1 - index[count++] = i + 1; // x+1,y - index[count++] = i + size_s + 1; // x+1,y+1 - } - } - - // Do the top and bottom caps, if necessary - if (path_open) - { - // bottom cap - for (s = 1; s < size_s - 2; s++) - { - index[count++] = s+1; - index[count++] = s; - index[count++] = 0; - } - - // top cap - S32 offset = (size_t - 1)*size_s; - for (s = 1; s < size_s - 2; s++) - { - // Inverted ordering from bottom cap. - index[count++] = offset; - index[count++] = offset + s; - index[count++] = offset + s + 1; - } - } - } - -#ifdef LL_DEBUG - // assert that we computed the correct number of indices - if (count != expected_num_triangle_indices ) - { - llerrs << "bad index count prediciton:" - << " expected=" << expected_num_triangle_indices - << " actual=" << count << llendl; - } -#endif - -#if 0 - // verify that each index does not point beyond the size of the mesh - S32 num_vertices = mMesh.size(); - for (i = 0; i < count; i+=3) - { - llinfos << index[i] << ":" << index[i+1] << ":" << index[i+2] << llendl; - llassert(index[i] < num_vertices); - llassert(index[i+1] < num_vertices); - llassert(index[i+2] < num_vertices); - } -#endif - - num_indices = count; - return index; -} - -void LLVolume::getLoDTriangleCounts(const LLVolumeParams& params, S32* counts) -{ //attempt to approximate the number of triangles that will result from generating a volume LoD set for the - //supplied LLVolumeParams -- inaccurate, but a close enough approximation for determining streaming cost - F32 detail[] = {1.f, 1.5f, 2.5f, 4.f}; - for (S32 i = 0; i < 4; i++) - { - S32 count = 0; - S32 path_points = LLPath::getNumPoints(params.getPathParams(), detail[i]); - S32 profile_points = LLProfile::getNumPoints(params.getProfileParams(), false, detail[i]); - - count = (profile_points-1)*2*(path_points-1); - count += profile_points*2; - - counts[i] = count; - } -} - -S32 LLVolume::getNumTriangleIndices() const -{ - BOOL profile_open = getProfile().isOpen(); - BOOL hollow = (mParams.getProfileParams().getHollow() > 0); - BOOL path_open = getPath().isOpen(); - - S32 size_s, size_s_out, size_t; - size_s = getProfile().getTotal(); - size_s_out = getProfile().getTotalOut(); - size_t = getPath().mPath.size(); - - S32 count = 0; - if (profile_open) /* Flawfinder: ignore */ - { - if (hollow) - { - // Open hollow -- much like the closed solid, except we - // we need to stitch up the gap between s=0 and s=size_s-1 - count = (size_t - 1) * (((size_s -1) * 6) + 6); - } - else - { - count = (size_t - 1) * (((size_s -1) * 6) + 6); - } - } - else if (hollow) - { - // Closed hollow - // Outer face - count = (size_t - 1) * (size_s_out - 1) * 6; - - // Inner face - count += (size_t - 1) * ((size_s - 1) - size_s_out) * 6; - } - else - { - // Closed solid. Easy case. - count = (size_t - 1) * (size_s - 1) * 6; - } - - if (path_open) - { - S32 cap_triangle_count = size_s - 3; - if ( profile_open - || hollow ) - { - cap_triangle_count = size_s - 2; - } - if ( cap_triangle_count > 0 ) - { - // top and bottom caps - count += cap_triangle_count * 2 * 3; - } - } - return count; -} - - -S32 LLVolume::getNumTriangles(S32* vcount) const -{ - U32 triangle_count = 0; - U32 vertex_count = 0; - - for (S32 i = 0; i < getNumVolumeFaces(); ++i) - { - const LLVolumeFace& face = getVolumeFace(i); - triangle_count += face.mNumIndices/3; - - vertex_count += face.mNumVertices; - } - - - if (vcount) - { - *vcount = vertex_count; - } - - return triangle_count; -} - - -//----------------------------------------------------------------------------- -// generateSilhouetteVertices() -//----------------------------------------------------------------------------- -void LLVolume::generateSilhouetteVertices(std::vector &vertices, - std::vector &normals, - const LLVector3& obj_cam_vec_in, - const LLMatrix4& mat_in, - const LLMatrix3& norm_mat_in, - S32 face_mask) -{ - LLMatrix4a mat; - mat.loadu(mat_in); - - LLMatrix4a norm_mat; - norm_mat.loadu(norm_mat_in); - - LLVector4a obj_cam_vec; - obj_cam_vec.load3(obj_cam_vec_in.mV); - - vertices.clear(); - normals.clear(); - - if ((mParams.getSculptType() & LL_SCULPT_TYPE_MASK) == LL_SCULPT_TYPE_MESH) - { - return; - } - - S32 cur_index = 0; - //for each face - for (face_list_t::iterator iter = mVolumeFaces.begin(); - iter != mVolumeFaces.end(); ++iter) - { - LLVolumeFace& face = *iter; - - if (!(face_mask & (0x1 << cur_index++)) || - face.mNumIndices == 0 || face.mEdge.empty()) - { - continue; - } - - if (face.mTypeMask & (LLVolumeFace::CAP_MASK)) { - - } - else { - - //============================================== - //DEBUG draw edge map instead of silhouette edge - //============================================== - -#if DEBUG_SILHOUETTE_EDGE_MAP - - //for each triangle - U32 count = face.mNumIndices; - for (U32 j = 0; j < count/3; j++) { - //get vertices - S32 v1 = face.mIndices[j*3+0]; - S32 v2 = face.mIndices[j*3+1]; - S32 v3 = face.mIndices[j*3+2]; - - //get current face center - LLVector3 cCenter = (face.mVertices[v1].getPosition() + - face.mVertices[v2].getPosition() + - face.mVertices[v3].getPosition()) / 3.0f; - - //for each edge - for (S32 k = 0; k < 3; k++) { - S32 nIndex = face.mEdge[j*3+k]; - if (nIndex <= -1) { - continue; - } - - if (nIndex >= (S32) count/3) { - continue; - } - //get neighbor vertices - v1 = face.mIndices[nIndex*3+0]; - v2 = face.mIndices[nIndex*3+1]; - v3 = face.mIndices[nIndex*3+2]; + //get neighbor vertices + v1 = face.mIndices[nIndex*3+0]; + v2 = face.mIndices[nIndex*3+1]; + v3 = face.mIndices[nIndex*3+2]; //get neighbor face center LLVector3 nCenter = (face.mVertices[v1].getPosition() + @@ -5243,8 +4598,6 @@ LLVolumeFace& LLVolumeFace::operator=(const LLVolumeFace& src) freeData(); - LLVector4a::memcpyNonAliased16((F32*) mExtents, (F32*) src.mExtents, 3*sizeof(LLVector4a)); - resizeVertices(src.mNumVertices); resizeIndices(src.mNumIndices); @@ -5307,7 +4660,7 @@ LLVolumeFace::~LLVolumeFace() void LLVolumeFace::freeData() { - ll_aligned_free_16(mPositions); + ll_aligned_free(mPositions); mPositions = NULL; //normals and texture coordinates are part of the same buffer as mPositions, do not free them separately @@ -5331,52 +4684,23 @@ BOOL LLVolumeFace::create(LLVolume* volume, BOOL partial_build) delete mOctree; mOctree = NULL; + LL_CHECK_MEMORY BOOL ret = FALSE ; if (mTypeMask & CAP_MASK) { ret = createCap(volume, partial_build); + LL_CHECK_MEMORY } else if ((mTypeMask & END_MASK) || (mTypeMask & SIDE_MASK)) { ret = createSide(volume, partial_build); + LL_CHECK_MEMORY } else { llerrs << "Unknown/uninitialized face type!" << llendl; } - //update the range of the texture coordinates - if(ret) - { - mTexCoordExtents[0].setVec(1.f, 1.f) ; - mTexCoordExtents[1].setVec(0.f, 0.f) ; - - for(U32 i = 0 ; i < mNumVertices ; i++) - { - if(mTexCoordExtents[0].mV[0] > mTexCoords[i].mV[0]) - { - mTexCoordExtents[0].mV[0] = mTexCoords[i].mV[0] ; - } - if(mTexCoordExtents[1].mV[0] < mTexCoords[i].mV[0]) - { - mTexCoordExtents[1].mV[0] = mTexCoords[i].mV[0] ; - } - - if(mTexCoordExtents[0].mV[1] > mTexCoords[i].mV[1]) - { - mTexCoordExtents[0].mV[1] = mTexCoords[i].mV[1] ; - } - if(mTexCoordExtents[1].mV[1] < mTexCoords[i].mV[1]) - { - mTexCoordExtents[1].mV[1] = mTexCoords[i].mV[1] ; - } - } - mTexCoordExtents[0].mV[0] = llmax(0.f, mTexCoordExtents[0].mV[0]) ; - mTexCoordExtents[0].mV[1] = llmax(0.f, mTexCoordExtents[0].mV[1]) ; - mTexCoordExtents[1].mV[0] = llmin(1.f, mTexCoordExtents[1].mV[0]) ; - mTexCoordExtents[1].mV[1] = llmin(1.f, mTexCoordExtents[1].mV[1]) ; - } - return ret ; } @@ -6068,8 +5392,10 @@ void LerpPlanarVertex(LLVolumeFace::VertexData& v0, BOOL LLVolumeFace::createUnCutCubeCap(LLVolume* volume, BOOL partial_build) { - const std::vector& mesh = volume->getMesh(); - const std::vector& profile = volume->getProfile().mProfile; + LL_CHECK_MEMORY + + const LLAlignedArray& mesh = volume->getMesh(); + const LLAlignedArray& profile = volume->getProfile().mProfile; S32 max_s = volume->getProfile().getTotal(); S32 max_t = volume->getPath().mPath.size(); @@ -6099,9 +5425,9 @@ BOOL LLVolumeFace::createUnCutCubeCap(LLVolume* volume, BOOL partial_build) VertexData baseVert; for(S32 t = 0; t < 4; t++) { - corners[t].getPosition().load3( mesh[offset + (grid_size*t)].mPos.mV); - corners[t].mTexCoord.mV[0] = profile[grid_size*t].mV[0]+0.5f; - corners[t].mTexCoord.mV[1] = 0.5f - profile[grid_size*t].mV[1]; + corners[t].getPosition().load4a(mesh[offset + (grid_size*t)].getF32ptr()); + corners[t].mTexCoord.mV[0] = profile[grid_size*t][0]+0.5f; + corners[t].mTexCoord.mV[1] = 0.5f - profile[grid_size*t][1]; } { @@ -6182,6 +5508,9 @@ BOOL LLVolumeFace::createUnCutCubeCap(LLVolume* volume, BOOL partial_build) mCenter->mul(0.5f); } + llassert(less_than_max_mag(mExtents[0])); + llassert(less_than_max_mag(mExtents[1])); + if (!partial_build) { resizeIndices(grid_size*grid_size*6); @@ -6212,6 +5541,7 @@ BOOL LLVolumeFace::createUnCutCubeCap(LLVolume* volume, BOOL partial_build) } } + LL_CHECK_MEMORY return TRUE; } @@ -6230,8 +5560,8 @@ BOOL LLVolumeFace::createCap(LLVolume* volume, BOOL partial_build) S32 num_vertices = 0, num_indices = 0; - const std::vector& mesh = volume->getMesh(); - const std::vector& profile = volume->getProfile().mProfile; + const LLAlignedArray& mesh = volume->getMesh(); + const LLAlignedArray& profile = volume->getProfile().mProfile; // All types of caps have the same number of vertices and indices num_vertices = profile.size(); @@ -6251,13 +5581,14 @@ BOOL LLVolumeFace::createCap(LLVolume* volume, BOOL partial_build) { resizeVertices(num_vertices); allocateBinormals(num_vertices); - if (!partial_build) { resizeIndices(num_indices); } } + LL_CHECK_MEMORY; + S32 max_s = volume->getProfile().getTotal(); S32 max_t = volume->getPath().mPath.size(); @@ -6288,35 +5619,68 @@ BOOL LLVolumeFace::createCap(LLVolume* volume, BOOL partial_build) LLVector4a* binorm = (LLVector4a*) mBinormals; // Copy the vertices into the array - for (S32 i = 0; i < num_vertices; i++) + + const LLVector4a* src = mesh.mArray+offset; + const LLVector4a* end = src+num_vertices; + + min = *src; + max = min; + + + const LLVector4a* p = profile.mArray; + + if (mTypeMask & TOP_MASK) { - if (mTypeMask & TOP_MASK) - { - tc[i].mV[0] = profile[i].mV[0]+0.5f; - tc[i].mV[1] = profile[i].mV[1]+0.5f; - } - else + min_uv.set((*p)[0]+0.5f, + (*p)[1]+0.5f); + + max_uv = min_uv; + + while(src < end) { - // Mirror for underside. - tc[i].mV[0] = profile[i].mV[0]+0.5f; - tc[i].mV[1] = 0.5f - profile[i].mV[1]; - } + tc->mV[0] = (*p)[0]+0.5f; + tc->mV[1] = (*p)[1]+0.5f; - pos[i].load3(mesh[i + offset].mPos.mV); + llassert(less_than_max_mag(*src)); + update_min_max(min,max,*src); + update_min_max(min_uv, max_uv, *tc); - if (i == 0) - { - max = pos[i]; - min = max; - min_uv = max_uv = tc[i]; + *pos = *src; + + ++p; + ++tc; + ++src; + ++pos; } - else + } + else + { + + min_uv.set((*p)[0]+0.5f, + 0.5f - (*p)[1]); + max_uv = min_uv; + + while(src < end) { - update_min_max(min,max,pos[i]); - update_min_max(min_uv, max_uv, tc[i]); + // Mirror for underside. + tc->mV[0] = (*p)[0]+0.5f; + tc->mV[1] = 0.5f - (*p)[1]; + + llassert(less_than_max_mag(*src)); + update_min_max(min,max,*src); + update_min_max(min_uv, max_uv, *tc); + + *pos = *src; + + ++p; + ++tc; + ++src; + ++pos; } } + LL_CHECK_MEMORY + mCenter->setAdd(min, max); mCenter->mul(0.5f); @@ -6353,15 +5717,25 @@ BOOL LLVolumeFace::createCap(LLVolume* volume, BOOL partial_build) if (!(mTypeMask & HOLLOW_MASK) && !(mTypeMask & OPEN_MASK)) { - pos[num_vertices] = *mCenter; - tc[num_vertices] = cuv; + *pos++ = *mCenter; + *tc++ = cuv; num_vertices++; } - - for (S32 i = 0; i < num_vertices; i++) + + LL_CHECK_MEMORY + + F32* dst_binorm = (F32*) binorm; + F32* end_binorm = (F32*) (binorm+num_vertices); + + F32* dst_norm = (F32*) norm; + + while (dst_binorm < end_binorm) { - binorm[i].load4a(binormal.getF32ptr()); - norm[i].load4a(normal.getF32ptr()); + binormal.store4a(dst_binorm); + normal.store4a(dst_norm); + + dst_binorm += 4; + dst_norm += 4; } if (partial_build) @@ -6382,33 +5756,38 @@ BOOL LLVolumeFace::createCap(LLVolume* volume, BOOL partial_build) { // Use the profile points instead of the mesh, since you want // the un-transformed profile distances. - LLVector3 p1 = profile[pt1]; - LLVector3 p2 = profile[pt2]; - LLVector3 pa = profile[pt1+1]; - LLVector3 pb = profile[pt2-1]; + const LLVector4a& p1 = profile[pt1]; + const LLVector4a& p2 = profile[pt2]; + const LLVector4a& pa = profile[pt1+1]; + const LLVector4a& pb = profile[pt2-1]; + + const F32* p1V = p1.getF32ptr(); + const F32* p2V = p2.getF32ptr(); + const F32* paV = pa.getF32ptr(); + const F32* pbV = pb.getF32ptr(); - p1.mV[VZ] = 0.f; - p2.mV[VZ] = 0.f; - pa.mV[VZ] = 0.f; - pb.mV[VZ] = 0.f; + //p1.mV[VZ] = 0.f; + //p2.mV[VZ] = 0.f; + //pa.mV[VZ] = 0.f; + //pb.mV[VZ] = 0.f; // Use area of triangle to determine backfacing F32 area_1a2, area_1ba, area_21b, area_2ab; - area_1a2 = (p1.mV[0]*pa.mV[1] - pa.mV[0]*p1.mV[1]) + - (pa.mV[0]*p2.mV[1] - p2.mV[0]*pa.mV[1]) + - (p2.mV[0]*p1.mV[1] - p1.mV[0]*p2.mV[1]); + area_1a2 = (p1V[0]*paV[1] - paV[0]*p1V[1]) + + (paV[0]*p2V[1] - p2V[0]*paV[1]) + + (p2V[0]*p1V[1] - p1V[0]*p2V[1]); - area_1ba = (p1.mV[0]*pb.mV[1] - pb.mV[0]*p1.mV[1]) + - (pb.mV[0]*pa.mV[1] - pa.mV[0]*pb.mV[1]) + - (pa.mV[0]*p1.mV[1] - p1.mV[0]*pa.mV[1]); + area_1ba = (p1V[0]*pbV[1] - pbV[0]*p1V[1]) + + (pbV[0]*paV[1] - paV[0]*pbV[1]) + + (paV[0]*p1V[1] - p1V[0]*paV[1]); - area_21b = (p2.mV[0]*p1.mV[1] - p1.mV[0]*p2.mV[1]) + - (p1.mV[0]*pb.mV[1] - pb.mV[0]*p1.mV[1]) + - (pb.mV[0]*p2.mV[1] - p2.mV[0]*pb.mV[1]); + area_21b = (p2V[0]*p1V[1] - p1V[0]*p2V[1]) + + (p1V[0]*pbV[1] - pbV[0]*p1V[1]) + + (pbV[0]*p2V[1] - p2V[0]*pbV[1]); - area_2ab = (p2.mV[0]*pa.mV[1] - pa.mV[0]*p2.mV[1]) + - (pa.mV[0]*pb.mV[1] - pb.mV[0]*pa.mV[1]) + - (pb.mV[0]*p2.mV[1] - p2.mV[0]*pb.mV[1]); + area_2ab = (p2V[0]*paV[1] - paV[0]*p2V[1]) + + (paV[0]*pbV[1] - pbV[0]*paV[1]) + + (pbV[0]*p2V[1] - p2V[0]*pbV[1]); BOOL use_tri1a2 = TRUE; BOOL tri_1a2 = TRUE; @@ -6443,10 +5822,13 @@ BOOL LLVolumeFace::createCap(LLVolume* volume, BOOL partial_build) } else { - LLVector3 d1 = p1 - pa; - LLVector3 d2 = p2 - pb; + LLVector4a d1; + d1.setSub(p1, pa); + + LLVector4a d2; + d2.setSub(p2, pb); - if (d1.magVecSquared() < d2.magVecSquared()) + if (d1.dot3(d1) < d2.dot3(d2)) { use_tri1a2 = TRUE; } @@ -6485,33 +5867,33 @@ BOOL LLVolumeFace::createCap(LLVolume* volume, BOOL partial_build) { // Use the profile points instead of the mesh, since you want // the un-transformed profile distances. - LLVector3 p1 = profile[pt1]; - LLVector3 p2 = profile[pt2]; - LLVector3 pa = profile[pt1+1]; - LLVector3 pb = profile[pt2-1]; - - p1.mV[VZ] = 0.f; - p2.mV[VZ] = 0.f; - pa.mV[VZ] = 0.f; - pb.mV[VZ] = 0.f; - + const LLVector4a& p1 = profile[pt1]; + const LLVector4a& p2 = profile[pt2]; + const LLVector4a& pa = profile[pt1+1]; + const LLVector4a& pb = profile[pt2-1]; + + const F32* p1V = p1.getF32ptr(); + const F32* p2V = p2.getF32ptr(); + const F32* paV = pa.getF32ptr(); + const F32* pbV = pb.getF32ptr(); + // Use area of triangle to determine backfacing F32 area_1a2, area_1ba, area_21b, area_2ab; - area_1a2 = (p1.mV[0]*pa.mV[1] - pa.mV[0]*p1.mV[1]) + - (pa.mV[0]*p2.mV[1] - p2.mV[0]*pa.mV[1]) + - (p2.mV[0]*p1.mV[1] - p1.mV[0]*p2.mV[1]); + area_1a2 = (p1V[0]*paV[1] - paV[0]*p1V[1]) + + (paV[0]*p2V[1] - p2V[0]*paV[1]) + + (p2V[0]*p1V[1] - p1V[0]*p2V[1]); - area_1ba = (p1.mV[0]*pb.mV[1] - pb.mV[0]*p1.mV[1]) + - (pb.mV[0]*pa.mV[1] - pa.mV[0]*pb.mV[1]) + - (pa.mV[0]*p1.mV[1] - p1.mV[0]*pa.mV[1]); + area_1ba = (p1V[0]*pbV[1] - pbV[0]*p1V[1]) + + (pbV[0]*paV[1] - paV[0]*pbV[1]) + + (paV[0]*p1V[1] - p1V[0]*paV[1]); - area_21b = (p2.mV[0]*p1.mV[1] - p1.mV[0]*p2.mV[1]) + - (p1.mV[0]*pb.mV[1] - pb.mV[0]*p1.mV[1]) + - (pb.mV[0]*p2.mV[1] - p2.mV[0]*pb.mV[1]); + area_21b = (p2V[0]*p1V[1] - p1V[0]*p2V[1]) + + (p1V[0]*pbV[1] - pbV[0]*p1V[1]) + + (pbV[0]*p2V[1] - p2V[0]*pbV[1]); - area_2ab = (p2.mV[0]*pa.mV[1] - pa.mV[0]*p2.mV[1]) + - (pa.mV[0]*pb.mV[1] - pb.mV[0]*pa.mV[1]) + - (pb.mV[0]*p2.mV[1] - p2.mV[0]*pb.mV[1]); + area_2ab = (p2V[0]*paV[1] - paV[0]*p2V[1]) + + (paV[0]*pbV[1] - pbV[0]*paV[1]) + + (pbV[0]*p2V[1] - p2V[0]*pbV[1]); BOOL use_tri1a2 = TRUE; BOOL tri_1a2 = TRUE; @@ -6546,10 +5928,12 @@ BOOL LLVolumeFace::createCap(LLVolume* volume, BOOL partial_build) } else { - LLVector3 d1 = p1 - pa; - LLVector3 d2 = p2 - pb; + LLVector4a d1; + d1.setSub(p1,pa); + LLVector4a d2; + d2.setSub(p2,pb); - if (d1.magVecSquared() < d2.magVecSquared()) + if (d1.dot3(d1) < d2.dot3(d2)) { use_tri1a2 = TRUE; } @@ -6598,6 +5982,8 @@ BOOL LLVolumeFace::createCap(LLVolume* volume, BOOL partial_build) } + + LL_CHECK_MEMORY return TRUE; } @@ -6900,6 +6286,7 @@ void LLVolumeFace::appendFace(const LLVolumeFace& face, LLMatrix4& mat_in, LLMat BOOL LLVolumeFace::createSide(LLVolume* volume, BOOL partial_build) { + LL_CHECK_MEMORY BOOL flat = mTypeMask & FLAT_MASK; U8 sculpt_type = volume->getParams().getSculptType(); @@ -6910,9 +6297,9 @@ BOOL LLVolumeFace::createSide(LLVolume* volume, BOOL partial_build) S32 num_vertices, num_indices; - const std::vector& mesh = volume->getMesh(); - const std::vector& profile = volume->getProfile().mProfile; - const std::vector& path_data = volume->getPath().mPath; + const LLAlignedArray& mesh = volume->getMesh(); + const LLAlignedArray& profile = volume->getProfile().mProfile; + const LLAlignedArray& path_data = volume->getPath().mPath; S32 max_s = volume->getProfile().getTotal(); @@ -6933,10 +6320,11 @@ BOOL LLVolumeFace::createSide(LLVolume* volume, BOOL partial_build) } } + LL_CHECK_MEMORY + LLVector4a* pos = (LLVector4a*) mPositions; - LLVector4a* norm = (LLVector4a*) mNormals; LLVector2* tc = (LLVector2*) mTexCoords; - F32 begin_stex = floorf(profile[mBeginS].mV[2]); + F32 begin_stex = floorf(profile[mBeginS][2]); S32 num_s = ((mTypeMask & INNER_MASK) && (mTypeMask & FLAT_MASK) && mNumS > 2) ? mNumS/2 : mNumS; S32 cur_vertex = 0; @@ -6965,11 +6353,11 @@ BOOL LLVolumeFace::createSide(LLVolume* volume, BOOL partial_build) // Get s value for tex-coord. if (!flat) { - ss = profile[mBeginS + s].mV[2]; + ss = profile[mBeginS + s][2]; } else { - ss = profile[mBeginS + s].mV[2] - begin_stex; + ss = profile[mBeginS + s][2] - begin_stex; } } @@ -6989,19 +6377,17 @@ BOOL LLVolumeFace::createSide(LLVolume* volume, BOOL partial_build) i = mBeginS + s + max_s*t; } - pos[cur_vertex].load3(mesh[i].mPos.mV); - tc[cur_vertex] = LLVector2(ss,tt); + llassert(less_than_max_mag(mesh[i])); + mesh[i].store4a((F32*)(pos+cur_vertex)); + tc[cur_vertex].set(ss,tt); - norm[cur_vertex].clear(); cur_vertex++; if (test && s > 0) { - pos[cur_vertex].load3(mesh[i].mPos.mV); - tc[cur_vertex] = LLVector2(ss,tt); - - norm[cur_vertex].clear(); - + llassert(less_than_max_mag(mesh[i])); + mesh[i].store4a((F32*)(pos+cur_vertex)); + tc[cur_vertex].set(ss,tt); cur_vertex++; } } @@ -7018,28 +6404,66 @@ BOOL LLVolumeFace::createSide(LLVolume* volume, BOOL partial_build) } i = mBeginS + s + max_s*t; - ss = profile[mBeginS + s].mV[2] - begin_stex; - pos[cur_vertex].load3(mesh[i].mPos.mV); - tc[cur_vertex] = LLVector2(ss,tt); - norm[cur_vertex].clear(); - + ss = profile[mBeginS + s][2] - begin_stex; + + llassert(less_than_max_mag(mesh[i])); + mesh[i].store4a((F32*)(pos+cur_vertex)); + tc[cur_vertex].set(ss,tt); + cur_vertex++; } } + LL_CHECK_MEMORY - //get bounding box for this side - LLVector4a& face_min = mExtents[0]; - LLVector4a& face_max = mExtents[1]; + mCenter->clear(); - face_min = face_max = pos[0]; + LLVector4a* cur_pos = pos; + LLVector4a* end_pos = pos + mNumVertices; + + //get bounding box for this side + LLVector4a face_min; + LLVector4a face_max; + + face_min = face_max = *cur_pos++; + + while (cur_pos < end_pos) + { + update_min_max(face_min, face_max, *cur_pos++); + } + + mExtents[0] = face_min; + mExtents[1] = face_max; + + U32 tc_count = mNumVertices; + if (tc_count%2 == 1) + { //odd number of texture coordinates, duplicate last entry to padded end of array + tc_count++; + mTexCoords[mNumVertices] = mTexCoords[mNumVertices-1]; + } + + LLVector4a* cur_tc = (LLVector4a*) mTexCoords; + LLVector4a* end_tc = (LLVector4a*) (mTexCoords+tc_count); + + LLVector4a tc_min; + LLVector4a tc_max; + + tc_min = tc_max = *cur_tc++; - for (U32 i = 1; i < mNumVertices; ++i) + while (cur_tc < end_tc) { - update_min_max(face_min, face_max, pos[i]); + update_min_max(tc_min, tc_max, *cur_tc++); } + F32* minp = tc_min.getF32ptr(); + F32* maxp = tc_max.getF32ptr(); + + mTexCoordExtents[0].mV[0] = llmin(minp[0], minp[2]); + mTexCoordExtents[0].mV[1] = llmin(minp[1], minp[3]); + mTexCoordExtents[1].mV[0] = llmax(maxp[0], maxp[2]); + mTexCoordExtents[1].mV[1] = llmax(maxp[1], maxp[3]); + mCenter->setAdd(face_min, face_max); mCenter->mul(0.5f); @@ -7104,33 +6528,94 @@ BOOL LLVolumeFace::createSide(LLVolume* volume, BOOL partial_build) } } + LL_CHECK_MEMORY + //clear normals - for (U32 i = 0; i < mNumVertices; i++) + F32* dst = (F32*) mNormals; + F32* end = (F32*) (mNormals+mNumVertices); + LLVector4a zero = LLVector4a::getZero(); + + while (dst < end) { - mNormals[i].clear(); + zero.store4a(dst); + dst += 4; } + LL_CHECK_MEMORY + //generate normals U32 count = mNumIndices/3; - for (U32 i = 0; i < count; i++) //for each triangle + LLVector4a* norm = mNormals; + + static LLAlignedArray triangle_normals; + triangle_normals.resize(count); + LLVector4a* output = triangle_normals.mArray; + LLVector4a* end_output = output+count; + + U16* idx = mIndices; + + while (output < end_output) { - const U16* idx = &(mIndices[i*3]); - - LLVector4a& v0 = *(pos+idx[0]); - LLVector4a& v1 = *(pos+idx[1]); - LLVector4a& v2 = *(pos+idx[2]); - - LLVector4a& n0 = *(norm+idx[0]); - LLVector4a& n1 = *(norm+idx[1]); - LLVector4a& n2 = *(norm+idx[2]); + LLVector4a b,v1,v2; + b.load4a((F32*) (pos+idx[0])); + v1.load4a((F32*) (pos+idx[1])); + v2.load4a((F32*) (pos+idx[2])); //calculate triangle normal - LLVector4a a, b, c; + LLVector4a a; - a.setSub(v0, v1); - b.setSub(v0, v2); - c.setCross3(a,b); + a.setSub(b, v1); + b.sub(v2); + + + LLQuad& vector1 = *((LLQuad*) &v1); + LLQuad& vector2 = *((LLQuad*) &v2); + + LLQuad& amQ = *((LLQuad*) &a); + LLQuad& bmQ = *((LLQuad*) &b); + + //v1.setCross3(t,v0); + //setCross3(const LLVector4a& a, const LLVector4a& b) + // Vectors are stored in memory in w, z, y, x order from high to low + // Set vector1 = { a[W], a[X], a[Z], a[Y] } + vector1 = _mm_shuffle_ps( amQ, amQ, _MM_SHUFFLE( 3, 0, 2, 1 )); + // Set vector2 = { b[W], b[Y], b[X], b[Z] } + vector2 = _mm_shuffle_ps( bmQ, bmQ, _MM_SHUFFLE( 3, 1, 0, 2 )); + // mQ = { a[W]*b[W], a[X]*b[Y], a[Z]*b[X], a[Y]*b[Z] } + vector2 = _mm_mul_ps( vector1, vector2 ); + // vector3 = { a[W], a[Y], a[X], a[Z] } + amQ = _mm_shuffle_ps( amQ, amQ, _MM_SHUFFLE( 3, 1, 0, 2 )); + // vector4 = { b[W], b[X], b[Z], b[Y] } + bmQ = _mm_shuffle_ps( bmQ, bmQ, _MM_SHUFFLE( 3, 0, 2, 1 )); + // mQ = { 0, a[X]*b[Y] - a[Y]*b[X], a[Z]*b[X] - a[X]*b[Z], a[Y]*b[Z] - a[Z]*b[Y] } + vector1 = _mm_sub_ps( vector2, _mm_mul_ps( amQ, bmQ )); + + v1.store4a((F32*) output); + + output++; + idx += 3; + } + + idx = mIndices; + + LLVector4a* src = triangle_normals.mArray; + + for (U32 i = 0; i < count; i++) //for each triangle + { + LLVector4a c; + c.load4a((F32*) (src++)); + + LLVector4a* n0p = norm+idx[0]; + LLVector4a* n1p = norm+idx[1]; + LLVector4a* n2p = norm+idx[2]; + + idx += 3; + + LLVector4a n0,n1,n2; + n0.load4a((F32*) n0p); + n1.load4a((F32*) n1p); + n2.load4a((F32*) n2p); n0.add(c); n1.add(c); @@ -7143,8 +6628,14 @@ BOOL LLVolumeFace::createSide(LLVolume* volume, BOOL partial_build) case 1: n1.add(c); break; case 2: n2.add(c); break; }; + + n0.store4a((F32*) n0p); + n1.store4a((F32*) n1p); + n2.store4a((F32*) n2p); } + LL_CHECK_MEMORY + // adjust normals based on wrapping and stitching LLVector4a top; @@ -7276,6 +6767,8 @@ BOOL LLVolumeFace::createSide(LLVolume* volume, BOOL partial_build) } + LL_CHECK_MEMORY + return TRUE; } diff --git a/indra/llmath/llvolume.h b/indra/llmath/llvolume.h index 1d3b0fe52f..5e43af92ec 100644 --- a/indra/llmath/llvolume.h +++ b/indra/llmath/llvolume.h @@ -37,7 +37,6 @@ class LLPath; template class LLOctreeNode; -class LLVector4a; class LLVolumeFace; class LLVolume; class LLVolumeTriangle; @@ -50,6 +49,8 @@ class LLVolumeTriangle; #include "v3math.h" #include "v3dmath.h" #include "v4math.h" +#include "llvector4a.h" +#include "llmatrix4a.h" #include "llquaternion.h" #include "llstrider.h" #include "v4coloru.h" @@ -194,6 +195,26 @@ const U8 LL_SCULPT_FLAG_MIRROR = 128; const S32 LL_SCULPT_MESH_MAX_FACES = 8; +template +class LLAlignedArray +{ +public: + T* mArray; + U32 mElementCount; + U32 mCapacity; + + LLAlignedArray(); + ~LLAlignedArray(); + + void push_back(const T& elem); + U32 size() const { return mElementCount; } + void resize(U32 size); + T* append(S32 N); + T& operator[](int idx); + const T& operator[](int idx) const; +}; + + class LLProfileParams { public: @@ -708,16 +729,16 @@ public: LLFaceID mFaceID; }; - std::vector mProfile; - std::vector mNormals; + LLAlignedArray mProfile; + //LLAlignedArray mNormals; std::vector mFaces; - std::vector mEdgeNormals; - std::vector mEdgeCenters; + + //LLAlignedArray mEdgeNormals; + //LLAlignedArray mEdgeCenters; friend std::ostream& operator<<(std::ostream &s, const LLProfile &profile); protected: - void genNormals(const LLProfileParams& params); static S32 getNumNGonPoints(const LLProfileParams& params, S32 sides, F32 offset=0.0f, F32 bevel = 0.0f, F32 ang_scale = 1.f, S32 split = 0); void genNGon(const LLProfileParams& params, S32 sides, F32 offset=0.0f, F32 bevel = 0.0f, F32 ang_scale = 1.f, S32 split = 0); @@ -741,13 +762,29 @@ protected: class LLPath { public: - struct PathPt + class PathPt { - LLVector3 mPos; - LLVector2 mScale; - LLQuaternion mRot; + public: + LLMatrix4a mRot; + LLVector4a mPos; + + LLVector4a mScale; F32 mTexT; - PathPt() { mPos.setVec(0,0,0); mTexT = 0; mScale.setVec(0,0); mRot.loadIdentity(); } + F32 pad[3]; //for alignment + PathPt() + { + mPos.clear(); + mTexT = 0; + mScale.clear(); + mRot.setRows(LLVector4a(1,0,0,0), + LLVector4a(0,1,0,0), + LLVector4a(0,0,1,0)); + + //distinguished data in the pad for debugging + pad[0] = 3.14159f; + pad[1] = -3.14159f; + pad[2] = 0.585f; + } }; public: @@ -779,7 +816,7 @@ public: friend std::ostream& operator<<(std::ostream &s, const LLPath &path); public: - std::vector mPath; + LLAlignedArray mPath; protected: BOOL mOpen; @@ -951,11 +988,7 @@ protected: ~LLVolume(); // use unref public: - struct Point - { - LLVector3 mPos; - }; - + struct FaceParams { LLFaceID mFaceID; @@ -978,8 +1011,8 @@ public: const LLProfile& getProfile() const { return *mProfilep; } LLPath& getPath() const { return *mPathp; } void resizePath(S32 length); - const std::vector& getMesh() const { return mMesh; } - const LLVector3& getMeshPt(const U32 i) const { return mMesh[i].mPos; } + const LLAlignedArray& getMesh() const { return mMesh; } + const LLVector4a& getMeshPt(const U32 i) const { return mMesh[i]; } void setDirty() { mPathp->setDirty(); mProfilep->setDirty(); } @@ -994,10 +1027,7 @@ public: S32 getSculptLevel() const { return mSculptLevel; } void setSculptLevel(S32 level) { mSculptLevel = level; } - S32 *getTriangleIndices(U32 &num_indices) const; - - // returns number of triangle indeces required for path/profile mesh - S32 getNumTriangleIndices() const; + static void getLoDTriangleCounts(const LLVolumeParams& params, S32* counts); S32 getNumTriangles(S32* vcount = NULL) const; @@ -1070,7 +1100,8 @@ public: LLVolumeParams mParams; LLPath *mPathp; LLProfile *mProfilep; - std::vector mMesh; + LLAlignedArray mMesh; + BOOL mGenerateSingleFace; typedef std::vector face_list_t; -- cgit v1.2.3 From c04f4f66c813181eb378b00045aec969dc2c4aae Mon Sep 17 00:00:00 2001 From: Graham Madarasz Date: Mon, 11 Mar 2013 12:30:16 -0700 Subject: Moved LLAlignedArray from llmath to llcommon and put template func impls in header to work around Mac 4.3.3 link issue. --- indra/llmath/llvolume.cpp | 87 ----------------------------------------------- indra/llmath/llvolume.h | 21 +----------- 2 files changed, 1 insertion(+), 107 deletions(-) (limited to 'indra/llmath') diff --git a/indra/llmath/llvolume.cpp b/indra/llmath/llvolume.cpp index 9fc72fd801..4f3e753276 100644 --- a/indra/llmath/llvolume.cpp +++ b/indra/llmath/llvolume.cpp @@ -96,93 +96,6 @@ extern BOOL gDebugGL; bool less_than_max_mag(const LLVector4a& vec); -template -LLAlignedArray::LLAlignedArray() -{ - mArray = NULL; - mElementCount = 0; - mCapacity = 0; -} - -template -LLAlignedArray::~LLAlignedArray() -{ - ll_aligned_free(mArray); - mArray = NULL; - mElementCount = 0; - mCapacity = 0; -} - -template -void LLAlignedArray::push_back(const T& elem) -{ - T* old_buf = NULL; - if (mCapacity <= mElementCount) - { - mCapacity++; - mCapacity *= 2; - T* new_buf = (T*) ll_aligned_malloc(mCapacity*sizeof(T), alignment); - if (mArray) - { - LLVector4a::memcpyNonAliased16((F32*) new_buf, (F32*) mArray, sizeof(T)*mElementCount); - } - old_buf = mArray; - mArray = new_buf; - } - - mArray[mElementCount++] = elem; - - //delete old array here to prevent error on a.push_back(a[0]) - ll_aligned_free(old_buf); -} - -template -void LLAlignedArray::resize(U32 size) -{ - if (mCapacity < size) - { - mCapacity = size+mCapacity*2; - T* new_buf = mCapacity > 0 ? (T*) ll_aligned_malloc(mCapacity*sizeof(T), alignment) : NULL; - if (mArray) - { - LLVector4a::memcpyNonAliased16((F32*) new_buf, (F32*) mArray, sizeof(T)*mElementCount); - ll_aligned_free(mArray); - } - - /*for (U32 i = mElementCount; i < mCapacity; ++i) - { - new(new_buf+i) T(); - }*/ - mArray = new_buf; - } - - mElementCount = size; -} - - -template -T& LLAlignedArray::operator[](int idx) -{ - llassert(idx < mElementCount); - return mArray[idx]; -} - -template -const T& LLAlignedArray::operator[](int idx) const -{ - llassert(idx < mElementCount); - return mArray[idx]; -} - -template -T* LLAlignedArray::append(S32 N) -{ - U32 sz = size(); - resize(sz+N); - return &((*this)[sz]); -} - - BOOL check_same_clock_dir( const LLVector3& pt1, const LLVector3& pt2, const LLVector3& pt3, const LLVector3& norm) { LLVector3 test = (pt2-pt1)%(pt3-pt2); diff --git a/indra/llmath/llvolume.h b/indra/llmath/llvolume.h index 5e43af92ec..1ff53590cf 100644 --- a/indra/llmath/llvolume.h +++ b/indra/llmath/llvolume.h @@ -41,6 +41,7 @@ class LLVolumeFace; class LLVolume; class LLVolumeTriangle; +#include "llalignedarray.h" #include "lldarray.h" #include "lluuid.h" #include "v4color.h" @@ -195,26 +196,6 @@ const U8 LL_SCULPT_FLAG_MIRROR = 128; const S32 LL_SCULPT_MESH_MAX_FACES = 8; -template -class LLAlignedArray -{ -public: - T* mArray; - U32 mElementCount; - U32 mCapacity; - - LLAlignedArray(); - ~LLAlignedArray(); - - void push_back(const T& elem); - U32 size() const { return mElementCount; } - void resize(U32 size); - T* append(S32 N); - T& operator[](int idx); - const T& operator[](int idx) const; -}; - - class LLProfileParams { public: -- cgit v1.2.3 From 356d10e6a6cfea279228f371aed88aba73ed70dd Mon Sep 17 00:00:00 2001 From: Graham Madarasz Date: Mon, 11 Mar 2013 13:12:03 -0700 Subject: Work around 'interesting' forced include order in vmath stuff on Windows --- indra/llmath/llvolume.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'indra/llmath') diff --git a/indra/llmath/llvolume.h b/indra/llmath/llvolume.h index 1ff53590cf..6b599a4126 100644 --- a/indra/llmath/llvolume.h +++ b/indra/llmath/llvolume.h @@ -41,7 +41,6 @@ class LLVolumeFace; class LLVolume; class LLVolumeTriangle; -#include "llalignedarray.h" #include "lldarray.h" #include "lluuid.h" #include "v4color.h" @@ -58,6 +57,7 @@ class LLVolumeTriangle; #include "llrefcount.h" #include "llpointer.h" #include "llfile.h" +#include "llalignedarray.h" //============================================================================ -- cgit v1.2.3 From 9a0f913e7089c9b2120856936bf47dcb8de9c6b1 Mon Sep 17 00:00:00 2001 From: "Graham Madarasz (Graham)" Date: Mon, 11 Mar 2013 13:55:40 -0700 Subject: Work around bogus gcc 4.3.4 warning from alignment assert --- indra/llmath/llvector4a.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'indra/llmath') diff --git a/indra/llmath/llvector4a.h b/indra/llmath/llvector4a.h index 0526793d3a..1a478bc8de 100644 --- a/indra/llmath/llvector4a.h +++ b/indra/llmath/llvector4a.h @@ -93,7 +93,11 @@ public: LLVector4a() { //DO NOT INITIALIZE -- The overhead is completely unnecessary +// This assert is causing spurious referenced before set warnings on GCC 4.3.4 +// +#if !LL_LINUX ll_assert_aligned(this,16); +#endif } LLVector4a(F32 x, F32 y, F32 z, F32 w = 0.f) -- cgit v1.2.3 From 6ac6736994240d9789a81bf585468bef50805fd8 Mon Sep 17 00:00:00 2001 From: Graham Madarasz Date: Mon, 11 Mar 2013 16:00:25 -0700 Subject: Move 16b aligned memcpy and alignment utilities to llmem in llcommon for easier use elsewhere --- indra/llmath/llsimdmath.h | 28 ------------------------- indra/llmath/llvector4a.cpp | 50 +-------------------------------------------- 2 files changed, 1 insertion(+), 77 deletions(-) (limited to 'indra/llmath') diff --git a/indra/llmath/llsimdmath.h b/indra/llmath/llsimdmath.h index 01458521ec..cebd2ace7d 100644 --- a/indra/llmath/llsimdmath.h +++ b/indra/llmath/llsimdmath.h @@ -39,34 +39,6 @@ #include #endif -template T* LL_NEXT_ALIGNED_ADDRESS(T* address) -{ - return reinterpret_cast( - (reinterpret_cast(address) + 0xF) & ~0xF); -} - -template T* LL_NEXT_ALIGNED_ADDRESS_64(T* address) -{ - return reinterpret_cast( - (reinterpret_cast(address) + 0x3F) & ~0x3F); -} - -#if LL_LINUX || LL_DARWIN - -#define LL_ALIGN_PREFIX(x) -#define LL_ALIGN_POSTFIX(x) __attribute__((aligned(x))) - -#elif LL_WINDOWS - -#define LL_ALIGN_PREFIX(x) __declspec(align(x)) -#define LL_ALIGN_POSTFIX(x) - -#else -#error "LL_ALIGN_PREFIX and LL_ALIGN_POSTFIX undefined" -#endif - -#define LL_ALIGN_16(var) LL_ALIGN_PREFIX(16) var LL_ALIGN_POSTFIX(16) - #include #include diff --git a/indra/llmath/llvector4a.cpp b/indra/llmath/llvector4a.cpp index 6edeb0fefe..570fa41a43 100644 --- a/indra/llmath/llvector4a.cpp +++ b/indra/llmath/llvector4a.cpp @@ -41,55 +41,7 @@ extern const LLVector4a LL_V4A_EPSILON = reinterpret_cast ( F /*static */void LLVector4a::memcpyNonAliased16(F32* __restrict dst, const F32* __restrict src, size_t bytes) { - assert(src != NULL); - assert(dst != NULL); - assert(bytes > 0); - assert((bytes % sizeof(F32))== 0); - ll_assert_aligned(src,16); - ll_assert_aligned(dst,16); - assert(bytes%16==0); - - F32* end = dst + (bytes / sizeof(F32) ); - - if (bytes > 64) - { - F32* begin_64 = LL_NEXT_ALIGNED_ADDRESS_64(dst); - - //at least 64 (16*4) bytes before the end of the destination, switch to 16 byte copies - F32* end_64 = end-16; - - _mm_prefetch((char*)begin_64, _MM_HINT_NTA); - _mm_prefetch((char*)begin_64 + 64, _MM_HINT_NTA); - _mm_prefetch((char*)begin_64 + 128, _MM_HINT_NTA); - _mm_prefetch((char*)begin_64 + 192, _MM_HINT_NTA); - - while (dst < begin_64) - { - copy4a(dst, src); - dst += 4; - src += 4; - } - - while (dst < end_64) - { - _mm_prefetch((char*)src + 512, _MM_HINT_NTA); - _mm_prefetch((char*)dst + 512, _MM_HINT_NTA); - copy4a(dst, src); - copy4a(dst+4, src+4); - copy4a(dst+8, src+8); - copy4a(dst+12, src+12); - - dst += 16; - src += 16; - } - } - - while (dst < end) - { - copy4a(dst, src); - dst += 4; - src += 4; - } + ll_memcpy_nonaliased_aligned_16((char*)dst, (char*)src, bytes); } void LLVector4a::setRotated( const LLRotation& rot, const LLVector4a& vec ) -- cgit v1.2.3 From e8b8a12b7365c17cf0326be365b78bcb1da1bfac Mon Sep 17 00:00:00 2001 From: Graham Madarasz Date: Tue, 12 Mar 2013 10:48:05 -0700 Subject: Mods to make compile on gcc 4.6.3 work mo betta --- indra/llmath/llvolume.cpp | 5 ----- 1 file changed, 5 deletions(-) (limited to 'indra/llmath') diff --git a/indra/llmath/llvolume.cpp b/indra/llmath/llvolume.cpp index eff224743b..cb5633c1bb 100644 --- a/indra/llmath/llvolume.cpp +++ b/indra/llmath/llvolume.cpp @@ -5314,12 +5314,7 @@ BOOL LLVolumeFace::createUnCutCubeCap(LLVolume* volume, BOOL partial_build) S32 max_t = volume->getPath().mPath.size(); // S32 i; - S32 num_vertices = 0, num_indices = 0; S32 grid_size = (profile.size()-1)/4; - S32 quad_count = (grid_size * grid_size); - - num_vertices = (grid_size+1)*(grid_size+1); - num_indices = quad_count * 4; LLVector4a& min = mExtents[0]; LLVector4a& max = mExtents[1]; -- cgit v1.2.3 From 5d2fea6262d91eb8d3c06d97a160ca9373b96889 Mon Sep 17 00:00:00 2001 From: "Graham Madarasz (Graham Linden)" Date: Wed, 13 Mar 2013 10:42:40 -0700 Subject: Move fast memcpy to llcommon and use it in llalignedarray pushback on all platforms. Code Review: DaveP --- indra/llmath/llsimdmath.h | 28 ------------------------- indra/llmath/llvector4a.cpp | 50 +-------------------------------------------- indra/llmath/llvector4a.h | 4 ---- indra/llmath/llvolume.cpp | 11 ++++++---- 4 files changed, 8 insertions(+), 85 deletions(-) (limited to 'indra/llmath') diff --git a/indra/llmath/llsimdmath.h b/indra/llmath/llsimdmath.h index 01458521ec..cebd2ace7d 100644 --- a/indra/llmath/llsimdmath.h +++ b/indra/llmath/llsimdmath.h @@ -39,34 +39,6 @@ #include #endif -template T* LL_NEXT_ALIGNED_ADDRESS(T* address) -{ - return reinterpret_cast( - (reinterpret_cast(address) + 0xF) & ~0xF); -} - -template T* LL_NEXT_ALIGNED_ADDRESS_64(T* address) -{ - return reinterpret_cast( - (reinterpret_cast(address) + 0x3F) & ~0x3F); -} - -#if LL_LINUX || LL_DARWIN - -#define LL_ALIGN_PREFIX(x) -#define LL_ALIGN_POSTFIX(x) __attribute__((aligned(x))) - -#elif LL_WINDOWS - -#define LL_ALIGN_PREFIX(x) __declspec(align(x)) -#define LL_ALIGN_POSTFIX(x) - -#else -#error "LL_ALIGN_PREFIX and LL_ALIGN_POSTFIX undefined" -#endif - -#define LL_ALIGN_16(var) LL_ALIGN_PREFIX(16) var LL_ALIGN_POSTFIX(16) - #include #include diff --git a/indra/llmath/llvector4a.cpp b/indra/llmath/llvector4a.cpp index 6edeb0fefe..570fa41a43 100644 --- a/indra/llmath/llvector4a.cpp +++ b/indra/llmath/llvector4a.cpp @@ -41,55 +41,7 @@ extern const LLVector4a LL_V4A_EPSILON = reinterpret_cast ( F /*static */void LLVector4a::memcpyNonAliased16(F32* __restrict dst, const F32* __restrict src, size_t bytes) { - assert(src != NULL); - assert(dst != NULL); - assert(bytes > 0); - assert((bytes % sizeof(F32))== 0); - ll_assert_aligned(src,16); - ll_assert_aligned(dst,16); - assert(bytes%16==0); - - F32* end = dst + (bytes / sizeof(F32) ); - - if (bytes > 64) - { - F32* begin_64 = LL_NEXT_ALIGNED_ADDRESS_64(dst); - - //at least 64 (16*4) bytes before the end of the destination, switch to 16 byte copies - F32* end_64 = end-16; - - _mm_prefetch((char*)begin_64, _MM_HINT_NTA); - _mm_prefetch((char*)begin_64 + 64, _MM_HINT_NTA); - _mm_prefetch((char*)begin_64 + 128, _MM_HINT_NTA); - _mm_prefetch((char*)begin_64 + 192, _MM_HINT_NTA); - - while (dst < begin_64) - { - copy4a(dst, src); - dst += 4; - src += 4; - } - - while (dst < end_64) - { - _mm_prefetch((char*)src + 512, _MM_HINT_NTA); - _mm_prefetch((char*)dst + 512, _MM_HINT_NTA); - copy4a(dst, src); - copy4a(dst+4, src+4); - copy4a(dst+8, src+8); - copy4a(dst+12, src+12); - - dst += 16; - src += 16; - } - } - - while (dst < end) - { - copy4a(dst, src); - dst += 4; - src += 4; - } + ll_memcpy_nonaliased_aligned_16((char*)dst, (char*)src, bytes); } void LLVector4a::setRotated( const LLRotation& rot, const LLVector4a& vec ) diff --git a/indra/llmath/llvector4a.h b/indra/llmath/llvector4a.h index 1a478bc8de..0526793d3a 100644 --- a/indra/llmath/llvector4a.h +++ b/indra/llmath/llvector4a.h @@ -93,11 +93,7 @@ public: LLVector4a() { //DO NOT INITIALIZE -- The overhead is completely unnecessary -// This assert is causing spurious referenced before set warnings on GCC 4.3.4 -// -#if !LL_LINUX ll_assert_aligned(this,16); -#endif } LLVector4a(F32 x, F32 y, F32 z, F32 w = 0.f) diff --git a/indra/llmath/llvolume.cpp b/indra/llmath/llvolume.cpp index 4f3e753276..e7a0bc7df7 100644 --- a/indra/llmath/llvolume.cpp +++ b/indra/llmath/llvolume.cpp @@ -4729,10 +4729,13 @@ void LLVolumeFace::optimize(F32 angle_cutoff) } } - llassert(new_face.mNumIndices == mNumIndices); - llassert(new_face.mNumVertices <= mNumVertices); - - swapData(new_face); + // Only swap data if we've actually optimized the mesh + // + if (new_face.mNumVertices < mNumVertices) + { + llassert(new_face.mNumIndices == mNumIndices); + swapData(new_face); + } } class LLVCacheTriangleData; -- cgit v1.2.3 From bba84a3fa9a1af87f6a8080f9093f9277feb1292 Mon Sep 17 00:00:00 2001 From: "Graham Madarasz (Graham Linden)" Date: Wed, 13 Mar 2013 13:38:30 -0700 Subject: Cleanup per code review of prev change with DaveP --- indra/llmath/llvolume.cpp | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) (limited to 'indra/llmath') diff --git a/indra/llmath/llvolume.cpp b/indra/llmath/llvolume.cpp index cb5633c1bb..edd16b5688 100644 --- a/indra/llmath/llvolume.cpp +++ b/indra/llmath/llvolume.cpp @@ -4729,11 +4729,13 @@ void LLVolumeFace::optimize(F32 angle_cutoff) } } - if (new_face.mNumVertices) + // disallow data amplification + // + if (new_face.mNumVertices <= mNumVertices) { - llassert(new_face.mNumIndices == mNumIndices); - swapData(new_face); - } + llassert(new_face.mNumIndices == mNumIndices); + swapData(new_face); + } } class LLVCacheTriangleData; @@ -6731,3 +6733,4 @@ void calc_binormal_from_triangle(LLVector4a& binormal, binormal.set( 0, 1 , 0 ); } } + -- cgit v1.2.3 From dbfcd6c9c5709b74365c2538ba312685b09d22bf Mon Sep 17 00:00:00 2001 From: Dave Parks Date: Tue, 7 May 2013 17:20:33 -0500 Subject: Optimization -- don't draw glow in alpha pool unless needed --- indra/llmath/llvolume.cpp | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'indra/llmath') diff --git a/indra/llmath/llvolume.cpp b/indra/llmath/llvolume.cpp index 602f2c29e5..7751ef87ee 100644 --- a/indra/llmath/llvolume.cpp +++ b/indra/llmath/llvolume.cpp @@ -4520,7 +4520,11 @@ LLVolumeFace& LLVolumeFace::operator=(const LLVolumeFace& src) S32 tc_size = (mNumVertices*sizeof(LLVector2)+0xF) & ~0xF; LLVector4a::memcpyNonAliased16((F32*) mPositions, (F32*) src.mPositions, vert_size); - LLVector4a::memcpyNonAliased16((F32*) mNormals, (F32*) src.mNormals, vert_size); + + if (src.mNormals) + { + LLVector4a::memcpyNonAliased16((F32*) mNormals, (F32*) src.mNormals, vert_size); + } if(src.mTexCoords) { -- cgit v1.2.3 From ec5bd94d59247e600f8f8702a543f99d40930d20 Mon Sep 17 00:00:00 2001 From: simon Date: Wed, 15 May 2013 11:00:00 -0700 Subject: BUG-2581 : [simon-ll-viewer-cat] Path cut, hollow and slice break planar texture mapping on prims. Credits to DaveP --- indra/llmath/llvolume.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'indra/llmath') diff --git a/indra/llmath/llvolume.cpp b/indra/llmath/llvolume.cpp index 7751ef87ee..317d15f84e 100755 --- a/indra/llmath/llvolume.cpp +++ b/indra/llmath/llvolume.cpp @@ -5604,16 +5604,16 @@ BOOL LLVolumeFace::createCap(LLVolume* volume, BOOL partial_build) LLVector4a binormal; calc_binormal_from_triangle(binormal, *mCenter, cuv, - pos[0], tc[0], - pos[1], tc[1]); + mPositions[0], mTexCoords[0], + mPositions[1], mTexCoords[1]); binormal.normalize3fast(); LLVector4a normal; LLVector4a d0, d1; - d0.setSub(*mCenter, pos[0]); - d1.setSub(*mCenter, pos[1]); + d0.setSub(*mCenter, mPositions[0]); + d1.setSub(*mCenter, mPositions[1]); if (mTypeMask & TOP_MASK) { -- cgit v1.2.3 From cacefaf2fc1b95557aca62dc69dbe3744c6424b0 Mon Sep 17 00:00:00 2001 From: Dave Parks Date: Fri, 30 Aug 2013 15:55:20 -0500 Subject: MAINT-2811 Fix for infinite loop on octree code during teleport. --- indra/llmath/llvolume.cpp | 30 +++++++++++++++++++----------- 1 file changed, 19 insertions(+), 11 deletions(-) (limited to 'indra/llmath') diff --git a/indra/llmath/llvolume.cpp b/indra/llmath/llvolume.cpp index 773995cb86..0c329c69b2 100755 --- a/indra/llmath/llvolume.cpp +++ b/indra/llmath/llvolume.cpp @@ -802,6 +802,7 @@ BOOL LLProfile::generate(const LLProfileParams& params, BOOL path_open,F32 detai { // Scale by 4 to generate proper tex coords. mProfile[i].mul(scale); + llassert(mProfile[i].isFinite3()); } if (hollow) @@ -839,6 +840,7 @@ BOOL LLProfile::generate(const LLProfileParams& params, BOOL path_open,F32 detai { // Scale by 3 to generate proper tex coords. mProfile[i].mul(scale); + llassert(mProfile[i].isFinite3()); } if (path_open) @@ -989,6 +991,7 @@ BOOL LLProfile::generate(const LLProfileParams& params, BOOL path_open,F32 detai { mOpen = FALSE; mProfile.push_back(mProfile[0]); + llassert(mProfile[0].isFinite3()); mTotal++; } } @@ -2127,7 +2130,7 @@ BOOL LLVolume::generate() { rot_mat.rotate(*profile++, tmp); dst->setAdd(tmp,offset); - llassert(less_than_max_mag(*dst)); + llassert(dst->isFinite3()); ++dst; } } @@ -2840,6 +2843,8 @@ void LLVolume::sculptGeneratePlaceholder() p[1] = (F32)(sin(F_PI * v) * sin(2.0 * F_PI * u) * RADIUS); p[2] = (F32)(cos(F_PI * v) * RADIUS); + llassert(pt.isFinite3()); + } line += sizeT; } @@ -2927,6 +2932,8 @@ void LLVolume::sculptGenerateMapVertices(U16 sculpt_width, U16 sculpt_height, S8 LLVector4a scale(-1.f,1,1,1); pt.mul(scale); } + + llassert(pt.isFinite3()); } line += sizeT; @@ -5552,12 +5559,14 @@ BOOL LLVolumeFace::createCap(LLVolume* volume, BOOL partial_build) tc->mV[0] = (*p)[0]+0.5f; tc->mV[1] = (*p)[1]+0.5f; - llassert(less_than_max_mag(*src)); + llassert(src->isFinite3()); update_min_max(min,max,*src); update_min_max(min_uv, max_uv, *tc); *pos = *src; + llassert(pos->isFinite3()); + ++p; ++tc; ++src; @@ -5577,11 +5586,13 @@ BOOL LLVolumeFace::createCap(LLVolume* volume, BOOL partial_build) tc->mV[0] = (*p)[0]+0.5f; tc->mV[1] = 0.5f - (*p)[1]; - llassert(less_than_max_mag(*src)); + llassert(src->isFinite3()); update_min_max(min,max,*src); update_min_max(min_uv, max_uv, *tc); *pos = *src; + + llassert(pos->isFinite3()); ++p; ++tc; @@ -6468,8 +6479,11 @@ BOOL LLVolumeFace::createSide(LLVolume* volume, BOOL partial_build) // mQ = { 0, a[X]*b[Y] - a[Y]*b[X], a[Z]*b[X] - a[X]*b[Z], a[Y]*b[Z] - a[Z]*b[Y] } vector1 = _mm_sub_ps( vector2, _mm_mul_ps( amQ, bmQ )); + llassert(v1.isFinite3()); + v1.store4a((F32*) output); + output++; idx += 3; } @@ -6498,14 +6512,8 @@ BOOL LLVolumeFace::createSide(LLVolume* volume, BOOL partial_build) n1.add(c); n2.add(c); - llassert(llfinite(c.getF32ptr()[0])); - llassert(llfinite(c.getF32ptr()[1])); - llassert(llfinite(c.getF32ptr()[2])); - - llassert(!llisnan(c.getF32ptr()[0])); - llassert(!llisnan(c.getF32ptr()[1])); - llassert(!llisnan(c.getF32ptr()[2])); - + llassert(c.isFinite3()); + //even out quad contributions switch (i%2+1) { -- cgit v1.2.3 From 29216ac5e725254c48711737f5c22d2ad3e5e4a2 Mon Sep 17 00:00:00 2001 From: Graham Linden Date: Mon, 23 Sep 2013 13:43:01 -0700 Subject: MAINT-3179 fix bad indexing of position info generating false alarms on range checks, issue with free of data now allocated contig with positions instead, and added copious amounts of llwarns to error return sites so 'invalid element' will be a reason to read the log and not a tease. --- indra/llmath/llvolume.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'indra/llmath') diff --git a/indra/llmath/llvolume.cpp b/indra/llmath/llvolume.cpp index de960e5d95..2b865b4a8e 100755 --- a/indra/llmath/llvolume.cpp +++ b/indra/llmath/llvolume.cpp @@ -4827,13 +4827,15 @@ void LLVolumeFace::optimize(F32 angle_cutoff) if (angle_cutoff > 1.f && !mNormals) { - ll_aligned_free_16(new_face.mNormals); + // Now alloc'd with positions + //ll_aligned_free_16(new_face.mNormals); new_face.mNormals = NULL; } if (!mTexCoords) { - ll_aligned_free_16(new_face.mTexCoords); + // Now alloc'd with positions + //ll_aligned_free_16(new_face.mTexCoords); new_face.mTexCoords = NULL; } -- cgit v1.2.3