From f5e5396c3a17b6bcdc4eb49cda304a9047920fe1 Mon Sep 17 00:00:00 2001 From: Dave Parks Date: Tue, 26 Feb 2013 15:15:08 -0600 Subject: MAINT-2371 First set of profile guided optimizations. Reviewed by Graham --- indra/llmath/llvolume.cpp | 97 ++++++++++++++++++++++++++--------------------- indra/llmath/llvolume.h | 3 ++ 2 files changed, 56 insertions(+), 44 deletions(-) (limited to 'indra/llmath') diff --git a/indra/llmath/llvolume.cpp b/indra/llmath/llvolume.cpp index 02c8d2b86f..77d89568df 100644 --- a/indra/llmath/llvolume.cpp +++ b/indra/llmath/llvolume.cpp @@ -5187,7 +5187,8 @@ LLVolumeFace::LLVolumeFace() : mTexCoords(NULL), mIndices(NULL), mWeights(NULL), - mOctree(NULL) + mOctree(NULL), + mOptimized(FALSE) { mExtents = (LLVector4a*) ll_aligned_malloc_16(sizeof(LLVector4a)*3); mExtents[0].splat(-0.5f); @@ -5517,14 +5518,14 @@ class LLVCacheVertexData public: S32 mIdx; S32 mCacheTag; - F32 mScore; + F64 mScore; U32 mActiveTriangles; std::vector mTriangles; LLVCacheVertexData() { mCacheTag = -1; - mScore = 0.f; + mScore = 0.0; mActiveTriangles = 0; mIdx = -1; } @@ -5534,13 +5535,13 @@ class LLVCacheTriangleData { public: bool mActive; - F32 mScore; + F64 mScore; LLVCacheVertexData* mVertex[3]; LLVCacheTriangleData() { mActive = true; - mScore = 0.f; + mScore = 0.0; mVertex[0] = mVertex[1] = mVertex[2] = NULL; } @@ -5551,7 +5552,7 @@ public: { if (mVertex[i]) { - llassert_always(mVertex[i]->mActiveTriangles > 0); + llassert(mVertex[i]->mActiveTriangles > 0); mVertex[i]->mActiveTriangles--; } } @@ -5563,44 +5564,44 @@ public: } }; -const F32 FindVertexScore_CacheDecayPower = 1.5f; -const F32 FindVertexScore_LastTriScore = 0.75f; -const F32 FindVertexScore_ValenceBoostScale = 2.0f; -const F32 FindVertexScore_ValenceBoostPower = 0.5f; +const F64 FindVertexScore_CacheDecayPower = 1.5; +const F64 FindVertexScore_LastTriScore = 0.75; +const F64 FindVertexScore_ValenceBoostScale = 2.0; +const F64 FindVertexScore_ValenceBoostPower = 0.5; const U32 MaxSizeVertexCache = 32; +const F64 FindVertexScore_Scaler = 1.0/(MaxSizeVertexCache-3); -F32 find_vertex_score(LLVCacheVertexData& data) +F64 find_vertex_score(LLVCacheVertexData& data) { - if (data.mActiveTriangles == 0) - { //no triangle references this vertex - return -1.f; - } - - F32 score = 0.f; + F64 score = -1.0; - S32 cache_idx = data.mCacheTag; + if (data.mActiveTriangles >= 0) + { + score = 0.0; + + S32 cache_idx = data.mCacheTag; - if (cache_idx < 0) - { - //not in cache - } - else - { - if (cache_idx < 3) - { //vertex was in the last triangle - score = FindVertexScore_LastTriScore; + if (cache_idx < 0) + { + //not in cache } else - { //more points for being higher in the cache - F32 scaler = 1.f/(MaxSizeVertexCache-3); - score = 1.f-((cache_idx-3)*scaler); - score = powf(score, FindVertexScore_CacheDecayPower); + { + if (cache_idx < 3) + { //vertex was in the last triangle + score = FindVertexScore_LastTriScore; + } + else + { //more points for being higher in the cache + score = 1.0-((cache_idx-3)*FindVertexScore_Scaler); + score = pow(score, FindVertexScore_CacheDecayPower); + } } - } - //bonus points for having low valence - F32 valence_boost = powf((F32)data.mActiveTriangles, -FindVertexScore_ValenceBoostPower); - score += FindVertexScore_ValenceBoostScale * valence_boost; + //bonus points for having low valence + F64 valence_boost = pow((F64)data.mActiveTriangles, -FindVertexScore_ValenceBoostPower); + score += FindVertexScore_ValenceBoostScale * valence_boost; + } return score; } @@ -5720,7 +5721,7 @@ public: if (mCache[i]) { mCache[i]->mScore = find_vertex_score(*(mCache[i])); - llassert_always(mCache[i]->mCacheTag == i); + llassert(mCache[i]->mCacheTag == i); } } @@ -5728,11 +5729,14 @@ public: //update triangle scores for (U32 i = 0; i < MaxSizeVertexCache+3; ++i) { - if (mCache[i]) + LLVCacheVertexData* data = mCache[i]; + if (data) { - for (U32 j = 0; j < mCache[i]->mTriangles.size(); ++j) + U32 count = data->mTriangles.size(); + + for (U32 j = 0; j < count; ++j) { - LLVCacheTriangleData* tri = mCache[i]->mTriangles[j]; + LLVCacheTriangleData* tri = data->mTriangles[j]; if (tri->mActive) { tri->mScore = tri->mVertex[0]->mScore; @@ -5753,7 +5757,7 @@ public: { if (mCache[i]) { - llassert_always(mCache[i]->mCacheTag == -1); + llassert(mCache[i]->mCacheTag == -1); mCache[i] = NULL; } } @@ -5765,6 +5769,9 @@ void LLVolumeFace::cacheOptimize() { //optimize for vertex cache according to Forsyth method: // http://home.comcast.net/~tom_forsyth/papers/fast_vert_cache_opt.html + llassert(!mOptimized); + mOptimized = TRUE; + LLVCacheLRU cache; if (mNumVertices < 3) @@ -5810,12 +5817,14 @@ void LLVolumeFace::cacheOptimize() for (U32 i = 0; i < mNumVertices; i++) { //initialize score values (no cache -- might try a fifo cache here) - vertex_data[i].mScore = find_vertex_score(vertex_data[i]); - vertex_data[i].mActiveTriangles = vertex_data[i].mTriangles.size(); + LLVCacheVertexData& data = vertex_data[i]; + + data.mScore = find_vertex_score(data); + data.mActiveTriangles = data.mTriangles.size(); - for (U32 j = 0; j < vertex_data[i].mTriangles.size(); ++j) + for (U32 j = 0; j < data.mActiveTriangles; ++j) { - vertex_data[i].mTriangles[j]->mScore += vertex_data[i].mScore; + data.mTriangles[j]->mScore += data.mScore; } } diff --git a/indra/llmath/llvolume.h b/indra/llmath/llvolume.h index c845556557..99158c1c44 100644 --- a/indra/llmath/llvolume.h +++ b/indra/llmath/llvolume.h @@ -933,6 +933,9 @@ public: LLOctreeNode* mOctree; + //whether or not face has been cache optimized + BOOL mOptimized; + private: BOOL createUnCutCubeCap(LLVolume* volume, BOOL partial_build = FALSE); BOOL createCap(LLVolume* volume, BOOL partial_build = FALSE); -- cgit v1.2.3 From 609ed855e1160505238378a1be49e2b92e8496f5 Mon Sep 17 00:00:00 2001 From: Dave Parks Date: Mon, 4 Mar 2013 18:01:42 -0600 Subject: MAINT-2371 More optimizations. Reviewed by Graham --- indra/llmath/llvolume.cpp | 220 ++++++++++++++++++++++++++-------------------- indra/llmath/llvolume.h | 1 + 2 files changed, 127 insertions(+), 94 deletions(-) (limited to 'indra/llmath') diff --git a/indra/llmath/llvolume.cpp b/indra/llmath/llvolume.cpp index 77d89568df..d614695efb 100644 --- a/indra/llmath/llvolume.cpp +++ b/indra/llmath/llvolume.cpp @@ -5180,6 +5180,7 @@ LLVolumeFace::LLVolumeFace() : mNumS(0), mNumT(0), mNumVertices(0), + mNumAllocatedVertices(0), mNumIndices(0), mPositions(NULL), mNormals(NULL), @@ -5204,6 +5205,7 @@ LLVolumeFace::LLVolumeFace(const LLVolumeFace& src) mNumS(0), mNumT(0), mNumVertices(0), + mNumAllocatedVertices(0), mNumIndices(0), mPositions(NULL), mNormals(NULL), @@ -5258,12 +5260,6 @@ LLVolumeFace& LLVolumeFace::operator=(const LLVolumeFace& src) { LLVector4a::memcpyNonAliased16((F32*) mTexCoords, (F32*) src.mTexCoords, tc_size); } - else - { - ll_aligned_free_16(mTexCoords) ; - mTexCoords = NULL ; - } - if (src.mBinormals) { @@ -5311,10 +5307,11 @@ void LLVolumeFace::freeData() { ll_aligned_free_16(mPositions); mPositions = NULL; - ll_aligned_free_16( mNormals); + + //normals and texture coordinates are part of the same buffer as mPositions, do not free them separately mNormals = NULL; - ll_aligned_free_16(mTexCoords); mTexCoords = NULL; + ll_aligned_free_16(mIndices); mIndices = NULL; ll_aligned_free_16(mBinormals); @@ -5496,18 +5493,6 @@ void LLVolumeFace::optimize(F32 angle_cutoff) llassert(new_face.mNumIndices == mNumIndices); llassert(new_face.mNumVertices <= mNumVertices); - if (angle_cutoff > 1.f && !mNormals) - { - ll_aligned_free_16(new_face.mNormals); - new_face.mNormals = NULL; - } - - if (!mTexCoords) - { - ll_aligned_free_16(new_face.mTexCoords); - new_face.mTexCoords = NULL; - } - swapData(new_face); } @@ -5708,35 +5693,44 @@ public: void updateScores() { - for (U32 i = MaxSizeVertexCache; i < MaxSizeVertexCache+3; ++i) - { //trailing 3 vertices aren't actually in the cache for scoring purposes - if (mCache[i]) + LLVCacheVertexData** data_iter = mCache+MaxSizeVertexCache; + LLVCacheVertexData** end_data = mCache+MaxSizeVertexCache+3; + + while(data_iter != end_data) + { + LLVCacheVertexData* data = *data_iter++; + //trailing 3 vertices aren't actually in the cache for scoring purposes + if (data) { - mCache[i]->mCacheTag = -1; + data->mCacheTag = -1; } } - for (U32 i = 0; i < MaxSizeVertexCache; ++i) + data_iter = mCache; + end_data = mCache+MaxSizeVertexCache; + + while (data_iter != end_data) { //update scores of vertices in cache - if (mCache[i]) + LLVCacheVertexData* data = *data_iter++; + if (data) { - mCache[i]->mScore = find_vertex_score(*(mCache[i])); - llassert(mCache[i]->mCacheTag == i); + data->mScore = find_vertex_score(*data); } } mBestTriangle = NULL; //update triangle scores - for (U32 i = 0; i < MaxSizeVertexCache+3; ++i) + data_iter = mCache; + end_data = mCache+MaxSizeVertexCache+3; + + while (data_iter != end_data) { - LLVCacheVertexData* data = mCache[i]; + LLVCacheVertexData* data = *data_iter++; if (data) { - U32 count = data->mTriangles.size(); - - for (U32 j = 0; j < count; ++j) + for (std::vector::iterator iter = data->mTriangles.begin(), end_iter = data->mTriangles.end(); iter != end_iter; ++iter) { - LLVCacheTriangleData* tri = data->mTriangles[j]; + LLVCacheTriangleData* tri = *iter; if (tri->mActive) { tri->mScore = tri->mVertex[0]->mScore; @@ -5753,13 +5747,17 @@ public: } //knock trailing 3 vertices off the cache - for (U32 i = MaxSizeVertexCache; i < MaxSizeVertexCache+3; ++i) + data_iter = mCache+MaxSizeVertexCache; + end_data = mCache+MaxSizeVertexCache+3; + while (data_iter != end_data) { - if (mCache[i]) + LLVCacheVertexData* data = *data_iter; + if (data) { - llassert(mCache[i]->mCacheTag == -1); - mCache[i] = NULL; + llassert(data->mCacheTag == -1); + *data_iter = NULL; } + ++data_iter; } } }; @@ -5894,10 +5892,10 @@ void LLVolumeFace::cacheOptimize() //allocate space for new buffer S32 num_verts = mNumVertices; - LLVector4a* pos = (LLVector4a*) ll_aligned_malloc_16(sizeof(LLVector4a)*num_verts); - LLVector4a* norm = (LLVector4a*) ll_aligned_malloc_16(sizeof(LLVector4a)*num_verts); S32 size = ((num_verts*sizeof(LLVector2)) + 0xF) & ~0xF; - LLVector2* tc = (LLVector2*) ll_aligned_malloc_16(size); + LLVector4a* pos = (LLVector4a*) ll_aligned_malloc(sizeof(LLVector4a)*2*num_verts+size, 64); + LLVector4a* norm = pos + num_verts; + LLVector2* tc = (LLVector2*) (norm + num_verts); LLVector4a* wght = NULL; if (mWeights) @@ -5945,9 +5943,8 @@ void LLVolumeFace::cacheOptimize() mIndices[i] = new_idx[mIndices[i]]; } - ll_aligned_free_16(mPositions); - ll_aligned_free_16(mNormals); - ll_aligned_free_16(mTexCoords); + ll_aligned_free(mPositions); + // DO NOT free mNormals and mTexCoords as they are part of mPositions buffer ll_aligned_free_16(mWeights); ll_aligned_free_16(mBinormals); @@ -6664,24 +6661,22 @@ void LLVolumeFace::createBinormals() void LLVolumeFace::resizeVertices(S32 num_verts) { - ll_aligned_free_16(mPositions); - ll_aligned_free_16(mNormals); + ll_aligned_free(mPositions); + //DO NOT free mNormals and mTexCoords as they are part of mPositions buffer ll_aligned_free_16(mBinormals); - ll_aligned_free_16(mTexCoords); - + mBinormals = NULL; if (num_verts) { - mPositions = (LLVector4a*) ll_aligned_malloc_16(sizeof(LLVector4a)*num_verts); - ll_assert_aligned(mPositions, 16); - mNormals = (LLVector4a*) ll_aligned_malloc_16(sizeof(LLVector4a)*num_verts); - ll_assert_aligned(mNormals, 16); - //pad texture coordinate block end to allow for QWORD reads S32 size = ((num_verts*sizeof(LLVector2)) + 0xF) & ~0xF; - mTexCoords = (LLVector2*) ll_aligned_malloc_16(size); - ll_assert_aligned(mTexCoords, 16); + + mPositions = (LLVector4a*) ll_aligned_malloc(sizeof(LLVector4a)*2*num_verts+size, 64); + mNormals = mPositions+num_verts; + mTexCoords = (LLVector2*) (mNormals+num_verts); + + ll_assert_aligned(mPositions, 64); } else { @@ -6691,6 +6686,7 @@ void LLVolumeFace::resizeVertices(S32 num_verts) } mNumVertices = num_verts; + mNumAllocatedVertices = num_verts; } void LLVolumeFace::pushVertex(const LLVolumeFace::VertexData& cv) @@ -6701,27 +6697,43 @@ void LLVolumeFace::pushVertex(const LLVolumeFace::VertexData& cv) void LLVolumeFace::pushVertex(const LLVector4a& pos, const LLVector4a& norm, const LLVector2& tc) { S32 new_verts = mNumVertices+1; - S32 new_size = new_verts*16; - S32 old_size = mNumVertices*16; - //positions - mPositions = (LLVector4a*) ll_aligned_realloc_16(mPositions, new_size, old_size); - ll_assert_aligned(mPositions,16); + if (new_verts > mNumAllocatedVertices) + { + //double buffer size on expansion + new_verts *= 2; + + S32 new_tc_size = ((new_verts*8)+0xF) & ~0xF; + S32 old_tc_size = ((mNumVertices*8)+0xF) & ~0xF; + + S32 old_vsize = mNumVertices*16; + + S32 new_size = new_verts*16*2+new_tc_size; + + LLVector4a* old_buf = mPositions; + + mPositions = (LLVector4a*) ll_aligned_malloc(new_size, 64); + mNormals = mPositions+new_verts; + mTexCoords = (LLVector2*) (mNormals+new_verts); + + //positions + LLVector4a::memcpyNonAliased16((F32*) mPositions, (F32*) old_buf, old_vsize); + + //normals + LLVector4a::memcpyNonAliased16((F32*) mNormals, (F32*) (old_buf+mNumVertices), old_vsize); - //normals - mNormals = (LLVector4a*) ll_aligned_realloc_16(mNormals, new_size, old_size); - ll_assert_aligned(mNormals,16); - - //tex coords - new_size = ((new_verts*8)+0xF) & ~0xF; - old_size = ((mNumVertices*8)+0xF) & ~0xF; - mTexCoords = (LLVector2*) ll_aligned_realloc_16(mTexCoords, new_size, old_size); - ll_assert_aligned(mTexCoords,16); + //tex coords + LLVector4a::memcpyNonAliased16((F32*) mTexCoords, (F32*) (old_buf+mNumVertices*2), old_tc_size); + //just clear binormals + ll_aligned_free_16(mBinormals); - //just clear binormals - ll_aligned_free_16(mBinormals); - mBinormals = NULL; + ll_aligned_free(old_buf); + + mNumAllocatedVertices = new_verts; + + mBinormals = NULL; + } mPositions[mNumVertices] = pos; mNormals[mNumVertices] = norm; @@ -6810,13 +6822,23 @@ void LLVolumeFace::appendFace(const LLVolumeFace& face, LLMatrix4& mat_in, LLMat llerrs << "Cannot append empty face." << llendl; } + U32 old_vsize = mNumVertices*16; + U32 new_vsize = new_count * 16; + U32 old_tcsize = (mNumVertices*sizeof(LLVector2)+0xF) & ~0xF; + U32 new_tcsize = (new_count*sizeof(LLVector2)+0xF) & ~0xF; + U32 new_size = new_vsize * 2 + new_tcsize; + //allocate new buffer space - mPositions = (LLVector4a*) ll_aligned_realloc_16(mPositions, new_count*sizeof(LLVector4a), mNumVertices*sizeof(LLVector4a)); - ll_assert_aligned(mPositions, 16); - mNormals = (LLVector4a*) ll_aligned_realloc_16(mNormals, new_count*sizeof(LLVector4a), mNumVertices*sizeof(LLVector4a)); - ll_assert_aligned(mNormals, 16); - mTexCoords = (LLVector2*) ll_aligned_realloc_16(mTexCoords, (new_count*sizeof(LLVector2)+0xF) & ~0xF, (mNumVertices*sizeof(LLVector2)+0xF) & ~0xF); - ll_assert_aligned(mTexCoords, 16); + LLVector4a* old_buf = mPositions; + mPositions = (LLVector4a*) ll_aligned_malloc(new_size, 64); + mNormals = mPositions + new_count; + mTexCoords = (LLVector2*) (mNormals+new_count); + + mNumAllocatedVertices = new_count; + + LLVector4a::memcpyNonAliased16((F32*) mPositions, (F32*) old_buf, old_vsize); + LLVector4a::memcpyNonAliased16((F32*) mNormals, (F32*) (old_buf+mNumVertices), old_vsize); + LLVector4a::memcpyNonAliased16((F32*) mTexCoords, (F32*) (old_buf+mNumVertices*2), old_tcsize); mNumVertices = new_count; @@ -6912,12 +6934,15 @@ BOOL LLVolumeFace::createSide(LLVolume* volume, BOOL partial_build) LLVector4a* pos = (LLVector4a*) mPositions; LLVector4a* norm = (LLVector4a*) mNormals; LLVector2* tc = (LLVector2*) mTexCoords; - S32 begin_stex = llfloor( profile[mBeginS].mV[2] ); + F32 begin_stex = floorf(profile[mBeginS].mV[2]); S32 num_s = ((mTypeMask & INNER_MASK) && (mTypeMask & FLAT_MASK) && mNumS > 2) ? mNumS/2 : mNumS; S32 cur_vertex = 0; + S32 end_t = mBeginT+mNumT; + bool test = (mTypeMask & INNER_MASK) && (mTypeMask & FLAT_MASK) && mNumS > 2; + // Copy the vertices into the array - for (t = mBeginT; t < mBeginT + mNumT; t++) + for (t = mBeginT; t < end_t; t++) { tt = path_data[t].mTexT; for (s = 0; s < num_s; s++) @@ -6968,9 +6993,8 @@ BOOL LLVolumeFace::createSide(LLVolume* volume, BOOL partial_build) norm[cur_vertex].clear(); cur_vertex++; - if ((mTypeMask & INNER_MASK) && (mTypeMask & FLAT_MASK) && mNumS > 2 && s > 0) + if (test && s > 0) { - pos[cur_vertex].load3(mesh[i].mPos.mV); tc[cur_vertex] = LLVector2(ss,tt); @@ -7085,30 +7109,38 @@ BOOL LLVolumeFace::createSide(LLVolume* volume, BOOL partial_build) } //generate normals - for (U32 i = 0; i < mNumIndices/3; i++) //for each triangle + U32 count = mNumIndices/3; + + for (U32 i = 0; i < count; i++) //for each triangle { const U16* idx = &(mIndices[i*3]); - - LLVector4a* v[] = - { pos+idx[0], pos+idx[1], pos+idx[2] }; + LLVector4a& v0 = *(pos+idx[0]); + LLVector4a& v1 = *(pos+idx[1]); + LLVector4a& v2 = *(pos+idx[2]); - LLVector4a* n[] = - { norm+idx[0], norm+idx[1], norm+idx[2] }; + LLVector4a& n0 = *(norm+idx[0]); + LLVector4a& n1 = *(norm+idx[1]); + LLVector4a& n2 = *(norm+idx[2]); //calculate triangle normal LLVector4a a, b, c; - a.setSub(*v[0], *v[1]); - b.setSub(*v[0], *v[2]); + a.setSub(v0, v1); + b.setSub(v0, v2); c.setCross3(a,b); - n[0]->add(c); - n[1]->add(c); - n[2]->add(c); + n0.add(c); + n1.add(c); + n2.add(c); //even out quad contributions - n[i%2+1]->add(c); + switch (i%2+1) + { + case 0: n0.add(c); break; + case 1: n1.add(c); break; + case 2: n2.add(c); break; + }; } // adjust normals based on wrapping and stitching diff --git a/indra/llmath/llvolume.h b/indra/llmath/llvolume.h index 99158c1c44..1d3b0fe52f 100644 --- a/indra/llmath/llvolume.h +++ b/indra/llmath/llvolume.h @@ -912,6 +912,7 @@ public: LLVector2 mTexCoordExtents[2]; //minimum and maximum of texture coordinates of the face. S32 mNumVertices; + S32 mNumAllocatedVertices; S32 mNumIndices; LLVector4a* mPositions; -- cgit v1.2.3 From 1816582b929737f92ee68a1422e3be4e7c02f542 Mon Sep 17 00:00:00 2001 From: Graham Madarasz Date: Wed, 6 Mar 2013 09:09:07 -0800 Subject: Fix crashes from using single alloc for pos/norm/tc in volume face data fighting with old free call in model loading code --- indra/llmath/llvolume.cpp | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) (limited to 'indra/llmath') diff --git a/indra/llmath/llvolume.cpp b/indra/llmath/llvolume.cpp index f989e8ed17..f503eea107 100644 --- a/indra/llmath/llvolume.cpp +++ b/indra/llmath/llvolume.cpp @@ -5307,7 +5307,7 @@ LLVolumeFace::~LLVolumeFace() void LLVolumeFace::freeData() { - ll_aligned_free_16(mPositions); + ll_aligned_free(mPositions); mPositions = NULL; //normals and texture coordinates are part of the same buffer as mPositions, do not free them separately @@ -5492,10 +5492,11 @@ void LLVolumeFace::optimize(F32 angle_cutoff) } } - llassert(new_face.mNumIndices == mNumIndices); - llassert(new_face.mNumVertices <= mNumVertices); - - swapData(new_face); + if (new_face.mNumVertices) + { + llassert(new_face.mNumIndices == mNumIndices); + swapData(new_face); + } } class LLVCacheTriangleData; -- cgit v1.2.3 From f8e059deee28500b88c8c172eaa8c4d7ca657748 Mon Sep 17 00:00:00 2001 From: Dave Parks Date: Fri, 8 Mar 2013 17:11:30 -0600 Subject: MAINT-2371 Lat round of optimizations. Reviewed by Graham --- indra/llmath/llmatrix4a.h | 11 +- indra/llmath/llvector4a.inl | 13 +- indra/llmath/llvolume.cpp | 1791 ++++++++++++++++--------------------------- indra/llmath/llvolume.h | 79 +- 4 files changed, 707 insertions(+), 1187 deletions(-) (limited to 'indra/llmath') diff --git a/indra/llmath/llmatrix4a.h b/indra/llmath/llmatrix4a.h index c4cefdb4fa..d141298f69 100644 --- a/indra/llmath/llmatrix4a.h +++ b/indra/llmath/llmatrix4a.h @@ -107,15 +107,14 @@ public: inline void rotate(const LLVector4a& v, LLVector4a& res) { + LLVector4a y,z; + res = _mm_shuffle_ps(v, v, _MM_SHUFFLE(0, 0, 0, 0)); - res.mul(mMatrix[0]); - - LLVector4a y; y = _mm_shuffle_ps(v, v, _MM_SHUFFLE(1, 1, 1, 1)); - y.mul(mMatrix[1]); - - LLVector4a z; z = _mm_shuffle_ps(v, v, _MM_SHUFFLE(2, 2, 2, 2)); + + res.mul(mMatrix[0]); + y.mul(mMatrix[1]); z.mul(mMatrix[2]); res.add(y); diff --git a/indra/llmath/llvector4a.inl b/indra/llmath/llvector4a.inl index 7c52ffef21..35a67204ec 100644 --- a/indra/llmath/llvector4a.inl +++ b/indra/llmath/llvector4a.inl @@ -460,16 +460,13 @@ inline void LLVector4a::setMax(const LLVector4a& lhs, const LLVector4a& rhs) mQ = _mm_max_ps(lhs.mQ, rhs.mQ); } -// Set this to (c * lhs) + rhs * ( 1 - c) +// Set this to lhs + (rhs-lhs)*c inline void LLVector4a::setLerp(const LLVector4a& lhs, const LLVector4a& rhs, F32 c) { - LLVector4a a = lhs; - a.mul(c); - - LLVector4a b = rhs; - b.mul(1.f-c); - - setAdd(a, b); + LLVector4a t; + t.setSub(rhs,lhs); + t.mul(c); + setAdd(lhs, t); } inline LLBool32 LLVector4a::isFinite3() const diff --git a/indra/llmath/llvolume.cpp b/indra/llmath/llvolume.cpp index f989e8ed17..9fc72fd801 100644 --- a/indra/llmath/llvolume.cpp +++ b/indra/llmath/llvolume.cpp @@ -94,6 +94,95 @@ const S32 SCULPT_MIN_AREA_DETAIL = 1; extern BOOL gDebugGL; +bool less_than_max_mag(const LLVector4a& vec); + +template +LLAlignedArray::LLAlignedArray() +{ + mArray = NULL; + mElementCount = 0; + mCapacity = 0; +} + +template +LLAlignedArray::~LLAlignedArray() +{ + ll_aligned_free(mArray); + mArray = NULL; + mElementCount = 0; + mCapacity = 0; +} + +template +void LLAlignedArray::push_back(const T& elem) +{ + T* old_buf = NULL; + if (mCapacity <= mElementCount) + { + mCapacity++; + mCapacity *= 2; + T* new_buf = (T*) ll_aligned_malloc(mCapacity*sizeof(T), alignment); + if (mArray) + { + LLVector4a::memcpyNonAliased16((F32*) new_buf, (F32*) mArray, sizeof(T)*mElementCount); + } + old_buf = mArray; + mArray = new_buf; + } + + mArray[mElementCount++] = elem; + + //delete old array here to prevent error on a.push_back(a[0]) + ll_aligned_free(old_buf); +} + +template +void LLAlignedArray::resize(U32 size) +{ + if (mCapacity < size) + { + mCapacity = size+mCapacity*2; + T* new_buf = mCapacity > 0 ? (T*) ll_aligned_malloc(mCapacity*sizeof(T), alignment) : NULL; + if (mArray) + { + LLVector4a::memcpyNonAliased16((F32*) new_buf, (F32*) mArray, sizeof(T)*mElementCount); + ll_aligned_free(mArray); + } + + /*for (U32 i = mElementCount; i < mCapacity; ++i) + { + new(new_buf+i) T(); + }*/ + mArray = new_buf; + } + + mElementCount = size; +} + + +template +T& LLAlignedArray::operator[](int idx) +{ + llassert(idx < mElementCount); + return mArray[idx]; +} + +template +const T& LLAlignedArray::operator[](int idx) const +{ + llassert(idx < mElementCount); + return mArray[idx]; +} + +template +T* LLAlignedArray::append(S32 N) +{ + U32 sz = size(); + resize(sz+N); + return &((*this)[sz]); +} + + BOOL check_same_clock_dir( const LLVector3& pt1, const LLVector3& pt2, const LLVector3& pt3, const LLVector3& norm) { LLVector3 test = (pt2-pt1)%(pt3-pt2); @@ -474,7 +563,7 @@ void LLProfile::genNGon(const LLProfileParams& params, S32 sides, F32 offset, F3 const F32 tableScale[] = { 1, 1, 1, 0.5f, 0.707107f, 0.53f, 0.525f, 0.5f }; F32 scale = 0.5f; F32 t, t_step, t_first, t_fraction, ang, ang_step; - LLVector3 pt1,pt2; + LLVector4a pt1,pt2; F32 begin = params.getBegin(); F32 end = params.getEnd(); @@ -497,20 +586,21 @@ void LLProfile::genNGon(const LLProfileParams& params, S32 sides, F32 offset, F3 // Starting t and ang values for the first face t = t_first; ang = 2.0f*F_PI*(t*ang_scale + offset); - pt1.setVec(cos(ang)*scale,sin(ang)*scale, t); + pt1.set(cos(ang)*scale,sin(ang)*scale, t); // Increment to the next point. // pt2 is the end point on the fractional face t += t_step; ang += ang_step; - pt2.setVec(cos(ang)*scale,sin(ang)*scale,t); + pt2.set(cos(ang)*scale,sin(ang)*scale,t); t_fraction = (begin - t_first)*sides; // Only use if it's not almost exactly on an edge. if (t_fraction < 0.9999f) { - LLVector3 new_pt = lerp(pt1, pt2, t_fraction); + LLVector4a new_pt; + new_pt.setLerp(pt1, pt2, t_fraction); mProfile.push_back(new_pt); } @@ -518,12 +608,17 @@ void LLProfile::genNGon(const LLProfileParams& params, S32 sides, F32 offset, F3 while (t < end) { // Iterate through all the integer steps of t. - pt1.setVec(cos(ang)*scale,sin(ang)*scale,t); + pt1.set(cos(ang)*scale,sin(ang)*scale,t); if (mProfile.size() > 0) { - LLVector3 p = mProfile[mProfile.size()-1]; + LLVector4a p = mProfile[mProfile.size()-1]; for (S32 i = 0; i < split && mProfile.size() > 0; i++) { - mProfile.push_back(p+(pt1-p) * 1.0f/(float)(split+1) * (float)(i+1)); + //mProfile.push_back(p+(pt1-p) * 1.0f/(float)(split+1) * (float)(i+1)); + LLVector4a new_pt; + new_pt.setSub(pt1, p); + new_pt.mul(1.0f/(float)(split+1) * (float)(i+1)); + new_pt.add(p); + mProfile.push_back(new_pt); } } mProfile.push_back(pt1); @@ -536,18 +631,25 @@ void LLProfile::genNGon(const LLProfileParams& params, S32 sides, F32 offset, F3 // pt1 is the first point on the fractional face // pt2 is the end point on the fractional face - pt2.setVec(cos(ang)*scale,sin(ang)*scale,t); + pt2.set(cos(ang)*scale,sin(ang)*scale,t); // Find the fraction that we need to add to the end point. t_fraction = (end - (t - t_step))*sides; if (t_fraction > 0.0001f) { - LLVector3 new_pt = lerp(pt1, pt2, t_fraction); + LLVector4a new_pt; + new_pt.setLerp(pt1, pt2, t_fraction); if (mProfile.size() > 0) { - LLVector3 p = mProfile[mProfile.size()-1]; + LLVector4a p = mProfile[mProfile.size()-1]; for (S32 i = 0; i < split && mProfile.size() > 0; i++) { - mProfile.push_back(p+(new_pt-p) * 1.0f/(float)(split+1) * (float)(i+1)); + //mProfile.push_back(p+(new_pt-p) * 1.0f/(float)(split+1) * (float)(i+1)); + + LLVector4a pt1; + pt1.setSub(new_pt, p); + pt1.mul(1.0f/(float)(split+1) * (float)(i+1)); + pt1.add(p); + mProfile.push_back(pt1); } } mProfile.push_back(new_pt); @@ -568,7 +670,7 @@ void LLProfile::genNGon(const LLProfileParams& params, S32 sides, F32 offset, F3 if (params.getHollow() <= 0) { // put center point if not hollow. - mProfile.push_back(LLVector3(0,0,0)); + mProfile.push_back(LLVector4a(0,0,0)); } } else @@ -581,103 +683,6 @@ void LLProfile::genNGon(const LLProfileParams& params, S32 sides, F32 offset, F3 mTotal = mProfile.size(); } -void LLProfile::genNormals(const LLProfileParams& params) -{ - S32 count = mProfile.size(); - - S32 outer_count; - if (mTotalOut) - { - outer_count = mTotalOut; - } - else - { - outer_count = mTotal / 2; - } - - mEdgeNormals.resize(count * 2); - mEdgeCenters.resize(count * 2); - mNormals.resize(count); - - LLVector2 pt0,pt1; - - BOOL hollow = (params.getHollow() > 0); - - S32 i0, i1, i2, i3, i4; - - // Parametrically generate normal - for (i2 = 0; i2 < count; i2++) - { - mNormals[i2].mV[0] = mProfile[i2].mV[0]; - mNormals[i2].mV[1] = mProfile[i2].mV[1]; - if (hollow && (i2 >= outer_count)) - { - mNormals[i2] *= -1.f; - } - if (mNormals[i2].magVec() < 0.001) - { - // Special case for point at center, get adjacent points. - i1 = (i2 - 1) >= 0 ? i2 - 1 : count - 1; - i0 = (i1 - 1) >= 0 ? i1 - 1 : count - 1; - i3 = (i2 + 1) < count ? i2 + 1 : 0; - i4 = (i3 + 1) < count ? i3 + 1 : 0; - - pt0.setVec(mProfile[i1].mV[VX] + mProfile[i1].mV[VX] - mProfile[i0].mV[VX], - mProfile[i1].mV[VY] + mProfile[i1].mV[VY] - mProfile[i0].mV[VY]); - pt1.setVec(mProfile[i3].mV[VX] + mProfile[i3].mV[VX] - mProfile[i4].mV[VX], - mProfile[i3].mV[VY] + mProfile[i3].mV[VY] - mProfile[i4].mV[VY]); - - mNormals[i2] = pt0 + pt1; - mNormals[i2] *= 0.5f; - } - mNormals[i2].normVec(); - } - - S32 num_normal_sets = isConcave() ? 2 : 1; - for (S32 normal_set = 0; normal_set < num_normal_sets; normal_set++) - { - S32 point_num; - for (point_num = 0; point_num < mTotal; point_num++) - { - LLVector3 point_1 = mProfile[point_num]; - point_1.mV[VZ] = 0.f; - - LLVector3 point_2; - - if (isConcave() && normal_set == 0 && point_num == (mTotal - 1) / 2) - { - point_2 = mProfile[mTotal - 1]; - } - else if (isConcave() && normal_set == 1 && point_num == mTotal - 1) - { - point_2 = mProfile[(mTotal - 1) / 2]; - } - else - { - LLVector3 delta_pos; - S32 neighbor_point = (point_num + 1) % mTotal; - while(delta_pos.magVecSquared() < 0.01f * 0.01f) - { - point_2 = mProfile[neighbor_point]; - delta_pos = point_2 - point_1; - neighbor_point = (neighbor_point + 1) % mTotal; - if (neighbor_point == point_num) - { - break; - } - } - } - - point_2.mV[VZ] = 0.f; - LLVector3 face_normal = (point_2 - point_1) % LLVector3::z_axis; - face_normal.normVec(); - mEdgeNormals[normal_set * count + point_num] = face_normal; - mEdgeCenters[normal_set * count + point_num] = lerp(point_1, point_2, 0.5f); - } - } -} - - // Hollow is percent of the original bounding box, not of this particular // profile's geometry. Thus, a swept triangle needs lower hollow values than // a swept square. @@ -693,12 +698,13 @@ LLProfile::Face* LLProfile::addHole(const LLProfileParams& params, BOOL flat, F3 Face *face = addFace(mTotalOut, mTotal-mTotalOut,0,LL_FACE_INNER_SIDE, flat); - std::vector pt; + static LLAlignedArray pt; pt.resize(mTotal) ; for (S32 i=mTotalOut;imPos.setVec(0 + lerp(0,params.getShear().mV[0],s) + pt->mPos.set(0 + lerp(0,params.getShear().mV[0],s) + lerp(-skew ,skew, t) * 0.5f, c + lerp(0,params.getShear().mV[1],s), s); - pt->mScale.mV[VX] = hole_x * lerp(taper_x_begin, taper_x_end, t); - pt->mScale.mV[VY] = hole_y * lerp(taper_y_begin, taper_y_end, t); + pt->mScale.set(hole_x * lerp(taper_x_begin, taper_x_end, t), + hole_y * lerp(taper_y_begin, taper_y_end, t), + 0,1); pt->mTexT = t; // Twist rotates the path along the x,y plane (I think) - DJS 04/05/02 twist.setQuat (lerp(twist_begin,twist_end,t) * 2.f * F_PI - F_PI,0,0,1); // Rotate the point around the circle's center. qang.setQuat (ang,path_axis); - pt->mRot = twist * qang; + + LLMatrix3 rot(twist * qang); + + pt->mRot.loadu(rot); t+=step; @@ -1408,51 +1419,55 @@ void LLPath::genNGon(const LLPathParams& params, S32 sides, F32 startOff, F32 en // Run through the non-cut dependent points. while (t < params.getEnd()) { - pt = vector_append(mPath, 1); + pt = mPath.append(1); ang = 2.0f*F_PI*revolutions * t; c = cos(ang)*lerp(radius_start, radius_end, t); s = sin(ang)*lerp(radius_start, radius_end, t); - pt->mPos.setVec(0 + lerp(0,params.getShear().mV[0],s) + pt->mPos.set(0 + lerp(0,params.getShear().mV[0],s) + lerp(-skew ,skew, t) * 0.5f, c + lerp(0,params.getShear().mV[1],s), s); - pt->mScale.mV[VX] = hole_x * lerp(taper_x_begin, taper_x_end, t); - pt->mScale.mV[VY] = hole_y * lerp(taper_y_begin, taper_y_end, t); + pt->mScale.set(hole_x * lerp(taper_x_begin, taper_x_end, t), + hole_y * lerp(taper_y_begin, taper_y_end, t), + 0,1); pt->mTexT = t; // Twist rotates the path along the x,y plane (I think) - DJS 04/05/02 twist.setQuat (lerp(twist_begin,twist_end,t) * 2.f * F_PI - F_PI,0,0,1); // Rotate the point around the circle's center. qang.setQuat (ang,path_axis); - pt->mRot = twist * qang; - + LLMatrix3 tmp(twist*qang); + pt->mRot.loadu(tmp); + t+=step; } // Make one final pass for the end cut. t = params.getEnd(); - pt = vector_append(mPath, 1); + pt = mPath.append(1); ang = 2.0f*F_PI*revolutions * t; c = cos(ang)*lerp(radius_start, radius_end, t); s = sin(ang)*lerp(radius_start, radius_end, t); - pt->mPos.setVec(0 + lerp(0,params.getShear().mV[0],s) + pt->mPos.set(0 + lerp(0,params.getShear().mV[0],s) + lerp(-skew ,skew, t) * 0.5f, c + lerp(0,params.getShear().mV[1],s), s); - pt->mScale.mV[VX] = hole_x * lerp(taper_x_begin, taper_x_end, t); - pt->mScale.mV[VY] = hole_y * lerp(taper_y_begin, taper_y_end, t); + pt->mScale.set(hole_x * lerp(taper_x_begin, taper_x_end, t), + hole_y * lerp(taper_y_begin, taper_y_end, t), + 0,1); pt->mTexT = t; // Twist rotates the path along the x,y plane (I think) - DJS 04/05/02 twist.setQuat (lerp(twist_begin,twist_end,t) * 2.f * F_PI - F_PI,0,0,1); // Rotate the point around the circle's center. qang.setQuat (ang,path_axis); - pt->mRot = twist * qang; - + LLMatrix3 tmp(twist*qang); + pt->mRot.loadu(tmp); + mTotal = mPath.size(); } @@ -1549,7 +1564,7 @@ BOOL LLPath::generate(const LLPathParams& params, F32 detail, S32 split, mDirty = FALSE; S32 np = 2; // hardcode for line - mPath.clear(); + mPath.resize(0); mOpen = TRUE; // Is this 0xf0 mask really necessary? DK 03/02/05 @@ -1575,12 +1590,16 @@ BOOL LLPath::generate(const LLPathParams& params, F32 detail, S32 split, for (S32 i=0;imPath.size() * mProfilep->mProfile.size()) > (1u << 20)) - { - llinfos << "sizeS: " << mPathp->mPath.size() << " sizeT: " << mProfilep->mProfile.size() << llendl ; - llinfos << "path_detail : " << path_detail << " split: " << split << " profile_detail: " << profile_detail << llendl ; - llinfos << mParams << llendl ; - llinfos << "more info to check if mProfilep is deleted or not." << llendl ; - llinfos << mProfilep->mNormals.size() << " : " << mProfilep->mFaces.size() << " : " << mProfilep->mEdgeNormals.size() << " : " << mProfilep->mEdgeCenters.size() << llendl ; - - llerrs << "LLVolume corrupted!" << llendl ; - } - //******************************************************************** - BOOL regenPath = mPathp->generate(mParams.getPathParams(), path_detail, split); BOOL regenProf = mProfilep->generate(mParams.getProfileParams(), mPathp->isOpen(),profile_detail, split); @@ -2163,21 +2176,6 @@ BOOL LLVolume::generate() S32 sizeS = mPathp->mPath.size(); S32 sizeT = mProfilep->mProfile.size(); - //******************************************************************** - //debug info, to be removed - if((U32)(sizeS * sizeT) > (1u << 20)) - { - llinfos << "regenPath: " << (S32)regenPath << " regenProf: " << (S32)regenProf << llendl ; - llinfos << "sizeS: " << sizeS << " sizeT: " << sizeT << llendl ; - llinfos << "path_detail : " << path_detail << " split: " << split << " profile_detail: " << profile_detail << llendl ; - llinfos << mParams << llendl ; - llinfos << "more info to check if mProfilep is deleted or not." << llendl ; - llinfos << mProfilep->mNormals.size() << " : " << mProfilep->mFaces.size() << " : " << mProfilep->mEdgeNormals.size() << " : " << mProfilep->mEdgeCenters.size() << llendl ; - - llerrs << "LLVolume corrupted!" << llendl ; - } - //******************************************************************** - sNumMeshPoints -= mMesh.size(); mMesh.resize(sizeT * sizeS); sNumMeshPoints += mMesh.size(); @@ -2185,22 +2183,39 @@ BOOL LLVolume::generate() //generate vertex positions // Run along the path. + LLVector4a* dst = mMesh.mArray; + for (S32 s = 0; s < sizeS; ++s) { - LLVector2 scale = mPathp->mPath[s].mScale; - LLQuaternion rot = mPathp->mPath[s].mRot; + F32* scale = mPathp->mPath[s].mScale.getF32ptr(); + + F32 sc [] = + { scale[0], 0, 0, 0, + 0, scale[1], 0, 0, + 0, 0, scale[2], 0, + 0, 0, 0, 1 }; + + LLMatrix4 rot((F32*) mPathp->mPath[s].mRot.mMatrix); + LLMatrix4 scale_mat(sc); + + scale_mat *= rot; + + LLMatrix4a rot_mat; + rot_mat.loadu(scale_mat); + + LLVector4a* profile = mProfilep->mProfile.mArray; + LLVector4a* end_profile = profile+sizeT; + LLVector4a offset = mPathp->mPath[s].mPos; + + LLVector4a tmp; // Run along the profile. - for (S32 t = 0; t < sizeT; ++t) + while (profile < end_profile) { - S32 m = s*sizeT + t; - Point& pt = mMesh[m]; - - pt.mPos.mV[0] = mProfilep->mProfile[t].mV[0] * scale.mV[0]; - pt.mPos.mV[1] = mProfilep->mProfile[t].mV[1] * scale.mV[1]; - pt.mPos.mV[2] = 0.0f; - pt.mPos = pt.mPos * rot; - pt.mPos += mPathp->mPath[s].mPos; + rot_mat.rotate(*profile++, tmp); + dst->setAdd(tmp,offset); + llassert(less_than_max_mag(*dst)); + ++dst; } } @@ -2210,9 +2225,11 @@ BOOL LLVolume::generate() LLFaceID id = iter->mFaceID; mFaceMask |= id; } - + LL_CHECK_MEMORY return TRUE; } + + LL_CHECK_MEMORY return FALSE; } @@ -2790,14 +2807,16 @@ void LLVolume::createVolumeFaces() } -inline LLVector3 sculpt_rgb_to_vector(U8 r, U8 g, U8 b) +inline LLVector4a sculpt_rgb_to_vector(U8 r, U8 g, U8 b) { // maps RGB values to vector values [0..255] -> [-0.5..0.5] - LLVector3 value; - value.mV[VX] = r / 255.f - 0.5f; - value.mV[VY] = g / 255.f - 0.5f; - value.mV[VZ] = b / 255.f - 0.5f; + LLVector4a value; + LLVector4a sub(0.5f, 0.5f, 0.5f); + value.set(r,g,b); + value.mul(1.f/255.f); + value.sub(sub); + return value; } @@ -2817,21 +2836,21 @@ inline U32 sculpt_st_to_index(S32 s, S32 t, S32 size_s, S32 size_t, U16 sculpt_w } -inline LLVector3 sculpt_index_to_vector(U32 index, const U8* sculpt_data) +inline LLVector4a sculpt_index_to_vector(U32 index, const U8* sculpt_data) { - LLVector3 v = sculpt_rgb_to_vector(sculpt_data[index], sculpt_data[index+1], sculpt_data[index+2]); + LLVector4a v = sculpt_rgb_to_vector(sculpt_data[index], sculpt_data[index+1], sculpt_data[index+2]); return v; } -inline LLVector3 sculpt_st_to_vector(S32 s, S32 t, S32 size_s, S32 size_t, U16 sculpt_width, U16 sculpt_height, S8 sculpt_components, const U8* sculpt_data) +inline LLVector4a sculpt_st_to_vector(S32 s, S32 t, S32 size_s, S32 size_t, U16 sculpt_width, U16 sculpt_height, S8 sculpt_components, const U8* sculpt_data) { U32 index = sculpt_st_to_index(s, t, size_s, size_t, sculpt_width, sculpt_height, sculpt_components); return sculpt_index_to_vector(index, sculpt_data); } -inline LLVector3 sculpt_xy_to_vector(U32 x, U32 y, U16 sculpt_width, U16 sculpt_height, S8 sculpt_components, const U8* sculpt_data) +inline LLVector4a sculpt_xy_to_vector(U32 x, U32 y, U16 sculpt_width, U16 sculpt_height, S8 sculpt_components, const U8* sculpt_data) { U32 index = sculpt_xy_to_index(x, y, sculpt_width, sculpt_height, sculpt_components); @@ -2853,15 +2872,26 @@ F32 LLVolume::sculptGetSurfaceArea() for (S32 t = 0; t < sizeT-1; t++) { // get four corners of quad - LLVector3 p1 = mMesh[(s )*sizeT + (t )].mPos; - LLVector3 p2 = mMesh[(s+1)*sizeT + (t )].mPos; - LLVector3 p3 = mMesh[(s )*sizeT + (t+1)].mPos; - LLVector3 p4 = mMesh[(s+1)*sizeT + (t+1)].mPos; + LLVector4a& p1 = mMesh[(s )*sizeT + (t )]; + LLVector4a& p2 = mMesh[(s+1)*sizeT + (t )]; + LLVector4a& p3 = mMesh[(s )*sizeT + (t+1)]; + LLVector4a& p4 = mMesh[(s+1)*sizeT + (t+1)]; // compute the area of the quad by taking the length of the cross product of the two triangles - LLVector3 cross1 = (p1 - p2) % (p1 - p3); - LLVector3 cross2 = (p4 - p2) % (p4 - p3); - area += (cross1.magVec() + cross2.magVec()) / 2.f; + LLVector4a v0,v1,v2,v3; + v0.setSub(p1,p2); + v1.setSub(p1,p3); + v2.setSub(p4,p2); + v3.setSub(p4,p3); + + LLVector4a cross1, cross2; + cross1.setCross3(v0,v1); + cross2.setCross3(v2,v3); + + //LLVector3 cross1 = (p1 - p2) % (p1 - p3); + //LLVector3 cross2 = (p4 - p2) % (p4 - p3); + + area += (cross1.getLength3() + cross2.getLength3()).getF32() / 2.f; } } @@ -2882,17 +2912,19 @@ void LLVolume::sculptGeneratePlaceholder() for (S32 t = 0; t < sizeT; t++) { S32 i = t + line; - Point& pt = mMesh[i]; + LLVector4a& pt = mMesh[i]; F32 u = (F32)s/(sizeS-1); F32 v = (F32)t/(sizeT-1); const F32 RADIUS = (F32) 0.3; - - pt.mPos.mV[0] = (F32)(sin(F_PI * v) * cos(2.0 * F_PI * u) * RADIUS); - pt.mPos.mV[1] = (F32)(sin(F_PI * v) * sin(2.0 * F_PI * u) * RADIUS); - pt.mPos.mV[2] = (F32)(cos(F_PI * v) * RADIUS); + + F32* p = pt.getF32ptr(); + + p[0] = (F32)(sin(F_PI * v) * cos(2.0 * F_PI * u) * RADIUS); + p[1] = (F32)(sin(F_PI * v) * sin(2.0 * F_PI * u) * RADIUS); + p[2] = (F32)(cos(F_PI * v) * RADIUS); } line += sizeT; @@ -2917,7 +2949,7 @@ void LLVolume::sculptGenerateMapVertices(U16 sculpt_width, U16 sculpt_height, S8 for (S32 t = 0; t < sizeT; t++) { S32 i = t + line; - Point& pt = mMesh[i]; + LLVector4a& pt = mMesh[i]; S32 reversed_t = t; @@ -2974,11 +3006,12 @@ void LLVolume::sculptGenerateMapVertices(U16 sculpt_width, U16 sculpt_height, S8 } } - pt.mPos = sculpt_xy_to_vector(x, y, sculpt_width, sculpt_height, sculpt_components, sculpt_data); + pt = sculpt_xy_to_vector(x, y, sculpt_width, sculpt_height, sculpt_components, sculpt_data); if (sculpt_mirror) { - pt.mPos.mV[VX] *= -1.f; + LLVector4a scale(-1.f,1,1,1); + pt.mul(scale); } } @@ -3560,803 +3593,125 @@ bool LLVolumeParams::validate(U8 prof_curve, F32 prof_begin, F32 prof_end, F32 h return true; } -S32 *LLVolume::getTriangleIndices(U32 &num_indices) const +void LLVolume::getLoDTriangleCounts(const LLVolumeParams& params, S32* counts) +{ //attempt to approximate the number of triangles that will result from generating a volume LoD set for the + //supplied LLVolumeParams -- inaccurate, but a close enough approximation for determining streaming cost + F32 detail[] = {1.f, 1.5f, 2.5f, 4.f}; + for (S32 i = 0; i < 4; i++) + { + S32 count = 0; + S32 path_points = LLPath::getNumPoints(params.getPathParams(), detail[i]); + S32 profile_points = LLProfile::getNumPoints(params.getProfileParams(), false, detail[i]); + + count = (profile_points-1)*2*(path_points-1); + count += profile_points*2; + + counts[i] = count; + } +} + + +S32 LLVolume::getNumTriangles(S32* vcount) const { - S32 expected_num_triangle_indices = getNumTriangleIndices(); - if (expected_num_triangle_indices > MAX_VOLUME_TRIANGLE_INDICES) + U32 triangle_count = 0; + U32 vertex_count = 0; + + for (S32 i = 0; i < getNumVolumeFaces(); ++i) { - // we don't allow LLVolumes with this many vertices - llwarns << "Couldn't allocate triangle indices" << llendl; - num_indices = 0; - return NULL; + const LLVolumeFace& face = getVolumeFace(i); + triangle_count += face.mNumIndices/3; + + vertex_count += face.mNumVertices; } - S32* index = new S32[expected_num_triangle_indices]; - S32 count = 0; - // Let's do this totally diffently, as we don't care about faces... - // Counter-clockwise triangles are forward facing... + if (vcount) + { + *vcount = vertex_count; + } + + return triangle_count; +} + - BOOL open = getProfile().isOpen(); - BOOL hollow = (mParams.getProfileParams().getHollow() > 0); - BOOL path_open = getPath().isOpen(); - S32 size_s, size_s_out, size_t; - S32 s, t, i; - size_s = getProfile().getTotal(); - size_s_out = getProfile().getTotalOut(); - size_t = getPath().mPath.size(); +//----------------------------------------------------------------------------- +// generateSilhouetteVertices() +//----------------------------------------------------------------------------- +void LLVolume::generateSilhouetteVertices(std::vector &vertices, + std::vector &normals, + const LLVector3& obj_cam_vec_in, + const LLMatrix4& mat_in, + const LLMatrix3& norm_mat_in, + S32 face_mask) +{ + LLMatrix4a mat; + mat.loadu(mat_in); + + LLMatrix4a norm_mat; + norm_mat.loadu(norm_mat_in); + + LLVector4a obj_cam_vec; + obj_cam_vec.load3(obj_cam_vec_in.mV); - // NOTE -- if the construction of the triangles below ever changes - // then getNumTriangleIndices() method may also have to be updated. + vertices.clear(); + normals.clear(); - if (open) /* Flawfinder: ignore */ + if ((mParams.getSculptType() & LL_SCULPT_TYPE_MASK) == LL_SCULPT_TYPE_MESH) { - if (hollow) + return; + } + + S32 cur_index = 0; + //for each face + for (face_list_t::iterator iter = mVolumeFaces.begin(); + iter != mVolumeFaces.end(); ++iter) + { + LLVolumeFace& face = *iter; + + if (!(face_mask & (0x1 << cur_index++)) || + face.mNumIndices == 0 || face.mEdge.empty()) { - // Open hollow -- much like the closed solid, except we - // we need to stitch up the gap between s=0 and s=size_s-1 + continue; + } - for (t = 0; t < size_t - 1; t++) - { - // The outer face, first cut, and inner face - for (s = 0; s < size_s - 1; s++) - { - i = s + t*size_s; - index[count++] = i; // x,y - index[count++] = i + 1; // x+1,y - index[count++] = i + size_s; // x,y+1 + if (face.mTypeMask & (LLVolumeFace::CAP_MASK)) { - index[count++] = i + size_s; // x,y+1 - index[count++] = i + 1; // x+1,y - index[count++] = i + size_s + 1; // x+1,y+1 - } + } + else { - // The other cut face - index[count++] = s + t*size_s; // x,y - index[count++] = 0 + t*size_s; // x+1,y - index[count++] = s + (t+1)*size_s; // x,y+1 - - index[count++] = s + (t+1)*size_s; // x,y+1 - index[count++] = 0 + t*size_s; // x+1,y - index[count++] = 0 + (t+1)*size_s; // x+1,y+1 - } + //============================================== + //DEBUG draw edge map instead of silhouette edge + //============================================== - // Do the top and bottom caps, if necessary - if (path_open) - { - // Top cap - S32 pt1 = 0; - S32 pt2 = size_s-1; - S32 i = (size_t - 1)*size_s; +#if DEBUG_SILHOUETTE_EDGE_MAP - while (pt2 - pt1 > 1) - { - // Use the profile points instead of the mesh, since you want - // the un-transformed profile distances. - LLVector3 p1 = getProfile().mProfile[pt1]; - LLVector3 p2 = getProfile().mProfile[pt2]; - LLVector3 pa = getProfile().mProfile[pt1+1]; - LLVector3 pb = getProfile().mProfile[pt2-1]; - - p1.mV[VZ] = 0.f; - p2.mV[VZ] = 0.f; - pa.mV[VZ] = 0.f; - pb.mV[VZ] = 0.f; - - // Use area of triangle to determine backfacing - F32 area_1a2, area_1ba, area_21b, area_2ab; - area_1a2 = (p1.mV[0]*pa.mV[1] - pa.mV[0]*p1.mV[1]) + - (pa.mV[0]*p2.mV[1] - p2.mV[0]*pa.mV[1]) + - (p2.mV[0]*p1.mV[1] - p1.mV[0]*p2.mV[1]); - - area_1ba = (p1.mV[0]*pb.mV[1] - pb.mV[0]*p1.mV[1]) + - (pb.mV[0]*pa.mV[1] - pa.mV[0]*pb.mV[1]) + - (pa.mV[0]*p1.mV[1] - p1.mV[0]*pa.mV[1]); - - area_21b = (p2.mV[0]*p1.mV[1] - p1.mV[0]*p2.mV[1]) + - (p1.mV[0]*pb.mV[1] - pb.mV[0]*p1.mV[1]) + - (pb.mV[0]*p2.mV[1] - p2.mV[0]*pb.mV[1]); - - area_2ab = (p2.mV[0]*pa.mV[1] - pa.mV[0]*p2.mV[1]) + - (pa.mV[0]*pb.mV[1] - pb.mV[0]*pa.mV[1]) + - (pb.mV[0]*p2.mV[1] - p2.mV[0]*pb.mV[1]); - - BOOL use_tri1a2 = TRUE; - BOOL tri_1a2 = TRUE; - BOOL tri_21b = TRUE; - - if (area_1a2 < 0) - { - tri_1a2 = FALSE; - } - if (area_2ab < 0) - { - // Can't use, because it contains point b - tri_1a2 = FALSE; - } - if (area_21b < 0) - { - tri_21b = FALSE; - } - if (area_1ba < 0) - { - // Can't use, because it contains point b - tri_21b = FALSE; - } + //for each triangle + U32 count = face.mNumIndices; + for (U32 j = 0; j < count/3; j++) { + //get vertices + S32 v1 = face.mIndices[j*3+0]; + S32 v2 = face.mIndices[j*3+1]; + S32 v3 = face.mIndices[j*3+2]; - if (!tri_1a2) - { - use_tri1a2 = FALSE; - } - else if (!tri_21b) - { - use_tri1a2 = TRUE; - } - else - { - LLVector3 d1 = p1 - pa; - LLVector3 d2 = p2 - pb; + //get current face center + LLVector3 cCenter = (face.mVertices[v1].getPosition() + + face.mVertices[v2].getPosition() + + face.mVertices[v3].getPosition()) / 3.0f; - if (d1.magVecSquared() < d2.magVecSquared()) - { - use_tri1a2 = TRUE; - } - else - { - use_tri1a2 = FALSE; - } + //for each edge + for (S32 k = 0; k < 3; k++) { + S32 nIndex = face.mEdge[j*3+k]; + if (nIndex <= -1) { + continue; } - if (use_tri1a2) - { - index[count++] = pt1 + i; - index[count++] = pt1 + 1 + i; - index[count++] = pt2 + i; - pt1++; - } - else - { - index[count++] = pt1 + i; - index[count++] = pt2 - 1 + i; - index[count++] = pt2 + i; - pt2--; + if (nIndex >= (S32) count/3) { + continue; } - } - - // Bottom cap - pt1 = 0; - pt2 = size_s-1; - while (pt2 - pt1 > 1) - { - // Use the profile points instead of the mesh, since you want - // the un-transformed profile distances. - LLVector3 p1 = getProfile().mProfile[pt1]; - LLVector3 p2 = getProfile().mProfile[pt2]; - LLVector3 pa = getProfile().mProfile[pt1+1]; - LLVector3 pb = getProfile().mProfile[pt2-1]; - - p1.mV[VZ] = 0.f; - p2.mV[VZ] = 0.f; - pa.mV[VZ] = 0.f; - pb.mV[VZ] = 0.f; - - // Use area of triangle to determine backfacing - F32 area_1a2, area_1ba, area_21b, area_2ab; - area_1a2 = (p1.mV[0]*pa.mV[1] - pa.mV[0]*p1.mV[1]) + - (pa.mV[0]*p2.mV[1] - p2.mV[0]*pa.mV[1]) + - (p2.mV[0]*p1.mV[1] - p1.mV[0]*p2.mV[1]); - - area_1ba = (p1.mV[0]*pb.mV[1] - pb.mV[0]*p1.mV[1]) + - (pb.mV[0]*pa.mV[1] - pa.mV[0]*pb.mV[1]) + - (pa.mV[0]*p1.mV[1] - p1.mV[0]*pa.mV[1]); - - area_21b = (p2.mV[0]*p1.mV[1] - p1.mV[0]*p2.mV[1]) + - (p1.mV[0]*pb.mV[1] - pb.mV[0]*p1.mV[1]) + - (pb.mV[0]*p2.mV[1] - p2.mV[0]*pb.mV[1]); - - area_2ab = (p2.mV[0]*pa.mV[1] - pa.mV[0]*p2.mV[1]) + - (pa.mV[0]*pb.mV[1] - pb.mV[0]*pa.mV[1]) + - (pb.mV[0]*p2.mV[1] - p2.mV[0]*pb.mV[1]); - - BOOL use_tri1a2 = TRUE; - BOOL tri_1a2 = TRUE; - BOOL tri_21b = TRUE; - - if (area_1a2 < 0) - { - tri_1a2 = FALSE; - } - if (area_2ab < 0) - { - // Can't use, because it contains point b - tri_1a2 = FALSE; - } - if (area_21b < 0) - { - tri_21b = FALSE; - } - if (area_1ba < 0) - { - // Can't use, because it contains point b - tri_21b = FALSE; - } - - if (!tri_1a2) - { - use_tri1a2 = FALSE; - } - else if (!tri_21b) - { - use_tri1a2 = TRUE; - } - else - { - LLVector3 d1 = p1 - pa; - LLVector3 d2 = p2 - pb; - - if (d1.magVecSquared() < d2.magVecSquared()) - { - use_tri1a2 = TRUE; - } - else - { - use_tri1a2 = FALSE; - } - } - - if (use_tri1a2) - { - index[count++] = pt1; - index[count++] = pt2; - index[count++] = pt1 + 1; - pt1++; - } - else - { - index[count++] = pt1; - index[count++] = pt2; - index[count++] = pt2 - 1; - pt2--; - } - } - } - } - else - { - // Open solid - - for (t = 0; t < size_t - 1; t++) - { - // Outer face + 1 cut face - for (s = 0; s < size_s - 1; s++) - { - i = s + t*size_s; - - index[count++] = i; // x,y - index[count++] = i + 1; // x+1,y - index[count++] = i + size_s; // x,y+1 - - index[count++] = i + size_s; // x,y+1 - index[count++] = i + 1; // x+1,y - index[count++] = i + size_s + 1; // x+1,y+1 - } - - // The other cut face - index[count++] = (size_s - 1) + (t*size_s); // x,y - index[count++] = 0 + t*size_s; // x+1,y - index[count++] = (size_s - 1) + (t+1)*size_s; // x,y+1 - - index[count++] = (size_s - 1) + (t+1)*size_s; // x,y+1 - index[count++] = 0 + (t*size_s); // x+1,y - index[count++] = 0 + (t+1)*size_s; // x+1,y+1 - } - - // Do the top and bottom caps, if necessary - if (path_open) - { - for (s = 0; s < size_s - 2; s++) - { - index[count++] = s+1; - index[count++] = s; - index[count++] = size_s - 1; - } - - // We've got a top cap - S32 offset = (size_t - 1)*size_s; - for (s = 0; s < size_s - 2; s++) - { - // Inverted ordering from bottom cap. - index[count++] = offset + size_s - 1; - index[count++] = offset + s; - index[count++] = offset + s + 1; - } - } - } - } - else if (hollow) - { - // Closed hollow - // Outer face - - for (t = 0; t < size_t - 1; t++) - { - for (s = 0; s < size_s_out - 1; s++) - { - i = s + t*size_s; - - index[count++] = i; // x,y - index[count++] = i + 1; // x+1,y - index[count++] = i + size_s; // x,y+1 - - index[count++] = i + size_s; // x,y+1 - index[count++] = i + 1; // x+1,y - index[count++] = i + 1 + size_s; // x+1,y+1 - } - } - - // Inner face - // Invert facing from outer face - for (t = 0; t < size_t - 1; t++) - { - for (s = size_s_out; s < size_s - 1; s++) - { - i = s + t*size_s; - - index[count++] = i; // x,y - index[count++] = i + 1; // x+1,y - index[count++] = i + size_s; // x,y+1 - - index[count++] = i + size_s; // x,y+1 - index[count++] = i + 1; // x+1,y - index[count++] = i + 1 + size_s; // x+1,y+1 - } - } - - // Do the top and bottom caps, if necessary - if (path_open) - { - // Top cap - S32 pt1 = 0; - S32 pt2 = size_s-1; - S32 i = (size_t - 1)*size_s; - - while (pt2 - pt1 > 1) - { - // Use the profile points instead of the mesh, since you want - // the un-transformed profile distances. - LLVector3 p1 = getProfile().mProfile[pt1]; - LLVector3 p2 = getProfile().mProfile[pt2]; - LLVector3 pa = getProfile().mProfile[pt1+1]; - LLVector3 pb = getProfile().mProfile[pt2-1]; - - p1.mV[VZ] = 0.f; - p2.mV[VZ] = 0.f; - pa.mV[VZ] = 0.f; - pb.mV[VZ] = 0.f; - - // Use area of triangle to determine backfacing - F32 area_1a2, area_1ba, area_21b, area_2ab; - area_1a2 = (p1.mV[0]*pa.mV[1] - pa.mV[0]*p1.mV[1]) + - (pa.mV[0]*p2.mV[1] - p2.mV[0]*pa.mV[1]) + - (p2.mV[0]*p1.mV[1] - p1.mV[0]*p2.mV[1]); - - area_1ba = (p1.mV[0]*pb.mV[1] - pb.mV[0]*p1.mV[1]) + - (pb.mV[0]*pa.mV[1] - pa.mV[0]*pb.mV[1]) + - (pa.mV[0]*p1.mV[1] - p1.mV[0]*pa.mV[1]); - - area_21b = (p2.mV[0]*p1.mV[1] - p1.mV[0]*p2.mV[1]) + - (p1.mV[0]*pb.mV[1] - pb.mV[0]*p1.mV[1]) + - (pb.mV[0]*p2.mV[1] - p2.mV[0]*pb.mV[1]); - - area_2ab = (p2.mV[0]*pa.mV[1] - pa.mV[0]*p2.mV[1]) + - (pa.mV[0]*pb.mV[1] - pb.mV[0]*pa.mV[1]) + - (pb.mV[0]*p2.mV[1] - p2.mV[0]*pb.mV[1]); - - BOOL use_tri1a2 = TRUE; - BOOL tri_1a2 = TRUE; - BOOL tri_21b = TRUE; - - if (area_1a2 < 0) - { - tri_1a2 = FALSE; - } - if (area_2ab < 0) - { - // Can't use, because it contains point b - tri_1a2 = FALSE; - } - if (area_21b < 0) - { - tri_21b = FALSE; - } - if (area_1ba < 0) - { - // Can't use, because it contains point b - tri_21b = FALSE; - } - - if (!tri_1a2) - { - use_tri1a2 = FALSE; - } - else if (!tri_21b) - { - use_tri1a2 = TRUE; - } - else - { - LLVector3 d1 = p1 - pa; - LLVector3 d2 = p2 - pb; - - if (d1.magVecSquared() < d2.magVecSquared()) - { - use_tri1a2 = TRUE; - } - else - { - use_tri1a2 = FALSE; - } - } - - if (use_tri1a2) - { - index[count++] = pt1 + i; - index[count++] = pt1 + 1 + i; - index[count++] = pt2 + i; - pt1++; - } - else - { - index[count++] = pt1 + i; - index[count++] = pt2 - 1 + i; - index[count++] = pt2 + i; - pt2--; - } - } - - // Bottom cap - pt1 = 0; - pt2 = size_s-1; - while (pt2 - pt1 > 1) - { - // Use the profile points instead of the mesh, since you want - // the un-transformed profile distances. - LLVector3 p1 = getProfile().mProfile[pt1]; - LLVector3 p2 = getProfile().mProfile[pt2]; - LLVector3 pa = getProfile().mProfile[pt1+1]; - LLVector3 pb = getProfile().mProfile[pt2-1]; - - p1.mV[VZ] = 0.f; - p2.mV[VZ] = 0.f; - pa.mV[VZ] = 0.f; - pb.mV[VZ] = 0.f; - - // Use area of triangle to determine backfacing - F32 area_1a2, area_1ba, area_21b, area_2ab; - area_1a2 = (p1.mV[0]*pa.mV[1] - pa.mV[0]*p1.mV[1]) + - (pa.mV[0]*p2.mV[1] - p2.mV[0]*pa.mV[1]) + - (p2.mV[0]*p1.mV[1] - p1.mV[0]*p2.mV[1]); - - area_1ba = (p1.mV[0]*pb.mV[1] - pb.mV[0]*p1.mV[1]) + - (pb.mV[0]*pa.mV[1] - pa.mV[0]*pb.mV[1]) + - (pa.mV[0]*p1.mV[1] - p1.mV[0]*pa.mV[1]); - - area_21b = (p2.mV[0]*p1.mV[1] - p1.mV[0]*p2.mV[1]) + - (p1.mV[0]*pb.mV[1] - pb.mV[0]*p1.mV[1]) + - (pb.mV[0]*p2.mV[1] - p2.mV[0]*pb.mV[1]); - - area_2ab = (p2.mV[0]*pa.mV[1] - pa.mV[0]*p2.mV[1]) + - (pa.mV[0]*pb.mV[1] - pb.mV[0]*pa.mV[1]) + - (pb.mV[0]*p2.mV[1] - p2.mV[0]*pb.mV[1]); - - BOOL use_tri1a2 = TRUE; - BOOL tri_1a2 = TRUE; - BOOL tri_21b = TRUE; - - if (area_1a2 < 0) - { - tri_1a2 = FALSE; - } - if (area_2ab < 0) - { - // Can't use, because it contains point b - tri_1a2 = FALSE; - } - if (area_21b < 0) - { - tri_21b = FALSE; - } - if (area_1ba < 0) - { - // Can't use, because it contains point b - tri_21b = FALSE; - } - - if (!tri_1a2) - { - use_tri1a2 = FALSE; - } - else if (!tri_21b) - { - use_tri1a2 = TRUE; - } - else - { - LLVector3 d1 = p1 - pa; - LLVector3 d2 = p2 - pb; - - if (d1.magVecSquared() < d2.magVecSquared()) - { - use_tri1a2 = TRUE; - } - else - { - use_tri1a2 = FALSE; - } - } - - if (use_tri1a2) - { - index[count++] = pt1; - index[count++] = pt2; - index[count++] = pt1 + 1; - pt1++; - } - else - { - index[count++] = pt1; - index[count++] = pt2; - index[count++] = pt2 - 1; - pt2--; - } - } - } - } - else - { - // Closed solid. Easy case. - for (t = 0; t < size_t - 1; t++) - { - for (s = 0; s < size_s - 1; s++) - { - // Should wrap properly, but for now... - i = s + t*size_s; - - index[count++] = i; // x,y - index[count++] = i + 1; // x+1,y - index[count++] = i + size_s; // x,y+1 - - index[count++] = i + size_s; // x,y+1 - index[count++] = i + 1; // x+1,y - index[count++] = i + size_s + 1; // x+1,y+1 - } - } - - // Do the top and bottom caps, if necessary - if (path_open) - { - // bottom cap - for (s = 1; s < size_s - 2; s++) - { - index[count++] = s+1; - index[count++] = s; - index[count++] = 0; - } - - // top cap - S32 offset = (size_t - 1)*size_s; - for (s = 1; s < size_s - 2; s++) - { - // Inverted ordering from bottom cap. - index[count++] = offset; - index[count++] = offset + s; - index[count++] = offset + s + 1; - } - } - } - -#ifdef LL_DEBUG - // assert that we computed the correct number of indices - if (count != expected_num_triangle_indices ) - { - llerrs << "bad index count prediciton:" - << " expected=" << expected_num_triangle_indices - << " actual=" << count << llendl; - } -#endif - -#if 0 - // verify that each index does not point beyond the size of the mesh - S32 num_vertices = mMesh.size(); - for (i = 0; i < count; i+=3) - { - llinfos << index[i] << ":" << index[i+1] << ":" << index[i+2] << llendl; - llassert(index[i] < num_vertices); - llassert(index[i+1] < num_vertices); - llassert(index[i+2] < num_vertices); - } -#endif - - num_indices = count; - return index; -} - -void LLVolume::getLoDTriangleCounts(const LLVolumeParams& params, S32* counts) -{ //attempt to approximate the number of triangles that will result from generating a volume LoD set for the - //supplied LLVolumeParams -- inaccurate, but a close enough approximation for determining streaming cost - F32 detail[] = {1.f, 1.5f, 2.5f, 4.f}; - for (S32 i = 0; i < 4; i++) - { - S32 count = 0; - S32 path_points = LLPath::getNumPoints(params.getPathParams(), detail[i]); - S32 profile_points = LLProfile::getNumPoints(params.getProfileParams(), false, detail[i]); - - count = (profile_points-1)*2*(path_points-1); - count += profile_points*2; - - counts[i] = count; - } -} - -S32 LLVolume::getNumTriangleIndices() const -{ - BOOL profile_open = getProfile().isOpen(); - BOOL hollow = (mParams.getProfileParams().getHollow() > 0); - BOOL path_open = getPath().isOpen(); - - S32 size_s, size_s_out, size_t; - size_s = getProfile().getTotal(); - size_s_out = getProfile().getTotalOut(); - size_t = getPath().mPath.size(); - - S32 count = 0; - if (profile_open) /* Flawfinder: ignore */ - { - if (hollow) - { - // Open hollow -- much like the closed solid, except we - // we need to stitch up the gap between s=0 and s=size_s-1 - count = (size_t - 1) * (((size_s -1) * 6) + 6); - } - else - { - count = (size_t - 1) * (((size_s -1) * 6) + 6); - } - } - else if (hollow) - { - // Closed hollow - // Outer face - count = (size_t - 1) * (size_s_out - 1) * 6; - - // Inner face - count += (size_t - 1) * ((size_s - 1) - size_s_out) * 6; - } - else - { - // Closed solid. Easy case. - count = (size_t - 1) * (size_s - 1) * 6; - } - - if (path_open) - { - S32 cap_triangle_count = size_s - 3; - if ( profile_open - || hollow ) - { - cap_triangle_count = size_s - 2; - } - if ( cap_triangle_count > 0 ) - { - // top and bottom caps - count += cap_triangle_count * 2 * 3; - } - } - return count; -} - - -S32 LLVolume::getNumTriangles(S32* vcount) const -{ - U32 triangle_count = 0; - U32 vertex_count = 0; - - for (S32 i = 0; i < getNumVolumeFaces(); ++i) - { - const LLVolumeFace& face = getVolumeFace(i); - triangle_count += face.mNumIndices/3; - - vertex_count += face.mNumVertices; - } - - - if (vcount) - { - *vcount = vertex_count; - } - - return triangle_count; -} - - -//----------------------------------------------------------------------------- -// generateSilhouetteVertices() -//----------------------------------------------------------------------------- -void LLVolume::generateSilhouetteVertices(std::vector &vertices, - std::vector &normals, - const LLVector3& obj_cam_vec_in, - const LLMatrix4& mat_in, - const LLMatrix3& norm_mat_in, - S32 face_mask) -{ - LLMatrix4a mat; - mat.loadu(mat_in); - - LLMatrix4a norm_mat; - norm_mat.loadu(norm_mat_in); - - LLVector4a obj_cam_vec; - obj_cam_vec.load3(obj_cam_vec_in.mV); - - vertices.clear(); - normals.clear(); - - if ((mParams.getSculptType() & LL_SCULPT_TYPE_MASK) == LL_SCULPT_TYPE_MESH) - { - return; - } - - S32 cur_index = 0; - //for each face - for (face_list_t::iterator iter = mVolumeFaces.begin(); - iter != mVolumeFaces.end(); ++iter) - { - LLVolumeFace& face = *iter; - - if (!(face_mask & (0x1 << cur_index++)) || - face.mNumIndices == 0 || face.mEdge.empty()) - { - continue; - } - - if (face.mTypeMask & (LLVolumeFace::CAP_MASK)) { - - } - else { - - //============================================== - //DEBUG draw edge map instead of silhouette edge - //============================================== - -#if DEBUG_SILHOUETTE_EDGE_MAP - - //for each triangle - U32 count = face.mNumIndices; - for (U32 j = 0; j < count/3; j++) { - //get vertices - S32 v1 = face.mIndices[j*3+0]; - S32 v2 = face.mIndices[j*3+1]; - S32 v3 = face.mIndices[j*3+2]; - - //get current face center - LLVector3 cCenter = (face.mVertices[v1].getPosition() + - face.mVertices[v2].getPosition() + - face.mVertices[v3].getPosition()) / 3.0f; - - //for each edge - for (S32 k = 0; k < 3; k++) { - S32 nIndex = face.mEdge[j*3+k]; - if (nIndex <= -1) { - continue; - } - - if (nIndex >= (S32) count/3) { - continue; - } - //get neighbor vertices - v1 = face.mIndices[nIndex*3+0]; - v2 = face.mIndices[nIndex*3+1]; - v3 = face.mIndices[nIndex*3+2]; + //get neighbor vertices + v1 = face.mIndices[nIndex*3+0]; + v2 = face.mIndices[nIndex*3+1]; + v3 = face.mIndices[nIndex*3+2]; //get neighbor face center LLVector3 nCenter = (face.mVertices[v1].getPosition() + @@ -5243,8 +4598,6 @@ LLVolumeFace& LLVolumeFace::operator=(const LLVolumeFace& src) freeData(); - LLVector4a::memcpyNonAliased16((F32*) mExtents, (F32*) src.mExtents, 3*sizeof(LLVector4a)); - resizeVertices(src.mNumVertices); resizeIndices(src.mNumIndices); @@ -5307,7 +4660,7 @@ LLVolumeFace::~LLVolumeFace() void LLVolumeFace::freeData() { - ll_aligned_free_16(mPositions); + ll_aligned_free(mPositions); mPositions = NULL; //normals and texture coordinates are part of the same buffer as mPositions, do not free them separately @@ -5331,52 +4684,23 @@ BOOL LLVolumeFace::create(LLVolume* volume, BOOL partial_build) delete mOctree; mOctree = NULL; + LL_CHECK_MEMORY BOOL ret = FALSE ; if (mTypeMask & CAP_MASK) { ret = createCap(volume, partial_build); + LL_CHECK_MEMORY } else if ((mTypeMask & END_MASK) || (mTypeMask & SIDE_MASK)) { ret = createSide(volume, partial_build); + LL_CHECK_MEMORY } else { llerrs << "Unknown/uninitialized face type!" << llendl; } - //update the range of the texture coordinates - if(ret) - { - mTexCoordExtents[0].setVec(1.f, 1.f) ; - mTexCoordExtents[1].setVec(0.f, 0.f) ; - - for(U32 i = 0 ; i < mNumVertices ; i++) - { - if(mTexCoordExtents[0].mV[0] > mTexCoords[i].mV[0]) - { - mTexCoordExtents[0].mV[0] = mTexCoords[i].mV[0] ; - } - if(mTexCoordExtents[1].mV[0] < mTexCoords[i].mV[0]) - { - mTexCoordExtents[1].mV[0] = mTexCoords[i].mV[0] ; - } - - if(mTexCoordExtents[0].mV[1] > mTexCoords[i].mV[1]) - { - mTexCoordExtents[0].mV[1] = mTexCoords[i].mV[1] ; - } - if(mTexCoordExtents[1].mV[1] < mTexCoords[i].mV[1]) - { - mTexCoordExtents[1].mV[1] = mTexCoords[i].mV[1] ; - } - } - mTexCoordExtents[0].mV[0] = llmax(0.f, mTexCoordExtents[0].mV[0]) ; - mTexCoordExtents[0].mV[1] = llmax(0.f, mTexCoordExtents[0].mV[1]) ; - mTexCoordExtents[1].mV[0] = llmin(1.f, mTexCoordExtents[1].mV[0]) ; - mTexCoordExtents[1].mV[1] = llmin(1.f, mTexCoordExtents[1].mV[1]) ; - } - return ret ; } @@ -6068,8 +5392,10 @@ void LerpPlanarVertex(LLVolumeFace::VertexData& v0, BOOL LLVolumeFace::createUnCutCubeCap(LLVolume* volume, BOOL partial_build) { - const std::vector& mesh = volume->getMesh(); - const std::vector& profile = volume->getProfile().mProfile; + LL_CHECK_MEMORY + + const LLAlignedArray& mesh = volume->getMesh(); + const LLAlignedArray& profile = volume->getProfile().mProfile; S32 max_s = volume->getProfile().getTotal(); S32 max_t = volume->getPath().mPath.size(); @@ -6099,9 +5425,9 @@ BOOL LLVolumeFace::createUnCutCubeCap(LLVolume* volume, BOOL partial_build) VertexData baseVert; for(S32 t = 0; t < 4; t++) { - corners[t].getPosition().load3( mesh[offset + (grid_size*t)].mPos.mV); - corners[t].mTexCoord.mV[0] = profile[grid_size*t].mV[0]+0.5f; - corners[t].mTexCoord.mV[1] = 0.5f - profile[grid_size*t].mV[1]; + corners[t].getPosition().load4a(mesh[offset + (grid_size*t)].getF32ptr()); + corners[t].mTexCoord.mV[0] = profile[grid_size*t][0]+0.5f; + corners[t].mTexCoord.mV[1] = 0.5f - profile[grid_size*t][1]; } { @@ -6182,6 +5508,9 @@ BOOL LLVolumeFace::createUnCutCubeCap(LLVolume* volume, BOOL partial_build) mCenter->mul(0.5f); } + llassert(less_than_max_mag(mExtents[0])); + llassert(less_than_max_mag(mExtents[1])); + if (!partial_build) { resizeIndices(grid_size*grid_size*6); @@ -6212,6 +5541,7 @@ BOOL LLVolumeFace::createUnCutCubeCap(LLVolume* volume, BOOL partial_build) } } + LL_CHECK_MEMORY return TRUE; } @@ -6230,8 +5560,8 @@ BOOL LLVolumeFace::createCap(LLVolume* volume, BOOL partial_build) S32 num_vertices = 0, num_indices = 0; - const std::vector& mesh = volume->getMesh(); - const std::vector& profile = volume->getProfile().mProfile; + const LLAlignedArray& mesh = volume->getMesh(); + const LLAlignedArray& profile = volume->getProfile().mProfile; // All types of caps have the same number of vertices and indices num_vertices = profile.size(); @@ -6251,13 +5581,14 @@ BOOL LLVolumeFace::createCap(LLVolume* volume, BOOL partial_build) { resizeVertices(num_vertices); allocateBinormals(num_vertices); - if (!partial_build) { resizeIndices(num_indices); } } + LL_CHECK_MEMORY; + S32 max_s = volume->getProfile().getTotal(); S32 max_t = volume->getPath().mPath.size(); @@ -6288,35 +5619,68 @@ BOOL LLVolumeFace::createCap(LLVolume* volume, BOOL partial_build) LLVector4a* binorm = (LLVector4a*) mBinormals; // Copy the vertices into the array - for (S32 i = 0; i < num_vertices; i++) + + const LLVector4a* src = mesh.mArray+offset; + const LLVector4a* end = src+num_vertices; + + min = *src; + max = min; + + + const LLVector4a* p = profile.mArray; + + if (mTypeMask & TOP_MASK) { - if (mTypeMask & TOP_MASK) - { - tc[i].mV[0] = profile[i].mV[0]+0.5f; - tc[i].mV[1] = profile[i].mV[1]+0.5f; - } - else + min_uv.set((*p)[0]+0.5f, + (*p)[1]+0.5f); + + max_uv = min_uv; + + while(src < end) { - // Mirror for underside. - tc[i].mV[0] = profile[i].mV[0]+0.5f; - tc[i].mV[1] = 0.5f - profile[i].mV[1]; - } + tc->mV[0] = (*p)[0]+0.5f; + tc->mV[1] = (*p)[1]+0.5f; - pos[i].load3(mesh[i + offset].mPos.mV); + llassert(less_than_max_mag(*src)); + update_min_max(min,max,*src); + update_min_max(min_uv, max_uv, *tc); - if (i == 0) - { - max = pos[i]; - min = max; - min_uv = max_uv = tc[i]; + *pos = *src; + + ++p; + ++tc; + ++src; + ++pos; } - else + } + else + { + + min_uv.set((*p)[0]+0.5f, + 0.5f - (*p)[1]); + max_uv = min_uv; + + while(src < end) { - update_min_max(min,max,pos[i]); - update_min_max(min_uv, max_uv, tc[i]); + // Mirror for underside. + tc->mV[0] = (*p)[0]+0.5f; + tc->mV[1] = 0.5f - (*p)[1]; + + llassert(less_than_max_mag(*src)); + update_min_max(min,max,*src); + update_min_max(min_uv, max_uv, *tc); + + *pos = *src; + + ++p; + ++tc; + ++src; + ++pos; } } + LL_CHECK_MEMORY + mCenter->setAdd(min, max); mCenter->mul(0.5f); @@ -6353,15 +5717,25 @@ BOOL LLVolumeFace::createCap(LLVolume* volume, BOOL partial_build) if (!(mTypeMask & HOLLOW_MASK) && !(mTypeMask & OPEN_MASK)) { - pos[num_vertices] = *mCenter; - tc[num_vertices] = cuv; + *pos++ = *mCenter; + *tc++ = cuv; num_vertices++; } - - for (S32 i = 0; i < num_vertices; i++) + + LL_CHECK_MEMORY + + F32* dst_binorm = (F32*) binorm; + F32* end_binorm = (F32*) (binorm+num_vertices); + + F32* dst_norm = (F32*) norm; + + while (dst_binorm < end_binorm) { - binorm[i].load4a(binormal.getF32ptr()); - norm[i].load4a(normal.getF32ptr()); + binormal.store4a(dst_binorm); + normal.store4a(dst_norm); + + dst_binorm += 4; + dst_norm += 4; } if (partial_build) @@ -6382,33 +5756,38 @@ BOOL LLVolumeFace::createCap(LLVolume* volume, BOOL partial_build) { // Use the profile points instead of the mesh, since you want // the un-transformed profile distances. - LLVector3 p1 = profile[pt1]; - LLVector3 p2 = profile[pt2]; - LLVector3 pa = profile[pt1+1]; - LLVector3 pb = profile[pt2-1]; + const LLVector4a& p1 = profile[pt1]; + const LLVector4a& p2 = profile[pt2]; + const LLVector4a& pa = profile[pt1+1]; + const LLVector4a& pb = profile[pt2-1]; + + const F32* p1V = p1.getF32ptr(); + const F32* p2V = p2.getF32ptr(); + const F32* paV = pa.getF32ptr(); + const F32* pbV = pb.getF32ptr(); - p1.mV[VZ] = 0.f; - p2.mV[VZ] = 0.f; - pa.mV[VZ] = 0.f; - pb.mV[VZ] = 0.f; + //p1.mV[VZ] = 0.f; + //p2.mV[VZ] = 0.f; + //pa.mV[VZ] = 0.f; + //pb.mV[VZ] = 0.f; // Use area of triangle to determine backfacing F32 area_1a2, area_1ba, area_21b, area_2ab; - area_1a2 = (p1.mV[0]*pa.mV[1] - pa.mV[0]*p1.mV[1]) + - (pa.mV[0]*p2.mV[1] - p2.mV[0]*pa.mV[1]) + - (p2.mV[0]*p1.mV[1] - p1.mV[0]*p2.mV[1]); + area_1a2 = (p1V[0]*paV[1] - paV[0]*p1V[1]) + + (paV[0]*p2V[1] - p2V[0]*paV[1]) + + (p2V[0]*p1V[1] - p1V[0]*p2V[1]); - area_1ba = (p1.mV[0]*pb.mV[1] - pb.mV[0]*p1.mV[1]) + - (pb.mV[0]*pa.mV[1] - pa.mV[0]*pb.mV[1]) + - (pa.mV[0]*p1.mV[1] - p1.mV[0]*pa.mV[1]); + area_1ba = (p1V[0]*pbV[1] - pbV[0]*p1V[1]) + + (pbV[0]*paV[1] - paV[0]*pbV[1]) + + (paV[0]*p1V[1] - p1V[0]*paV[1]); - area_21b = (p2.mV[0]*p1.mV[1] - p1.mV[0]*p2.mV[1]) + - (p1.mV[0]*pb.mV[1] - pb.mV[0]*p1.mV[1]) + - (pb.mV[0]*p2.mV[1] - p2.mV[0]*pb.mV[1]); + area_21b = (p2V[0]*p1V[1] - p1V[0]*p2V[1]) + + (p1V[0]*pbV[1] - pbV[0]*p1V[1]) + + (pbV[0]*p2V[1] - p2V[0]*pbV[1]); - area_2ab = (p2.mV[0]*pa.mV[1] - pa.mV[0]*p2.mV[1]) + - (pa.mV[0]*pb.mV[1] - pb.mV[0]*pa.mV[1]) + - (pb.mV[0]*p2.mV[1] - p2.mV[0]*pb.mV[1]); + area_2ab = (p2V[0]*paV[1] - paV[0]*p2V[1]) + + (paV[0]*pbV[1] - pbV[0]*paV[1]) + + (pbV[0]*p2V[1] - p2V[0]*pbV[1]); BOOL use_tri1a2 = TRUE; BOOL tri_1a2 = TRUE; @@ -6443,10 +5822,13 @@ BOOL LLVolumeFace::createCap(LLVolume* volume, BOOL partial_build) } else { - LLVector3 d1 = p1 - pa; - LLVector3 d2 = p2 - pb; + LLVector4a d1; + d1.setSub(p1, pa); + + LLVector4a d2; + d2.setSub(p2, pb); - if (d1.magVecSquared() < d2.magVecSquared()) + if (d1.dot3(d1) < d2.dot3(d2)) { use_tri1a2 = TRUE; } @@ -6485,33 +5867,33 @@ BOOL LLVolumeFace::createCap(LLVolume* volume, BOOL partial_build) { // Use the profile points instead of the mesh, since you want // the un-transformed profile distances. - LLVector3 p1 = profile[pt1]; - LLVector3 p2 = profile[pt2]; - LLVector3 pa = profile[pt1+1]; - LLVector3 pb = profile[pt2-1]; - - p1.mV[VZ] = 0.f; - p2.mV[VZ] = 0.f; - pa.mV[VZ] = 0.f; - pb.mV[VZ] = 0.f; - + const LLVector4a& p1 = profile[pt1]; + const LLVector4a& p2 = profile[pt2]; + const LLVector4a& pa = profile[pt1+1]; + const LLVector4a& pb = profile[pt2-1]; + + const F32* p1V = p1.getF32ptr(); + const F32* p2V = p2.getF32ptr(); + const F32* paV = pa.getF32ptr(); + const F32* pbV = pb.getF32ptr(); + // Use area of triangle to determine backfacing F32 area_1a2, area_1ba, area_21b, area_2ab; - area_1a2 = (p1.mV[0]*pa.mV[1] - pa.mV[0]*p1.mV[1]) + - (pa.mV[0]*p2.mV[1] - p2.mV[0]*pa.mV[1]) + - (p2.mV[0]*p1.mV[1] - p1.mV[0]*p2.mV[1]); + area_1a2 = (p1V[0]*paV[1] - paV[0]*p1V[1]) + + (paV[0]*p2V[1] - p2V[0]*paV[1]) + + (p2V[0]*p1V[1] - p1V[0]*p2V[1]); - area_1ba = (p1.mV[0]*pb.mV[1] - pb.mV[0]*p1.mV[1]) + - (pb.mV[0]*pa.mV[1] - pa.mV[0]*pb.mV[1]) + - (pa.mV[0]*p1.mV[1] - p1.mV[0]*pa.mV[1]); + area_1ba = (p1V[0]*pbV[1] - pbV[0]*p1V[1]) + + (pbV[0]*paV[1] - paV[0]*pbV[1]) + + (paV[0]*p1V[1] - p1V[0]*paV[1]); - area_21b = (p2.mV[0]*p1.mV[1] - p1.mV[0]*p2.mV[1]) + - (p1.mV[0]*pb.mV[1] - pb.mV[0]*p1.mV[1]) + - (pb.mV[0]*p2.mV[1] - p2.mV[0]*pb.mV[1]); + area_21b = (p2V[0]*p1V[1] - p1V[0]*p2V[1]) + + (p1V[0]*pbV[1] - pbV[0]*p1V[1]) + + (pbV[0]*p2V[1] - p2V[0]*pbV[1]); - area_2ab = (p2.mV[0]*pa.mV[1] - pa.mV[0]*p2.mV[1]) + - (pa.mV[0]*pb.mV[1] - pb.mV[0]*pa.mV[1]) + - (pb.mV[0]*p2.mV[1] - p2.mV[0]*pb.mV[1]); + area_2ab = (p2V[0]*paV[1] - paV[0]*p2V[1]) + + (paV[0]*pbV[1] - pbV[0]*paV[1]) + + (pbV[0]*p2V[1] - p2V[0]*pbV[1]); BOOL use_tri1a2 = TRUE; BOOL tri_1a2 = TRUE; @@ -6546,10 +5928,12 @@ BOOL LLVolumeFace::createCap(LLVolume* volume, BOOL partial_build) } else { - LLVector3 d1 = p1 - pa; - LLVector3 d2 = p2 - pb; + LLVector4a d1; + d1.setSub(p1,pa); + LLVector4a d2; + d2.setSub(p2,pb); - if (d1.magVecSquared() < d2.magVecSquared()) + if (d1.dot3(d1) < d2.dot3(d2)) { use_tri1a2 = TRUE; } @@ -6598,6 +5982,8 @@ BOOL LLVolumeFace::createCap(LLVolume* volume, BOOL partial_build) } + + LL_CHECK_MEMORY return TRUE; } @@ -6900,6 +6286,7 @@ void LLVolumeFace::appendFace(const LLVolumeFace& face, LLMatrix4& mat_in, LLMat BOOL LLVolumeFace::createSide(LLVolume* volume, BOOL partial_build) { + LL_CHECK_MEMORY BOOL flat = mTypeMask & FLAT_MASK; U8 sculpt_type = volume->getParams().getSculptType(); @@ -6910,9 +6297,9 @@ BOOL LLVolumeFace::createSide(LLVolume* volume, BOOL partial_build) S32 num_vertices, num_indices; - const std::vector& mesh = volume->getMesh(); - const std::vector& profile = volume->getProfile().mProfile; - const std::vector& path_data = volume->getPath().mPath; + const LLAlignedArray& mesh = volume->getMesh(); + const LLAlignedArray& profile = volume->getProfile().mProfile; + const LLAlignedArray& path_data = volume->getPath().mPath; S32 max_s = volume->getProfile().getTotal(); @@ -6933,10 +6320,11 @@ BOOL LLVolumeFace::createSide(LLVolume* volume, BOOL partial_build) } } + LL_CHECK_MEMORY + LLVector4a* pos = (LLVector4a*) mPositions; - LLVector4a* norm = (LLVector4a*) mNormals; LLVector2* tc = (LLVector2*) mTexCoords; - F32 begin_stex = floorf(profile[mBeginS].mV[2]); + F32 begin_stex = floorf(profile[mBeginS][2]); S32 num_s = ((mTypeMask & INNER_MASK) && (mTypeMask & FLAT_MASK) && mNumS > 2) ? mNumS/2 : mNumS; S32 cur_vertex = 0; @@ -6965,11 +6353,11 @@ BOOL LLVolumeFace::createSide(LLVolume* volume, BOOL partial_build) // Get s value for tex-coord. if (!flat) { - ss = profile[mBeginS + s].mV[2]; + ss = profile[mBeginS + s][2]; } else { - ss = profile[mBeginS + s].mV[2] - begin_stex; + ss = profile[mBeginS + s][2] - begin_stex; } } @@ -6989,19 +6377,17 @@ BOOL LLVolumeFace::createSide(LLVolume* volume, BOOL partial_build) i = mBeginS + s + max_s*t; } - pos[cur_vertex].load3(mesh[i].mPos.mV); - tc[cur_vertex] = LLVector2(ss,tt); + llassert(less_than_max_mag(mesh[i])); + mesh[i].store4a((F32*)(pos+cur_vertex)); + tc[cur_vertex].set(ss,tt); - norm[cur_vertex].clear(); cur_vertex++; if (test && s > 0) { - pos[cur_vertex].load3(mesh[i].mPos.mV); - tc[cur_vertex] = LLVector2(ss,tt); - - norm[cur_vertex].clear(); - + llassert(less_than_max_mag(mesh[i])); + mesh[i].store4a((F32*)(pos+cur_vertex)); + tc[cur_vertex].set(ss,tt); cur_vertex++; } } @@ -7018,28 +6404,66 @@ BOOL LLVolumeFace::createSide(LLVolume* volume, BOOL partial_build) } i = mBeginS + s + max_s*t; - ss = profile[mBeginS + s].mV[2] - begin_stex; - pos[cur_vertex].load3(mesh[i].mPos.mV); - tc[cur_vertex] = LLVector2(ss,tt); - norm[cur_vertex].clear(); - + ss = profile[mBeginS + s][2] - begin_stex; + + llassert(less_than_max_mag(mesh[i])); + mesh[i].store4a((F32*)(pos+cur_vertex)); + tc[cur_vertex].set(ss,tt); + cur_vertex++; } } + LL_CHECK_MEMORY - //get bounding box for this side - LLVector4a& face_min = mExtents[0]; - LLVector4a& face_max = mExtents[1]; + mCenter->clear(); - face_min = face_max = pos[0]; + LLVector4a* cur_pos = pos; + LLVector4a* end_pos = pos + mNumVertices; + + //get bounding box for this side + LLVector4a face_min; + LLVector4a face_max; + + face_min = face_max = *cur_pos++; + + while (cur_pos < end_pos) + { + update_min_max(face_min, face_max, *cur_pos++); + } + + mExtents[0] = face_min; + mExtents[1] = face_max; + + U32 tc_count = mNumVertices; + if (tc_count%2 == 1) + { //odd number of texture coordinates, duplicate last entry to padded end of array + tc_count++; + mTexCoords[mNumVertices] = mTexCoords[mNumVertices-1]; + } + + LLVector4a* cur_tc = (LLVector4a*) mTexCoords; + LLVector4a* end_tc = (LLVector4a*) (mTexCoords+tc_count); + + LLVector4a tc_min; + LLVector4a tc_max; + + tc_min = tc_max = *cur_tc++; - for (U32 i = 1; i < mNumVertices; ++i) + while (cur_tc < end_tc) { - update_min_max(face_min, face_max, pos[i]); + update_min_max(tc_min, tc_max, *cur_tc++); } + F32* minp = tc_min.getF32ptr(); + F32* maxp = tc_max.getF32ptr(); + + mTexCoordExtents[0].mV[0] = llmin(minp[0], minp[2]); + mTexCoordExtents[0].mV[1] = llmin(minp[1], minp[3]); + mTexCoordExtents[1].mV[0] = llmax(maxp[0], maxp[2]); + mTexCoordExtents[1].mV[1] = llmax(maxp[1], maxp[3]); + mCenter->setAdd(face_min, face_max); mCenter->mul(0.5f); @@ -7104,33 +6528,94 @@ BOOL LLVolumeFace::createSide(LLVolume* volume, BOOL partial_build) } } + LL_CHECK_MEMORY + //clear normals - for (U32 i = 0; i < mNumVertices; i++) + F32* dst = (F32*) mNormals; + F32* end = (F32*) (mNormals+mNumVertices); + LLVector4a zero = LLVector4a::getZero(); + + while (dst < end) { - mNormals[i].clear(); + zero.store4a(dst); + dst += 4; } + LL_CHECK_MEMORY + //generate normals U32 count = mNumIndices/3; - for (U32 i = 0; i < count; i++) //for each triangle + LLVector4a* norm = mNormals; + + static LLAlignedArray triangle_normals; + triangle_normals.resize(count); + LLVector4a* output = triangle_normals.mArray; + LLVector4a* end_output = output+count; + + U16* idx = mIndices; + + while (output < end_output) { - const U16* idx = &(mIndices[i*3]); - - LLVector4a& v0 = *(pos+idx[0]); - LLVector4a& v1 = *(pos+idx[1]); - LLVector4a& v2 = *(pos+idx[2]); - - LLVector4a& n0 = *(norm+idx[0]); - LLVector4a& n1 = *(norm+idx[1]); - LLVector4a& n2 = *(norm+idx[2]); + LLVector4a b,v1,v2; + b.load4a((F32*) (pos+idx[0])); + v1.load4a((F32*) (pos+idx[1])); + v2.load4a((F32*) (pos+idx[2])); //calculate triangle normal - LLVector4a a, b, c; + LLVector4a a; - a.setSub(v0, v1); - b.setSub(v0, v2); - c.setCross3(a,b); + a.setSub(b, v1); + b.sub(v2); + + + LLQuad& vector1 = *((LLQuad*) &v1); + LLQuad& vector2 = *((LLQuad*) &v2); + + LLQuad& amQ = *((LLQuad*) &a); + LLQuad& bmQ = *((LLQuad*) &b); + + //v1.setCross3(t,v0); + //setCross3(const LLVector4a& a, const LLVector4a& b) + // Vectors are stored in memory in w, z, y, x order from high to low + // Set vector1 = { a[W], a[X], a[Z], a[Y] } + vector1 = _mm_shuffle_ps( amQ, amQ, _MM_SHUFFLE( 3, 0, 2, 1 )); + // Set vector2 = { b[W], b[Y], b[X], b[Z] } + vector2 = _mm_shuffle_ps( bmQ, bmQ, _MM_SHUFFLE( 3, 1, 0, 2 )); + // mQ = { a[W]*b[W], a[X]*b[Y], a[Z]*b[X], a[Y]*b[Z] } + vector2 = _mm_mul_ps( vector1, vector2 ); + // vector3 = { a[W], a[Y], a[X], a[Z] } + amQ = _mm_shuffle_ps( amQ, amQ, _MM_SHUFFLE( 3, 1, 0, 2 )); + // vector4 = { b[W], b[X], b[Z], b[Y] } + bmQ = _mm_shuffle_ps( bmQ, bmQ, _MM_SHUFFLE( 3, 0, 2, 1 )); + // mQ = { 0, a[X]*b[Y] - a[Y]*b[X], a[Z]*b[X] - a[X]*b[Z], a[Y]*b[Z] - a[Z]*b[Y] } + vector1 = _mm_sub_ps( vector2, _mm_mul_ps( amQ, bmQ )); + + v1.store4a((F32*) output); + + output++; + idx += 3; + } + + idx = mIndices; + + LLVector4a* src = triangle_normals.mArray; + + for (U32 i = 0; i < count; i++) //for each triangle + { + LLVector4a c; + c.load4a((F32*) (src++)); + + LLVector4a* n0p = norm+idx[0]; + LLVector4a* n1p = norm+idx[1]; + LLVector4a* n2p = norm+idx[2]; + + idx += 3; + + LLVector4a n0,n1,n2; + n0.load4a((F32*) n0p); + n1.load4a((F32*) n1p); + n2.load4a((F32*) n2p); n0.add(c); n1.add(c); @@ -7143,8 +6628,14 @@ BOOL LLVolumeFace::createSide(LLVolume* volume, BOOL partial_build) case 1: n1.add(c); break; case 2: n2.add(c); break; }; + + n0.store4a((F32*) n0p); + n1.store4a((F32*) n1p); + n2.store4a((F32*) n2p); } + LL_CHECK_MEMORY + // adjust normals based on wrapping and stitching LLVector4a top; @@ -7276,6 +6767,8 @@ BOOL LLVolumeFace::createSide(LLVolume* volume, BOOL partial_build) } + LL_CHECK_MEMORY + return TRUE; } diff --git a/indra/llmath/llvolume.h b/indra/llmath/llvolume.h index 1d3b0fe52f..5e43af92ec 100644 --- a/indra/llmath/llvolume.h +++ b/indra/llmath/llvolume.h @@ -37,7 +37,6 @@ class LLPath; template class LLOctreeNode; -class LLVector4a; class LLVolumeFace; class LLVolume; class LLVolumeTriangle; @@ -50,6 +49,8 @@ class LLVolumeTriangle; #include "v3math.h" #include "v3dmath.h" #include "v4math.h" +#include "llvector4a.h" +#include "llmatrix4a.h" #include "llquaternion.h" #include "llstrider.h" #include "v4coloru.h" @@ -194,6 +195,26 @@ const U8 LL_SCULPT_FLAG_MIRROR = 128; const S32 LL_SCULPT_MESH_MAX_FACES = 8; +template +class LLAlignedArray +{ +public: + T* mArray; + U32 mElementCount; + U32 mCapacity; + + LLAlignedArray(); + ~LLAlignedArray(); + + void push_back(const T& elem); + U32 size() const { return mElementCount; } + void resize(U32 size); + T* append(S32 N); + T& operator[](int idx); + const T& operator[](int idx) const; +}; + + class LLProfileParams { public: @@ -708,16 +729,16 @@ public: LLFaceID mFaceID; }; - std::vector mProfile; - std::vector mNormals; + LLAlignedArray mProfile; + //LLAlignedArray mNormals; std::vector mFaces; - std::vector mEdgeNormals; - std::vector mEdgeCenters; + + //LLAlignedArray mEdgeNormals; + //LLAlignedArray mEdgeCenters; friend std::ostream& operator<<(std::ostream &s, const LLProfile &profile); protected: - void genNormals(const LLProfileParams& params); static S32 getNumNGonPoints(const LLProfileParams& params, S32 sides, F32 offset=0.0f, F32 bevel = 0.0f, F32 ang_scale = 1.f, S32 split = 0); void genNGon(const LLProfileParams& params, S32 sides, F32 offset=0.0f, F32 bevel = 0.0f, F32 ang_scale = 1.f, S32 split = 0); @@ -741,13 +762,29 @@ protected: class LLPath { public: - struct PathPt + class PathPt { - LLVector3 mPos; - LLVector2 mScale; - LLQuaternion mRot; + public: + LLMatrix4a mRot; + LLVector4a mPos; + + LLVector4a mScale; F32 mTexT; - PathPt() { mPos.setVec(0,0,0); mTexT = 0; mScale.setVec(0,0); mRot.loadIdentity(); } + F32 pad[3]; //for alignment + PathPt() + { + mPos.clear(); + mTexT = 0; + mScale.clear(); + mRot.setRows(LLVector4a(1,0,0,0), + LLVector4a(0,1,0,0), + LLVector4a(0,0,1,0)); + + //distinguished data in the pad for debugging + pad[0] = 3.14159f; + pad[1] = -3.14159f; + pad[2] = 0.585f; + } }; public: @@ -779,7 +816,7 @@ public: friend std::ostream& operator<<(std::ostream &s, const LLPath &path); public: - std::vector mPath; + LLAlignedArray mPath; protected: BOOL mOpen; @@ -951,11 +988,7 @@ protected: ~LLVolume(); // use unref public: - struct Point - { - LLVector3 mPos; - }; - + struct FaceParams { LLFaceID mFaceID; @@ -978,8 +1011,8 @@ public: const LLProfile& getProfile() const { return *mProfilep; } LLPath& getPath() const { return *mPathp; } void resizePath(S32 length); - const std::vector& getMesh() const { return mMesh; } - const LLVector3& getMeshPt(const U32 i) const { return mMesh[i].mPos; } + const LLAlignedArray& getMesh() const { return mMesh; } + const LLVector4a& getMeshPt(const U32 i) const { return mMesh[i]; } void setDirty() { mPathp->setDirty(); mProfilep->setDirty(); } @@ -994,10 +1027,7 @@ public: S32 getSculptLevel() const { return mSculptLevel; } void setSculptLevel(S32 level) { mSculptLevel = level; } - S32 *getTriangleIndices(U32 &num_indices) const; - - // returns number of triangle indeces required for path/profile mesh - S32 getNumTriangleIndices() const; + static void getLoDTriangleCounts(const LLVolumeParams& params, S32* counts); S32 getNumTriangles(S32* vcount = NULL) const; @@ -1070,7 +1100,8 @@ public: LLVolumeParams mParams; LLPath *mPathp; LLProfile *mProfilep; - std::vector mMesh; + LLAlignedArray mMesh; + BOOL mGenerateSingleFace; typedef std::vector face_list_t; -- cgit v1.2.3 From c04f4f66c813181eb378b00045aec969dc2c4aae Mon Sep 17 00:00:00 2001 From: Graham Madarasz Date: Mon, 11 Mar 2013 12:30:16 -0700 Subject: Moved LLAlignedArray from llmath to llcommon and put template func impls in header to work around Mac 4.3.3 link issue. --- indra/llmath/llvolume.cpp | 87 ----------------------------------------------- indra/llmath/llvolume.h | 21 +----------- 2 files changed, 1 insertion(+), 107 deletions(-) (limited to 'indra/llmath') diff --git a/indra/llmath/llvolume.cpp b/indra/llmath/llvolume.cpp index 9fc72fd801..4f3e753276 100644 --- a/indra/llmath/llvolume.cpp +++ b/indra/llmath/llvolume.cpp @@ -96,93 +96,6 @@ extern BOOL gDebugGL; bool less_than_max_mag(const LLVector4a& vec); -template -LLAlignedArray::LLAlignedArray() -{ - mArray = NULL; - mElementCount = 0; - mCapacity = 0; -} - -template -LLAlignedArray::~LLAlignedArray() -{ - ll_aligned_free(mArray); - mArray = NULL; - mElementCount = 0; - mCapacity = 0; -} - -template -void LLAlignedArray::push_back(const T& elem) -{ - T* old_buf = NULL; - if (mCapacity <= mElementCount) - { - mCapacity++; - mCapacity *= 2; - T* new_buf = (T*) ll_aligned_malloc(mCapacity*sizeof(T), alignment); - if (mArray) - { - LLVector4a::memcpyNonAliased16((F32*) new_buf, (F32*) mArray, sizeof(T)*mElementCount); - } - old_buf = mArray; - mArray = new_buf; - } - - mArray[mElementCount++] = elem; - - //delete old array here to prevent error on a.push_back(a[0]) - ll_aligned_free(old_buf); -} - -template -void LLAlignedArray::resize(U32 size) -{ - if (mCapacity < size) - { - mCapacity = size+mCapacity*2; - T* new_buf = mCapacity > 0 ? (T*) ll_aligned_malloc(mCapacity*sizeof(T), alignment) : NULL; - if (mArray) - { - LLVector4a::memcpyNonAliased16((F32*) new_buf, (F32*) mArray, sizeof(T)*mElementCount); - ll_aligned_free(mArray); - } - - /*for (U32 i = mElementCount; i < mCapacity; ++i) - { - new(new_buf+i) T(); - }*/ - mArray = new_buf; - } - - mElementCount = size; -} - - -template -T& LLAlignedArray::operator[](int idx) -{ - llassert(idx < mElementCount); - return mArray[idx]; -} - -template -const T& LLAlignedArray::operator[](int idx) const -{ - llassert(idx < mElementCount); - return mArray[idx]; -} - -template -T* LLAlignedArray::append(S32 N) -{ - U32 sz = size(); - resize(sz+N); - return &((*this)[sz]); -} - - BOOL check_same_clock_dir( const LLVector3& pt1, const LLVector3& pt2, const LLVector3& pt3, const LLVector3& norm) { LLVector3 test = (pt2-pt1)%(pt3-pt2); diff --git a/indra/llmath/llvolume.h b/indra/llmath/llvolume.h index 5e43af92ec..1ff53590cf 100644 --- a/indra/llmath/llvolume.h +++ b/indra/llmath/llvolume.h @@ -41,6 +41,7 @@ class LLVolumeFace; class LLVolume; class LLVolumeTriangle; +#include "llalignedarray.h" #include "lldarray.h" #include "lluuid.h" #include "v4color.h" @@ -195,26 +196,6 @@ const U8 LL_SCULPT_FLAG_MIRROR = 128; const S32 LL_SCULPT_MESH_MAX_FACES = 8; -template -class LLAlignedArray -{ -public: - T* mArray; - U32 mElementCount; - U32 mCapacity; - - LLAlignedArray(); - ~LLAlignedArray(); - - void push_back(const T& elem); - U32 size() const { return mElementCount; } - void resize(U32 size); - T* append(S32 N); - T& operator[](int idx); - const T& operator[](int idx) const; -}; - - class LLProfileParams { public: -- cgit v1.2.3 From 356d10e6a6cfea279228f371aed88aba73ed70dd Mon Sep 17 00:00:00 2001 From: Graham Madarasz Date: Mon, 11 Mar 2013 13:12:03 -0700 Subject: Work around 'interesting' forced include order in vmath stuff on Windows --- indra/llmath/llvolume.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'indra/llmath') diff --git a/indra/llmath/llvolume.h b/indra/llmath/llvolume.h index 1ff53590cf..6b599a4126 100644 --- a/indra/llmath/llvolume.h +++ b/indra/llmath/llvolume.h @@ -41,7 +41,6 @@ class LLVolumeFace; class LLVolume; class LLVolumeTriangle; -#include "llalignedarray.h" #include "lldarray.h" #include "lluuid.h" #include "v4color.h" @@ -58,6 +57,7 @@ class LLVolumeTriangle; #include "llrefcount.h" #include "llpointer.h" #include "llfile.h" +#include "llalignedarray.h" //============================================================================ -- cgit v1.2.3 From 9a0f913e7089c9b2120856936bf47dcb8de9c6b1 Mon Sep 17 00:00:00 2001 From: "Graham Madarasz (Graham)" Date: Mon, 11 Mar 2013 13:55:40 -0700 Subject: Work around bogus gcc 4.3.4 warning from alignment assert --- indra/llmath/llvector4a.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'indra/llmath') diff --git a/indra/llmath/llvector4a.h b/indra/llmath/llvector4a.h index 0526793d3a..1a478bc8de 100644 --- a/indra/llmath/llvector4a.h +++ b/indra/llmath/llvector4a.h @@ -93,7 +93,11 @@ public: LLVector4a() { //DO NOT INITIALIZE -- The overhead is completely unnecessary +// This assert is causing spurious referenced before set warnings on GCC 4.3.4 +// +#if !LL_LINUX ll_assert_aligned(this,16); +#endif } LLVector4a(F32 x, F32 y, F32 z, F32 w = 0.f) -- cgit v1.2.3 From 6ac6736994240d9789a81bf585468bef50805fd8 Mon Sep 17 00:00:00 2001 From: Graham Madarasz Date: Mon, 11 Mar 2013 16:00:25 -0700 Subject: Move 16b aligned memcpy and alignment utilities to llmem in llcommon for easier use elsewhere --- indra/llmath/llsimdmath.h | 28 ------------------------- indra/llmath/llvector4a.cpp | 50 +-------------------------------------------- 2 files changed, 1 insertion(+), 77 deletions(-) (limited to 'indra/llmath') diff --git a/indra/llmath/llsimdmath.h b/indra/llmath/llsimdmath.h index 01458521ec..cebd2ace7d 100644 --- a/indra/llmath/llsimdmath.h +++ b/indra/llmath/llsimdmath.h @@ -39,34 +39,6 @@ #include #endif -template T* LL_NEXT_ALIGNED_ADDRESS(T* address) -{ - return reinterpret_cast( - (reinterpret_cast(address) + 0xF) & ~0xF); -} - -template T* LL_NEXT_ALIGNED_ADDRESS_64(T* address) -{ - return reinterpret_cast( - (reinterpret_cast(address) + 0x3F) & ~0x3F); -} - -#if LL_LINUX || LL_DARWIN - -#define LL_ALIGN_PREFIX(x) -#define LL_ALIGN_POSTFIX(x) __attribute__((aligned(x))) - -#elif LL_WINDOWS - -#define LL_ALIGN_PREFIX(x) __declspec(align(x)) -#define LL_ALIGN_POSTFIX(x) - -#else -#error "LL_ALIGN_PREFIX and LL_ALIGN_POSTFIX undefined" -#endif - -#define LL_ALIGN_16(var) LL_ALIGN_PREFIX(16) var LL_ALIGN_POSTFIX(16) - #include #include diff --git a/indra/llmath/llvector4a.cpp b/indra/llmath/llvector4a.cpp index 6edeb0fefe..570fa41a43 100644 --- a/indra/llmath/llvector4a.cpp +++ b/indra/llmath/llvector4a.cpp @@ -41,55 +41,7 @@ extern const LLVector4a LL_V4A_EPSILON = reinterpret_cast ( F /*static */void LLVector4a::memcpyNonAliased16(F32* __restrict dst, const F32* __restrict src, size_t bytes) { - assert(src != NULL); - assert(dst != NULL); - assert(bytes > 0); - assert((bytes % sizeof(F32))== 0); - ll_assert_aligned(src,16); - ll_assert_aligned(dst,16); - assert(bytes%16==0); - - F32* end = dst + (bytes / sizeof(F32) ); - - if (bytes > 64) - { - F32* begin_64 = LL_NEXT_ALIGNED_ADDRESS_64(dst); - - //at least 64 (16*4) bytes before the end of the destination, switch to 16 byte copies - F32* end_64 = end-16; - - _mm_prefetch((char*)begin_64, _MM_HINT_NTA); - _mm_prefetch((char*)begin_64 + 64, _MM_HINT_NTA); - _mm_prefetch((char*)begin_64 + 128, _MM_HINT_NTA); - _mm_prefetch((char*)begin_64 + 192, _MM_HINT_NTA); - - while (dst < begin_64) - { - copy4a(dst, src); - dst += 4; - src += 4; - } - - while (dst < end_64) - { - _mm_prefetch((char*)src + 512, _MM_HINT_NTA); - _mm_prefetch((char*)dst + 512, _MM_HINT_NTA); - copy4a(dst, src); - copy4a(dst+4, src+4); - copy4a(dst+8, src+8); - copy4a(dst+12, src+12); - - dst += 16; - src += 16; - } - } - - while (dst < end) - { - copy4a(dst, src); - dst += 4; - src += 4; - } + ll_memcpy_nonaliased_aligned_16((char*)dst, (char*)src, bytes); } void LLVector4a::setRotated( const LLRotation& rot, const LLVector4a& vec ) -- cgit v1.2.3 From e8b8a12b7365c17cf0326be365b78bcb1da1bfac Mon Sep 17 00:00:00 2001 From: Graham Madarasz Date: Tue, 12 Mar 2013 10:48:05 -0700 Subject: Mods to make compile on gcc 4.6.3 work mo betta --- indra/llmath/llvolume.cpp | 5 ----- 1 file changed, 5 deletions(-) (limited to 'indra/llmath') diff --git a/indra/llmath/llvolume.cpp b/indra/llmath/llvolume.cpp index eff224743b..cb5633c1bb 100644 --- a/indra/llmath/llvolume.cpp +++ b/indra/llmath/llvolume.cpp @@ -5314,12 +5314,7 @@ BOOL LLVolumeFace::createUnCutCubeCap(LLVolume* volume, BOOL partial_build) S32 max_t = volume->getPath().mPath.size(); // S32 i; - S32 num_vertices = 0, num_indices = 0; S32 grid_size = (profile.size()-1)/4; - S32 quad_count = (grid_size * grid_size); - - num_vertices = (grid_size+1)*(grid_size+1); - num_indices = quad_count * 4; LLVector4a& min = mExtents[0]; LLVector4a& max = mExtents[1]; -- cgit v1.2.3 From 5d2fea6262d91eb8d3c06d97a160ca9373b96889 Mon Sep 17 00:00:00 2001 From: "Graham Madarasz (Graham Linden)" Date: Wed, 13 Mar 2013 10:42:40 -0700 Subject: Move fast memcpy to llcommon and use it in llalignedarray pushback on all platforms. Code Review: DaveP --- indra/llmath/llsimdmath.h | 28 ------------------------- indra/llmath/llvector4a.cpp | 50 +-------------------------------------------- indra/llmath/llvector4a.h | 4 ---- indra/llmath/llvolume.cpp | 11 ++++++---- 4 files changed, 8 insertions(+), 85 deletions(-) (limited to 'indra/llmath') diff --git a/indra/llmath/llsimdmath.h b/indra/llmath/llsimdmath.h index 01458521ec..cebd2ace7d 100644 --- a/indra/llmath/llsimdmath.h +++ b/indra/llmath/llsimdmath.h @@ -39,34 +39,6 @@ #include #endif -template T* LL_NEXT_ALIGNED_ADDRESS(T* address) -{ - return reinterpret_cast( - (reinterpret_cast(address) + 0xF) & ~0xF); -} - -template T* LL_NEXT_ALIGNED_ADDRESS_64(T* address) -{ - return reinterpret_cast( - (reinterpret_cast(address) + 0x3F) & ~0x3F); -} - -#if LL_LINUX || LL_DARWIN - -#define LL_ALIGN_PREFIX(x) -#define LL_ALIGN_POSTFIX(x) __attribute__((aligned(x))) - -#elif LL_WINDOWS - -#define LL_ALIGN_PREFIX(x) __declspec(align(x)) -#define LL_ALIGN_POSTFIX(x) - -#else -#error "LL_ALIGN_PREFIX and LL_ALIGN_POSTFIX undefined" -#endif - -#define LL_ALIGN_16(var) LL_ALIGN_PREFIX(16) var LL_ALIGN_POSTFIX(16) - #include #include diff --git a/indra/llmath/llvector4a.cpp b/indra/llmath/llvector4a.cpp index 6edeb0fefe..570fa41a43 100644 --- a/indra/llmath/llvector4a.cpp +++ b/indra/llmath/llvector4a.cpp @@ -41,55 +41,7 @@ extern const LLVector4a LL_V4A_EPSILON = reinterpret_cast ( F /*static */void LLVector4a::memcpyNonAliased16(F32* __restrict dst, const F32* __restrict src, size_t bytes) { - assert(src != NULL); - assert(dst != NULL); - assert(bytes > 0); - assert((bytes % sizeof(F32))== 0); - ll_assert_aligned(src,16); - ll_assert_aligned(dst,16); - assert(bytes%16==0); - - F32* end = dst + (bytes / sizeof(F32) ); - - if (bytes > 64) - { - F32* begin_64 = LL_NEXT_ALIGNED_ADDRESS_64(dst); - - //at least 64 (16*4) bytes before the end of the destination, switch to 16 byte copies - F32* end_64 = end-16; - - _mm_prefetch((char*)begin_64, _MM_HINT_NTA); - _mm_prefetch((char*)begin_64 + 64, _MM_HINT_NTA); - _mm_prefetch((char*)begin_64 + 128, _MM_HINT_NTA); - _mm_prefetch((char*)begin_64 + 192, _MM_HINT_NTA); - - while (dst < begin_64) - { - copy4a(dst, src); - dst += 4; - src += 4; - } - - while (dst < end_64) - { - _mm_prefetch((char*)src + 512, _MM_HINT_NTA); - _mm_prefetch((char*)dst + 512, _MM_HINT_NTA); - copy4a(dst, src); - copy4a(dst+4, src+4); - copy4a(dst+8, src+8); - copy4a(dst+12, src+12); - - dst += 16; - src += 16; - } - } - - while (dst < end) - { - copy4a(dst, src); - dst += 4; - src += 4; - } + ll_memcpy_nonaliased_aligned_16((char*)dst, (char*)src, bytes); } void LLVector4a::setRotated( const LLRotation& rot, const LLVector4a& vec ) diff --git a/indra/llmath/llvector4a.h b/indra/llmath/llvector4a.h index 1a478bc8de..0526793d3a 100644 --- a/indra/llmath/llvector4a.h +++ b/indra/llmath/llvector4a.h @@ -93,11 +93,7 @@ public: LLVector4a() { //DO NOT INITIALIZE -- The overhead is completely unnecessary -// This assert is causing spurious referenced before set warnings on GCC 4.3.4 -// -#if !LL_LINUX ll_assert_aligned(this,16); -#endif } LLVector4a(F32 x, F32 y, F32 z, F32 w = 0.f) diff --git a/indra/llmath/llvolume.cpp b/indra/llmath/llvolume.cpp index 4f3e753276..e7a0bc7df7 100644 --- a/indra/llmath/llvolume.cpp +++ b/indra/llmath/llvolume.cpp @@ -4729,10 +4729,13 @@ void LLVolumeFace::optimize(F32 angle_cutoff) } } - llassert(new_face.mNumIndices == mNumIndices); - llassert(new_face.mNumVertices <= mNumVertices); - - swapData(new_face); + // Only swap data if we've actually optimized the mesh + // + if (new_face.mNumVertices < mNumVertices) + { + llassert(new_face.mNumIndices == mNumIndices); + swapData(new_face); + } } class LLVCacheTriangleData; -- cgit v1.2.3 From bba84a3fa9a1af87f6a8080f9093f9277feb1292 Mon Sep 17 00:00:00 2001 From: "Graham Madarasz (Graham Linden)" Date: Wed, 13 Mar 2013 13:38:30 -0700 Subject: Cleanup per code review of prev change with DaveP --- indra/llmath/llvolume.cpp | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) (limited to 'indra/llmath') diff --git a/indra/llmath/llvolume.cpp b/indra/llmath/llvolume.cpp index cb5633c1bb..edd16b5688 100644 --- a/indra/llmath/llvolume.cpp +++ b/indra/llmath/llvolume.cpp @@ -4729,11 +4729,13 @@ void LLVolumeFace::optimize(F32 angle_cutoff) } } - if (new_face.mNumVertices) + // disallow data amplification + // + if (new_face.mNumVertices <= mNumVertices) { - llassert(new_face.mNumIndices == mNumIndices); - swapData(new_face); - } + llassert(new_face.mNumIndices == mNumIndices); + swapData(new_face); + } } class LLVCacheTriangleData; @@ -6731,3 +6733,4 @@ void calc_binormal_from_triangle(LLVector4a& binormal, binormal.set( 0, 1 , 0 ); } } + -- cgit v1.2.3 From dbfcd6c9c5709b74365c2538ba312685b09d22bf Mon Sep 17 00:00:00 2001 From: Dave Parks Date: Tue, 7 May 2013 17:20:33 -0500 Subject: Optimization -- don't draw glow in alpha pool unless needed --- indra/llmath/llvolume.cpp | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'indra/llmath') diff --git a/indra/llmath/llvolume.cpp b/indra/llmath/llvolume.cpp index 602f2c29e5..7751ef87ee 100644 --- a/indra/llmath/llvolume.cpp +++ b/indra/llmath/llvolume.cpp @@ -4520,7 +4520,11 @@ LLVolumeFace& LLVolumeFace::operator=(const LLVolumeFace& src) S32 tc_size = (mNumVertices*sizeof(LLVector2)+0xF) & ~0xF; LLVector4a::memcpyNonAliased16((F32*) mPositions, (F32*) src.mPositions, vert_size); - LLVector4a::memcpyNonAliased16((F32*) mNormals, (F32*) src.mNormals, vert_size); + + if (src.mNormals) + { + LLVector4a::memcpyNonAliased16((F32*) mNormals, (F32*) src.mNormals, vert_size); + } if(src.mTexCoords) { -- cgit v1.2.3 From ec5bd94d59247e600f8f8702a543f99d40930d20 Mon Sep 17 00:00:00 2001 From: simon Date: Wed, 15 May 2013 11:00:00 -0700 Subject: BUG-2581 : [simon-ll-viewer-cat] Path cut, hollow and slice break planar texture mapping on prims. Credits to DaveP --- indra/llmath/llvolume.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'indra/llmath') diff --git a/indra/llmath/llvolume.cpp b/indra/llmath/llvolume.cpp index 7751ef87ee..317d15f84e 100755 --- a/indra/llmath/llvolume.cpp +++ b/indra/llmath/llvolume.cpp @@ -5604,16 +5604,16 @@ BOOL LLVolumeFace::createCap(LLVolume* volume, BOOL partial_build) LLVector4a binormal; calc_binormal_from_triangle(binormal, *mCenter, cuv, - pos[0], tc[0], - pos[1], tc[1]); + mPositions[0], mTexCoords[0], + mPositions[1], mTexCoords[1]); binormal.normalize3fast(); LLVector4a normal; LLVector4a d0, d1; - d0.setSub(*mCenter, pos[0]); - d1.setSub(*mCenter, pos[1]); + d0.setSub(*mCenter, mPositions[0]); + d1.setSub(*mCenter, mPositions[1]); if (mTypeMask & TOP_MASK) { -- cgit v1.2.3