diff options
Diffstat (limited to 'indra/llmath/llvolume.cpp')
-rw-r--r-- | indra/llmath/llvolume.cpp | 305 |
1 files changed, 174 insertions, 131 deletions
diff --git a/indra/llmath/llvolume.cpp b/indra/llmath/llvolume.cpp index 02c8d2b86f..f989e8ed17 100644 --- a/indra/llmath/llvolume.cpp +++ b/indra/llmath/llvolume.cpp @@ -5180,6 +5180,7 @@ LLVolumeFace::LLVolumeFace() : mNumS(0), mNumT(0), mNumVertices(0), + mNumAllocatedVertices(0), mNumIndices(0), mPositions(NULL), mNormals(NULL), @@ -5187,7 +5188,8 @@ LLVolumeFace::LLVolumeFace() : mTexCoords(NULL), mIndices(NULL), mWeights(NULL), - mOctree(NULL) + mOctree(NULL), + mOptimized(FALSE) { mExtents = (LLVector4a*) ll_aligned_malloc_16(sizeof(LLVector4a)*3); mExtents[0].splat(-0.5f); @@ -5203,6 +5205,7 @@ LLVolumeFace::LLVolumeFace(const LLVolumeFace& src) mNumS(0), mNumT(0), mNumVertices(0), + mNumAllocatedVertices(0), mNumIndices(0), mPositions(NULL), mNormals(NULL), @@ -5257,12 +5260,6 @@ LLVolumeFace& LLVolumeFace::operator=(const LLVolumeFace& src) { LLVector4a::memcpyNonAliased16((F32*) mTexCoords, (F32*) src.mTexCoords, tc_size); } - else - { - ll_aligned_free_16(mTexCoords) ; - mTexCoords = NULL ; - } - if (src.mBinormals) { @@ -5294,6 +5291,8 @@ LLVolumeFace& LLVolumeFace::operator=(const LLVolumeFace& src) LLVector4a::memcpyNonAliased16((F32*) mIndices, (F32*) src.mIndices, idx_size); } + mOptimized = src.mOptimized; + //delete return *this; } @@ -5310,10 +5309,11 @@ void LLVolumeFace::freeData() { ll_aligned_free_16(mPositions); mPositions = NULL; - ll_aligned_free_16( mNormals); + + //normals and texture coordinates are part of the same buffer as mPositions, do not free them separately mNormals = NULL; - ll_aligned_free_16(mTexCoords); mTexCoords = NULL; + ll_aligned_free_16(mIndices); mIndices = NULL; ll_aligned_free_16(mBinormals); @@ -5495,18 +5495,6 @@ void LLVolumeFace::optimize(F32 angle_cutoff) llassert(new_face.mNumIndices == mNumIndices); llassert(new_face.mNumVertices <= mNumVertices); - if (angle_cutoff > 1.f && !mNormals) - { - ll_aligned_free_16(new_face.mNormals); - new_face.mNormals = NULL; - } - - if (!mTexCoords) - { - ll_aligned_free_16(new_face.mTexCoords); - new_face.mTexCoords = NULL; - } - swapData(new_face); } @@ -5517,14 +5505,14 @@ class LLVCacheVertexData public: S32 mIdx; S32 mCacheTag; - F32 mScore; + F64 mScore; U32 mActiveTriangles; std::vector<LLVCacheTriangleData*> mTriangles; LLVCacheVertexData() { mCacheTag = -1; - mScore = 0.f; + mScore = 0.0; mActiveTriangles = 0; mIdx = -1; } @@ -5534,13 +5522,13 @@ class LLVCacheTriangleData { public: bool mActive; - F32 mScore; + F64 mScore; LLVCacheVertexData* mVertex[3]; LLVCacheTriangleData() { mActive = true; - mScore = 0.f; + mScore = 0.0; mVertex[0] = mVertex[1] = mVertex[2] = NULL; } @@ -5551,7 +5539,7 @@ public: { if (mVertex[i]) { - llassert_always(mVertex[i]->mActiveTriangles > 0); + llassert(mVertex[i]->mActiveTriangles > 0); mVertex[i]->mActiveTriangles--; } } @@ -5563,44 +5551,44 @@ public: } }; -const F32 FindVertexScore_CacheDecayPower = 1.5f; -const F32 FindVertexScore_LastTriScore = 0.75f; -const F32 FindVertexScore_ValenceBoostScale = 2.0f; -const F32 FindVertexScore_ValenceBoostPower = 0.5f; +const F64 FindVertexScore_CacheDecayPower = 1.5; +const F64 FindVertexScore_LastTriScore = 0.75; +const F64 FindVertexScore_ValenceBoostScale = 2.0; +const F64 FindVertexScore_ValenceBoostPower = 0.5; const U32 MaxSizeVertexCache = 32; +const F64 FindVertexScore_Scaler = 1.0/(MaxSizeVertexCache-3); -F32 find_vertex_score(LLVCacheVertexData& data) +F64 find_vertex_score(LLVCacheVertexData& data) { - if (data.mActiveTriangles == 0) - { //no triangle references this vertex - return -1.f; - } + F64 score = -1.0; - F32 score = 0.f; - - S32 cache_idx = data.mCacheTag; + if (data.mActiveTriangles >= 0) + { + score = 0.0; + + S32 cache_idx = data.mCacheTag; - if (cache_idx < 0) - { - //not in cache - } - else - { - if (cache_idx < 3) - { //vertex was in the last triangle - score = FindVertexScore_LastTriScore; + if (cache_idx < 0) + { + //not in cache } else - { //more points for being higher in the cache - F32 scaler = 1.f/(MaxSizeVertexCache-3); - score = 1.f-((cache_idx-3)*scaler); - score = powf(score, FindVertexScore_CacheDecayPower); + { + if (cache_idx < 3) + { //vertex was in the last triangle + score = FindVertexScore_LastTriScore; + } + else + { //more points for being higher in the cache + score = 1.0-((cache_idx-3)*FindVertexScore_Scaler); + score = pow(score, FindVertexScore_CacheDecayPower); + } } - } - //bonus points for having low valence - F32 valence_boost = powf((F32)data.mActiveTriangles, -FindVertexScore_ValenceBoostPower); - score += FindVertexScore_ValenceBoostScale * valence_boost; + //bonus points for having low valence + F64 valence_boost = pow((F64)data.mActiveTriangles, -FindVertexScore_ValenceBoostPower); + score += FindVertexScore_ValenceBoostScale * valence_boost; + } return score; } @@ -5707,32 +5695,44 @@ public: void updateScores() { - for (U32 i = MaxSizeVertexCache; i < MaxSizeVertexCache+3; ++i) - { //trailing 3 vertices aren't actually in the cache for scoring purposes - if (mCache[i]) + LLVCacheVertexData** data_iter = mCache+MaxSizeVertexCache; + LLVCacheVertexData** end_data = mCache+MaxSizeVertexCache+3; + + while(data_iter != end_data) + { + LLVCacheVertexData* data = *data_iter++; + //trailing 3 vertices aren't actually in the cache for scoring purposes + if (data) { - mCache[i]->mCacheTag = -1; + data->mCacheTag = -1; } } - for (U32 i = 0; i < MaxSizeVertexCache; ++i) + data_iter = mCache; + end_data = mCache+MaxSizeVertexCache; + + while (data_iter != end_data) { //update scores of vertices in cache - if (mCache[i]) + LLVCacheVertexData* data = *data_iter++; + if (data) { - mCache[i]->mScore = find_vertex_score(*(mCache[i])); - llassert_always(mCache[i]->mCacheTag == i); + data->mScore = find_vertex_score(*data); } } mBestTriangle = NULL; //update triangle scores - for (U32 i = 0; i < MaxSizeVertexCache+3; ++i) + data_iter = mCache; + end_data = mCache+MaxSizeVertexCache+3; + + while (data_iter != end_data) { - if (mCache[i]) + LLVCacheVertexData* data = *data_iter++; + if (data) { - for (U32 j = 0; j < mCache[i]->mTriangles.size(); ++j) + for (std::vector<LLVCacheTriangleData*>::iterator iter = data->mTriangles.begin(), end_iter = data->mTriangles.end(); iter != end_iter; ++iter) { - LLVCacheTriangleData* tri = mCache[i]->mTriangles[j]; + LLVCacheTriangleData* tri = *iter; if (tri->mActive) { tri->mScore = tri->mVertex[0]->mScore; @@ -5749,13 +5749,17 @@ public: } //knock trailing 3 vertices off the cache - for (U32 i = MaxSizeVertexCache; i < MaxSizeVertexCache+3; ++i) + data_iter = mCache+MaxSizeVertexCache; + end_data = mCache+MaxSizeVertexCache+3; + while (data_iter != end_data) { - if (mCache[i]) + LLVCacheVertexData* data = *data_iter; + if (data) { - llassert_always(mCache[i]->mCacheTag == -1); - mCache[i] = NULL; + llassert(data->mCacheTag == -1); + *data_iter = NULL; } + ++data_iter; } } }; @@ -5765,6 +5769,9 @@ void LLVolumeFace::cacheOptimize() { //optimize for vertex cache according to Forsyth method: // http://home.comcast.net/~tom_forsyth/papers/fast_vert_cache_opt.html + llassert(!mOptimized); + mOptimized = TRUE; + LLVCacheLRU cache; if (mNumVertices < 3) @@ -5810,12 +5817,14 @@ void LLVolumeFace::cacheOptimize() for (U32 i = 0; i < mNumVertices; i++) { //initialize score values (no cache -- might try a fifo cache here) - vertex_data[i].mScore = find_vertex_score(vertex_data[i]); - vertex_data[i].mActiveTriangles = vertex_data[i].mTriangles.size(); + LLVCacheVertexData& data = vertex_data[i]; - for (U32 j = 0; j < vertex_data[i].mTriangles.size(); ++j) + data.mScore = find_vertex_score(data); + data.mActiveTriangles = data.mTriangles.size(); + + for (U32 j = 0; j < data.mActiveTriangles; ++j) { - vertex_data[i].mTriangles[j]->mScore += vertex_data[i].mScore; + data.mTriangles[j]->mScore += data.mScore; } } @@ -5885,10 +5894,10 @@ void LLVolumeFace::cacheOptimize() //allocate space for new buffer S32 num_verts = mNumVertices; - LLVector4a* pos = (LLVector4a*) ll_aligned_malloc_16(sizeof(LLVector4a)*num_verts); - LLVector4a* norm = (LLVector4a*) ll_aligned_malloc_16(sizeof(LLVector4a)*num_verts); S32 size = ((num_verts*sizeof(LLVector2)) + 0xF) & ~0xF; - LLVector2* tc = (LLVector2*) ll_aligned_malloc_16(size); + LLVector4a* pos = (LLVector4a*) ll_aligned_malloc(sizeof(LLVector4a)*2*num_verts+size, 64); + LLVector4a* norm = pos + num_verts; + LLVector2* tc = (LLVector2*) (norm + num_verts); LLVector4a* wght = NULL; if (mWeights) @@ -5936,9 +5945,8 @@ void LLVolumeFace::cacheOptimize() mIndices[i] = new_idx[mIndices[i]]; } - ll_aligned_free_16(mPositions); - ll_aligned_free_16(mNormals); - ll_aligned_free_16(mTexCoords); + ll_aligned_free(mPositions); + // DO NOT free mNormals and mTexCoords as they are part of mPositions buffer ll_aligned_free_16(mWeights); ll_aligned_free_16(mBinormals); @@ -6655,24 +6663,22 @@ void LLVolumeFace::createBinormals() void LLVolumeFace::resizeVertices(S32 num_verts) { - ll_aligned_free_16(mPositions); - ll_aligned_free_16(mNormals); + ll_aligned_free(mPositions); + //DO NOT free mNormals and mTexCoords as they are part of mPositions buffer ll_aligned_free_16(mBinormals); - ll_aligned_free_16(mTexCoords); - + mBinormals = NULL; if (num_verts) { - mPositions = (LLVector4a*) ll_aligned_malloc_16(sizeof(LLVector4a)*num_verts); - ll_assert_aligned(mPositions, 16); - mNormals = (LLVector4a*) ll_aligned_malloc_16(sizeof(LLVector4a)*num_verts); - ll_assert_aligned(mNormals, 16); - //pad texture coordinate block end to allow for QWORD reads S32 size = ((num_verts*sizeof(LLVector2)) + 0xF) & ~0xF; - mTexCoords = (LLVector2*) ll_aligned_malloc_16(size); - ll_assert_aligned(mTexCoords, 16); + + mPositions = (LLVector4a*) ll_aligned_malloc(sizeof(LLVector4a)*2*num_verts+size, 64); + mNormals = mPositions+num_verts; + mTexCoords = (LLVector2*) (mNormals+num_verts); + + ll_assert_aligned(mPositions, 64); } else { @@ -6682,6 +6688,7 @@ void LLVolumeFace::resizeVertices(S32 num_verts) } mNumVertices = num_verts; + mNumAllocatedVertices = num_verts; } void LLVolumeFace::pushVertex(const LLVolumeFace::VertexData& cv) @@ -6692,27 +6699,43 @@ void LLVolumeFace::pushVertex(const LLVolumeFace::VertexData& cv) void LLVolumeFace::pushVertex(const LLVector4a& pos, const LLVector4a& norm, const LLVector2& tc) { S32 new_verts = mNumVertices+1; - S32 new_size = new_verts*16; - S32 old_size = mNumVertices*16; - //positions - mPositions = (LLVector4a*) ll_aligned_realloc_16(mPositions, new_size, old_size); - ll_assert_aligned(mPositions,16); + if (new_verts > mNumAllocatedVertices) + { + //double buffer size on expansion + new_verts *= 2; + + S32 new_tc_size = ((new_verts*8)+0xF) & ~0xF; + S32 old_tc_size = ((mNumVertices*8)+0xF) & ~0xF; + + S32 old_vsize = mNumVertices*16; + + S32 new_size = new_verts*16*2+new_tc_size; + + LLVector4a* old_buf = mPositions; + + mPositions = (LLVector4a*) ll_aligned_malloc(new_size, 64); + mNormals = mPositions+new_verts; + mTexCoords = (LLVector2*) (mNormals+new_verts); + + //positions + LLVector4a::memcpyNonAliased16((F32*) mPositions, (F32*) old_buf, old_vsize); + + //normals + LLVector4a::memcpyNonAliased16((F32*) mNormals, (F32*) (old_buf+mNumVertices), old_vsize); - //normals - mNormals = (LLVector4a*) ll_aligned_realloc_16(mNormals, new_size, old_size); - ll_assert_aligned(mNormals,16); - - //tex coords - new_size = ((new_verts*8)+0xF) & ~0xF; - old_size = ((mNumVertices*8)+0xF) & ~0xF; - mTexCoords = (LLVector2*) ll_aligned_realloc_16(mTexCoords, new_size, old_size); - ll_assert_aligned(mTexCoords,16); + //tex coords + LLVector4a::memcpyNonAliased16((F32*) mTexCoords, (F32*) (old_buf+mNumVertices*2), old_tc_size); + //just clear binormals + ll_aligned_free_16(mBinormals); - //just clear binormals - ll_aligned_free_16(mBinormals); - mBinormals = NULL; + ll_aligned_free(old_buf); + + mNumAllocatedVertices = new_verts; + + mBinormals = NULL; + } mPositions[mNumVertices] = pos; mNormals[mNumVertices] = norm; @@ -6801,13 +6824,23 @@ void LLVolumeFace::appendFace(const LLVolumeFace& face, LLMatrix4& mat_in, LLMat llerrs << "Cannot append empty face." << llendl; } + U32 old_vsize = mNumVertices*16; + U32 new_vsize = new_count * 16; + U32 old_tcsize = (mNumVertices*sizeof(LLVector2)+0xF) & ~0xF; + U32 new_tcsize = (new_count*sizeof(LLVector2)+0xF) & ~0xF; + U32 new_size = new_vsize * 2 + new_tcsize; + //allocate new buffer space - mPositions = (LLVector4a*) ll_aligned_realloc_16(mPositions, new_count*sizeof(LLVector4a), mNumVertices*sizeof(LLVector4a)); - ll_assert_aligned(mPositions, 16); - mNormals = (LLVector4a*) ll_aligned_realloc_16(mNormals, new_count*sizeof(LLVector4a), mNumVertices*sizeof(LLVector4a)); - ll_assert_aligned(mNormals, 16); - mTexCoords = (LLVector2*) ll_aligned_realloc_16(mTexCoords, (new_count*sizeof(LLVector2)+0xF) & ~0xF, (mNumVertices*sizeof(LLVector2)+0xF) & ~0xF); - ll_assert_aligned(mTexCoords, 16); + LLVector4a* old_buf = mPositions; + mPositions = (LLVector4a*) ll_aligned_malloc(new_size, 64); + mNormals = mPositions + new_count; + mTexCoords = (LLVector2*) (mNormals+new_count); + + mNumAllocatedVertices = new_count; + + LLVector4a::memcpyNonAliased16((F32*) mPositions, (F32*) old_buf, old_vsize); + LLVector4a::memcpyNonAliased16((F32*) mNormals, (F32*) (old_buf+mNumVertices), old_vsize); + LLVector4a::memcpyNonAliased16((F32*) mTexCoords, (F32*) (old_buf+mNumVertices*2), old_tcsize); mNumVertices = new_count; @@ -6903,12 +6936,15 @@ BOOL LLVolumeFace::createSide(LLVolume* volume, BOOL partial_build) LLVector4a* pos = (LLVector4a*) mPositions; LLVector4a* norm = (LLVector4a*) mNormals; LLVector2* tc = (LLVector2*) mTexCoords; - S32 begin_stex = llfloor( profile[mBeginS].mV[2] ); + F32 begin_stex = floorf(profile[mBeginS].mV[2]); S32 num_s = ((mTypeMask & INNER_MASK) && (mTypeMask & FLAT_MASK) && mNumS > 2) ? mNumS/2 : mNumS; S32 cur_vertex = 0; + S32 end_t = mBeginT+mNumT; + bool test = (mTypeMask & INNER_MASK) && (mTypeMask & FLAT_MASK) && mNumS > 2; + // Copy the vertices into the array - for (t = mBeginT; t < mBeginT + mNumT; t++) + for (t = mBeginT; t < end_t; t++) { tt = path_data[t].mTexT; for (s = 0; s < num_s; s++) @@ -6959,9 +6995,8 @@ BOOL LLVolumeFace::createSide(LLVolume* volume, BOOL partial_build) norm[cur_vertex].clear(); cur_vertex++; - if ((mTypeMask & INNER_MASK) && (mTypeMask & FLAT_MASK) && mNumS > 2 && s > 0) + if (test && s > 0) { - pos[cur_vertex].load3(mesh[i].mPos.mV); tc[cur_vertex] = LLVector2(ss,tt); @@ -7076,30 +7111,38 @@ BOOL LLVolumeFace::createSide(LLVolume* volume, BOOL partial_build) } //generate normals - for (U32 i = 0; i < mNumIndices/3; i++) //for each triangle + U32 count = mNumIndices/3; + + for (U32 i = 0; i < count; i++) //for each triangle { const U16* idx = &(mIndices[i*3]); - - LLVector4a* v[] = - { pos+idx[0], pos+idx[1], pos+idx[2] }; + LLVector4a& v0 = *(pos+idx[0]); + LLVector4a& v1 = *(pos+idx[1]); + LLVector4a& v2 = *(pos+idx[2]); - LLVector4a* n[] = - { norm+idx[0], norm+idx[1], norm+idx[2] }; + LLVector4a& n0 = *(norm+idx[0]); + LLVector4a& n1 = *(norm+idx[1]); + LLVector4a& n2 = *(norm+idx[2]); //calculate triangle normal LLVector4a a, b, c; - a.setSub(*v[0], *v[1]); - b.setSub(*v[0], *v[2]); + a.setSub(v0, v1); + b.setSub(v0, v2); c.setCross3(a,b); - n[0]->add(c); - n[1]->add(c); - n[2]->add(c); + n0.add(c); + n1.add(c); + n2.add(c); //even out quad contributions - n[i%2+1]->add(c); + switch (i%2+1) + { + case 0: n0.add(c); break; + case 1: n1.add(c); break; + case 2: n2.add(c); break; + }; } // adjust normals based on wrapping and stitching |