From f5e5396c3a17b6bcdc4eb49cda304a9047920fe1 Mon Sep 17 00:00:00 2001 From: Dave Parks Date: Tue, 26 Feb 2013 15:15:08 -0600 Subject: MAINT-2371 First set of profile guided optimizations. Reviewed by Graham --- indra/llmath/llvolume.cpp | 97 ++++++++++++++++++++++++++--------------------- indra/llmath/llvolume.h | 3 ++ 2 files changed, 56 insertions(+), 44 deletions(-) (limited to 'indra/llmath') diff --git a/indra/llmath/llvolume.cpp b/indra/llmath/llvolume.cpp index 02c8d2b86f..77d89568df 100644 --- a/indra/llmath/llvolume.cpp +++ b/indra/llmath/llvolume.cpp @@ -5187,7 +5187,8 @@ LLVolumeFace::LLVolumeFace() : mTexCoords(NULL), mIndices(NULL), mWeights(NULL), - mOctree(NULL) + mOctree(NULL), + mOptimized(FALSE) { mExtents = (LLVector4a*) ll_aligned_malloc_16(sizeof(LLVector4a)*3); mExtents[0].splat(-0.5f); @@ -5517,14 +5518,14 @@ class LLVCacheVertexData public: S32 mIdx; S32 mCacheTag; - F32 mScore; + F64 mScore; U32 mActiveTriangles; std::vector mTriangles; LLVCacheVertexData() { mCacheTag = -1; - mScore = 0.f; + mScore = 0.0; mActiveTriangles = 0; mIdx = -1; } @@ -5534,13 +5535,13 @@ class LLVCacheTriangleData { public: bool mActive; - F32 mScore; + F64 mScore; LLVCacheVertexData* mVertex[3]; LLVCacheTriangleData() { mActive = true; - mScore = 0.f; + mScore = 0.0; mVertex[0] = mVertex[1] = mVertex[2] = NULL; } @@ -5551,7 +5552,7 @@ public: { if (mVertex[i]) { - llassert_always(mVertex[i]->mActiveTriangles > 0); + llassert(mVertex[i]->mActiveTriangles > 0); mVertex[i]->mActiveTriangles--; } } @@ -5563,44 +5564,44 @@ public: } }; -const F32 FindVertexScore_CacheDecayPower = 1.5f; -const F32 FindVertexScore_LastTriScore = 0.75f; -const F32 FindVertexScore_ValenceBoostScale = 2.0f; -const F32 FindVertexScore_ValenceBoostPower = 0.5f; +const F64 FindVertexScore_CacheDecayPower = 1.5; +const F64 FindVertexScore_LastTriScore = 0.75; +const F64 FindVertexScore_ValenceBoostScale = 2.0; +const F64 FindVertexScore_ValenceBoostPower = 0.5; const U32 MaxSizeVertexCache = 32; +const F64 FindVertexScore_Scaler = 1.0/(MaxSizeVertexCache-3); -F32 find_vertex_score(LLVCacheVertexData& data) +F64 find_vertex_score(LLVCacheVertexData& data) { - if (data.mActiveTriangles == 0) - { //no triangle references this vertex - return -1.f; - } - - F32 score = 0.f; + F64 score = -1.0; - S32 cache_idx = data.mCacheTag; + if (data.mActiveTriangles >= 0) + { + score = 0.0; + + S32 cache_idx = data.mCacheTag; - if (cache_idx < 0) - { - //not in cache - } - else - { - if (cache_idx < 3) - { //vertex was in the last triangle - score = FindVertexScore_LastTriScore; + if (cache_idx < 0) + { + //not in cache } else - { //more points for being higher in the cache - F32 scaler = 1.f/(MaxSizeVertexCache-3); - score = 1.f-((cache_idx-3)*scaler); - score = powf(score, FindVertexScore_CacheDecayPower); + { + if (cache_idx < 3) + { //vertex was in the last triangle + score = FindVertexScore_LastTriScore; + } + else + { //more points for being higher in the cache + score = 1.0-((cache_idx-3)*FindVertexScore_Scaler); + score = pow(score, FindVertexScore_CacheDecayPower); + } } - } - //bonus points for having low valence - F32 valence_boost = powf((F32)data.mActiveTriangles, -FindVertexScore_ValenceBoostPower); - score += FindVertexScore_ValenceBoostScale * valence_boost; + //bonus points for having low valence + F64 valence_boost = pow((F64)data.mActiveTriangles, -FindVertexScore_ValenceBoostPower); + score += FindVertexScore_ValenceBoostScale * valence_boost; + } return score; } @@ -5720,7 +5721,7 @@ public: if (mCache[i]) { mCache[i]->mScore = find_vertex_score(*(mCache[i])); - llassert_always(mCache[i]->mCacheTag == i); + llassert(mCache[i]->mCacheTag == i); } } @@ -5728,11 +5729,14 @@ public: //update triangle scores for (U32 i = 0; i < MaxSizeVertexCache+3; ++i) { - if (mCache[i]) + LLVCacheVertexData* data = mCache[i]; + if (data) { - for (U32 j = 0; j < mCache[i]->mTriangles.size(); ++j) + U32 count = data->mTriangles.size(); + + for (U32 j = 0; j < count; ++j) { - LLVCacheTriangleData* tri = mCache[i]->mTriangles[j]; + LLVCacheTriangleData* tri = data->mTriangles[j]; if (tri->mActive) { tri->mScore = tri->mVertex[0]->mScore; @@ -5753,7 +5757,7 @@ public: { if (mCache[i]) { - llassert_always(mCache[i]->mCacheTag == -1); + llassert(mCache[i]->mCacheTag == -1); mCache[i] = NULL; } } @@ -5765,6 +5769,9 @@ void LLVolumeFace::cacheOptimize() { //optimize for vertex cache according to Forsyth method: // http://home.comcast.net/~tom_forsyth/papers/fast_vert_cache_opt.html + llassert(!mOptimized); + mOptimized = TRUE; + LLVCacheLRU cache; if (mNumVertices < 3) @@ -5810,12 +5817,14 @@ void LLVolumeFace::cacheOptimize() for (U32 i = 0; i < mNumVertices; i++) { //initialize score values (no cache -- might try a fifo cache here) - vertex_data[i].mScore = find_vertex_score(vertex_data[i]); - vertex_data[i].mActiveTriangles = vertex_data[i].mTriangles.size(); + LLVCacheVertexData& data = vertex_data[i]; + + data.mScore = find_vertex_score(data); + data.mActiveTriangles = data.mTriangles.size(); - for (U32 j = 0; j < vertex_data[i].mTriangles.size(); ++j) + for (U32 j = 0; j < data.mActiveTriangles; ++j) { - vertex_data[i].mTriangles[j]->mScore += vertex_data[i].mScore; + data.mTriangles[j]->mScore += data.mScore; } } diff --git a/indra/llmath/llvolume.h b/indra/llmath/llvolume.h index c845556557..99158c1c44 100644 --- a/indra/llmath/llvolume.h +++ b/indra/llmath/llvolume.h @@ -933,6 +933,9 @@ public: LLOctreeNode* mOctree; + //whether or not face has been cache optimized + BOOL mOptimized; + private: BOOL createUnCutCubeCap(LLVolume* volume, BOOL partial_build = FALSE); BOOL createCap(LLVolume* volume, BOOL partial_build = FALSE); -- cgit v1.3 From 609ed855e1160505238378a1be49e2b92e8496f5 Mon Sep 17 00:00:00 2001 From: Dave Parks Date: Mon, 4 Mar 2013 18:01:42 -0600 Subject: MAINT-2371 More optimizations. Reviewed by Graham --- indra/llcommon/llmemory.h | 13 +- indra/llmath/llvolume.cpp | 220 ++++++++++++--------- indra/llmath/llvolume.h | 1 + indra/llrender/llgl.cpp | 11 +- indra/llrender/llimagegl.cpp | 34 +++- indra/llrender/llrender.cpp | 10 + indra/llrender/llshadermgr.cpp | 46 ++++- indra/llrender/llshadermgr.h | 44 +++++ indra/llrender/llvertexbuffer.cpp | 88 ++++----- indra/llrender/llvertexbuffer.h | 4 +- .../app_settings/shaders/class1/deferred/giF.glsl | 190 ------------------ .../shaders/class1/deferred/waterF.glsl | 6 +- .../shaders/class1/environment/underWaterF.glsl | 2 - .../shaders/class1/environment/waterF.glsl | 2 - indra/newview/llappviewer.cpp | 2 +- indra/newview/lldrawable.cpp | 2 +- indra/newview/lldrawpool.cpp | 1 + indra/newview/lldrawpoolavatar.cpp | 4 +- indra/newview/lldrawpoolterrain.cpp | 8 +- indra/newview/lldrawpoolwater.cpp | 62 +++--- indra/newview/llface.cpp | 99 +++++++--- indra/newview/llfasttimerview.cpp | 2 +- indra/newview/llviewerdisplay.cpp | 4 +- indra/newview/llviewershadermgr.cpp | 104 +++------- indra/newview/llviewershadermgr.h | 69 +------ indra/newview/llvovolume.cpp | 2 +- indra/newview/llwaterparammanager.cpp | 12 +- indra/newview/llwlparammanager.cpp | 4 +- indra/newview/pipeline.cpp | 10 +- 29 files changed, 474 insertions(+), 582 deletions(-) delete mode 100644 indra/newview/app_settings/shaders/class1/deferred/giF.glsl (limited to 'indra/llmath') diff --git a/indra/llcommon/llmemory.h b/indra/llcommon/llmemory.h index e725bdd9fa..46cabfadcd 100644 --- a/indra/llcommon/llmemory.h +++ b/indra/llcommon/llmemory.h @@ -38,17 +38,28 @@ class LLMutex ; inline void* ll_aligned_malloc( size_t size, int align ) { +#if defined(LL_WINDOWS) + return _aligned_malloc(size, align); +#else void* mem = malloc( size + (align - 1) + sizeof(void*) ); char* aligned = ((char*)mem) + sizeof(void*); aligned += align - ((uintptr_t)aligned & (align - 1)); ((void**)aligned)[-1] = mem; return aligned; +#endif } inline void ll_aligned_free( void* ptr ) { - free( ((void**)ptr)[-1] ); +#if defined(LL_WINDOWS) + _aligned_free(ptr); +#else + if (ptr) + { + free( ((void**)ptr)[-1] ); + } +#endif } #if !LL_USE_TCMALLOC diff --git a/indra/llmath/llvolume.cpp b/indra/llmath/llvolume.cpp index 77d89568df..d614695efb 100644 --- a/indra/llmath/llvolume.cpp +++ b/indra/llmath/llvolume.cpp @@ -5180,6 +5180,7 @@ LLVolumeFace::LLVolumeFace() : mNumS(0), mNumT(0), mNumVertices(0), + mNumAllocatedVertices(0), mNumIndices(0), mPositions(NULL), mNormals(NULL), @@ -5204,6 +5205,7 @@ LLVolumeFace::LLVolumeFace(const LLVolumeFace& src) mNumS(0), mNumT(0), mNumVertices(0), + mNumAllocatedVertices(0), mNumIndices(0), mPositions(NULL), mNormals(NULL), @@ -5258,12 +5260,6 @@ LLVolumeFace& LLVolumeFace::operator=(const LLVolumeFace& src) { LLVector4a::memcpyNonAliased16((F32*) mTexCoords, (F32*) src.mTexCoords, tc_size); } - else - { - ll_aligned_free_16(mTexCoords) ; - mTexCoords = NULL ; - } - if (src.mBinormals) { @@ -5311,10 +5307,11 @@ void LLVolumeFace::freeData() { ll_aligned_free_16(mPositions); mPositions = NULL; - ll_aligned_free_16( mNormals); + + //normals and texture coordinates are part of the same buffer as mPositions, do not free them separately mNormals = NULL; - ll_aligned_free_16(mTexCoords); mTexCoords = NULL; + ll_aligned_free_16(mIndices); mIndices = NULL; ll_aligned_free_16(mBinormals); @@ -5496,18 +5493,6 @@ void LLVolumeFace::optimize(F32 angle_cutoff) llassert(new_face.mNumIndices == mNumIndices); llassert(new_face.mNumVertices <= mNumVertices); - if (angle_cutoff > 1.f && !mNormals) - { - ll_aligned_free_16(new_face.mNormals); - new_face.mNormals = NULL; - } - - if (!mTexCoords) - { - ll_aligned_free_16(new_face.mTexCoords); - new_face.mTexCoords = NULL; - } - swapData(new_face); } @@ -5708,35 +5693,44 @@ public: void updateScores() { - for (U32 i = MaxSizeVertexCache; i < MaxSizeVertexCache+3; ++i) - { //trailing 3 vertices aren't actually in the cache for scoring purposes - if (mCache[i]) + LLVCacheVertexData** data_iter = mCache+MaxSizeVertexCache; + LLVCacheVertexData** end_data = mCache+MaxSizeVertexCache+3; + + while(data_iter != end_data) + { + LLVCacheVertexData* data = *data_iter++; + //trailing 3 vertices aren't actually in the cache for scoring purposes + if (data) { - mCache[i]->mCacheTag = -1; + data->mCacheTag = -1; } } - for (U32 i = 0; i < MaxSizeVertexCache; ++i) + data_iter = mCache; + end_data = mCache+MaxSizeVertexCache; + + while (data_iter != end_data) { //update scores of vertices in cache - if (mCache[i]) + LLVCacheVertexData* data = *data_iter++; + if (data) { - mCache[i]->mScore = find_vertex_score(*(mCache[i])); - llassert(mCache[i]->mCacheTag == i); + data->mScore = find_vertex_score(*data); } } mBestTriangle = NULL; //update triangle scores - for (U32 i = 0; i < MaxSizeVertexCache+3; ++i) + data_iter = mCache; + end_data = mCache+MaxSizeVertexCache+3; + + while (data_iter != end_data) { - LLVCacheVertexData* data = mCache[i]; + LLVCacheVertexData* data = *data_iter++; if (data) { - U32 count = data->mTriangles.size(); - - for (U32 j = 0; j < count; ++j) + for (std::vector::iterator iter = data->mTriangles.begin(), end_iter = data->mTriangles.end(); iter != end_iter; ++iter) { - LLVCacheTriangleData* tri = data->mTriangles[j]; + LLVCacheTriangleData* tri = *iter; if (tri->mActive) { tri->mScore = tri->mVertex[0]->mScore; @@ -5753,13 +5747,17 @@ public: } //knock trailing 3 vertices off the cache - for (U32 i = MaxSizeVertexCache; i < MaxSizeVertexCache+3; ++i) + data_iter = mCache+MaxSizeVertexCache; + end_data = mCache+MaxSizeVertexCache+3; + while (data_iter != end_data) { - if (mCache[i]) + LLVCacheVertexData* data = *data_iter; + if (data) { - llassert(mCache[i]->mCacheTag == -1); - mCache[i] = NULL; + llassert(data->mCacheTag == -1); + *data_iter = NULL; } + ++data_iter; } } }; @@ -5894,10 +5892,10 @@ void LLVolumeFace::cacheOptimize() //allocate space for new buffer S32 num_verts = mNumVertices; - LLVector4a* pos = (LLVector4a*) ll_aligned_malloc_16(sizeof(LLVector4a)*num_verts); - LLVector4a* norm = (LLVector4a*) ll_aligned_malloc_16(sizeof(LLVector4a)*num_verts); S32 size = ((num_verts*sizeof(LLVector2)) + 0xF) & ~0xF; - LLVector2* tc = (LLVector2*) ll_aligned_malloc_16(size); + LLVector4a* pos = (LLVector4a*) ll_aligned_malloc(sizeof(LLVector4a)*2*num_verts+size, 64); + LLVector4a* norm = pos + num_verts; + LLVector2* tc = (LLVector2*) (norm + num_verts); LLVector4a* wght = NULL; if (mWeights) @@ -5945,9 +5943,8 @@ void LLVolumeFace::cacheOptimize() mIndices[i] = new_idx[mIndices[i]]; } - ll_aligned_free_16(mPositions); - ll_aligned_free_16(mNormals); - ll_aligned_free_16(mTexCoords); + ll_aligned_free(mPositions); + // DO NOT free mNormals and mTexCoords as they are part of mPositions buffer ll_aligned_free_16(mWeights); ll_aligned_free_16(mBinormals); @@ -6664,24 +6661,22 @@ void LLVolumeFace::createBinormals() void LLVolumeFace::resizeVertices(S32 num_verts) { - ll_aligned_free_16(mPositions); - ll_aligned_free_16(mNormals); + ll_aligned_free(mPositions); + //DO NOT free mNormals and mTexCoords as they are part of mPositions buffer ll_aligned_free_16(mBinormals); - ll_aligned_free_16(mTexCoords); - + mBinormals = NULL; if (num_verts) { - mPositions = (LLVector4a*) ll_aligned_malloc_16(sizeof(LLVector4a)*num_verts); - ll_assert_aligned(mPositions, 16); - mNormals = (LLVector4a*) ll_aligned_malloc_16(sizeof(LLVector4a)*num_verts); - ll_assert_aligned(mNormals, 16); - //pad texture coordinate block end to allow for QWORD reads S32 size = ((num_verts*sizeof(LLVector2)) + 0xF) & ~0xF; - mTexCoords = (LLVector2*) ll_aligned_malloc_16(size); - ll_assert_aligned(mTexCoords, 16); + + mPositions = (LLVector4a*) ll_aligned_malloc(sizeof(LLVector4a)*2*num_verts+size, 64); + mNormals = mPositions+num_verts; + mTexCoords = (LLVector2*) (mNormals+num_verts); + + ll_assert_aligned(mPositions, 64); } else { @@ -6691,6 +6686,7 @@ void LLVolumeFace::resizeVertices(S32 num_verts) } mNumVertices = num_verts; + mNumAllocatedVertices = num_verts; } void LLVolumeFace::pushVertex(const LLVolumeFace::VertexData& cv) @@ -6701,27 +6697,43 @@ void LLVolumeFace::pushVertex(const LLVolumeFace::VertexData& cv) void LLVolumeFace::pushVertex(const LLVector4a& pos, const LLVector4a& norm, const LLVector2& tc) { S32 new_verts = mNumVertices+1; - S32 new_size = new_verts*16; - S32 old_size = mNumVertices*16; - //positions - mPositions = (LLVector4a*) ll_aligned_realloc_16(mPositions, new_size, old_size); - ll_assert_aligned(mPositions,16); + if (new_verts > mNumAllocatedVertices) + { + //double buffer size on expansion + new_verts *= 2; + + S32 new_tc_size = ((new_verts*8)+0xF) & ~0xF; + S32 old_tc_size = ((mNumVertices*8)+0xF) & ~0xF; + + S32 old_vsize = mNumVertices*16; + + S32 new_size = new_verts*16*2+new_tc_size; + + LLVector4a* old_buf = mPositions; + + mPositions = (LLVector4a*) ll_aligned_malloc(new_size, 64); + mNormals = mPositions+new_verts; + mTexCoords = (LLVector2*) (mNormals+new_verts); + + //positions + LLVector4a::memcpyNonAliased16((F32*) mPositions, (F32*) old_buf, old_vsize); + + //normals + LLVector4a::memcpyNonAliased16((F32*) mNormals, (F32*) (old_buf+mNumVertices), old_vsize); - //normals - mNormals = (LLVector4a*) ll_aligned_realloc_16(mNormals, new_size, old_size); - ll_assert_aligned(mNormals,16); - - //tex coords - new_size = ((new_verts*8)+0xF) & ~0xF; - old_size = ((mNumVertices*8)+0xF) & ~0xF; - mTexCoords = (LLVector2*) ll_aligned_realloc_16(mTexCoords, new_size, old_size); - ll_assert_aligned(mTexCoords,16); + //tex coords + LLVector4a::memcpyNonAliased16((F32*) mTexCoords, (F32*) (old_buf+mNumVertices*2), old_tc_size); + //just clear binormals + ll_aligned_free_16(mBinormals); - //just clear binormals - ll_aligned_free_16(mBinormals); - mBinormals = NULL; + ll_aligned_free(old_buf); + + mNumAllocatedVertices = new_verts; + + mBinormals = NULL; + } mPositions[mNumVertices] = pos; mNormals[mNumVertices] = norm; @@ -6810,13 +6822,23 @@ void LLVolumeFace::appendFace(const LLVolumeFace& face, LLMatrix4& mat_in, LLMat llerrs << "Cannot append empty face." << llendl; } + U32 old_vsize = mNumVertices*16; + U32 new_vsize = new_count * 16; + U32 old_tcsize = (mNumVertices*sizeof(LLVector2)+0xF) & ~0xF; + U32 new_tcsize = (new_count*sizeof(LLVector2)+0xF) & ~0xF; + U32 new_size = new_vsize * 2 + new_tcsize; + //allocate new buffer space - mPositions = (LLVector4a*) ll_aligned_realloc_16(mPositions, new_count*sizeof(LLVector4a), mNumVertices*sizeof(LLVector4a)); - ll_assert_aligned(mPositions, 16); - mNormals = (LLVector4a*) ll_aligned_realloc_16(mNormals, new_count*sizeof(LLVector4a), mNumVertices*sizeof(LLVector4a)); - ll_assert_aligned(mNormals, 16); - mTexCoords = (LLVector2*) ll_aligned_realloc_16(mTexCoords, (new_count*sizeof(LLVector2)+0xF) & ~0xF, (mNumVertices*sizeof(LLVector2)+0xF) & ~0xF); - ll_assert_aligned(mTexCoords, 16); + LLVector4a* old_buf = mPositions; + mPositions = (LLVector4a*) ll_aligned_malloc(new_size, 64); + mNormals = mPositions + new_count; + mTexCoords = (LLVector2*) (mNormals+new_count); + + mNumAllocatedVertices = new_count; + + LLVector4a::memcpyNonAliased16((F32*) mPositions, (F32*) old_buf, old_vsize); + LLVector4a::memcpyNonAliased16((F32*) mNormals, (F32*) (old_buf+mNumVertices), old_vsize); + LLVector4a::memcpyNonAliased16((F32*) mTexCoords, (F32*) (old_buf+mNumVertices*2), old_tcsize); mNumVertices = new_count; @@ -6912,12 +6934,15 @@ BOOL LLVolumeFace::createSide(LLVolume* volume, BOOL partial_build) LLVector4a* pos = (LLVector4a*) mPositions; LLVector4a* norm = (LLVector4a*) mNormals; LLVector2* tc = (LLVector2*) mTexCoords; - S32 begin_stex = llfloor( profile[mBeginS].mV[2] ); + F32 begin_stex = floorf(profile[mBeginS].mV[2]); S32 num_s = ((mTypeMask & INNER_MASK) && (mTypeMask & FLAT_MASK) && mNumS > 2) ? mNumS/2 : mNumS; S32 cur_vertex = 0; + S32 end_t = mBeginT+mNumT; + bool test = (mTypeMask & INNER_MASK) && (mTypeMask & FLAT_MASK) && mNumS > 2; + // Copy the vertices into the array - for (t = mBeginT; t < mBeginT + mNumT; t++) + for (t = mBeginT; t < end_t; t++) { tt = path_data[t].mTexT; for (s = 0; s < num_s; s++) @@ -6968,9 +6993,8 @@ BOOL LLVolumeFace::createSide(LLVolume* volume, BOOL partial_build) norm[cur_vertex].clear(); cur_vertex++; - if ((mTypeMask & INNER_MASK) && (mTypeMask & FLAT_MASK) && mNumS > 2 && s > 0) + if (test && s > 0) { - pos[cur_vertex].load3(mesh[i].mPos.mV); tc[cur_vertex] = LLVector2(ss,tt); @@ -7085,30 +7109,38 @@ BOOL LLVolumeFace::createSide(LLVolume* volume, BOOL partial_build) } //generate normals - for (U32 i = 0; i < mNumIndices/3; i++) //for each triangle + U32 count = mNumIndices/3; + + for (U32 i = 0; i < count; i++) //for each triangle { const U16* idx = &(mIndices[i*3]); - - LLVector4a* v[] = - { pos+idx[0], pos+idx[1], pos+idx[2] }; + LLVector4a& v0 = *(pos+idx[0]); + LLVector4a& v1 = *(pos+idx[1]); + LLVector4a& v2 = *(pos+idx[2]); - LLVector4a* n[] = - { norm+idx[0], norm+idx[1], norm+idx[2] }; + LLVector4a& n0 = *(norm+idx[0]); + LLVector4a& n1 = *(norm+idx[1]); + LLVector4a& n2 = *(norm+idx[2]); //calculate triangle normal LLVector4a a, b, c; - a.setSub(*v[0], *v[1]); - b.setSub(*v[0], *v[2]); + a.setSub(v0, v1); + b.setSub(v0, v2); c.setCross3(a,b); - n[0]->add(c); - n[1]->add(c); - n[2]->add(c); + n0.add(c); + n1.add(c); + n2.add(c); //even out quad contributions - n[i%2+1]->add(c); + switch (i%2+1) + { + case 0: n0.add(c); break; + case 1: n1.add(c); break; + case 2: n2.add(c); break; + }; } // adjust normals based on wrapping and stitching diff --git a/indra/llmath/llvolume.h b/indra/llmath/llvolume.h index 99158c1c44..1d3b0fe52f 100644 --- a/indra/llmath/llvolume.h +++ b/indra/llmath/llvolume.h @@ -912,6 +912,7 @@ public: LLVector2 mTexCoordExtents[2]; //minimum and maximum of texture coordinates of the face. S32 mNumVertices; + S32 mNumAllocatedVertices; S32 mNumIndices; LLVector4a* mPositions; diff --git a/indra/llrender/llgl.cpp b/indra/llrender/llgl.cpp index 89f1f36297..58bd346c15 100644 --- a/indra/llrender/llgl.cpp +++ b/indra/llrender/llgl.cpp @@ -741,7 +741,7 @@ bool LLGLManager::initGL() #if LL_WINDOWS if (mHasDebugOutput && gDebugGL) { //setup debug output callback - //glDebugMessageControlARB(GL_DONT_CARE, GL_DONT_CARE, GL_DEBUG_SEVERITY_LOW_ARB, 0, NULL, GL_TRUE); + glDebugMessageControlARB(GL_DONT_CARE, GL_DEBUG_TYPE_DEPRECATED_BEHAVIOR_ARB, GL_DEBUG_SEVERITY_LOW_ARB, 0, NULL, GL_TRUE); glDebugMessageCallbackARB((GLDEBUGPROCARB) gl_debug_callback, NULL); glEnable(GL_DEBUG_OUTPUT_SYNCHRONOUS_ARB); } @@ -1478,7 +1478,7 @@ void do_assert_glerror() void assert_glerror() { - if (!gGLActive) +/* if (!gGLActive) { //llwarns << "GL used while not active!" << llendl; @@ -1487,8 +1487,13 @@ void assert_glerror() //ll_fail("GL used while not active"); } } +*/ - if (gDebugGL) + if (!gDebugGL) + { + //funny looking if for branch prediction -- gDebugGL is almost always false and assert_glerror is called often + } + else { do_assert_glerror(); } diff --git a/indra/llrender/llimagegl.cpp b/indra/llrender/llimagegl.cpp index a4d7872ec2..ef2648ae98 100755 --- a/indra/llrender/llimagegl.cpp +++ b/indra/llrender/llimagegl.cpp @@ -709,9 +709,12 @@ void LLImageGL::setImage(const U8* data_in, BOOL data_hasmips) mMipLevels = wpo2(llmax(w, h)); - //use legacy mipmap generation mode - glTexParameteri(mTarget, GL_GENERATE_MIPMAP, GL_TRUE); - + if (!gGLManager.mHasFramebufferObject) + { + //use legacy mipmap generation mode + glTexParameteri(mTarget, GL_GENERATE_MIPMAP, GL_TRUE); + } + LLImageGL::setManualImage(mTarget, 0, mFormatInternal, w, h, mFormatPrimary, mFormatType, @@ -726,6 +729,11 @@ void LLImageGL::setImage(const U8* data_in, BOOL data_hasmips) glPixelStorei(GL_UNPACK_SWAP_BYTES, 0); stop_glerror(); } + + if (gGLManager.mHasFramebufferObject) + { + glGenerateMipmap(mTarget); + } } } else @@ -1057,6 +1065,16 @@ void LLImageGL::generateTextures(LLTexUnit::eTextureType type, U32 format, S32 n { bool empty = true; + if (LLRender::sGLCoreProfile) + { + switch (format) + { + case GL_LUMINANCE8: format = GL_RGB8; break; + case GL_LUMINANCE8_ALPHA8: + case GL_ALPHA8: format = GL_RGBA8; break; + } + } + dead_texturelist_t::iterator iter = sDeadTextureList[type].find(format); if (iter != sDeadTextureList[type].end()) @@ -1084,6 +1102,16 @@ void LLImageGL::deleteTextures(LLTexUnit::eTextureType type, U32 format, S32 mip { if (gGLManager.mInited) { + if (LLRender::sGLCoreProfile) + { + switch (format) + { + case GL_LUMINANCE8: format = GL_RGB8; break; + case GL_LUMINANCE8_ALPHA8: + case GL_ALPHA8: format = GL_RGBA8; break; + } + } + if (format == 0 || type == LLTexUnit::TT_CUBE_MAP || mip_levels == -1) { //unknown internal format or unknown number of mip levels, not safe to reuse glDeleteTextures(numTextures, textures); diff --git a/indra/llrender/llrender.cpp b/indra/llrender/llrender.cpp index cb82cbfb74..dada27557e 100644 --- a/indra/llrender/llrender.cpp +++ b/indra/llrender/llrender.cpp @@ -1070,6 +1070,16 @@ LLRender::~LLRender() void LLRender::init() { + if (sGLCoreProfile && !LLVertexBuffer::sUseVAO) + { //bind a dummy vertex array object so we're core profile compliant +#ifdef GL_ARB_vertex_array_object + U32 ret; + glGenVertexArrays(1, &ret); + glBindVertexArray(ret); +#endif + } + + llassert_always(mBuffer.isNull()) ; stop_glerror(); mBuffer = new LLVertexBuffer(immediate_mask, 0); diff --git a/indra/llrender/llshadermgr.cpp b/indra/llrender/llshadermgr.cpp index b6a9a6b653..825f80a6dc 100644 --- a/indra/llrender/llshadermgr.cpp +++ b/indra/llrender/llshadermgr.cpp @@ -974,7 +974,9 @@ void LLShaderMgr::initAttribsAndUniforms() mReservedUniforms.push_back("texture_matrix1"); mReservedUniforms.push_back("texture_matrix2"); mReservedUniforms.push_back("texture_matrix3"); - llassert(mReservedUniforms.size() == LLShaderMgr::TEXTURE_MATRIX3+1); + mReservedUniforms.push_back("object_plane_s"); + mReservedUniforms.push_back("object_plane_t"); + llassert(mReservedUniforms.size() == LLShaderMgr::OBJECT_PLANE_T+1); mReservedUniforms.push_back("viewport"); @@ -1116,6 +1118,48 @@ void LLShaderMgr::initAttribsAndUniforms() mReservedUniforms.push_back("bloomMap"); mReservedUniforms.push_back("projectionMap"); + mReservedUniforms.push_back("matrixPalette"); + + + mReservedUniforms.reserve(12); + mReservedUniforms.push_back("screenTex"); + mReservedUniforms.push_back("screenDepth"); + mReservedUniforms.push_back("refTex"); + mReservedUniforms.push_back("eyeVec"); + mReservedUniforms.push_back("time"); + mReservedUniforms.push_back("d1"); + mReservedUniforms.push_back("d2"); + mReservedUniforms.push_back("lightDir"); + mReservedUniforms.push_back("specular"); + mReservedUniforms.push_back("lightExp"); + mReservedUniforms.push_back("waterFogColor"); + mReservedUniforms.push_back("waterFogDensity"); + mReservedUniforms.push_back("waterFogKS"); + mReservedUniforms.push_back("refScale"); + mReservedUniforms.push_back("waterHeight"); + mReservedUniforms.push_back("waterPlane"); + mReservedUniforms.push_back("normScale"); + mReservedUniforms.push_back("fresnelScale"); + mReservedUniforms.push_back("fresnelOffset"); + mReservedUniforms.push_back("blurMultiplier"); + mReservedUniforms.push_back("sunAngle"); + mReservedUniforms.push_back("scaledAngle"); + mReservedUniforms.push_back("sunAngle2"); + + mReservedUniforms.push_back("camPosLocal"); + + mReservedUniforms.push_back("gWindDir"); + mReservedUniforms.push_back("gSinWaveParams"); + mReservedUniforms.push_back("gGravity"); + + mReservedUniforms.push_back("detail_0"); + mReservedUniforms.push_back("detail_1"); + mReservedUniforms.push_back("detail_2"); + mReservedUniforms.push_back("detail_3"); + mReservedUniforms.push_back("alpha_ramp"); + + mReservedUniforms.push_back("origin"); + llassert(mReservedUniforms.size() == END_RESERVED_UNIFORMS); std::set dupe_check; diff --git a/indra/llrender/llshadermgr.h b/indra/llrender/llshadermgr.h index 7a16b7c20f..77e90372e0 100644 --- a/indra/llrender/llshadermgr.h +++ b/indra/llrender/llshadermgr.h @@ -47,6 +47,8 @@ public: TEXTURE_MATRIX1, TEXTURE_MATRIX2, TEXTURE_MATRIX3, + OBJECT_PLANE_S, + OBJECT_PLANE_T, VIEWPORT, LIGHT_POSITION, LIGHT_DIRECTION, @@ -164,7 +166,49 @@ public: DEFERRED_LIGHT, DEFERRED_BLOOM, DEFERRED_PROJECTION, + + AVATAR_MATRIX, + + WATER_SCREENTEX, + WATER_SCREENDEPTH, + WATER_REFTEX, + WATER_EYEVEC, + WATER_TIME, + WATER_WAVE_DIR1, + WATER_WAVE_DIR2, + WATER_LIGHT_DIR, + WATER_SPECULAR, + WATER_SPECULAR_EXP, + WATER_FOGCOLOR, + WATER_FOGDENSITY, + WATER_FOGKS, + WATER_REFSCALE, + WATER_WATERHEIGHT, + WATER_WATERPLANE, + WATER_NORM_SCALE, + WATER_FRESNEL_SCALE, + WATER_FRESNEL_OFFSET, + WATER_BLUR_MULTIPLIER, + WATER_SUN_ANGLE, + WATER_SCALED_ANGLE, + WATER_SUN_ANGLE2, + + WL_CAMPOSLOCAL, + + AVATAR_WIND, + AVATAR_SINWAVE, + AVATAR_GRAVITY, + + TERRAIN_DETAIL0, + TERRAIN_DETAIL1, + TERRAIN_DETAIL2, + TERRAIN_DETAIL3, + TERRAIN_ALPHARAMP, + + SHINY_ORIGIN, + END_RESERVED_UNIFORMS + } eGLSLReservedUniforms; // singleton pattern implementation diff --git a/indra/llrender/llvertexbuffer.cpp b/indra/llrender/llvertexbuffer.cpp index f152911b24..1d257d8415 100644 --- a/indra/llrender/llvertexbuffer.cpp +++ b/indra/llrender/llvertexbuffer.cpp @@ -202,7 +202,7 @@ volatile U8* LLVBOPool::allocate(U32& name, U32 size, bool for_seed) glBufferDataARB(mType, size, 0, mUsage); if (mUsage != GL_DYNAMIC_COPY_ARB) { //data will be provided by application - ret = (U8*) ll_aligned_malloc_16(size); + ret = (U8*) ll_aligned_malloc(size, 64); } } else @@ -256,7 +256,7 @@ void LLVBOPool::release(U32 name, volatile U8* buffer, U32 size) llassert(vbo_block_size(size) == size); deleteBuffer(name); - ll_aligned_free_16((U8*) buffer); + ll_aligned_free((U8*) buffer); if (mType == GL_ARRAY_BUFFER_ARB) { @@ -1298,7 +1298,7 @@ void LLVertexBuffer::allocateBuffer(S32 nverts, S32 nindices, bool create) //actually allocate space for the vertex buffer if using VBO mapping flush(); - if (gGLManager.mHasVertexArrayObject && useVBOs() && (LLRender::sGLCoreProfile || sUseVAO)) + if (gGLManager.mHasVertexArrayObject && useVBOs() && (sUseVAO)) { #if GL_ARB_vertex_array_object mGLArray = getVAOName(); @@ -1454,21 +1454,18 @@ bool LLVertexBuffer::useVBOs() const //---------------------------------------------------------------------------- -bool expand_region(LLVertexBuffer::MappedRegion& region, S32 index, S32 count) +bool expand_region(LLVertexBuffer::MappedRegion& region, S32 start, S32 end) { - S32 end = index+count; - S32 region_end = region.mIndex+region.mCount; - if (end < region.mIndex || - index > region_end) + start > region.mEnd) { //gap exists, do not merge return false; } - S32 new_end = llmax(end, region_end); - S32 new_index = llmin(index, region.mIndex); - region.mIndex = new_index; - region.mCount = new_end-new_index; + region.mEnd = llmax(end, region.mEnd); + region.mIndex = llmin(start, region.mIndex); + region.mCount = region.mEnd-region.mIndex; + return true; } @@ -1478,7 +1475,6 @@ static LLFastTimer::DeclareTimer FTM_VBO_MAP_BUFFER("VBO Map"); // Map for data access volatile U8* LLVertexBuffer::mapVertexBuffer(S32 type, S32 index, S32 count, bool map_range) { - bindGLBuffer(true); if (mFinal) { llerrs << "LLVertexBuffer::mapVeretxBuffer() called on a finalized buffer." << llendl; @@ -1499,23 +1495,23 @@ volatile U8* LLVertexBuffer::mapVertexBuffer(S32 type, S32 index, S32 count, boo bool mapped = false; //see if range is already mapped - for (U32 i = 0; i < mMappedVertexRegions.size(); ++i) + S32 start_index = mOffsets[type]+index*sTypeSize[type]; + S32 end_index = start_index+count*sTypeSize[type]; + + for (std::vector::iterator iter = mMappedVertexRegions.begin(), end = mMappedVertexRegions.end(); iter != end; ++iter) { - MappedRegion& region = mMappedVertexRegions[i]; - if (region.mType == type) + MappedRegion& region = *iter; + if (expand_region(region, index, end_index)) { - if (expand_region(region, index, count)) - { - mapped = true; - break; - } + mapped = true; + break; } } if (!mapped) { //not already mapped, map new region - MappedRegion region(type, mMappable && map_range ? -1 : index, count); + MappedRegion region(mMappable && map_range ? -1 : start_index, end_index-start_index); mMappedVertexRegions.push_back(region); } } @@ -1539,6 +1535,7 @@ volatile U8* LLVertexBuffer::mapVertexBuffer(S32 type, S32 index, S32 count, boo { volatile U8* src = NULL; waitFence(); + bindGLBuffer(); if (gGLManager.mHasMapBufferRange) { if (map_range) @@ -1657,7 +1654,6 @@ static LLFastTimer::DeclareTimer FTM_VBO_MAP_INDEX("IBO Map"); volatile U8* LLVertexBuffer::mapIndexBuffer(S32 index, S32 count, bool map_range) { - bindGLIndices(true); if (mFinal) { llerrs << "LLVertexBuffer::mapIndexBuffer() called on a finalized buffer." << llendl; @@ -1676,12 +1672,14 @@ volatile U8* LLVertexBuffer::mapIndexBuffer(S32 index, S32 count, bool map_range count = mNumIndices-index; } + S32 end = index+count; + bool mapped = false; //see if range is already mapped for (U32 i = 0; i < mMappedIndexRegions.size(); ++i) { MappedRegion& region = mMappedIndexRegions[i]; - if (expand_region(region, index, count)) + if (expand_region(region, index, end)) { mapped = true; break; @@ -1691,7 +1689,7 @@ volatile U8* LLVertexBuffer::mapIndexBuffer(S32 index, S32 count, bool map_range if (!mapped) { //not already mapped, map new region - MappedRegion region(TYPE_INDEX, mMappable && map_range ? -1 : index, count); + MappedRegion region(mMappable && map_range ? -1 : index, count); mMappedIndexRegions.push_back(region); } } @@ -1707,23 +1705,23 @@ volatile U8* LLVertexBuffer::mapIndexBuffer(S32 index, S32 count, bool map_range sMappedCount++; stop_glerror(); - if (gDebugGL && useVBOs()) - { - GLint elem = 0; - glGetIntegerv(GL_ELEMENT_ARRAY_BUFFER_BINDING_ARB, &elem); - - if (elem != mGLIndices) - { - llerrs << "Wrong index buffer bound!" << llendl; - } - } - if(!mMappable) { map_range = false; } else { + bindGLIndices(); + if (gDebugGL && useVBOs()) + { + GLint elem = 0; + glGetIntegerv(GL_ELEMENT_ARRAY_BUFFER_BINDING_ARB, &elem); + + if (elem != mGLIndices) + { + llerrs << "Wrong index buffer bound!" << llendl; + } + } volatile U8* src = NULL; waitFence(); if (gGLManager.mHasMapBufferRange) @@ -1837,7 +1835,7 @@ void LLVertexBuffer::unmapBuffer() llassert(mUsage != GL_DYNAMIC_COPY_ARB); LLFastTimer t(FTM_VBO_UNMAP); - bindGLBuffer(true); + bindGLBuffer(); updated_all = mIndexLocked; //both vertex and index buffers done updating if(!mMappable) @@ -1848,8 +1846,8 @@ void LLVertexBuffer::unmapBuffer() for (U32 i = 0; i < mMappedVertexRegions.size(); ++i) { const MappedRegion& region = mMappedVertexRegions[i]; - S32 offset = region.mIndex >= 0 ? mOffsets[region.mType]+sTypeSize[region.mType]*region.mIndex : 0; - S32 length = sTypeSize[region.mType]*region.mCount; + S32 offset = region.mIndex; + S32 length = region.mCount; glBufferSubDataARB(GL_ARRAY_BUFFER_ARB, offset, length, (U8*) mMappedData+offset); stop_glerror(); } @@ -1873,8 +1871,8 @@ void LLVertexBuffer::unmapBuffer() for (U32 i = 0; i < mMappedVertexRegions.size(); ++i) { const MappedRegion& region = mMappedVertexRegions[i]; - S32 offset = region.mIndex >= 0 ? mOffsets[region.mType]+sTypeSize[region.mType]*region.mIndex : 0; - S32 length = sTypeSize[region.mType]*region.mCount; + S32 offset = region.mIndex; + S32 length = region.mCount; if (gGLManager.mHasMapBufferRange) { LLFastTimer t(FTM_VBO_FLUSH_RANGE); @@ -2083,7 +2081,6 @@ bool LLVertexBuffer::bindGLArray() if (mGLArray && sGLRenderArray != mGLArray) { { - LLFastTimer t(FTM_BIND_GL_ARRAY); #if GL_ARB_vertex_array_object glBindVertexArray(mGLArray); #endif @@ -2453,11 +2450,10 @@ void LLVertexBuffer::setupVertexBuffer(U32 data_mask) llglassertok(); } -LLVertexBuffer::MappedRegion::MappedRegion(S32 type, S32 index, S32 count) -: mType(type), mIndex(index), mCount(count) +LLVertexBuffer::MappedRegion::MappedRegion(S32 index, S32 count) +: mIndex(index), mCount(count) { - llassert(mType == LLVertexBuffer::TYPE_INDEX || - mType < LLVertexBuffer::TYPE_TEXTURE_INDEX); + mEnd = mIndex+mCount; } diff --git a/indra/llrender/llvertexbuffer.h b/indra/llrender/llvertexbuffer.h index a3400ae80c..52559d3505 100644 --- a/indra/llrender/llvertexbuffer.h +++ b/indra/llrender/llvertexbuffer.h @@ -104,11 +104,11 @@ public: class MappedRegion { public: - S32 mType; S32 mIndex; S32 mCount; + S32 mEnd; - MappedRegion(S32 type, S32 index, S32 count); + MappedRegion(S32 index, S32 count); }; LLVertexBuffer(const LLVertexBuffer& rhs) diff --git a/indra/newview/app_settings/shaders/class1/deferred/giF.glsl b/indra/newview/app_settings/shaders/class1/deferred/giF.glsl deleted file mode 100644 index da1b234240..0000000000 --- a/indra/newview/app_settings/shaders/class1/deferred/giF.glsl +++ /dev/null @@ -1,190 +0,0 @@ -/** - * @file giF.glsl - * - * $LicenseInfo:firstyear=2007&license=viewerlgpl$ - * Second Life Viewer Source Code - * Copyright (C) 2007, Linden Research, Inc. - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; - * version 2.1 of the License only. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - * - * Linden Research, Inc., 945 Battery Street, San Francisco, CA 94111 USA - * $/LicenseInfo$ - */ - -#extension GL_ARB_texture_rectangle : enable - -#ifdef DEFINE_GL_FRAGCOLOR -out vec4 frag_color; -#else -#define frag_color gl_FragColor -#endif - -uniform sampler2DRect depthMap; -uniform sampler2DRect normalMap; -uniform sampler2D noiseMap; - -uniform sampler2D diffuseGIMap; -uniform sampler2D normalGIMap; -uniform sampler2D depthGIMap; - -uniform sampler2D lightFunc; - -// Inputs -VARYING vec2 vary_fragcoord; - -uniform vec2 screen_res; - -uniform mat4 inv_proj; -uniform mat4 gi_mat; //gPipeline.mGIMatrix - eye space to sun space -uniform mat4 gi_mat_proj; //gPipeline.mGIMatrixProj - eye space to projected sun space -uniform mat4 gi_norm_mat; //gPipeline.mGINormalMatrix - eye space normal to sun space normal matrix -uniform mat4 gi_inv_proj; //gPipeline.mGIInvProj - projected sun space to sun space -uniform float gi_radius; -uniform float gi_intensity; -uniform int gi_samples; -uniform vec2 gi_kern[25]; -uniform vec2 gi_scale; -uniform vec3 gi_quad; -uniform vec3 gi_spec; -uniform float gi_direction_weight; -uniform float gi_light_offset; - -vec4 getPosition(vec2 pos_screen) -{ - float depth = texture2DRect(depthMap, pos_screen.xy).a; - vec2 sc = pos_screen.xy*2.0; - sc /= screen_res; - sc -= vec2(1.0,1.0); - vec4 ndc = vec4(sc.x, sc.y, 2.0*depth-1.0, 1.0); - vec4 pos = inv_proj * ndc; - pos /= pos.w; - pos.w = 1.0; - return pos; -} - -vec4 getGIPosition(vec2 gi_tc) -{ - float depth = texture2D(depthGIMap, gi_tc).a; - vec2 sc = gi_tc*2.0; - sc -= vec2(1.0, 1.0); - vec4 ndc = vec4(sc.x, sc.y, 2.0*depth-1.0, 1.0); - vec4 pos = gi_inv_proj*ndc; - pos.xyz /= pos.w; - pos.w = 1.0; - return pos; -} - -vec3 giAmbient(vec3 pos, vec3 norm) -{ - vec4 gi_c = gi_mat_proj * vec4(pos, 1.0); - gi_c.xyz /= gi_c.w; - - vec4 gi_pos = gi_mat*vec4(pos,1.0); - vec3 gi_norm = (gi_norm_mat*vec4(norm,1.0)).xyz; - gi_norm = normalize(gi_norm); - - vec2 tcx = gi_norm.xy; - vec2 tcy = gi_norm.yx; - - vec4 eye_pos = gi_mat*vec4(0,0,0,1.0); - - vec3 eye_dir = normalize(gi_pos.xyz-eye_pos.xyz/eye_pos.w); - - //vec3 eye_dir = vec3(0,0,-1); - //eye_dir = (gi_norm_mat*vec4(eye_dir, 1.0)).xyz; - //eye_dir = normalize(eye_dir); - - //float round_x = gi_scale.x; - //float round_y = gi_scale.y; - - vec3 debug = texture2D(normalGIMap, gi_c.xy).rgb*0.5+0.5; - debug.xz = vec2(0.0,0.0); - //debug = fract(debug); - - float round_x = 1.0/64.0; - float round_y = 1.0/64.0; - - //gi_c.x = floor(gi_c.x/round_x+0.5)*round_x; - //gi_c.y = floor(gi_c.y/round_y+0.5)*round_y; - - float fda = 0.0; - vec3 fdiff = vec3(0,0,0); - - vec3 rcol = vec3(0,0,0); - - float fsa = 0.0; - - for (int i = -1; i < 2; i+=2 ) - { - for (int j = -1; j < 2; j+=2) - { - vec2 tc = vec2(i, j)*0.75; - vec3 nz = texture2D(noiseMap, vary_fragcoord.xy/128.0+tc*0.5).xyz; - //tc += gi_norm.xy*nz.z; - tc += nz.xy*2.0; - tc /= gi_samples; - tc += gi_c.xy; - - vec3 lnorm = -normalize(texture2D(normalGIMap, tc.xy).xyz*2.0-1.0); - vec3 lpos = getGIPosition(tc.xy).xyz; - - vec3 at = lpos-gi_pos.xyz; - float dist = dot(at,at); - float da = clamp(1.0/(gi_spec.x*dist), 0.0, 1.0); - - if (da > 0.0) - { - //add angular attenuation - vec3 ldir = at; - float ang_atten = clamp(dot(ldir, gi_norm), 0.0, 1.0); - - float ld = -dot(ldir, lnorm); - - if (ang_atten > 0.0 && ld < 0.0) - { - vec3 diff = texture2D(diffuseGIMap, tc.xy).xyz; - da = da*ang_atten; - fda += da; - fdiff += diff*da; - } - } - } - } - - fdiff /= max(gi_spec.y*fda, gi_quad.z); - fdiff = clamp(fdiff, vec3(0), vec3(1)); - - vec3 ret = fda*fdiff; - //ret = ret*ret*gi_quad.x+ret*gi_quad.y+gi_quad.z; - - //fda *= nz.z; - - //rcol.rgb *= gi_intensity; - //return rcol.rgb+vary_AmblitColor.rgb*0.25; - //return vec4(debug, 0.0); - //return vec4(fda*fdiff, 0.0); - return clamp(ret,vec3(0.0), vec3(1.0)); - //return debug.xyz; -} - -void main() -{ - vec2 pos_screen = vary_fragcoord.xy; - vec4 pos = getPosition(pos_screen); - vec3 norm = texture2DRect(normalMap, pos_screen).xyz; - norm = vec3((norm.xy-0.5)*2.0,norm.z); // unpack norm - - frag_color.xyz = giAmbient(pos, norm); -} diff --git a/indra/newview/app_settings/shaders/class1/deferred/waterF.glsl b/indra/newview/app_settings/shaders/class1/deferred/waterF.glsl index 3427d6db57..1149aec30b 100644 --- a/indra/newview/app_settings/shaders/class1/deferred/waterF.glsl +++ b/indra/newview/app_settings/shaders/class1/deferred/waterF.glsl @@ -53,13 +53,11 @@ uniform vec3 specular; uniform float lightExp; uniform float refScale; uniform float kd; -uniform vec2 screenRes; uniform vec3 normScale; uniform float fresnelScale; uniform float fresnelOffset; uniform float blurMultiplier; -uniform vec2 screen_res; -uniform mat4 norm_mat; //region space to screen space +uniform mat3 normal_matrix; //bigWave is (refCoord.w, view.w); VARYING vec4 refCoord; @@ -157,7 +155,7 @@ void main() //wavef.z *= 0.1f; //wavef = normalize(wavef); - vec3 screenspacewavef = (norm_mat*vec4(wavef, 1.0)).xyz; + vec3 screenspacewavef = normal_matrix*wavef; frag_data[0] = vec4(color.rgb, 0.5); // diffuse frag_data[1] = vec4(0.5,0.5,0.5, 0.95); // speccolor*spec, spec diff --git a/indra/newview/app_settings/shaders/class1/environment/underWaterF.glsl b/indra/newview/app_settings/shaders/class1/environment/underWaterF.glsl index 0d8dab0a41..485e48537c 100644 --- a/indra/newview/app_settings/shaders/class1/environment/underWaterF.glsl +++ b/indra/newview/app_settings/shaders/class1/environment/underWaterF.glsl @@ -43,13 +43,11 @@ uniform vec2 fbScale; uniform float refScale; uniform float znear; uniform float zfar; -uniform float kd; uniform vec4 waterPlane; uniform vec3 eyeVec; uniform vec4 waterFogColor; uniform float waterFogDensity; uniform float waterFogKS; -uniform vec2 screenRes; //bigWave is (refCoord.w, view.w); VARYING vec4 refCoord; diff --git a/indra/newview/app_settings/shaders/class1/environment/waterF.glsl b/indra/newview/app_settings/shaders/class1/environment/waterF.glsl index 79bffab745..1fd7bdaa5c 100644 --- a/indra/newview/app_settings/shaders/class1/environment/waterF.glsl +++ b/indra/newview/app_settings/shaders/class1/environment/waterF.glsl @@ -42,8 +42,6 @@ uniform vec3 lightDir; uniform vec3 specular; uniform float lightExp; uniform float refScale; -uniform float kd; -uniform vec2 screenRes; uniform vec3 normScale; uniform float fresnelScale; uniform float fresnelOffset; diff --git a/indra/newview/llappviewer.cpp b/indra/newview/llappviewer.cpp index 7331b93810..9bbaede68d 100644 --- a/indra/newview/llappviewer.cpp +++ b/indra/newview/llappviewer.cpp @@ -537,7 +537,7 @@ static void settings_to_globals() LLSurface::setTextureSize(gSavedSettings.getU32("RegionTextureSize")); LLRender::sGLCoreProfile = gSavedSettings.getBOOL("RenderGLCoreProfile"); - + LLVertexBuffer::sUseVAO = gSavedSettings.getBOOL("RenderUseVAO"); LLImageGL::sGlobalUseAnisotropic = gSavedSettings.getBOOL("RenderAnisotropic"); LLImageGL::sCompressTextures = gSavedSettings.getBOOL("RenderCompressTextures"); LLVOVolume::sLODFactor = gSavedSettings.getF32("RenderVolumeLODFactor"); diff --git a/indra/newview/lldrawable.cpp b/indra/newview/lldrawable.cpp index d041baea90..d046b22133 100644 --- a/indra/newview/lldrawable.cpp +++ b/indra/newview/lldrawable.cpp @@ -254,7 +254,7 @@ S32 LLDrawable::findReferences(LLDrawable *drawablep) return count; } -static LLFastTimer::DeclareTimer FTM_ALLOCATE_FACE("Allocate Face", true); +static LLFastTimer::DeclareTimer FTM_ALLOCATE_FACE("Allocate Face"); LLFace* LLDrawable::addFace(LLFacePool *poolp, LLViewerTexture *texturep) { diff --git a/indra/newview/lldrawpool.cpp b/indra/newview/lldrawpool.cpp index 94dd927d26..d8f293cc62 100644 --- a/indra/newview/lldrawpool.cpp +++ b/indra/newview/lldrawpool.cpp @@ -472,6 +472,7 @@ void LLRenderPass::pushBatch(LLDrawInfo& params, U32 mask, BOOL texture, BOOL ba { params.mGroup->rebuildMesh(); } + params.mVertexBuffer->setBuffer(mask); params.mVertexBuffer->drawRange(params.mDrawMode, params.mStart, params.mEnd, params.mCount, params.mOffset); gPipeline.addTrianglesDrawn(params.mCount, params.mDrawMode); diff --git a/indra/newview/lldrawpoolavatar.cpp b/indra/newview/lldrawpoolavatar.cpp index 6d02ad2b96..c3cf744222 100644 --- a/indra/newview/lldrawpoolavatar.cpp +++ b/indra/newview/lldrawpoolavatar.cpp @@ -1505,7 +1505,7 @@ void LLDrawPoolAvatar::renderRigged(LLVOAvatar* avatar, U32 type, bool glow) stop_glerror(); - LLDrawPoolAvatar::sVertexProgram->uniformMatrix4fv("matrixPalette", + LLDrawPoolAvatar::sVertexProgram->uniformMatrix4fv(LLViewerShaderMgr::AVATAR_MATRIX, skin->mJointNames.size(), FALSE, (GLfloat*) mat[0].mMatrix); @@ -1547,6 +1547,8 @@ void LLDrawPoolAvatar::renderRigged(LLVOAvatar* avatar, U32 type, bool glow) buff->setBuffer(data_mask); buff->drawRange(LLRender::TRIANGLES, start, end, count, offset); } + + gPipeline.addTrianglesDrawn(count, LLRender::TRIANGLES); } } } diff --git a/indra/newview/lldrawpoolterrain.cpp b/indra/newview/lldrawpoolterrain.cpp index 9bc32fddbd..cac862a107 100644 --- a/indra/newview/lldrawpoolterrain.cpp +++ b/indra/newview/lldrawpoolterrain.cpp @@ -352,8 +352,8 @@ void LLDrawPoolTerrain::renderFullShader() LLGLSLShader* shader = LLGLSLShader::sCurBoundShaderPtr; llassert(shader); - shader->uniform4fv("object_plane_s", 1, tp0.mV); - shader->uniform4fv("object_plane_t", 1, tp1.mV); + shader->uniform4fv(LLShaderMgr::OBJECT_PLANE_S, 1, tp0.mV); + shader->uniform4fv(LLShaderMgr::OBJECT_PLANE_T, 1, tp1.mV); gGL.matrixMode(LLRender::MM_TEXTURE); gGL.loadIdentity(); @@ -862,8 +862,8 @@ void LLDrawPoolTerrain::renderSimple() if (LLGLSLShader::sNoFixedFunction) { - sShader->uniform4fv("object_plane_s", 1, tp0.mV); - sShader->uniform4fv("object_plane_t", 1, tp1.mV); + sShader->uniform4fv(LLShaderMgr::OBJECT_PLANE_S, 1, tp0.mV); + sShader->uniform4fv(LLShaderMgr::OBJECT_PLANE_T, 1, tp1.mV); } else { diff --git a/indra/newview/lldrawpoolwater.cpp b/indra/newview/lldrawpoolwater.cpp index 4f6eaa5a5b..b6a4b0194c 100644 --- a/indra/newview/lldrawpoolwater.cpp +++ b/indra/newview/lldrawpoolwater.cpp @@ -407,8 +407,8 @@ void LLDrawPoolWater::renderOpaqueLegacyWater() } else { - shader->uniform4fv("object_plane_s", 1, tp0); - shader->uniform4fv("object_plane_t", 1, tp1); + shader->uniform4fv(LLShaderMgr::OBJECT_PLANE_S, 1, tp0); + shader->uniform4fv(LLShaderMgr::OBJECT_PLANE_T, 1, tp1); } gGL.diffuseColor3f(1.f, 1.f, 1.f); @@ -546,7 +546,7 @@ void LLDrawPoolWater::shade() sTime = (F32)LLFrameTimer::getElapsedSeconds()*0.5f; - S32 reftex = shader->enableTexture(LLViewerShaderMgr::WATER_REFTEX); + S32 reftex = shader->enableTexture(LLShaderMgr::WATER_REFTEX); if (reftex > -1) { @@ -577,12 +577,12 @@ void LLDrawPoolWater::shade() mWaterNormp->setFilteringOption(LLTexUnit::TFO_POINT); } - S32 screentex = shader->enableTexture(LLViewerShaderMgr::WATER_SCREENTEX); + S32 screentex = shader->enableTexture(LLShaderMgr::WATER_SCREENTEX); if (screentex > -1) { - shader->uniform4fv(LLViewerShaderMgr::WATER_FOGCOLOR, 1, sWaterFogColor.mV); - shader->uniform1f(LLViewerShaderMgr::WATER_FOGDENSITY, + shader->uniform4fv(LLShaderMgr::WATER_FOGCOLOR, 1, sWaterFogColor.mV); + shader->uniform1f(LLShaderMgr::WATER_FOGDENSITY, param_mgr->getFogDensity()); gPipeline.mWaterDis.bindTexture(0, screentex); } @@ -594,15 +594,9 @@ void LLDrawPoolWater::shade() if (mVertexShaderLevel == 1) { sWaterFogColor.mV[3] = param_mgr->mDensitySliderValue; - shader->uniform4fv(LLViewerShaderMgr::WATER_FOGCOLOR, 1, sWaterFogColor.mV); + shader->uniform4fv(LLShaderMgr::WATER_FOGCOLOR, 1, sWaterFogColor.mV); } - F32 screenRes[] = - { - 1.f/gGLViewport[2], - 1.f/gGLViewport[3] - }; - shader->uniform2fv("screenRes", 1, screenRes); stop_glerror(); S32 diffTex = shader->enableTexture(LLViewerShaderMgr::DIFFUSE_MAP); @@ -614,26 +608,26 @@ void LLDrawPoolWater::shade() light_diffuse *= 6.f; //shader->uniformMatrix4fv("inverse_ref", 1, GL_FALSE, (GLfloat*) gGLObliqueProjectionInverse.mMatrix); - shader->uniform1f(LLViewerShaderMgr::WATER_WATERHEIGHT, eyedepth); - shader->uniform1f(LLViewerShaderMgr::WATER_TIME, sTime); - shader->uniform3fv(LLViewerShaderMgr::WATER_EYEVEC, 1, LLViewerCamera::getInstance()->getOrigin().mV); - shader->uniform3fv(LLViewerShaderMgr::WATER_SPECULAR, 1, light_diffuse.mV); - shader->uniform1f(LLViewerShaderMgr::WATER_SPECULAR_EXP, light_exp); - shader->uniform2fv(LLViewerShaderMgr::WATER_WAVE_DIR1, 1, param_mgr->getWave1Dir().mV); - shader->uniform2fv(LLViewerShaderMgr::WATER_WAVE_DIR2, 1, param_mgr->getWave2Dir().mV); - shader->uniform3fv(LLViewerShaderMgr::WATER_LIGHT_DIR, 1, light_dir.mV); - - shader->uniform3fv("normScale", 1, param_mgr->getNormalScale().mV); - shader->uniform1f("fresnelScale", param_mgr->getFresnelScale()); - shader->uniform1f("fresnelOffset", param_mgr->getFresnelOffset()); - shader->uniform1f("blurMultiplier", param_mgr->getBlurMultiplier()); + shader->uniform1f(LLShaderMgr::WATER_WATERHEIGHT, eyedepth); + shader->uniform1f(LLShaderMgr::WATER_TIME, sTime); + shader->uniform3fv(LLShaderMgr::WATER_EYEVEC, 1, LLViewerCamera::getInstance()->getOrigin().mV); + shader->uniform3fv(LLShaderMgr::WATER_SPECULAR, 1, light_diffuse.mV); + shader->uniform1f(LLShaderMgr::WATER_SPECULAR_EXP, light_exp); + shader->uniform2fv(LLShaderMgr::WATER_WAVE_DIR1, 1, param_mgr->getWave1Dir().mV); + shader->uniform2fv(LLShaderMgr::WATER_WAVE_DIR2, 1, param_mgr->getWave2Dir().mV); + shader->uniform3fv(LLShaderMgr::WATER_LIGHT_DIR, 1, light_dir.mV); + + shader->uniform3fv(LLShaderMgr::WATER_NORM_SCALE, 1, param_mgr->getNormalScale().mV); + shader->uniform1f(LLShaderMgr::WATER_FRESNEL_SCALE, param_mgr->getFresnelScale()); + shader->uniform1f(LLShaderMgr::WATER_FRESNEL_OFFSET, param_mgr->getFresnelOffset()); + shader->uniform1f(LLShaderMgr::WATER_BLUR_MULTIPLIER, param_mgr->getBlurMultiplier()); F32 sunAngle = llmax(0.f, light_dir.mV[2]); F32 scaledAngle = 1.f - sunAngle; - shader->uniform1f("sunAngle", sunAngle); - shader->uniform1f("scaledAngle", scaledAngle); - shader->uniform1f("sunAngle2", 0.1f + 0.2f*sunAngle); + shader->uniform1f(LLShaderMgr::WATER_SUN_ANGLE, sunAngle); + shader->uniform1f(LLShaderMgr::WATER_SCALED_ANGLE, scaledAngle); + shader->uniform1f(LLShaderMgr::WATER_SUN_ANGLE2, 0.1f + 0.2f*sunAngle); LLColor4 water_color; LLVector3 camera_up = LLViewerCamera::getInstance()->getUpAxis(); @@ -641,12 +635,12 @@ void LLDrawPoolWater::shade() if (LLViewerCamera::getInstance()->cameraUnderWater()) { water_color.setVec(1.f, 1.f, 1.f, 0.4f); - shader->uniform1f(LLViewerShaderMgr::WATER_REFSCALE, param_mgr->getScaleBelow()); + shader->uniform1f(LLShaderMgr::WATER_REFSCALE, param_mgr->getScaleBelow()); } else { water_color.setVec(1.f, 1.f, 1.f, 0.5f*(1.f + up_dot)); - shader->uniform1f(LLViewerShaderMgr::WATER_REFSCALE, param_mgr->getScaleAbove()); + shader->uniform1f(LLShaderMgr::WATER_REFSCALE, param_mgr->getScaleAbove()); } if (water_color.mV[3] > 0.9f) @@ -690,11 +684,11 @@ void LLDrawPoolWater::shade() } shader->disableTexture(LLViewerShaderMgr::ENVIRONMENT_MAP, LLTexUnit::TT_CUBE_MAP); - shader->disableTexture(LLViewerShaderMgr::WATER_SCREENTEX); + shader->disableTexture(LLShaderMgr::WATER_SCREENTEX); shader->disableTexture(LLViewerShaderMgr::BUMP_MAP); shader->disableTexture(LLViewerShaderMgr::DIFFUSE_MAP); - shader->disableTexture(LLViewerShaderMgr::WATER_REFTEX); - shader->disableTexture(LLViewerShaderMgr::WATER_SCREENDEPTH); + shader->disableTexture(LLShaderMgr::WATER_REFTEX); + shader->disableTexture(LLShaderMgr::WATER_SCREENDEPTH); if (deferred_render) { diff --git a/indra/newview/llface.cpp b/indra/newview/llface.cpp index 6b3127decf..ef91a459e7 100755 --- a/indra/newview/llface.cpp +++ b/indra/newview/llface.cpp @@ -1407,6 +1407,7 @@ BOOL LLFace::getGeometryVolume(const LLVolume& volume, { //use transform feedback to pack vertex buffer //gGLDebugLoggingEnabled = TRUE; LLFastTimer t(FTM_FACE_GEOM_FEEDBACK); + LLGLEnable discard(GL_RASTERIZER_DISCARD); LLVertexBuffer* buff = (LLVertexBuffer*) vf.mVertexBuffer.get(); if (vf.mVertexBuffer.isNull() || buff->getNumVerts() != vf.mNumVertices) @@ -1955,21 +1956,31 @@ BOOL LLFace::getGeometryVolume(const LLVolume& volume, if (rebuild_pos) { - LLFastTimer t(FTM_FACE_GEOM_POSITION); + LLVector4a* src = vf.mPositions; + + //_mm_prefetch((char*)src, _MM_HINT_T0); + + LLVector4a* end = src+num_vertices; + //LLVector4a* end_64 = end-4; + + //LLFastTimer t(FTM_FACE_GEOM_POSITION); llassert(num_vertices > 0); mVertexBuffer->getVertexStrider(vert, mGeomIndex, mGeomCount, map_range); - LLMatrix4a mat_vert; mat_vert.loadu(mat_vert_in); + + F32* dst = (F32*) vert.get(); + F32* end_f32 = dst+mGeomCount*4; - LLVector4a* src = vf.mPositions; - volatile F32* dst = (volatile F32*) vert.get(); - - volatile F32* end = dst+num_vertices*4; - LLVector4a res; + //_mm_prefetch((char*)dst, _MM_HINT_NTA); + //_mm_prefetch((char*)src, _MM_HINT_NTA); + + //_mm_prefetch((char*)dst, _MM_HINT_NTA); + LLVector4a res0; //,res1,res2,res3; + LLVector4a texIdx; S32 index = mTextureIndex < 255 ? mTextureIndex : 0; @@ -1986,29 +1997,53 @@ BOOL LLFace::getGeometryVolume(const LLVolume& volume, texIdx.set(0,0,0,val); + LLVector4a tmp; + { - LLFastTimer t(FTM_FACE_POSITION_STORE); - LLVector4a tmp; + //LLFastTimer t2(FTM_FACE_POSITION_STORE); - do - { - mat_vert.affineTransform(*src++, res); - tmp.setSelectWithMask(mask, texIdx, res); + /*if (num_vertices > 4) + { //more than 64 bytes + while (src < end_64) + { + _mm_prefetch((char*)src + 64, _MM_HINT_T0); + _mm_prefetch((char*)dst + 64, _MM_HINT_T0); + + mat_vert.affineTransform(*src, res0); + tmp.setSelectWithMask(mask, texIdx, res0); + tmp.store4a((F32*) dst); + + mat_vert.affineTransform(*(src+1), res1); + tmp.setSelectWithMask(mask, texIdx, res1); + tmp.store4a((F32*) dst+4); + + mat_vert.affineTransform(*(src+2), res2); + tmp.setSelectWithMask(mask, texIdx, res2); + tmp.store4a((F32*) dst+8); + + mat_vert.affineTransform(*(src+3), res3); + tmp.setSelectWithMask(mask, texIdx, res3); + tmp.store4a((F32*) dst+12); + + dst += 16; + src += 4; + } + }*/ + + while (src < end) + { + mat_vert.affineTransform(*src++, res0); + tmp.setSelectWithMask(mask, texIdx, res0); tmp.store4a((F32*) dst); dst += 4; } - while(dst < end); } - + { - LLFastTimer t(FTM_FACE_POSITION_PAD); - S32 aligned_pad_vertices = mGeomCount - num_vertices; - res.set(res[0], res[1], res[2], 0.f); - - while (aligned_pad_vertices > 0) + //LLFastTimer t(FTM_FACE_POSITION_PAD); + while (dst < end_f32) { - --aligned_pad_vertices; - res.store4a((F32*) dst); + res0.store4a((F32*) dst); dst += 4; } } @@ -2022,15 +2057,17 @@ BOOL LLFace::getGeometryVolume(const LLVolume& volume, if (rebuild_normal) { - LLFastTimer t(FTM_FACE_GEOM_NORMAL); + //LLFastTimer t(FTM_FACE_GEOM_NORMAL); mVertexBuffer->getNormalStrider(norm, mGeomIndex, mGeomCount, map_range); F32* normals = (F32*) norm.get(); - for (S32 i = 0; i < num_vertices; i++) - { + LLVector4a* src = vf.mNormals; + LLVector4a* end = src+num_vertices; + + while (src < end) + { LLVector4a normal; - mat_normal.rotate(vf.mNormals[i], normal); - normal.normalize3fast(); + mat_normal.rotate(*src++, normal); normal.store4a(normals); normals += 4; } @@ -2047,11 +2084,13 @@ BOOL LLFace::getGeometryVolume(const LLVolume& volume, mVertexBuffer->getBinormalStrider(binorm, mGeomIndex, mGeomCount, map_range); F32* binormals = (F32*) binorm.get(); - for (S32 i = 0; i < num_vertices; i++) + LLVector4a* src = vf.mBinormals; + LLVector4a* end = vf.mBinormals+num_vertices; + + while (src < end) { LLVector4a binormal; - mat_normal.rotate(vf.mBinormals[i], binormal); - binormal.normalize3fast(); + mat_normal.rotate(*src++, binormal); binormal.store4a(binormals); binormals += 4; } diff --git a/indra/newview/llfasttimerview.cpp b/indra/newview/llfasttimerview.cpp index 4dfb93f1bc..e7a3f9b390 100644 --- a/indra/newview/llfasttimerview.cpp +++ b/indra/newview/llfasttimerview.cpp @@ -345,7 +345,7 @@ BOOL LLFastTimerView::handleScrollWheel(S32 x, S32 y, S32 clicks) return TRUE; } -static LLFastTimer::DeclareTimer FTM_RENDER_TIMER("Timers", true); +static LLFastTimer::DeclareTimer FTM_RENDER_TIMER("Timers"); static std::map sTimerColors; diff --git a/indra/newview/llviewerdisplay.cpp b/indra/newview/llviewerdisplay.cpp index ffeea2f4df..9ffc64312d 100644 --- a/indra/newview/llviewerdisplay.cpp +++ b/indra/newview/llviewerdisplay.cpp @@ -212,13 +212,13 @@ void display_stats() } static LLFastTimer::DeclareTimer FTM_PICK("Picking"); -static LLFastTimer::DeclareTimer FTM_RENDER("Render", true); +static LLFastTimer::DeclareTimer FTM_RENDER("Render"); static LLFastTimer::DeclareTimer FTM_UPDATE_SKY("Update Sky"); static LLFastTimer::DeclareTimer FTM_UPDATE_TEXTURES("Update Textures"); static LLFastTimer::DeclareTimer FTM_IMAGE_UPDATE("Update Images"); static LLFastTimer::DeclareTimer FTM_IMAGE_UPDATE_CLASS("Class"); static LLFastTimer::DeclareTimer FTM_IMAGE_UPDATE_BUMP("Image Update Bump"); -static LLFastTimer::DeclareTimer FTM_IMAGE_UPDATE_LIST("List"); +static LLFastTimer::DeclareTimer FTM_IMAGE_UPDATE_LIST("List", true); static LLFastTimer::DeclareTimer FTM_IMAGE_UPDATE_DELETE("Delete"); static LLFastTimer::DeclareTimer FTM_RESIZE_WINDOW("Resize Window"); static LLFastTimer::DeclareTimer FTM_HUD_UPDATE("HUD Update"); diff --git a/indra/newview/llviewershadermgr.cpp b/indra/newview/llviewershadermgr.cpp index ba9818946c..c7677759af 100644 --- a/indra/newview/llviewershadermgr.cpp +++ b/indra/newview/llviewershadermgr.cpp @@ -304,47 +304,6 @@ void LLViewerShaderMgr::initAttribsAndUniforms(void) if (mReservedAttribs.empty()) { LLShaderMgr::initAttribsAndUniforms(); - - mAvatarUniforms.push_back("matrixPalette"); - mAvatarUniforms.push_back("gWindDir"); - mAvatarUniforms.push_back("gSinWaveParams"); - mAvatarUniforms.push_back("gGravity"); - - mWLUniforms.push_back("camPosLocal"); - - mTerrainUniforms.reserve(5); - mTerrainUniforms.push_back("detail_0"); - mTerrainUniforms.push_back("detail_1"); - mTerrainUniforms.push_back("detail_2"); - mTerrainUniforms.push_back("detail_3"); - mTerrainUniforms.push_back("alpha_ramp"); - - mGlowUniforms.push_back("glowDelta"); - mGlowUniforms.push_back("glowStrength"); - - mGlowExtractUniforms.push_back("minLuminance"); - mGlowExtractUniforms.push_back("maxExtractAlpha"); - mGlowExtractUniforms.push_back("lumWeights"); - mGlowExtractUniforms.push_back("warmthWeights"); - mGlowExtractUniforms.push_back("warmthAmount"); - - mShinyUniforms.push_back("origin"); - - mWaterUniforms.reserve(12); - mWaterUniforms.push_back("screenTex"); - mWaterUniforms.push_back("screenDepth"); - mWaterUniforms.push_back("refTex"); - mWaterUniforms.push_back("eyeVec"); - mWaterUniforms.push_back("time"); - mWaterUniforms.push_back("d1"); - mWaterUniforms.push_back("d2"); - mWaterUniforms.push_back("lightDir"); - mWaterUniforms.push_back("specular"); - mWaterUniforms.push_back("lightExp"); - mWaterUniforms.push_back("fogCol"); - mWaterUniforms.push_back("kd"); - mWaterUniforms.push_back("refScale"); - mWaterUniforms.push_back("waterHeight"); } } @@ -915,7 +874,7 @@ BOOL LLViewerShaderMgr::loadShadersEnvironment() gTerrainProgram.mShaderFiles.push_back(make_pair("environment/terrainV.glsl", GL_VERTEX_SHADER_ARB)); gTerrainProgram.mShaderFiles.push_back(make_pair("environment/terrainF.glsl", GL_FRAGMENT_SHADER_ARB)); gTerrainProgram.mShaderLevel = mVertexShaderLevel[SHADER_ENVIRONMENT]; - success = gTerrainProgram.createShader(NULL, &mTerrainUniforms); + success = gTerrainProgram.createShader(NULL, NULL); } if (!success) @@ -953,7 +912,7 @@ BOOL LLViewerShaderMgr::loadShadersWater() gWaterProgram.mShaderFiles.push_back(make_pair("environment/waterV.glsl", GL_VERTEX_SHADER_ARB)); gWaterProgram.mShaderFiles.push_back(make_pair("environment/waterF.glsl", GL_FRAGMENT_SHADER_ARB)); gWaterProgram.mShaderLevel = mVertexShaderLevel[SHADER_WATER]; - success = gWaterProgram.createShader(NULL, &mWaterUniforms); + success = gWaterProgram.createShader(NULL, NULL); } if (success) @@ -967,7 +926,7 @@ BOOL LLViewerShaderMgr::loadShadersWater() gUnderWaterProgram.mShaderLevel = mVertexShaderLevel[SHADER_WATER]; gUnderWaterProgram.mShaderGroup = LLGLSLShader::SG_WATER; - success = gUnderWaterProgram.createShader(NULL, &mWaterUniforms); + success = gUnderWaterProgram.createShader(NULL, NULL); } if (success) @@ -985,7 +944,7 @@ BOOL LLViewerShaderMgr::loadShadersWater() gTerrainWaterProgram.mShaderFiles.push_back(make_pair("environment/terrainWaterF.glsl", GL_FRAGMENT_SHADER_ARB)); gTerrainWaterProgram.mShaderLevel = mVertexShaderLevel[SHADER_ENVIRONMENT]; gTerrainWaterProgram.mShaderGroup = LLGLSLShader::SG_WATER; - terrainWaterSuccess = gTerrainWaterProgram.createShader(NULL, &mTerrainUniforms); + terrainWaterSuccess = gTerrainWaterProgram.createShader(NULL, NULL); } /// Keep track of water shader levels @@ -1034,7 +993,7 @@ BOOL LLViewerShaderMgr::loadShadersEffects() gGlowProgram.mShaderFiles.push_back(make_pair("effects/glowV.glsl", GL_VERTEX_SHADER_ARB)); gGlowProgram.mShaderFiles.push_back(make_pair("effects/glowF.glsl", GL_FRAGMENT_SHADER_ARB)); gGlowProgram.mShaderLevel = mVertexShaderLevel[SHADER_EFFECT]; - success = gGlowProgram.createShader(NULL, &mGlowUniforms); + success = gGlowProgram.createShader(NULL, NULL); if (!success) { LLPipeline::sRenderGlow = FALSE; @@ -1048,7 +1007,7 @@ BOOL LLViewerShaderMgr::loadShadersEffects() gGlowExtractProgram.mShaderFiles.push_back(make_pair("effects/glowExtractV.glsl", GL_VERTEX_SHADER_ARB)); gGlowExtractProgram.mShaderFiles.push_back(make_pair("effects/glowExtractF.glsl", GL_FRAGMENT_SHADER_ARB)); gGlowExtractProgram.mShaderLevel = mVertexShaderLevel[SHADER_EFFECT]; - success = gGlowExtractProgram.createShader(NULL, &mGlowExtractUniforms); + success = gGlowExtractProgram.createShader(NULL, NULL); if (!success) { LLPipeline::sRenderGlow = FALSE; @@ -1408,7 +1367,7 @@ BOOL LLViewerShaderMgr::loadShadersDeferred() gDeferredWaterProgram.mShaderFiles.push_back(make_pair("deferred/waterV.glsl", GL_VERTEX_SHADER_ARB)); gDeferredWaterProgram.mShaderFiles.push_back(make_pair("deferred/waterF.glsl", GL_FRAGMENT_SHADER_ARB)); gDeferredWaterProgram.mShaderLevel = mVertexShaderLevel[SHADER_DEFERRED]; - success = gDeferredWaterProgram.createShader(NULL, &mWaterUniforms); + success = gDeferredWaterProgram.createShader(NULL, NULL); } if (success) @@ -1467,7 +1426,7 @@ BOOL LLViewerShaderMgr::loadShadersDeferred() gDeferredAvatarShadowProgram.mShaderFiles.push_back(make_pair("deferred/avatarShadowV.glsl", GL_VERTEX_SHADER_ARB)); gDeferredAvatarShadowProgram.mShaderFiles.push_back(make_pair("deferred/avatarShadowF.glsl", GL_FRAGMENT_SHADER_ARB)); gDeferredAvatarShadowProgram.mShaderLevel = mVertexShaderLevel[SHADER_DEFERRED]; - success = gDeferredAvatarShadowProgram.createShader(NULL, &mAvatarUniforms); + success = gDeferredAvatarShadowProgram.createShader(NULL, NULL); } if (success) @@ -1488,7 +1447,7 @@ BOOL LLViewerShaderMgr::loadShadersDeferred() gDeferredTerrainProgram.mShaderFiles.push_back(make_pair("deferred/terrainV.glsl", GL_VERTEX_SHADER_ARB)); gDeferredTerrainProgram.mShaderFiles.push_back(make_pair("deferred/terrainF.glsl", GL_FRAGMENT_SHADER_ARB)); gDeferredTerrainProgram.mShaderLevel = mVertexShaderLevel[SHADER_DEFERRED]; - success = gDeferredTerrainProgram.createShader(NULL, &mTerrainUniforms); + success = gDeferredTerrainProgram.createShader(NULL, NULL); } if (success) @@ -1499,7 +1458,7 @@ BOOL LLViewerShaderMgr::loadShadersDeferred() gDeferredAvatarProgram.mShaderFiles.push_back(make_pair("deferred/avatarV.glsl", GL_VERTEX_SHADER_ARB)); gDeferredAvatarProgram.mShaderFiles.push_back(make_pair("deferred/avatarF.glsl", GL_FRAGMENT_SHADER_ARB)); gDeferredAvatarProgram.mShaderLevel = mVertexShaderLevel[SHADER_DEFERRED]; - success = gDeferredAvatarProgram.createShader(NULL, &mAvatarUniforms); + success = gDeferredAvatarProgram.createShader(NULL, NULL); } if (success) @@ -1519,7 +1478,7 @@ BOOL LLViewerShaderMgr::loadShadersDeferred() gDeferredAvatarAlphaProgram.mShaderFiles.push_back(make_pair("deferred/alphaNonIndexedNoColorF.glsl", GL_FRAGMENT_SHADER_ARB)); gDeferredAvatarAlphaProgram.mShaderLevel = mVertexShaderLevel[SHADER_DEFERRED]; - success = gDeferredAvatarAlphaProgram.createShader(NULL, &mAvatarUniforms); + success = gDeferredAvatarAlphaProgram.createShader(NULL, NULL); gDeferredAvatarAlphaProgram.mFeatures.calculatesLighting = true; gDeferredAvatarAlphaProgram.mFeatures.hasLighting = true; @@ -1584,7 +1543,7 @@ BOOL LLViewerShaderMgr::loadShadersDeferred() gDeferredWLSkyProgram.mShaderFiles.push_back(make_pair("deferred/skyF.glsl", GL_FRAGMENT_SHADER_ARB)); gDeferredWLSkyProgram.mShaderLevel = mVertexShaderLevel[SHADER_DEFERRED]; gDeferredWLSkyProgram.mShaderGroup = LLGLSLShader::SG_SKY; - success = gDeferredWLSkyProgram.createShader(NULL, &mWLUniforms); + success = gDeferredWLSkyProgram.createShader(NULL, NULL); } if (success) @@ -1595,7 +1554,7 @@ BOOL LLViewerShaderMgr::loadShadersDeferred() gDeferredWLCloudProgram.mShaderFiles.push_back(make_pair("deferred/cloudsF.glsl", GL_FRAGMENT_SHADER_ARB)); gDeferredWLCloudProgram.mShaderLevel = mVertexShaderLevel[SHADER_DEFERRED]; gDeferredWLCloudProgram.mShaderGroup = LLGLSLShader::SG_SKY; - success = gDeferredWLCloudProgram.createShader(NULL, &mWLUniforms); + success = gDeferredWLCloudProgram.createShader(NULL, NULL); } if (success) @@ -1606,7 +1565,7 @@ BOOL LLViewerShaderMgr::loadShadersDeferred() gDeferredStarProgram.mShaderFiles.push_back(make_pair("deferred/starsF.glsl", GL_FRAGMENT_SHADER_ARB)); gDeferredStarProgram.mShaderLevel = mVertexShaderLevel[SHADER_DEFERRED]; gDeferredStarProgram.mShaderGroup = LLGLSLShader::SG_SKY; - success = gDeferredStarProgram.createShader(NULL, &mWLUniforms); + success = gDeferredStarProgram.createShader(NULL, NULL); } if (success) @@ -1957,7 +1916,7 @@ BOOL LLViewerShaderMgr::loadShadersObject() gObjectShinyNonIndexedProgram.mShaderFiles.push_back(make_pair("objects/shinyV.glsl", GL_VERTEX_SHADER_ARB)); gObjectShinyNonIndexedProgram.mShaderFiles.push_back(make_pair("objects/shinyF.glsl", GL_FRAGMENT_SHADER_ARB)); gObjectShinyNonIndexedProgram.mShaderLevel = mVertexShaderLevel[SHADER_OBJECT]; - success = gObjectShinyNonIndexedProgram.createShader(NULL, &mShinyUniforms); + success = gObjectShinyNonIndexedProgram.createShader(NULL, NULL); } if (success) @@ -1974,7 +1933,7 @@ BOOL LLViewerShaderMgr::loadShadersObject() gObjectShinyNonIndexedWaterProgram.mShaderFiles.push_back(make_pair("objects/shinyV.glsl", GL_VERTEX_SHADER_ARB)); gObjectShinyNonIndexedWaterProgram.mShaderLevel = mVertexShaderLevel[SHADER_OBJECT]; gObjectShinyNonIndexedWaterProgram.mShaderGroup = LLGLSLShader::SG_WATER; - success = gObjectShinyNonIndexedWaterProgram.createShader(NULL, &mShinyUniforms); + success = gObjectShinyNonIndexedWaterProgram.createShader(NULL, NULL); } if (success) @@ -1990,7 +1949,7 @@ BOOL LLViewerShaderMgr::loadShadersObject() gObjectFullbrightShinyNonIndexedProgram.mShaderFiles.push_back(make_pair("objects/fullbrightShinyV.glsl", GL_VERTEX_SHADER_ARB)); gObjectFullbrightShinyNonIndexedProgram.mShaderFiles.push_back(make_pair("objects/fullbrightShinyF.glsl", GL_FRAGMENT_SHADER_ARB)); gObjectFullbrightShinyNonIndexedProgram.mShaderLevel = mVertexShaderLevel[SHADER_OBJECT]; - success = gObjectFullbrightShinyNonIndexedProgram.createShader(NULL, &mShinyUniforms); + success = gObjectFullbrightShinyNonIndexedProgram.createShader(NULL, NULL); } if (success) @@ -2008,7 +1967,7 @@ BOOL LLViewerShaderMgr::loadShadersObject() gObjectFullbrightShinyNonIndexedWaterProgram.mShaderFiles.push_back(make_pair("objects/fullbrightShinyWaterF.glsl", GL_FRAGMENT_SHADER_ARB)); gObjectFullbrightShinyNonIndexedWaterProgram.mShaderLevel = mVertexShaderLevel[SHADER_OBJECT]; gObjectFullbrightShinyNonIndexedWaterProgram.mShaderGroup = LLGLSLShader::SG_WATER; - success = gObjectFullbrightShinyNonIndexedWaterProgram.createShader(NULL, &mShinyUniforms); + success = gObjectFullbrightShinyNonIndexedWaterProgram.createShader(NULL, NULL); } if (success) @@ -2087,7 +2046,6 @@ BOOL LLViewerShaderMgr::loadShadersObject() gObjectBumpProgram.mShaderFiles.push_back(make_pair("objects/bumpF.glsl", GL_FRAGMENT_SHADER_ARB)); gObjectBumpProgram.mShaderLevel = mVertexShaderLevel[SHADER_OBJECT]; success = gObjectBumpProgram.createShader(NULL, NULL); - if (success) { //lldrawpoolbump assumes "texture0" has channel 0 and "texture1" has channel 1 gObjectBumpProgram.bind(); @@ -2241,7 +2199,7 @@ BOOL LLViewerShaderMgr::loadShadersObject() gObjectShinyProgram.mShaderFiles.push_back(make_pair("objects/shinyV.glsl", GL_VERTEX_SHADER_ARB)); gObjectShinyProgram.mShaderFiles.push_back(make_pair("objects/shinyF.glsl", GL_FRAGMENT_SHADER_ARB)); gObjectShinyProgram.mShaderLevel = mVertexShaderLevel[SHADER_OBJECT]; - success = gObjectShinyProgram.createShader(NULL, &mShinyUniforms); + success = gObjectShinyProgram.createShader(NULL, NULL); } if (success) @@ -2258,7 +2216,7 @@ BOOL LLViewerShaderMgr::loadShadersObject() gObjectShinyWaterProgram.mShaderFiles.push_back(make_pair("objects/shinyV.glsl", GL_VERTEX_SHADER_ARB)); gObjectShinyWaterProgram.mShaderLevel = mVertexShaderLevel[SHADER_OBJECT]; gObjectShinyWaterProgram.mShaderGroup = LLGLSLShader::SG_WATER; - success = gObjectShinyWaterProgram.createShader(NULL, &mShinyUniforms); + success = gObjectShinyWaterProgram.createShader(NULL, NULL); } if (success) @@ -2274,7 +2232,7 @@ BOOL LLViewerShaderMgr::loadShadersObject() gObjectFullbrightShinyProgram.mShaderFiles.push_back(make_pair("objects/fullbrightShinyV.glsl", GL_VERTEX_SHADER_ARB)); gObjectFullbrightShinyProgram.mShaderFiles.push_back(make_pair("objects/fullbrightShinyF.glsl", GL_FRAGMENT_SHADER_ARB)); gObjectFullbrightShinyProgram.mShaderLevel = mVertexShaderLevel[SHADER_OBJECT]; - success = gObjectFullbrightShinyProgram.createShader(NULL, &mShinyUniforms); + success = gObjectFullbrightShinyProgram.createShader(NULL, NULL); } if (success) @@ -2292,7 +2250,7 @@ BOOL LLViewerShaderMgr::loadShadersObject() gObjectFullbrightShinyWaterProgram.mShaderFiles.push_back(make_pair("objects/fullbrightShinyWaterF.glsl", GL_FRAGMENT_SHADER_ARB)); gObjectFullbrightShinyWaterProgram.mShaderLevel = mVertexShaderLevel[SHADER_OBJECT]; gObjectFullbrightShinyWaterProgram.mShaderGroup = LLGLSLShader::SG_WATER; - success = gObjectFullbrightShinyWaterProgram.createShader(NULL, &mShinyUniforms); + success = gObjectFullbrightShinyWaterProgram.createShader(NULL, NULL); } if (mVertexShaderLevel[SHADER_AVATAR] > 0) @@ -2377,7 +2335,7 @@ BOOL LLViewerShaderMgr::loadShadersObject() gSkinnedObjectFullbrightShinyProgram.mShaderFiles.push_back(make_pair("objects/fullbrightShinySkinnedV.glsl", GL_VERTEX_SHADER_ARB)); gSkinnedObjectFullbrightShinyProgram.mShaderFiles.push_back(make_pair("objects/fullbrightShinyF.glsl", GL_FRAGMENT_SHADER_ARB)); gSkinnedObjectFullbrightShinyProgram.mShaderLevel = mVertexShaderLevel[SHADER_OBJECT]; - success = gSkinnedObjectFullbrightShinyProgram.createShader(NULL, &mShinyUniforms); + success = gSkinnedObjectFullbrightShinyProgram.createShader(NULL, NULL); } if (success) @@ -2394,7 +2352,7 @@ BOOL LLViewerShaderMgr::loadShadersObject() gSkinnedObjectShinySimpleProgram.mShaderFiles.push_back(make_pair("objects/shinySimpleSkinnedV.glsl", GL_VERTEX_SHADER_ARB)); gSkinnedObjectShinySimpleProgram.mShaderFiles.push_back(make_pair("objects/shinyF.glsl", GL_FRAGMENT_SHADER_ARB)); gSkinnedObjectShinySimpleProgram.mShaderLevel = mVertexShaderLevel[SHADER_OBJECT]; - success = gSkinnedObjectShinySimpleProgram.createShader(NULL, &mShinyUniforms); + success = gSkinnedObjectShinySimpleProgram.createShader(NULL, NULL); } if (success) @@ -2451,7 +2409,7 @@ BOOL LLViewerShaderMgr::loadShadersObject() gSkinnedObjectFullbrightShinyWaterProgram.mShaderFiles.push_back(make_pair("objects/fullbrightShinySkinnedV.glsl", GL_VERTEX_SHADER_ARB)); gSkinnedObjectFullbrightShinyWaterProgram.mShaderFiles.push_back(make_pair("objects/fullbrightShinyWaterF.glsl", GL_FRAGMENT_SHADER_ARB)); gSkinnedObjectFullbrightShinyWaterProgram.mShaderLevel = mVertexShaderLevel[SHADER_OBJECT]; - success = gSkinnedObjectFullbrightShinyWaterProgram.createShader(NULL, &mShinyUniforms); + success = gSkinnedObjectFullbrightShinyWaterProgram.createShader(NULL, NULL); } if (success) @@ -2470,7 +2428,7 @@ BOOL LLViewerShaderMgr::loadShadersObject() gSkinnedObjectShinySimpleWaterProgram.mShaderFiles.push_back(make_pair("objects/shinySimpleSkinnedV.glsl", GL_VERTEX_SHADER_ARB)); gSkinnedObjectShinySimpleWaterProgram.mShaderFiles.push_back(make_pair("objects/shinyWaterF.glsl", GL_FRAGMENT_SHADER_ARB)); gSkinnedObjectShinySimpleWaterProgram.mShaderLevel = mVertexShaderLevel[SHADER_OBJECT]; - success = gSkinnedObjectShinySimpleWaterProgram.createShader(NULL, &mShinyUniforms); + success = gSkinnedObjectShinySimpleWaterProgram.createShader(NULL, NULL); } } @@ -2511,7 +2469,7 @@ BOOL LLViewerShaderMgr::loadShadersAvatar() gAvatarProgram.mShaderFiles.push_back(make_pair("avatar/avatarV.glsl", GL_VERTEX_SHADER_ARB)); gAvatarProgram.mShaderFiles.push_back(make_pair("avatar/avatarF.glsl", GL_FRAGMENT_SHADER_ARB)); gAvatarProgram.mShaderLevel = mVertexShaderLevel[SHADER_AVATAR]; - success = gAvatarProgram.createShader(NULL, &mAvatarUniforms); + success = gAvatarProgram.createShader(NULL, NULL); if (success) { @@ -2530,7 +2488,7 @@ BOOL LLViewerShaderMgr::loadShadersAvatar() // Note: no cloth under water: gAvatarWaterProgram.mShaderLevel = llmin(mVertexShaderLevel[SHADER_AVATAR], 1); gAvatarWaterProgram.mShaderGroup = LLGLSLShader::SG_WATER; - success = gAvatarWaterProgram.createShader(NULL, &mAvatarUniforms); + success = gAvatarWaterProgram.createShader(NULL, NULL); } /// Keep track of avatar levels @@ -2549,7 +2507,7 @@ BOOL LLViewerShaderMgr::loadShadersAvatar() gAvatarPickProgram.mShaderFiles.push_back(make_pair("avatar/pickAvatarV.glsl", GL_VERTEX_SHADER_ARB)); gAvatarPickProgram.mShaderFiles.push_back(make_pair("avatar/pickAvatarF.glsl", GL_FRAGMENT_SHADER_ARB)); gAvatarPickProgram.mShaderLevel = mVertexShaderLevel[SHADER_AVATAR]; - success = gAvatarPickProgram.createShader(NULL, &mAvatarUniforms); + success = gAvatarPickProgram.createShader(NULL, NULL); } if (success) @@ -2817,7 +2775,7 @@ BOOL LLViewerShaderMgr::loadShadersWindLight() gWLSkyProgram.mShaderFiles.push_back(make_pair("windlight/skyF.glsl", GL_FRAGMENT_SHADER_ARB)); gWLSkyProgram.mShaderLevel = mVertexShaderLevel[SHADER_WINDLIGHT]; gWLSkyProgram.mShaderGroup = LLGLSLShader::SG_SKY; - success = gWLSkyProgram.createShader(NULL, &mWLUniforms); + success = gWLSkyProgram.createShader(NULL, NULL); } if (success) @@ -2829,7 +2787,7 @@ BOOL LLViewerShaderMgr::loadShadersWindLight() gWLCloudProgram.mShaderFiles.push_back(make_pair("windlight/cloudsF.glsl", GL_FRAGMENT_SHADER_ARB)); gWLCloudProgram.mShaderLevel = mVertexShaderLevel[SHADER_WINDLIGHT]; gWLCloudProgram.mShaderGroup = LLGLSLShader::SG_SKY; - success = gWLCloudProgram.createShader(NULL, &mWLUniforms); + success = gWLCloudProgram.createShader(NULL, NULL); } return success; diff --git a/indra/newview/llviewershadermgr.h b/indra/newview/llviewershadermgr.h index d6dd645e8c..b8552d2d95 100644 --- a/indra/newview/llviewershadermgr.h +++ b/indra/newview/llviewershadermgr.h @@ -74,56 +74,7 @@ public: SHADER_COUNT }; - typedef enum - { - SHINY_ORIGIN = END_RESERVED_UNIFORMS - } eShinyUniforms; - - typedef enum - { - WATER_SCREENTEX = END_RESERVED_UNIFORMS, - WATER_SCREENDEPTH, - WATER_REFTEX, - WATER_EYEVEC, - WATER_TIME, - WATER_WAVE_DIR1, - WATER_WAVE_DIR2, - WATER_LIGHT_DIR, - WATER_SPECULAR, - WATER_SPECULAR_EXP, - WATER_FOGCOLOR, - WATER_FOGDENSITY, - WATER_REFSCALE, - WATER_WATERHEIGHT, - } eWaterUniforms; - - typedef enum - { - WL_CAMPOSLOCAL = END_RESERVED_UNIFORMS, - WL_WATERHEIGHT - } eWLUniforms; - - typedef enum - { - TERRAIN_DETAIL0 = END_RESERVED_UNIFORMS, - TERRAIN_DETAIL1, - TERRAIN_DETAIL2, - TERRAIN_DETAIL3, - TERRAIN_ALPHARAMP - } eTerrainUniforms; - - typedef enum - { - GLOW_DELTA = END_RESERVED_UNIFORMS - } eGlowUniforms; - - typedef enum - { - AVATAR_MATRIX = END_RESERVED_UNIFORMS, - AVATAR_WIND, - AVATAR_SINWAVE, - AVATAR_GRAVITY, - } eAvatarUniforms; + // simple model of forward iterator // http://www.sgi.com/tech/stl/ForwardIterator.html @@ -176,24 +127,6 @@ public: /* virtual */ void updateShaderUniforms(LLGLSLShader * shader); private: - - std::vector mShinyUniforms; - - //water parameters - std::vector mWaterUniforms; - - std::vector mWLUniforms; - - //terrain parameters - std::vector mTerrainUniforms; - - //glow parameters - std::vector mGlowUniforms; - - std::vector mGlowExtractUniforms; - - std::vector mAvatarUniforms; - // the list of shaders we need to propagate parameters to. std::vector mShaderList; diff --git a/indra/newview/llvovolume.cpp b/indra/newview/llvovolume.cpp index 6a18534484..b0f23fca42 100644 --- a/indra/newview/llvovolume.cpp +++ b/indra/newview/llvovolume.cpp @@ -4654,7 +4654,7 @@ void LLVolumeGeometryManager::rebuildGeom(LLSpatialGroup* group) bump_mask |= LLVertexBuffer::MAP_BINORMAL; genDrawInfo(group, simple_mask | LLVertexBuffer::MAP_TEXTURE_INDEX, simple_faces, FALSE, TRUE); genDrawInfo(group, fullbright_mask | LLVertexBuffer::MAP_TEXTURE_INDEX, fullbright_faces, FALSE, TRUE); - genDrawInfo(group, bump_mask | LLVertexBuffer::MAP_TEXTURE_INDEX, bump_faces, FALSE, TRUE); + genDrawInfo(group, bump_mask | LLVertexBuffer::MAP_TEXTURE_INDEX, bump_faces, FALSE, FALSE); genDrawInfo(group, alpha_mask | LLVertexBuffer::MAP_TEXTURE_INDEX, alpha_faces, TRUE, TRUE); } else diff --git a/indra/newview/llwaterparammanager.cpp b/indra/newview/llwaterparammanager.cpp index 4f52ff9778..548890b5b5 100644 --- a/indra/newview/llwaterparammanager.cpp +++ b/indra/newview/llwaterparammanager.cpp @@ -188,13 +188,11 @@ void LLWaterParamManager::updateShaderUniforms(LLGLSLShader * shader) if (shader->mShaderGroup == LLGLSLShader::SG_WATER) { shader->uniform4fv(LLViewerShaderMgr::LIGHTNORM, 1, LLWLParamManager::getInstance()->getRotatedLightDir().mV); - shader->uniform3fv("camPosLocal", 1, LLViewerCamera::getInstance()->getOrigin().mV); - shader->uniform4fv("waterFogColor", 1, LLDrawPoolWater::sWaterFogColor.mV); - shader->uniform1f("waterFogEnd", LLDrawPoolWater::sWaterFogEnd); - shader->uniform4fv("waterPlane", 1, mWaterPlane.mV); - shader->uniform1f("waterFogDensity", getFogDensity()); - shader->uniform1f("waterFogKS", mWaterFogKS); - shader->uniform1f("distance_multiplier", 0); + shader->uniform4fv(LLShaderMgr::WATER_FOGCOLOR, 1, LLDrawPoolWater::sWaterFogColor.mV); + shader->uniform4fv(LLShaderMgr::WATER_WATERPLANE, 1, mWaterPlane.mV); + shader->uniform1f(LLShaderMgr::WATER_FOGDENSITY, getFogDensity()); + shader->uniform1f(LLShaderMgr::WATER_FOGKS, mWaterFogKS); + shader->uniform1f(LLViewerShaderMgr::DISTANCE_MULTIPLIER, 0); } } diff --git a/indra/newview/llwlparammanager.cpp b/indra/newview/llwlparammanager.cpp index 6077208799..04d41a2512 100644 --- a/indra/newview/llwlparammanager.cpp +++ b/indra/newview/llwlparammanager.cpp @@ -352,7 +352,7 @@ void LLWLParamManager::updateShaderUniforms(LLGLSLShader * shader) if (shader->mShaderGroup == LLGLSLShader::SG_DEFAULT) { shader->uniform4fv(LLViewerShaderMgr::LIGHTNORM, 1, mRotatedLightDir.mV); - shader->uniform3fv("camPosLocal", 1, LLViewerCamera::getInstance()->getOrigin().mV); + shader->uniform3fv(LLShaderMgr::WL_CAMPOSLOCAL, 1, LLViewerCamera::getInstance()->getOrigin().mV); } else if (shader->mShaderGroup == LLGLSLShader::SG_SKY) @@ -360,7 +360,7 @@ void LLWLParamManager::updateShaderUniforms(LLGLSLShader * shader) shader->uniform4fv(LLViewerShaderMgr::LIGHTNORM, 1, mClampedLightDir.mV); } - shader->uniform1f("scene_light_strength", mSceneLightStrength); + shader->uniform1f(LLShaderMgr::SCENE_LIGHT_STRENGTH, mSceneLightStrength); } diff --git a/indra/newview/pipeline.cpp b/indra/newview/pipeline.cpp index 45d6d23b51..d9771af254 100644 --- a/indra/newview/pipeline.cpp +++ b/indra/newview/pipeline.cpp @@ -7857,13 +7857,6 @@ void LLPipeline::bindDeferredShader(LLGLSLShader& shader, U32 light_index, U32 n shader.uniform2f(LLShaderMgr::DEFERRED_PROJ_SHADOW_RES, mShadow[4].getWidth(), mShadow[4].getHeight()); shader.uniform1f(LLShaderMgr::DEFERRED_DEPTH_CUTOFF, RenderEdgeDepthCutoff); shader.uniform1f(LLShaderMgr::DEFERRED_NORM_CUTOFF, RenderEdgeNormCutoff); - - - if (shader.getUniformLocation("norm_mat") >= 0) - { - glh::matrix4f norm_mat = glh_get_current_modelview().inverse().transpose(); - shader.uniformMatrix4fv("norm_mat", 1, FALSE, norm_mat.m); - } } static LLFastTimer::DeclareTimer FTM_GI_TRACE("Trace"); @@ -7973,8 +7966,7 @@ void LLPipeline::renderDeferredLighting() } gDeferredSunProgram.uniform3fv("offset", slice, offset); - gDeferredSunProgram.uniform2f("screenRes", mDeferredLight.getWidth(), mDeferredLight.getHeight()); - + { LLGLDisable blend(GL_BLEND); LLGLDepthTest depth(GL_TRUE, GL_FALSE, GL_ALWAYS); -- cgit v1.3 From 1816582b929737f92ee68a1422e3be4e7c02f542 Mon Sep 17 00:00:00 2001 From: Graham Madarasz Date: Wed, 6 Mar 2013 09:09:07 -0800 Subject: Fix crashes from using single alloc for pos/norm/tc in volume face data fighting with old free call in model loading code --- indra/llmath/llvolume.cpp | 11 ++++++----- indra/llprimitive/llmodel.cpp | 31 ++++++++++++++++++------------- 2 files changed, 24 insertions(+), 18 deletions(-) (limited to 'indra/llmath') diff --git a/indra/llmath/llvolume.cpp b/indra/llmath/llvolume.cpp index f989e8ed17..f503eea107 100644 --- a/indra/llmath/llvolume.cpp +++ b/indra/llmath/llvolume.cpp @@ -5307,7 +5307,7 @@ LLVolumeFace::~LLVolumeFace() void LLVolumeFace::freeData() { - ll_aligned_free_16(mPositions); + ll_aligned_free(mPositions); mPositions = NULL; //normals and texture coordinates are part of the same buffer as mPositions, do not free them separately @@ -5492,10 +5492,11 @@ void LLVolumeFace::optimize(F32 angle_cutoff) } } - llassert(new_face.mNumIndices == mNumIndices); - llassert(new_face.mNumVertices <= mNumVertices); - - swapData(new_face); + if (new_face.mNumVertices) + { + llassert(new_face.mNumIndices == mNumIndices); + swapData(new_face); + } } class LLVCacheTriangleData; diff --git a/indra/llprimitive/llmodel.cpp b/indra/llprimitive/llmodel.cpp index 5ed05e2201..8f0120b064 100644 --- a/indra/llprimitive/llmodel.cpp +++ b/indra/llprimitive/llmodel.cpp @@ -194,6 +194,9 @@ LLModel::EModelStatus load_face_from_dom_triangles(std::vector& fa cv.setPosition(LLVector4a(v[idx[i+pos_offset]*3+0], v[idx[i+pos_offset]*3+1], v[idx[i+pos_offset]*3+2])); + + if (!cv.getPosition().isFinite3()) + return LLModel::BAD_ELEMENT; } if (tc_source) @@ -207,6 +210,8 @@ LLModel::EModelStatus load_face_from_dom_triangles(std::vector& fa cv.setNormal(LLVector4a(n[idx[i+norm_offset]*3+0], n[idx[i+norm_offset]*3+1], n[idx[i+norm_offset]*3+2])); + if (!cv.getNormal().isFinite3()) + return LLModel::BAD_ELEMENT; } BOOL found = FALSE; @@ -261,13 +266,13 @@ LLModel::EModelStatus load_face_from_dom_triangles(std::vector& fa LLVolumeFace& new_face = *face_list.rbegin(); if (!norm_source) { - ll_aligned_free_16(new_face.mNormals); + //ll_aligned_free_16(new_face.mNormals); new_face.mNormals = NULL; } if (!tc_source) { - ll_aligned_free_16(new_face.mTexCoords); + //ll_aligned_free_16(new_face.mTexCoords); new_face.mTexCoords = NULL; } @@ -292,13 +297,13 @@ LLModel::EModelStatus load_face_from_dom_triangles(std::vector& fa LLVolumeFace& new_face = *face_list.rbegin(); if (!norm_source) { - ll_aligned_free_16(new_face.mNormals); + //ll_aligned_free_16(new_face.mNormals); new_face.mNormals = NULL; } if (!tc_source) { - ll_aligned_free_16(new_face.mTexCoords); + //ll_aligned_free_16(new_face.mTexCoords); new_face.mTexCoords = NULL; } } @@ -480,13 +485,13 @@ LLModel::EModelStatus load_face_from_dom_polylist(std::vector& fac LLVolumeFace& new_face = *face_list.rbegin(); if (!norm_source) { - ll_aligned_free_16(new_face.mNormals); + //ll_aligned_free_16(new_face.mNormals); new_face.mNormals = NULL; } if (!tc_source) { - ll_aligned_free_16(new_face.mTexCoords); + //ll_aligned_free_16(new_face.mTexCoords); new_face.mTexCoords = NULL; } @@ -514,13 +519,13 @@ LLModel::EModelStatus load_face_from_dom_polylist(std::vector& fac LLVolumeFace& new_face = *face_list.rbegin(); if (!norm_source) { - ll_aligned_free_16(new_face.mNormals); + //ll_aligned_free_16(new_face.mNormals); new_face.mNormals = NULL; } if (!tc_source) { - ll_aligned_free_16(new_face.mTexCoords); + //ll_aligned_free_16(new_face.mTexCoords); new_face.mTexCoords = NULL; } } @@ -730,13 +735,13 @@ LLModel::EModelStatus load_face_from_dom_polygons(std::vector& fac LLVolumeFace& new_face = *face_list.rbegin(); if (!n) { - ll_aligned_free_16(new_face.mNormals); + //ll_aligned_free_16(new_face.mNormals); new_face.mNormals = NULL; } if (!t) { - ll_aligned_free_16(new_face.mTexCoords); + //ll_aligned_free_16(new_face.mTexCoords); new_face.mTexCoords = NULL; } } @@ -1036,7 +1041,7 @@ void LLModel::setVolumeFaceData( } else { - ll_aligned_free_16(face.mNormals); + //ll_aligned_free_16(face.mNormals); face.mNormals = NULL; } @@ -1047,7 +1052,7 @@ void LLModel::setVolumeFaceData( } else { - ll_aligned_free_16(face.mTexCoords); + //ll_aligned_free_16(face.mTexCoords); face.mTexCoords = NULL; } @@ -1246,7 +1251,7 @@ void LLModel::generateNormals(F32 angle_cutoff) } else { - ll_aligned_free_16(new_face.mTexCoords); + //ll_aligned_free_16(new_face.mTexCoords); new_face.mTexCoords = NULL; } -- cgit v1.3 From f8e059deee28500b88c8c172eaa8c4d7ca657748 Mon Sep 17 00:00:00 2001 From: Dave Parks Date: Fri, 8 Mar 2013 17:11:30 -0600 Subject: MAINT-2371 Lat round of optimizations. Reviewed by Graham --- indra/llmath/llmatrix4a.h | 11 +- indra/llmath/llvector4a.inl | 13 +- indra/llmath/llvolume.cpp | 1791 +++++++++++++----------------------- indra/llmath/llvolume.h | 79 +- indra/newview/llflexibleobject.cpp | 18 +- indra/newview/llspatialpartition.h | 2 +- indra/newview/llvovolume.cpp | 157 ++-- 7 files changed, 824 insertions(+), 1247 deletions(-) (limited to 'indra/llmath') diff --git a/indra/llmath/llmatrix4a.h b/indra/llmath/llmatrix4a.h index c4cefdb4fa..d141298f69 100644 --- a/indra/llmath/llmatrix4a.h +++ b/indra/llmath/llmatrix4a.h @@ -107,15 +107,14 @@ public: inline void rotate(const LLVector4a& v, LLVector4a& res) { + LLVector4a y,z; + res = _mm_shuffle_ps(v, v, _MM_SHUFFLE(0, 0, 0, 0)); - res.mul(mMatrix[0]); - - LLVector4a y; y = _mm_shuffle_ps(v, v, _MM_SHUFFLE(1, 1, 1, 1)); - y.mul(mMatrix[1]); - - LLVector4a z; z = _mm_shuffle_ps(v, v, _MM_SHUFFLE(2, 2, 2, 2)); + + res.mul(mMatrix[0]); + y.mul(mMatrix[1]); z.mul(mMatrix[2]); res.add(y); diff --git a/indra/llmath/llvector4a.inl b/indra/llmath/llvector4a.inl index 7c52ffef21..35a67204ec 100644 --- a/indra/llmath/llvector4a.inl +++ b/indra/llmath/llvector4a.inl @@ -460,16 +460,13 @@ inline void LLVector4a::setMax(const LLVector4a& lhs, const LLVector4a& rhs) mQ = _mm_max_ps(lhs.mQ, rhs.mQ); } -// Set this to (c * lhs) + rhs * ( 1 - c) +// Set this to lhs + (rhs-lhs)*c inline void LLVector4a::setLerp(const LLVector4a& lhs, const LLVector4a& rhs, F32 c) { - LLVector4a a = lhs; - a.mul(c); - - LLVector4a b = rhs; - b.mul(1.f-c); - - setAdd(a, b); + LLVector4a t; + t.setSub(rhs,lhs); + t.mul(c); + setAdd(lhs, t); } inline LLBool32 LLVector4a::isFinite3() const diff --git a/indra/llmath/llvolume.cpp b/indra/llmath/llvolume.cpp index f989e8ed17..9fc72fd801 100644 --- a/indra/llmath/llvolume.cpp +++ b/indra/llmath/llvolume.cpp @@ -94,6 +94,95 @@ const S32 SCULPT_MIN_AREA_DETAIL = 1; extern BOOL gDebugGL; +bool less_than_max_mag(const LLVector4a& vec); + +template +LLAlignedArray::LLAlignedArray() +{ + mArray = NULL; + mElementCount = 0; + mCapacity = 0; +} + +template +LLAlignedArray::~LLAlignedArray() +{ + ll_aligned_free(mArray); + mArray = NULL; + mElementCount = 0; + mCapacity = 0; +} + +template +void LLAlignedArray::push_back(const T& elem) +{ + T* old_buf = NULL; + if (mCapacity <= mElementCount) + { + mCapacity++; + mCapacity *= 2; + T* new_buf = (T*) ll_aligned_malloc(mCapacity*sizeof(T), alignment); + if (mArray) + { + LLVector4a::memcpyNonAliased16((F32*) new_buf, (F32*) mArray, sizeof(T)*mElementCount); + } + old_buf = mArray; + mArray = new_buf; + } + + mArray[mElementCount++] = elem; + + //delete old array here to prevent error on a.push_back(a[0]) + ll_aligned_free(old_buf); +} + +template +void LLAlignedArray::resize(U32 size) +{ + if (mCapacity < size) + { + mCapacity = size+mCapacity*2; + T* new_buf = mCapacity > 0 ? (T*) ll_aligned_malloc(mCapacity*sizeof(T), alignment) : NULL; + if (mArray) + { + LLVector4a::memcpyNonAliased16((F32*) new_buf, (F32*) mArray, sizeof(T)*mElementCount); + ll_aligned_free(mArray); + } + + /*for (U32 i = mElementCount; i < mCapacity; ++i) + { + new(new_buf+i) T(); + }*/ + mArray = new_buf; + } + + mElementCount = size; +} + + +template +T& LLAlignedArray::operator[](int idx) +{ + llassert(idx < mElementCount); + return mArray[idx]; +} + +template +const T& LLAlignedArray::operator[](int idx) const +{ + llassert(idx < mElementCount); + return mArray[idx]; +} + +template +T* LLAlignedArray::append(S32 N) +{ + U32 sz = size(); + resize(sz+N); + return &((*this)[sz]); +} + + BOOL check_same_clock_dir( const LLVector3& pt1, const LLVector3& pt2, const LLVector3& pt3, const LLVector3& norm) { LLVector3 test = (pt2-pt1)%(pt3-pt2); @@ -474,7 +563,7 @@ void LLProfile::genNGon(const LLProfileParams& params, S32 sides, F32 offset, F3 const F32 tableScale[] = { 1, 1, 1, 0.5f, 0.707107f, 0.53f, 0.525f, 0.5f }; F32 scale = 0.5f; F32 t, t_step, t_first, t_fraction, ang, ang_step; - LLVector3 pt1,pt2; + LLVector4a pt1,pt2; F32 begin = params.getBegin(); F32 end = params.getEnd(); @@ -497,20 +586,21 @@ void LLProfile::genNGon(const LLProfileParams& params, S32 sides, F32 offset, F3 // Starting t and ang values for the first face t = t_first; ang = 2.0f*F_PI*(t*ang_scale + offset); - pt1.setVec(cos(ang)*scale,sin(ang)*scale, t); + pt1.set(cos(ang)*scale,sin(ang)*scale, t); // Increment to the next point. // pt2 is the end point on the fractional face t += t_step; ang += ang_step; - pt2.setVec(cos(ang)*scale,sin(ang)*scale,t); + pt2.set(cos(ang)*scale,sin(ang)*scale,t); t_fraction = (begin - t_first)*sides; // Only use if it's not almost exactly on an edge. if (t_fraction < 0.9999f) { - LLVector3 new_pt = lerp(pt1, pt2, t_fraction); + LLVector4a new_pt; + new_pt.setLerp(pt1, pt2, t_fraction); mProfile.push_back(new_pt); } @@ -518,12 +608,17 @@ void LLProfile::genNGon(const LLProfileParams& params, S32 sides, F32 offset, F3 while (t < end) { // Iterate through all the integer steps of t. - pt1.setVec(cos(ang)*scale,sin(ang)*scale,t); + pt1.set(cos(ang)*scale,sin(ang)*scale,t); if (mProfile.size() > 0) { - LLVector3 p = mProfile[mProfile.size()-1]; + LLVector4a p = mProfile[mProfile.size()-1]; for (S32 i = 0; i < split && mProfile.size() > 0; i++) { - mProfile.push_back(p+(pt1-p) * 1.0f/(float)(split+1) * (float)(i+1)); + //mProfile.push_back(p+(pt1-p) * 1.0f/(float)(split+1) * (float)(i+1)); + LLVector4a new_pt; + new_pt.setSub(pt1, p); + new_pt.mul(1.0f/(float)(split+1) * (float)(i+1)); + new_pt.add(p); + mProfile.push_back(new_pt); } } mProfile.push_back(pt1); @@ -536,18 +631,25 @@ void LLProfile::genNGon(const LLProfileParams& params, S32 sides, F32 offset, F3 // pt1 is the first point on the fractional face // pt2 is the end point on the fractional face - pt2.setVec(cos(ang)*scale,sin(ang)*scale,t); + pt2.set(cos(ang)*scale,sin(ang)*scale,t); // Find the fraction that we need to add to the end point. t_fraction = (end - (t - t_step))*sides; if (t_fraction > 0.0001f) { - LLVector3 new_pt = lerp(pt1, pt2, t_fraction); + LLVector4a new_pt; + new_pt.setLerp(pt1, pt2, t_fraction); if (mProfile.size() > 0) { - LLVector3 p = mProfile[mProfile.size()-1]; + LLVector4a p = mProfile[mProfile.size()-1]; for (S32 i = 0; i < split && mProfile.size() > 0; i++) { - mProfile.push_back(p+(new_pt-p) * 1.0f/(float)(split+1) * (float)(i+1)); + //mProfile.push_back(p+(new_pt-p) * 1.0f/(float)(split+1) * (float)(i+1)); + + LLVector4a pt1; + pt1.setSub(new_pt, p); + pt1.mul(1.0f/(float)(split+1) * (float)(i+1)); + pt1.add(p); + mProfile.push_back(pt1); } } mProfile.push_back(new_pt); @@ -568,7 +670,7 @@ void LLProfile::genNGon(const LLProfileParams& params, S32 sides, F32 offset, F3 if (params.getHollow() <= 0) { // put center point if not hollow. - mProfile.push_back(LLVector3(0,0,0)); + mProfile.push_back(LLVector4a(0,0,0)); } } else @@ -581,103 +683,6 @@ void LLProfile::genNGon(const LLProfileParams& params, S32 sides, F32 offset, F3 mTotal = mProfile.size(); } -void LLProfile::genNormals(const LLProfileParams& params) -{ - S32 count = mProfile.size(); - - S32 outer_count; - if (mTotalOut) - { - outer_count = mTotalOut; - } - else - { - outer_count = mTotal / 2; - } - - mEdgeNormals.resize(count * 2); - mEdgeCenters.resize(count * 2); - mNormals.resize(count); - - LLVector2 pt0,pt1; - - BOOL hollow = (params.getHollow() > 0); - - S32 i0, i1, i2, i3, i4; - - // Parametrically generate normal - for (i2 = 0; i2 < count; i2++) - { - mNormals[i2].mV[0] = mProfile[i2].mV[0]; - mNormals[i2].mV[1] = mProfile[i2].mV[1]; - if (hollow && (i2 >= outer_count)) - { - mNormals[i2] *= -1.f; - } - if (mNormals[i2].magVec() < 0.001) - { - // Special case for point at center, get adjacent points. - i1 = (i2 - 1) >= 0 ? i2 - 1 : count - 1; - i0 = (i1 - 1) >= 0 ? i1 - 1 : count - 1; - i3 = (i2 + 1) < count ? i2 + 1 : 0; - i4 = (i3 + 1) < count ? i3 + 1 : 0; - - pt0.setVec(mProfile[i1].mV[VX] + mProfile[i1].mV[VX] - mProfile[i0].mV[VX], - mProfile[i1].mV[VY] + mProfile[i1].mV[VY] - mProfile[i0].mV[VY]); - pt1.setVec(mProfile[i3].mV[VX] + mProfile[i3].mV[VX] - mProfile[i4].mV[VX], - mProfile[i3].mV[VY] + mProfile[i3].mV[VY] - mProfile[i4].mV[VY]); - - mNormals[i2] = pt0 + pt1; - mNormals[i2] *= 0.5f; - } - mNormals[i2].normVec(); - } - - S32 num_normal_sets = isConcave() ? 2 : 1; - for (S32 normal_set = 0; normal_set < num_normal_sets; normal_set++) - { - S32 point_num; - for (point_num = 0; point_num < mTotal; point_num++) - { - LLVector3 point_1 = mProfile[point_num]; - point_1.mV[VZ] = 0.f; - - LLVector3 point_2; - - if (isConcave() && normal_set == 0 && point_num == (mTotal - 1) / 2) - { - point_2 = mProfile[mTotal - 1]; - } - else if (isConcave() && normal_set == 1 && point_num == mTotal - 1) - { - point_2 = mProfile[(mTotal - 1) / 2]; - } - else - { - LLVector3 delta_pos; - S32 neighbor_point = (point_num + 1) % mTotal; - while(delta_pos.magVecSquared() < 0.01f * 0.01f) - { - point_2 = mProfile[neighbor_point]; - delta_pos = point_2 - point_1; - neighbor_point = (neighbor_point + 1) % mTotal; - if (neighbor_point == point_num) - { - break; - } - } - } - - point_2.mV[VZ] = 0.f; - LLVector3 face_normal = (point_2 - point_1) % LLVector3::z_axis; - face_normal.normVec(); - mEdgeNormals[normal_set * count + point_num] = face_normal; - mEdgeCenters[normal_set * count + point_num] = lerp(point_1, point_2, 0.5f); - } - } -} - - // Hollow is percent of the original bounding box, not of this particular // profile's geometry. Thus, a swept triangle needs lower hollow values than // a swept square. @@ -693,12 +698,13 @@ LLProfile::Face* LLProfile::addHole(const LLProfileParams& params, BOOL flat, F3 Face *face = addFace(mTotalOut, mTotal-mTotalOut,0,LL_FACE_INNER_SIDE, flat); - std::vector pt; + static LLAlignedArray pt; pt.resize(mTotal) ; for (S32 i=mTotalOut;imPos.setVec(0 + lerp(0,params.getShear().mV[0],s) + pt->mPos.set(0 + lerp(0,params.getShear().mV[0],s) + lerp(-skew ,skew, t) * 0.5f, c + lerp(0,params.getShear().mV[1],s), s); - pt->mScale.mV[VX] = hole_x * lerp(taper_x_begin, taper_x_end, t); - pt->mScale.mV[VY] = hole_y * lerp(taper_y_begin, taper_y_end, t); + pt->mScale.set(hole_x * lerp(taper_x_begin, taper_x_end, t), + hole_y * lerp(taper_y_begin, taper_y_end, t), + 0,1); pt->mTexT = t; // Twist rotates the path along the x,y plane (I think) - DJS 04/05/02 twist.setQuat (lerp(twist_begin,twist_end,t) * 2.f * F_PI - F_PI,0,0,1); // Rotate the point around the circle's center. qang.setQuat (ang,path_axis); - pt->mRot = twist * qang; + + LLMatrix3 rot(twist * qang); + + pt->mRot.loadu(rot); t+=step; @@ -1408,51 +1419,55 @@ void LLPath::genNGon(const LLPathParams& params, S32 sides, F32 startOff, F32 en // Run through the non-cut dependent points. while (t < params.getEnd()) { - pt = vector_append(mPath, 1); + pt = mPath.append(1); ang = 2.0f*F_PI*revolutions * t; c = cos(ang)*lerp(radius_start, radius_end, t); s = sin(ang)*lerp(radius_start, radius_end, t); - pt->mPos.setVec(0 + lerp(0,params.getShear().mV[0],s) + pt->mPos.set(0 + lerp(0,params.getShear().mV[0],s) + lerp(-skew ,skew, t) * 0.5f, c + lerp(0,params.getShear().mV[1],s), s); - pt->mScale.mV[VX] = hole_x * lerp(taper_x_begin, taper_x_end, t); - pt->mScale.mV[VY] = hole_y * lerp(taper_y_begin, taper_y_end, t); + pt->mScale.set(hole_x * lerp(taper_x_begin, taper_x_end, t), + hole_y * lerp(taper_y_begin, taper_y_end, t), + 0,1); pt->mTexT = t; // Twist rotates the path along the x,y plane (I think) - DJS 04/05/02 twist.setQuat (lerp(twist_begin,twist_end,t) * 2.f * F_PI - F_PI,0,0,1); // Rotate the point around the circle's center. qang.setQuat (ang,path_axis); - pt->mRot = twist * qang; - + LLMatrix3 tmp(twist*qang); + pt->mRot.loadu(tmp); + t+=step; } // Make one final pass for the end cut. t = params.getEnd(); - pt = vector_append(mPath, 1); + pt = mPath.append(1); ang = 2.0f*F_PI*revolutions * t; c = cos(ang)*lerp(radius_start, radius_end, t); s = sin(ang)*lerp(radius_start, radius_end, t); - pt->mPos.setVec(0 + lerp(0,params.getShear().mV[0],s) + pt->mPos.set(0 + lerp(0,params.getShear().mV[0],s) + lerp(-skew ,skew, t) * 0.5f, c + lerp(0,params.getShear().mV[1],s), s); - pt->mScale.mV[VX] = hole_x * lerp(taper_x_begin, taper_x_end, t); - pt->mScale.mV[VY] = hole_y * lerp(taper_y_begin, taper_y_end, t); + pt->mScale.set(hole_x * lerp(taper_x_begin, taper_x_end, t), + hole_y * lerp(taper_y_begin, taper_y_end, t), + 0,1); pt->mTexT = t; // Twist rotates the path along the x,y plane (I think) - DJS 04/05/02 twist.setQuat (lerp(twist_begin,twist_end,t) * 2.f * F_PI - F_PI,0,0,1); // Rotate the point around the circle's center. qang.setQuat (ang,path_axis); - pt->mRot = twist * qang; - + LLMatrix3 tmp(twist*qang); + pt->mRot.loadu(tmp); + mTotal = mPath.size(); } @@ -1549,7 +1564,7 @@ BOOL LLPath::generate(const LLPathParams& params, F32 detail, S32 split, mDirty = FALSE; S32 np = 2; // hardcode for line - mPath.clear(); + mPath.resize(0); mOpen = TRUE; // Is this 0xf0 mask really necessary? DK 03/02/05 @@ -1575,12 +1590,16 @@ BOOL LLPath::generate(const LLPathParams& params, F32 detail, S32 split, for (S32 i=0;imPath.size() * mProfilep->mProfile.size()) > (1u << 20)) - { - llinfos << "sizeS: " << mPathp->mPath.size() << " sizeT: " << mProfilep->mProfile.size() << llendl ; - llinfos << "path_detail : " << path_detail << " split: " << split << " profile_detail: " << profile_detail << llendl ; - llinfos << mParams << llendl ; - llinfos << "more info to check if mProfilep is deleted or not." << llendl ; - llinfos << mProfilep->mNormals.size() << " : " << mProfilep->mFaces.size() << " : " << mProfilep->mEdgeNormals.size() << " : " << mProfilep->mEdgeCenters.size() << llendl ; - - llerrs << "LLVolume corrupted!" << llendl ; - } - //******************************************************************** - BOOL regenPath = mPathp->generate(mParams.getPathParams(), path_detail, split); BOOL regenProf = mProfilep->generate(mParams.getProfileParams(), mPathp->isOpen(),profile_detail, split); @@ -2163,21 +2176,6 @@ BOOL LLVolume::generate() S32 sizeS = mPathp->mPath.size(); S32 sizeT = mProfilep->mProfile.size(); - //******************************************************************** - //debug info, to be removed - if((U32)(sizeS * sizeT) > (1u << 20)) - { - llinfos << "regenPath: " << (S32)regenPath << " regenProf: " << (S32)regenProf << llendl ; - llinfos << "sizeS: " << sizeS << " sizeT: " << sizeT << llendl ; - llinfos << "path_detail : " << path_detail << " split: " << split << " profile_detail: " << profile_detail << llendl ; - llinfos << mParams << llendl ; - llinfos << "more info to check if mProfilep is deleted or not." << llendl ; - llinfos << mProfilep->mNormals.size() << " : " << mProfilep->mFaces.size() << " : " << mProfilep->mEdgeNormals.size() << " : " << mProfilep->mEdgeCenters.size() << llendl ; - - llerrs << "LLVolume corrupted!" << llendl ; - } - //******************************************************************** - sNumMeshPoints -= mMesh.size(); mMesh.resize(sizeT * sizeS); sNumMeshPoints += mMesh.size(); @@ -2185,22 +2183,39 @@ BOOL LLVolume::generate() //generate vertex positions // Run along the path. + LLVector4a* dst = mMesh.mArray; + for (S32 s = 0; s < sizeS; ++s) { - LLVector2 scale = mPathp->mPath[s].mScale; - LLQuaternion rot = mPathp->mPath[s].mRot; + F32* scale = mPathp->mPath[s].mScale.getF32ptr(); + + F32 sc [] = + { scale[0], 0, 0, 0, + 0, scale[1], 0, 0, + 0, 0, scale[2], 0, + 0, 0, 0, 1 }; + + LLMatrix4 rot((F32*) mPathp->mPath[s].mRot.mMatrix); + LLMatrix4 scale_mat(sc); + + scale_mat *= rot; + + LLMatrix4a rot_mat; + rot_mat.loadu(scale_mat); + + LLVector4a* profile = mProfilep->mProfile.mArray; + LLVector4a* end_profile = profile+sizeT; + LLVector4a offset = mPathp->mPath[s].mPos; + + LLVector4a tmp; // Run along the profile. - for (S32 t = 0; t < sizeT; ++t) + while (profile < end_profile) { - S32 m = s*sizeT + t; - Point& pt = mMesh[m]; - - pt.mPos.mV[0] = mProfilep->mProfile[t].mV[0] * scale.mV[0]; - pt.mPos.mV[1] = mProfilep->mProfile[t].mV[1] * scale.mV[1]; - pt.mPos.mV[2] = 0.0f; - pt.mPos = pt.mPos * rot; - pt.mPos += mPathp->mPath[s].mPos; + rot_mat.rotate(*profile++, tmp); + dst->setAdd(tmp,offset); + llassert(less_than_max_mag(*dst)); + ++dst; } } @@ -2210,9 +2225,11 @@ BOOL LLVolume::generate() LLFaceID id = iter->mFaceID; mFaceMask |= id; } - + LL_CHECK_MEMORY return TRUE; } + + LL_CHECK_MEMORY return FALSE; } @@ -2790,14 +2807,16 @@ void LLVolume::createVolumeFaces() } -inline LLVector3 sculpt_rgb_to_vector(U8 r, U8 g, U8 b) +inline LLVector4a sculpt_rgb_to_vector(U8 r, U8 g, U8 b) { // maps RGB values to vector values [0..255] -> [-0.5..0.5] - LLVector3 value; - value.mV[VX] = r / 255.f - 0.5f; - value.mV[VY] = g / 255.f - 0.5f; - value.mV[VZ] = b / 255.f - 0.5f; + LLVector4a value; + LLVector4a sub(0.5f, 0.5f, 0.5f); + value.set(r,g,b); + value.mul(1.f/255.f); + value.sub(sub); + return value; } @@ -2817,21 +2836,21 @@ inline U32 sculpt_st_to_index(S32 s, S32 t, S32 size_s, S32 size_t, U16 sculpt_w } -inline LLVector3 sculpt_index_to_vector(U32 index, const U8* sculpt_data) +inline LLVector4a sculpt_index_to_vector(U32 index, const U8* sculpt_data) { - LLVector3 v = sculpt_rgb_to_vector(sculpt_data[index], sculpt_data[index+1], sculpt_data[index+2]); + LLVector4a v = sculpt_rgb_to_vector(sculpt_data[index], sculpt_data[index+1], sculpt_data[index+2]); return v; } -inline LLVector3 sculpt_st_to_vector(S32 s, S32 t, S32 size_s, S32 size_t, U16 sculpt_width, U16 sculpt_height, S8 sculpt_components, const U8* sculpt_data) +inline LLVector4a sculpt_st_to_vector(S32 s, S32 t, S32 size_s, S32 size_t, U16 sculpt_width, U16 sculpt_height, S8 sculpt_components, const U8* sculpt_data) { U32 index = sculpt_st_to_index(s, t, size_s, size_t, sculpt_width, sculpt_height, sculpt_components); return sculpt_index_to_vector(index, sculpt_data); } -inline LLVector3 sculpt_xy_to_vector(U32 x, U32 y, U16 sculpt_width, U16 sculpt_height, S8 sculpt_components, const U8* sculpt_data) +inline LLVector4a sculpt_xy_to_vector(U32 x, U32 y, U16 sculpt_width, U16 sculpt_height, S8 sculpt_components, const U8* sculpt_data) { U32 index = sculpt_xy_to_index(x, y, sculpt_width, sculpt_height, sculpt_components); @@ -2853,15 +2872,26 @@ F32 LLVolume::sculptGetSurfaceArea() for (S32 t = 0; t < sizeT-1; t++) { // get four corners of quad - LLVector3 p1 = mMesh[(s )*sizeT + (t )].mPos; - LLVector3 p2 = mMesh[(s+1)*sizeT + (t )].mPos; - LLVector3 p3 = mMesh[(s )*sizeT + (t+1)].mPos; - LLVector3 p4 = mMesh[(s+1)*sizeT + (t+1)].mPos; + LLVector4a& p1 = mMesh[(s )*sizeT + (t )]; + LLVector4a& p2 = mMesh[(s+1)*sizeT + (t )]; + LLVector4a& p3 = mMesh[(s )*sizeT + (t+1)]; + LLVector4a& p4 = mMesh[(s+1)*sizeT + (t+1)]; // compute the area of the quad by taking the length of the cross product of the two triangles - LLVector3 cross1 = (p1 - p2) % (p1 - p3); - LLVector3 cross2 = (p4 - p2) % (p4 - p3); - area += (cross1.magVec() + cross2.magVec()) / 2.f; + LLVector4a v0,v1,v2,v3; + v0.setSub(p1,p2); + v1.setSub(p1,p3); + v2.setSub(p4,p2); + v3.setSub(p4,p3); + + LLVector4a cross1, cross2; + cross1.setCross3(v0,v1); + cross2.setCross3(v2,v3); + + //LLVector3 cross1 = (p1 - p2) % (p1 - p3); + //LLVector3 cross2 = (p4 - p2) % (p4 - p3); + + area += (cross1.getLength3() + cross2.getLength3()).getF32() / 2.f; } } @@ -2882,17 +2912,19 @@ void LLVolume::sculptGeneratePlaceholder() for (S32 t = 0; t < sizeT; t++) { S32 i = t + line; - Point& pt = mMesh[i]; + LLVector4a& pt = mMesh[i]; F32 u = (F32)s/(sizeS-1); F32 v = (F32)t/(sizeT-1); const F32 RADIUS = (F32) 0.3; - - pt.mPos.mV[0] = (F32)(sin(F_PI * v) * cos(2.0 * F_PI * u) * RADIUS); - pt.mPos.mV[1] = (F32)(sin(F_PI * v) * sin(2.0 * F_PI * u) * RADIUS); - pt.mPos.mV[2] = (F32)(cos(F_PI * v) * RADIUS); + + F32* p = pt.getF32ptr(); + + p[0] = (F32)(sin(F_PI * v) * cos(2.0 * F_PI * u) * RADIUS); + p[1] = (F32)(sin(F_PI * v) * sin(2.0 * F_PI * u) * RADIUS); + p[2] = (F32)(cos(F_PI * v) * RADIUS); } line += sizeT; @@ -2917,7 +2949,7 @@ void LLVolume::sculptGenerateMapVertices(U16 sculpt_width, U16 sculpt_height, S8 for (S32 t = 0; t < sizeT; t++) { S32 i = t + line; - Point& pt = mMesh[i]; + LLVector4a& pt = mMesh[i]; S32 reversed_t = t; @@ -2974,11 +3006,12 @@ void LLVolume::sculptGenerateMapVertices(U16 sculpt_width, U16 sculpt_height, S8 } } - pt.mPos = sculpt_xy_to_vector(x, y, sculpt_width, sculpt_height, sculpt_components, sculpt_data); + pt = sculpt_xy_to_vector(x, y, sculpt_width, sculpt_height, sculpt_components, sculpt_data); if (sculpt_mirror) { - pt.mPos.mV[VX] *= -1.f; + LLVector4a scale(-1.f,1,1,1); + pt.mul(scale); } } @@ -3560,803 +3593,125 @@ bool LLVolumeParams::validate(U8 prof_curve, F32 prof_begin, F32 prof_end, F32 h return true; } -S32 *LLVolume::getTriangleIndices(U32 &num_indices) const +void LLVolume::getLoDTriangleCounts(const LLVolumeParams& params, S32* counts) +{ //attempt to approximate the number of triangles that will result from generating a volume LoD set for the + //supplied LLVolumeParams -- inaccurate, but a close enough approximation for determining streaming cost + F32 detail[] = {1.f, 1.5f, 2.5f, 4.f}; + for (S32 i = 0; i < 4; i++) + { + S32 count = 0; + S32 path_points = LLPath::getNumPoints(params.getPathParams(), detail[i]); + S32 profile_points = LLProfile::getNumPoints(params.getProfileParams(), false, detail[i]); + + count = (profile_points-1)*2*(path_points-1); + count += profile_points*2; + + counts[i] = count; + } +} + + +S32 LLVolume::getNumTriangles(S32* vcount) const { - S32 expected_num_triangle_indices = getNumTriangleIndices(); - if (expected_num_triangle_indices > MAX_VOLUME_TRIANGLE_INDICES) + U32 triangle_count = 0; + U32 vertex_count = 0; + + for (S32 i = 0; i < getNumVolumeFaces(); ++i) { - // we don't allow LLVolumes with this many vertices - llwarns << "Couldn't allocate triangle indices" << llendl; - num_indices = 0; - return NULL; + const LLVolumeFace& face = getVolumeFace(i); + triangle_count += face.mNumIndices/3; + + vertex_count += face.mNumVertices; } - S32* index = new S32[expected_num_triangle_indices]; - S32 count = 0; - // Let's do this totally diffently, as we don't care about faces... - // Counter-clockwise triangles are forward facing... + if (vcount) + { + *vcount = vertex_count; + } + + return triangle_count; +} + - BOOL open = getProfile().isOpen(); - BOOL hollow = (mParams.getProfileParams().getHollow() > 0); - BOOL path_open = getPath().isOpen(); - S32 size_s, size_s_out, size_t; - S32 s, t, i; - size_s = getProfile().getTotal(); - size_s_out = getProfile().getTotalOut(); - size_t = getPath().mPath.size(); +//----------------------------------------------------------------------------- +// generateSilhouetteVertices() +//----------------------------------------------------------------------------- +void LLVolume::generateSilhouetteVertices(std::vector &vertices, + std::vector &normals, + const LLVector3& obj_cam_vec_in, + const LLMatrix4& mat_in, + const LLMatrix3& norm_mat_in, + S32 face_mask) +{ + LLMatrix4a mat; + mat.loadu(mat_in); + + LLMatrix4a norm_mat; + norm_mat.loadu(norm_mat_in); + + LLVector4a obj_cam_vec; + obj_cam_vec.load3(obj_cam_vec_in.mV); - // NOTE -- if the construction of the triangles below ever changes - // then getNumTriangleIndices() method may also have to be updated. + vertices.clear(); + normals.clear(); - if (open) /* Flawfinder: ignore */ + if ((mParams.getSculptType() & LL_SCULPT_TYPE_MASK) == LL_SCULPT_TYPE_MESH) { - if (hollow) + return; + } + + S32 cur_index = 0; + //for each face + for (face_list_t::iterator iter = mVolumeFaces.begin(); + iter != mVolumeFaces.end(); ++iter) + { + LLVolumeFace& face = *iter; + + if (!(face_mask & (0x1 << cur_index++)) || + face.mNumIndices == 0 || face.mEdge.empty()) { - // Open hollow -- much like the closed solid, except we - // we need to stitch up the gap between s=0 and s=size_s-1 + continue; + } - for (t = 0; t < size_t - 1; t++) - { - // The outer face, first cut, and inner face - for (s = 0; s < size_s - 1; s++) - { - i = s + t*size_s; - index[count++] = i; // x,y - index[count++] = i + 1; // x+1,y - index[count++] = i + size_s; // x,y+1 + if (face.mTypeMask & (LLVolumeFace::CAP_MASK)) { - index[count++] = i + size_s; // x,y+1 - index[count++] = i + 1; // x+1,y - index[count++] = i + size_s + 1; // x+1,y+1 - } + } + else { - // The other cut face - index[count++] = s + t*size_s; // x,y - index[count++] = 0 + t*size_s; // x+1,y - index[count++] = s + (t+1)*size_s; // x,y+1 - - index[count++] = s + (t+1)*size_s; // x,y+1 - index[count++] = 0 + t*size_s; // x+1,y - index[count++] = 0 + (t+1)*size_s; // x+1,y+1 - } + //============================================== + //DEBUG draw edge map instead of silhouette edge + //============================================== - // Do the top and bottom caps, if necessary - if (path_open) - { - // Top cap - S32 pt1 = 0; - S32 pt2 = size_s-1; - S32 i = (size_t - 1)*size_s; +#if DEBUG_SILHOUETTE_EDGE_MAP - while (pt2 - pt1 > 1) - { - // Use the profile points instead of the mesh, since you want - // the un-transformed profile distances. - LLVector3 p1 = getProfile().mProfile[pt1]; - LLVector3 p2 = getProfile().mProfile[pt2]; - LLVector3 pa = getProfile().mProfile[pt1+1]; - LLVector3 pb = getProfile().mProfile[pt2-1]; - - p1.mV[VZ] = 0.f; - p2.mV[VZ] = 0.f; - pa.mV[VZ] = 0.f; - pb.mV[VZ] = 0.f; - - // Use area of triangle to determine backfacing - F32 area_1a2, area_1ba, area_21b, area_2ab; - area_1a2 = (p1.mV[0]*pa.mV[1] - pa.mV[0]*p1.mV[1]) + - (pa.mV[0]*p2.mV[1] - p2.mV[0]*pa.mV[1]) + - (p2.mV[0]*p1.mV[1] - p1.mV[0]*p2.mV[1]); - - area_1ba = (p1.mV[0]*pb.mV[1] - pb.mV[0]*p1.mV[1]) + - (pb.mV[0]*pa.mV[1] - pa.mV[0]*pb.mV[1]) + - (pa.mV[0]*p1.mV[1] - p1.mV[0]*pa.mV[1]); - - area_21b = (p2.mV[0]*p1.mV[1] - p1.mV[0]*p2.mV[1]) + - (p1.mV[0]*pb.mV[1] - pb.mV[0]*p1.mV[1]) + - (pb.mV[0]*p2.mV[1] - p2.mV[0]*pb.mV[1]); - - area_2ab = (p2.mV[0]*pa.mV[1] - pa.mV[0]*p2.mV[1]) + - (pa.mV[0]*pb.mV[1] - pb.mV[0]*pa.mV[1]) + - (pb.mV[0]*p2.mV[1] - p2.mV[0]*pb.mV[1]); - - BOOL use_tri1a2 = TRUE; - BOOL tri_1a2 = TRUE; - BOOL tri_21b = TRUE; - - if (area_1a2 < 0) - { - tri_1a2 = FALSE; - } - if (area_2ab < 0) - { - // Can't use, because it contains point b - tri_1a2 = FALSE; - } - if (area_21b < 0) - { - tri_21b = FALSE; - } - if (area_1ba < 0) - { - // Can't use, because it contains point b - tri_21b = FALSE; - } + //for each triangle + U32 count = face.mNumIndices; + for (U32 j = 0; j < count/3; j++) { + //get vertices + S32 v1 = face.mIndices[j*3+0]; + S32 v2 = face.mIndices[j*3+1]; + S32 v3 = face.mIndices[j*3+2]; - if (!tri_1a2) - { - use_tri1a2 = FALSE; - } - else if (!tri_21b) - { - use_tri1a2 = TRUE; - } - else - { - LLVector3 d1 = p1 - pa; - LLVector3 d2 = p2 - pb; + //get current face center + LLVector3 cCenter = (face.mVertices[v1].getPosition() + + face.mVertices[v2].getPosition() + + face.mVertices[v3].getPosition()) / 3.0f; - if (d1.magVecSquared() < d2.magVecSquared()) - { - use_tri1a2 = TRUE; - } - else - { - use_tri1a2 = FALSE; - } + //for each edge + for (S32 k = 0; k < 3; k++) { + S32 nIndex = face.mEdge[j*3+k]; + if (nIndex <= -1) { + continue; } - if (use_tri1a2) - { - index[count++] = pt1 + i; - index[count++] = pt1 + 1 + i; - index[count++] = pt2 + i; - pt1++; - } - else - { - index[count++] = pt1 + i; - index[count++] = pt2 - 1 + i; - index[count++] = pt2 + i; - pt2--; + if (nIndex >= (S32) count/3) { + continue; } - } - - // Bottom cap - pt1 = 0; - pt2 = size_s-1; - while (pt2 - pt1 > 1) - { - // Use the profile points instead of the mesh, since you want - // the un-transformed profile distances. - LLVector3 p1 = getProfile().mProfile[pt1]; - LLVector3 p2 = getProfile().mProfile[pt2]; - LLVector3 pa = getProfile().mProfile[pt1+1]; - LLVector3 pb = getProfile().mProfile[pt2-1]; - - p1.mV[VZ] = 0.f; - p2.mV[VZ] = 0.f; - pa.mV[VZ] = 0.f; - pb.mV[VZ] = 0.f; - - // Use area of triangle to determine backfacing - F32 area_1a2, area_1ba, area_21b, area_2ab; - area_1a2 = (p1.mV[0]*pa.mV[1] - pa.mV[0]*p1.mV[1]) + - (pa.mV[0]*p2.mV[1] - p2.mV[0]*pa.mV[1]) + - (p2.mV[0]*p1.mV[1] - p1.mV[0]*p2.mV[1]); - - area_1ba = (p1.mV[0]*pb.mV[1] - pb.mV[0]*p1.mV[1]) + - (pb.mV[0]*pa.mV[1] - pa.mV[0]*pb.mV[1]) + - (pa.mV[0]*p1.mV[1] - p1.mV[0]*pa.mV[1]); - - area_21b = (p2.mV[0]*p1.mV[1] - p1.mV[0]*p2.mV[1]) + - (p1.mV[0]*pb.mV[1] - pb.mV[0]*p1.mV[1]) + - (pb.mV[0]*p2.mV[1] - p2.mV[0]*pb.mV[1]); - - area_2ab = (p2.mV[0]*pa.mV[1] - pa.mV[0]*p2.mV[1]) + - (pa.mV[0]*pb.mV[1] - pb.mV[0]*pa.mV[1]) + - (pb.mV[0]*p2.mV[1] - p2.mV[0]*pb.mV[1]); - - BOOL use_tri1a2 = TRUE; - BOOL tri_1a2 = TRUE; - BOOL tri_21b = TRUE; - - if (area_1a2 < 0) - { - tri_1a2 = FALSE; - } - if (area_2ab < 0) - { - // Can't use, because it contains point b - tri_1a2 = FALSE; - } - if (area_21b < 0) - { - tri_21b = FALSE; - } - if (area_1ba < 0) - { - // Can't use, because it contains point b - tri_21b = FALSE; - } - - if (!tri_1a2) - { - use_tri1a2 = FALSE; - } - else if (!tri_21b) - { - use_tri1a2 = TRUE; - } - else - { - LLVector3 d1 = p1 - pa; - LLVector3 d2 = p2 - pb; - - if (d1.magVecSquared() < d2.magVecSquared()) - { - use_tri1a2 = TRUE; - } - else - { - use_tri1a2 = FALSE; - } - } - - if (use_tri1a2) - { - index[count++] = pt1; - index[count++] = pt2; - index[count++] = pt1 + 1; - pt1++; - } - else - { - index[count++] = pt1; - index[count++] = pt2; - index[count++] = pt2 - 1; - pt2--; - } - } - } - } - else - { - // Open solid - - for (t = 0; t < size_t - 1; t++) - { - // Outer face + 1 cut face - for (s = 0; s < size_s - 1; s++) - { - i = s + t*size_s; - - index[count++] = i; // x,y - index[count++] = i + 1; // x+1,y - index[count++] = i + size_s; // x,y+1 - - index[count++] = i + size_s; // x,y+1 - index[count++] = i + 1; // x+1,y - index[count++] = i + size_s + 1; // x+1,y+1 - } - - // The other cut face - index[count++] = (size_s - 1) + (t*size_s); // x,y - index[count++] = 0 + t*size_s; // x+1,y - index[count++] = (size_s - 1) + (t+1)*size_s; // x,y+1 - - index[count++] = (size_s - 1) + (t+1)*size_s; // x,y+1 - index[count++] = 0 + (t*size_s); // x+1,y - index[count++] = 0 + (t+1)*size_s; // x+1,y+1 - } - - // Do the top and bottom caps, if necessary - if (path_open) - { - for (s = 0; s < size_s - 2; s++) - { - index[count++] = s+1; - index[count++] = s; - index[count++] = size_s - 1; - } - - // We've got a top cap - S32 offset = (size_t - 1)*size_s; - for (s = 0; s < size_s - 2; s++) - { - // Inverted ordering from bottom cap. - index[count++] = offset + size_s - 1; - index[count++] = offset + s; - index[count++] = offset + s + 1; - } - } - } - } - else if (hollow) - { - // Closed hollow - // Outer face - - for (t = 0; t < size_t - 1; t++) - { - for (s = 0; s < size_s_out - 1; s++) - { - i = s + t*size_s; - - index[count++] = i; // x,y - index[count++] = i + 1; // x+1,y - index[count++] = i + size_s; // x,y+1 - - index[count++] = i + size_s; // x,y+1 - index[count++] = i + 1; // x+1,y - index[count++] = i + 1 + size_s; // x+1,y+1 - } - } - - // Inner face - // Invert facing from outer face - for (t = 0; t < size_t - 1; t++) - { - for (s = size_s_out; s < size_s - 1; s++) - { - i = s + t*size_s; - - index[count++] = i; // x,y - index[count++] = i + 1; // x+1,y - index[count++] = i + size_s; // x,y+1 - - index[count++] = i + size_s; // x,y+1 - index[count++] = i + 1; // x+1,y - index[count++] = i + 1 + size_s; // x+1,y+1 - } - } - - // Do the top and bottom caps, if necessary - if (path_open) - { - // Top cap - S32 pt1 = 0; - S32 pt2 = size_s-1; - S32 i = (size_t - 1)*size_s; - - while (pt2 - pt1 > 1) - { - // Use the profile points instead of the mesh, since you want - // the un-transformed profile distances. - LLVector3 p1 = getProfile().mProfile[pt1]; - LLVector3 p2 = getProfile().mProfile[pt2]; - LLVector3 pa = getProfile().mProfile[pt1+1]; - LLVector3 pb = getProfile().mProfile[pt2-1]; - - p1.mV[VZ] = 0.f; - p2.mV[VZ] = 0.f; - pa.mV[VZ] = 0.f; - pb.mV[VZ] = 0.f; - - // Use area of triangle to determine backfacing - F32 area_1a2, area_1ba, area_21b, area_2ab; - area_1a2 = (p1.mV[0]*pa.mV[1] - pa.mV[0]*p1.mV[1]) + - (pa.mV[0]*p2.mV[1] - p2.mV[0]*pa.mV[1]) + - (p2.mV[0]*p1.mV[1] - p1.mV[0]*p2.mV[1]); - - area_1ba = (p1.mV[0]*pb.mV[1] - pb.mV[0]*p1.mV[1]) + - (pb.mV[0]*pa.mV[1] - pa.mV[0]*pb.mV[1]) + - (pa.mV[0]*p1.mV[1] - p1.mV[0]*pa.mV[1]); - - area_21b = (p2.mV[0]*p1.mV[1] - p1.mV[0]*p2.mV[1]) + - (p1.mV[0]*pb.mV[1] - pb.mV[0]*p1.mV[1]) + - (pb.mV[0]*p2.mV[1] - p2.mV[0]*pb.mV[1]); - - area_2ab = (p2.mV[0]*pa.mV[1] - pa.mV[0]*p2.mV[1]) + - (pa.mV[0]*pb.mV[1] - pb.mV[0]*pa.mV[1]) + - (pb.mV[0]*p2.mV[1] - p2.mV[0]*pb.mV[1]); - - BOOL use_tri1a2 = TRUE; - BOOL tri_1a2 = TRUE; - BOOL tri_21b = TRUE; - - if (area_1a2 < 0) - { - tri_1a2 = FALSE; - } - if (area_2ab < 0) - { - // Can't use, because it contains point b - tri_1a2 = FALSE; - } - if (area_21b < 0) - { - tri_21b = FALSE; - } - if (area_1ba < 0) - { - // Can't use, because it contains point b - tri_21b = FALSE; - } - - if (!tri_1a2) - { - use_tri1a2 = FALSE; - } - else if (!tri_21b) - { - use_tri1a2 = TRUE; - } - else - { - LLVector3 d1 = p1 - pa; - LLVector3 d2 = p2 - pb; - - if (d1.magVecSquared() < d2.magVecSquared()) - { - use_tri1a2 = TRUE; - } - else - { - use_tri1a2 = FALSE; - } - } - - if (use_tri1a2) - { - index[count++] = pt1 + i; - index[count++] = pt1 + 1 + i; - index[count++] = pt2 + i; - pt1++; - } - else - { - index[count++] = pt1 + i; - index[count++] = pt2 - 1 + i; - index[count++] = pt2 + i; - pt2--; - } - } - - // Bottom cap - pt1 = 0; - pt2 = size_s-1; - while (pt2 - pt1 > 1) - { - // Use the profile points instead of the mesh, since you want - // the un-transformed profile distances. - LLVector3 p1 = getProfile().mProfile[pt1]; - LLVector3 p2 = getProfile().mProfile[pt2]; - LLVector3 pa = getProfile().mProfile[pt1+1]; - LLVector3 pb = getProfile().mProfile[pt2-1]; - - p1.mV[VZ] = 0.f; - p2.mV[VZ] = 0.f; - pa.mV[VZ] = 0.f; - pb.mV[VZ] = 0.f; - - // Use area of triangle to determine backfacing - F32 area_1a2, area_1ba, area_21b, area_2ab; - area_1a2 = (p1.mV[0]*pa.mV[1] - pa.mV[0]*p1.mV[1]) + - (pa.mV[0]*p2.mV[1] - p2.mV[0]*pa.mV[1]) + - (p2.mV[0]*p1.mV[1] - p1.mV[0]*p2.mV[1]); - - area_1ba = (p1.mV[0]*pb.mV[1] - pb.mV[0]*p1.mV[1]) + - (pb.mV[0]*pa.mV[1] - pa.mV[0]*pb.mV[1]) + - (pa.mV[0]*p1.mV[1] - p1.mV[0]*pa.mV[1]); - - area_21b = (p2.mV[0]*p1.mV[1] - p1.mV[0]*p2.mV[1]) + - (p1.mV[0]*pb.mV[1] - pb.mV[0]*p1.mV[1]) + - (pb.mV[0]*p2.mV[1] - p2.mV[0]*pb.mV[1]); - - area_2ab = (p2.mV[0]*pa.mV[1] - pa.mV[0]*p2.mV[1]) + - (pa.mV[0]*pb.mV[1] - pb.mV[0]*pa.mV[1]) + - (pb.mV[0]*p2.mV[1] - p2.mV[0]*pb.mV[1]); - - BOOL use_tri1a2 = TRUE; - BOOL tri_1a2 = TRUE; - BOOL tri_21b = TRUE; - - if (area_1a2 < 0) - { - tri_1a2 = FALSE; - } - if (area_2ab < 0) - { - // Can't use, because it contains point b - tri_1a2 = FALSE; - } - if (area_21b < 0) - { - tri_21b = FALSE; - } - if (area_1ba < 0) - { - // Can't use, because it contains point b - tri_21b = FALSE; - } - - if (!tri_1a2) - { - use_tri1a2 = FALSE; - } - else if (!tri_21b) - { - use_tri1a2 = TRUE; - } - else - { - LLVector3 d1 = p1 - pa; - LLVector3 d2 = p2 - pb; - - if (d1.magVecSquared() < d2.magVecSquared()) - { - use_tri1a2 = TRUE; - } - else - { - use_tri1a2 = FALSE; - } - } - - if (use_tri1a2) - { - index[count++] = pt1; - index[count++] = pt2; - index[count++] = pt1 + 1; - pt1++; - } - else - { - index[count++] = pt1; - index[count++] = pt2; - index[count++] = pt2 - 1; - pt2--; - } - } - } - } - else - { - // Closed solid. Easy case. - for (t = 0; t < size_t - 1; t++) - { - for (s = 0; s < size_s - 1; s++) - { - // Should wrap properly, but for now... - i = s + t*size_s; - - index[count++] = i; // x,y - index[count++] = i + 1; // x+1,y - index[count++] = i + size_s; // x,y+1 - - index[count++] = i + size_s; // x,y+1 - index[count++] = i + 1; // x+1,y - index[count++] = i + size_s + 1; // x+1,y+1 - } - } - - // Do the top and bottom caps, if necessary - if (path_open) - { - // bottom cap - for (s = 1; s < size_s - 2; s++) - { - index[count++] = s+1; - index[count++] = s; - index[count++] = 0; - } - - // top cap - S32 offset = (size_t - 1)*size_s; - for (s = 1; s < size_s - 2; s++) - { - // Inverted ordering from bottom cap. - index[count++] = offset; - index[count++] = offset + s; - index[count++] = offset + s + 1; - } - } - } - -#ifdef LL_DEBUG - // assert that we computed the correct number of indices - if (count != expected_num_triangle_indices ) - { - llerrs << "bad index count prediciton:" - << " expected=" << expected_num_triangle_indices - << " actual=" << count << llendl; - } -#endif - -#if 0 - // verify that each index does not point beyond the size of the mesh - S32 num_vertices = mMesh.size(); - for (i = 0; i < count; i+=3) - { - llinfos << index[i] << ":" << index[i+1] << ":" << index[i+2] << llendl; - llassert(index[i] < num_vertices); - llassert(index[i+1] < num_vertices); - llassert(index[i+2] < num_vertices); - } -#endif - - num_indices = count; - return index; -} - -void LLVolume::getLoDTriangleCounts(const LLVolumeParams& params, S32* counts) -{ //attempt to approximate the number of triangles that will result from generating a volume LoD set for the - //supplied LLVolumeParams -- inaccurate, but a close enough approximation for determining streaming cost - F32 detail[] = {1.f, 1.5f, 2.5f, 4.f}; - for (S32 i = 0; i < 4; i++) - { - S32 count = 0; - S32 path_points = LLPath::getNumPoints(params.getPathParams(), detail[i]); - S32 profile_points = LLProfile::getNumPoints(params.getProfileParams(), false, detail[i]); - - count = (profile_points-1)*2*(path_points-1); - count += profile_points*2; - - counts[i] = count; - } -} - -S32 LLVolume::getNumTriangleIndices() const -{ - BOOL profile_open = getProfile().isOpen(); - BOOL hollow = (mParams.getProfileParams().getHollow() > 0); - BOOL path_open = getPath().isOpen(); - - S32 size_s, size_s_out, size_t; - size_s = getProfile().getTotal(); - size_s_out = getProfile().getTotalOut(); - size_t = getPath().mPath.size(); - - S32 count = 0; - if (profile_open) /* Flawfinder: ignore */ - { - if (hollow) - { - // Open hollow -- much like the closed solid, except we - // we need to stitch up the gap between s=0 and s=size_s-1 - count = (size_t - 1) * (((size_s -1) * 6) + 6); - } - else - { - count = (size_t - 1) * (((size_s -1) * 6) + 6); - } - } - else if (hollow) - { - // Closed hollow - // Outer face - count = (size_t - 1) * (size_s_out - 1) * 6; - - // Inner face - count += (size_t - 1) * ((size_s - 1) - size_s_out) * 6; - } - else - { - // Closed solid. Easy case. - count = (size_t - 1) * (size_s - 1) * 6; - } - - if (path_open) - { - S32 cap_triangle_count = size_s - 3; - if ( profile_open - || hollow ) - { - cap_triangle_count = size_s - 2; - } - if ( cap_triangle_count > 0 ) - { - // top and bottom caps - count += cap_triangle_count * 2 * 3; - } - } - return count; -} - - -S32 LLVolume::getNumTriangles(S32* vcount) const -{ - U32 triangle_count = 0; - U32 vertex_count = 0; - - for (S32 i = 0; i < getNumVolumeFaces(); ++i) - { - const LLVolumeFace& face = getVolumeFace(i); - triangle_count += face.mNumIndices/3; - - vertex_count += face.mNumVertices; - } - - - if (vcount) - { - *vcount = vertex_count; - } - - return triangle_count; -} - - -//----------------------------------------------------------------------------- -// generateSilhouetteVertices() -//----------------------------------------------------------------------------- -void LLVolume::generateSilhouetteVertices(std::vector &vertices, - std::vector &normals, - const LLVector3& obj_cam_vec_in, - const LLMatrix4& mat_in, - const LLMatrix3& norm_mat_in, - S32 face_mask) -{ - LLMatrix4a mat; - mat.loadu(mat_in); - - LLMatrix4a norm_mat; - norm_mat.loadu(norm_mat_in); - - LLVector4a obj_cam_vec; - obj_cam_vec.load3(obj_cam_vec_in.mV); - - vertices.clear(); - normals.clear(); - - if ((mParams.getSculptType() & LL_SCULPT_TYPE_MASK) == LL_SCULPT_TYPE_MESH) - { - return; - } - - S32 cur_index = 0; - //for each face - for (face_list_t::iterator iter = mVolumeFaces.begin(); - iter != mVolumeFaces.end(); ++iter) - { - LLVolumeFace& face = *iter; - - if (!(face_mask & (0x1 << cur_index++)) || - face.mNumIndices == 0 || face.mEdge.empty()) - { - continue; - } - - if (face.mTypeMask & (LLVolumeFace::CAP_MASK)) { - - } - else { - - //============================================== - //DEBUG draw edge map instead of silhouette edge - //============================================== - -#if DEBUG_SILHOUETTE_EDGE_MAP - - //for each triangle - U32 count = face.mNumIndices; - for (U32 j = 0; j < count/3; j++) { - //get vertices - S32 v1 = face.mIndices[j*3+0]; - S32 v2 = face.mIndices[j*3+1]; - S32 v3 = face.mIndices[j*3+2]; - - //get current face center - LLVector3 cCenter = (face.mVertices[v1].getPosition() + - face.mVertices[v2].getPosition() + - face.mVertices[v3].getPosition()) / 3.0f; - - //for each edge - for (S32 k = 0; k < 3; k++) { - S32 nIndex = face.mEdge[j*3+k]; - if (nIndex <= -1) { - continue; - } - - if (nIndex >= (S32) count/3) { - continue; - } - //get neighbor vertices - v1 = face.mIndices[nIndex*3+0]; - v2 = face.mIndices[nIndex*3+1]; - v3 = face.mIndices[nIndex*3+2]; + //get neighbor vertices + v1 = face.mIndices[nIndex*3+0]; + v2 = face.mIndices[nIndex*3+1]; + v3 = face.mIndices[nIndex*3+2]; //get neighbor face center LLVector3 nCenter = (face.mVertices[v1].getPosition() + @@ -5243,8 +4598,6 @@ LLVolumeFace& LLVolumeFace::operator=(const LLVolumeFace& src) freeData(); - LLVector4a::memcpyNonAliased16((F32*) mExtents, (F32*) src.mExtents, 3*sizeof(LLVector4a)); - resizeVertices(src.mNumVertices); resizeIndices(src.mNumIndices); @@ -5307,7 +4660,7 @@ LLVolumeFace::~LLVolumeFace() void LLVolumeFace::freeData() { - ll_aligned_free_16(mPositions); + ll_aligned_free(mPositions); mPositions = NULL; //normals and texture coordinates are part of the same buffer as mPositions, do not free them separately @@ -5331,52 +4684,23 @@ BOOL LLVolumeFace::create(LLVolume* volume, BOOL partial_build) delete mOctree; mOctree = NULL; + LL_CHECK_MEMORY BOOL ret = FALSE ; if (mTypeMask & CAP_MASK) { ret = createCap(volume, partial_build); + LL_CHECK_MEMORY } else if ((mTypeMask & END_MASK) || (mTypeMask & SIDE_MASK)) { ret = createSide(volume, partial_build); + LL_CHECK_MEMORY } else { llerrs << "Unknown/uninitialized face type!" << llendl; } - //update the range of the texture coordinates - if(ret) - { - mTexCoordExtents[0].setVec(1.f, 1.f) ; - mTexCoordExtents[1].setVec(0.f, 0.f) ; - - for(U32 i = 0 ; i < mNumVertices ; i++) - { - if(mTexCoordExtents[0].mV[0] > mTexCoords[i].mV[0]) - { - mTexCoordExtents[0].mV[0] = mTexCoords[i].mV[0] ; - } - if(mTexCoordExtents[1].mV[0] < mTexCoords[i].mV[0]) - { - mTexCoordExtents[1].mV[0] = mTexCoords[i].mV[0] ; - } - - if(mTexCoordExtents[0].mV[1] > mTexCoords[i].mV[1]) - { - mTexCoordExtents[0].mV[1] = mTexCoords[i].mV[1] ; - } - if(mTexCoordExtents[1].mV[1] < mTexCoords[i].mV[1]) - { - mTexCoordExtents[1].mV[1] = mTexCoords[i].mV[1] ; - } - } - mTexCoordExtents[0].mV[0] = llmax(0.f, mTexCoordExtents[0].mV[0]) ; - mTexCoordExtents[0].mV[1] = llmax(0.f, mTexCoordExtents[0].mV[1]) ; - mTexCoordExtents[1].mV[0] = llmin(1.f, mTexCoordExtents[1].mV[0]) ; - mTexCoordExtents[1].mV[1] = llmin(1.f, mTexCoordExtents[1].mV[1]) ; - } - return ret ; } @@ -6068,8 +5392,10 @@ void LerpPlanarVertex(LLVolumeFace::VertexData& v0, BOOL LLVolumeFace::createUnCutCubeCap(LLVolume* volume, BOOL partial_build) { - const std::vector& mesh = volume->getMesh(); - const std::vector& profile = volume->getProfile().mProfile; + LL_CHECK_MEMORY + + const LLAlignedArray& mesh = volume->getMesh(); + const LLAlignedArray& profile = volume->getProfile().mProfile; S32 max_s = volume->getProfile().getTotal(); S32 max_t = volume->getPath().mPath.size(); @@ -6099,9 +5425,9 @@ BOOL LLVolumeFace::createUnCutCubeCap(LLVolume* volume, BOOL partial_build) VertexData baseVert; for(S32 t = 0; t < 4; t++) { - corners[t].getPosition().load3( mesh[offset + (grid_size*t)].mPos.mV); - corners[t].mTexCoord.mV[0] = profile[grid_size*t].mV[0]+0.5f; - corners[t].mTexCoord.mV[1] = 0.5f - profile[grid_size*t].mV[1]; + corners[t].getPosition().load4a(mesh[offset + (grid_size*t)].getF32ptr()); + corners[t].mTexCoord.mV[0] = profile[grid_size*t][0]+0.5f; + corners[t].mTexCoord.mV[1] = 0.5f - profile[grid_size*t][1]; } { @@ -6182,6 +5508,9 @@ BOOL LLVolumeFace::createUnCutCubeCap(LLVolume* volume, BOOL partial_build) mCenter->mul(0.5f); } + llassert(less_than_max_mag(mExtents[0])); + llassert(less_than_max_mag(mExtents[1])); + if (!partial_build) { resizeIndices(grid_size*grid_size*6); @@ -6212,6 +5541,7 @@ BOOL LLVolumeFace::createUnCutCubeCap(LLVolume* volume, BOOL partial_build) } } + LL_CHECK_MEMORY return TRUE; } @@ -6230,8 +5560,8 @@ BOOL LLVolumeFace::createCap(LLVolume* volume, BOOL partial_build) S32 num_vertices = 0, num_indices = 0; - const std::vector& mesh = volume->getMesh(); - const std::vector& profile = volume->getProfile().mProfile; + const LLAlignedArray& mesh = volume->getMesh(); + const LLAlignedArray& profile = volume->getProfile().mProfile; // All types of caps have the same number of vertices and indices num_vertices = profile.size(); @@ -6251,13 +5581,14 @@ BOOL LLVolumeFace::createCap(LLVolume* volume, BOOL partial_build) { resizeVertices(num_vertices); allocateBinormals(num_vertices); - if (!partial_build) { resizeIndices(num_indices); } } + LL_CHECK_MEMORY; + S32 max_s = volume->getProfile().getTotal(); S32 max_t = volume->getPath().mPath.size(); @@ -6288,35 +5619,68 @@ BOOL LLVolumeFace::createCap(LLVolume* volume, BOOL partial_build) LLVector4a* binorm = (LLVector4a*) mBinormals; // Copy the vertices into the array - for (S32 i = 0; i < num_vertices; i++) + + const LLVector4a* src = mesh.mArray+offset; + const LLVector4a* end = src+num_vertices; + + min = *src; + max = min; + + + const LLVector4a* p = profile.mArray; + + if (mTypeMask & TOP_MASK) { - if (mTypeMask & TOP_MASK) - { - tc[i].mV[0] = profile[i].mV[0]+0.5f; - tc[i].mV[1] = profile[i].mV[1]+0.5f; - } - else + min_uv.set((*p)[0]+0.5f, + (*p)[1]+0.5f); + + max_uv = min_uv; + + while(src < end) { - // Mirror for underside. - tc[i].mV[0] = profile[i].mV[0]+0.5f; - tc[i].mV[1] = 0.5f - profile[i].mV[1]; - } + tc->mV[0] = (*p)[0]+0.5f; + tc->mV[1] = (*p)[1]+0.5f; - pos[i].load3(mesh[i + offset].mPos.mV); + llassert(less_than_max_mag(*src)); + update_min_max(min,max,*src); + update_min_max(min_uv, max_uv, *tc); - if (i == 0) - { - max = pos[i]; - min = max; - min_uv = max_uv = tc[i]; + *pos = *src; + + ++p; + ++tc; + ++src; + ++pos; } - else + } + else + { + + min_uv.set((*p)[0]+0.5f, + 0.5f - (*p)[1]); + max_uv = min_uv; + + while(src < end) { - update_min_max(min,max,pos[i]); - update_min_max(min_uv, max_uv, tc[i]); + // Mirror for underside. + tc->mV[0] = (*p)[0]+0.5f; + tc->mV[1] = 0.5f - (*p)[1]; + + llassert(less_than_max_mag(*src)); + update_min_max(min,max,*src); + update_min_max(min_uv, max_uv, *tc); + + *pos = *src; + + ++p; + ++tc; + ++src; + ++pos; } } + LL_CHECK_MEMORY + mCenter->setAdd(min, max); mCenter->mul(0.5f); @@ -6353,15 +5717,25 @@ BOOL LLVolumeFace::createCap(LLVolume* volume, BOOL partial_build) if (!(mTypeMask & HOLLOW_MASK) && !(mTypeMask & OPEN_MASK)) { - pos[num_vertices] = *mCenter; - tc[num_vertices] = cuv; + *pos++ = *mCenter; + *tc++ = cuv; num_vertices++; } - - for (S32 i = 0; i < num_vertices; i++) + + LL_CHECK_MEMORY + + F32* dst_binorm = (F32*) binorm; + F32* end_binorm = (F32*) (binorm+num_vertices); + + F32* dst_norm = (F32*) norm; + + while (dst_binorm < end_binorm) { - binorm[i].load4a(binormal.getF32ptr()); - norm[i].load4a(normal.getF32ptr()); + binormal.store4a(dst_binorm); + normal.store4a(dst_norm); + + dst_binorm += 4; + dst_norm += 4; } if (partial_build) @@ -6382,33 +5756,38 @@ BOOL LLVolumeFace::createCap(LLVolume* volume, BOOL partial_build) { // Use the profile points instead of the mesh, since you want // the un-transformed profile distances. - LLVector3 p1 = profile[pt1]; - LLVector3 p2 = profile[pt2]; - LLVector3 pa = profile[pt1+1]; - LLVector3 pb = profile[pt2-1]; + const LLVector4a& p1 = profile[pt1]; + const LLVector4a& p2 = profile[pt2]; + const LLVector4a& pa = profile[pt1+1]; + const LLVector4a& pb = profile[pt2-1]; + + const F32* p1V = p1.getF32ptr(); + const F32* p2V = p2.getF32ptr(); + const F32* paV = pa.getF32ptr(); + const F32* pbV = pb.getF32ptr(); - p1.mV[VZ] = 0.f; - p2.mV[VZ] = 0.f; - pa.mV[VZ] = 0.f; - pb.mV[VZ] = 0.f; + //p1.mV[VZ] = 0.f; + //p2.mV[VZ] = 0.f; + //pa.mV[VZ] = 0.f; + //pb.mV[VZ] = 0.f; // Use area of triangle to determine backfacing F32 area_1a2, area_1ba, area_21b, area_2ab; - area_1a2 = (p1.mV[0]*pa.mV[1] - pa.mV[0]*p1.mV[1]) + - (pa.mV[0]*p2.mV[1] - p2.mV[0]*pa.mV[1]) + - (p2.mV[0]*p1.mV[1] - p1.mV[0]*p2.mV[1]); + area_1a2 = (p1V[0]*paV[1] - paV[0]*p1V[1]) + + (paV[0]*p2V[1] - p2V[0]*paV[1]) + + (p2V[0]*p1V[1] - p1V[0]*p2V[1]); - area_1ba = (p1.mV[0]*pb.mV[1] - pb.mV[0]*p1.mV[1]) + - (pb.mV[0]*pa.mV[1] - pa.mV[0]*pb.mV[1]) + - (pa.mV[0]*p1.mV[1] - p1.mV[0]*pa.mV[1]); + area_1ba = (p1V[0]*pbV[1] - pbV[0]*p1V[1]) + + (pbV[0]*paV[1] - paV[0]*pbV[1]) + + (paV[0]*p1V[1] - p1V[0]*paV[1]); - area_21b = (p2.mV[0]*p1.mV[1] - p1.mV[0]*p2.mV[1]) + - (p1.mV[0]*pb.mV[1] - pb.mV[0]*p1.mV[1]) + - (pb.mV[0]*p2.mV[1] - p2.mV[0]*pb.mV[1]); + area_21b = (p2V[0]*p1V[1] - p1V[0]*p2V[1]) + + (p1V[0]*pbV[1] - pbV[0]*p1V[1]) + + (pbV[0]*p2V[1] - p2V[0]*pbV[1]); - area_2ab = (p2.mV[0]*pa.mV[1] - pa.mV[0]*p2.mV[1]) + - (pa.mV[0]*pb.mV[1] - pb.mV[0]*pa.mV[1]) + - (pb.mV[0]*p2.mV[1] - p2.mV[0]*pb.mV[1]); + area_2ab = (p2V[0]*paV[1] - paV[0]*p2V[1]) + + (paV[0]*pbV[1] - pbV[0]*paV[1]) + + (pbV[0]*p2V[1] - p2V[0]*pbV[1]); BOOL use_tri1a2 = TRUE; BOOL tri_1a2 = TRUE; @@ -6443,10 +5822,13 @@ BOOL LLVolumeFace::createCap(LLVolume* volume, BOOL partial_build) } else { - LLVector3 d1 = p1 - pa; - LLVector3 d2 = p2 - pb; + LLVector4a d1; + d1.setSub(p1, pa); + + LLVector4a d2; + d2.setSub(p2, pb); - if (d1.magVecSquared() < d2.magVecSquared()) + if (d1.dot3(d1) < d2.dot3(d2)) { use_tri1a2 = TRUE; } @@ -6485,33 +5867,33 @@ BOOL LLVolumeFace::createCap(LLVolume* volume, BOOL partial_build) { // Use the profile points instead of the mesh, since you want // the un-transformed profile distances. - LLVector3 p1 = profile[pt1]; - LLVector3 p2 = profile[pt2]; - LLVector3 pa = profile[pt1+1]; - LLVector3 pb = profile[pt2-1]; - - p1.mV[VZ] = 0.f; - p2.mV[VZ] = 0.f; - pa.mV[VZ] = 0.f; - pb.mV[VZ] = 0.f; - + const LLVector4a& p1 = profile[pt1]; + const LLVector4a& p2 = profile[pt2]; + const LLVector4a& pa = profile[pt1+1]; + const LLVector4a& pb = profile[pt2-1]; + + const F32* p1V = p1.getF32ptr(); + const F32* p2V = p2.getF32ptr(); + const F32* paV = pa.getF32ptr(); + const F32* pbV = pb.getF32ptr(); + // Use area of triangle to determine backfacing F32 area_1a2, area_1ba, area_21b, area_2ab; - area_1a2 = (p1.mV[0]*pa.mV[1] - pa.mV[0]*p1.mV[1]) + - (pa.mV[0]*p2.mV[1] - p2.mV[0]*pa.mV[1]) + - (p2.mV[0]*p1.mV[1] - p1.mV[0]*p2.mV[1]); + area_1a2 = (p1V[0]*paV[1] - paV[0]*p1V[1]) + + (paV[0]*p2V[1] - p2V[0]*paV[1]) + + (p2V[0]*p1V[1] - p1V[0]*p2V[1]); - area_1ba = (p1.mV[0]*pb.mV[1] - pb.mV[0]*p1.mV[1]) + - (pb.mV[0]*pa.mV[1] - pa.mV[0]*pb.mV[1]) + - (pa.mV[0]*p1.mV[1] - p1.mV[0]*pa.mV[1]); + area_1ba = (p1V[0]*pbV[1] - pbV[0]*p1V[1]) + + (pbV[0]*paV[1] - paV[0]*pbV[1]) + + (paV[0]*p1V[1] - p1V[0]*paV[1]); - area_21b = (p2.mV[0]*p1.mV[1] - p1.mV[0]*p2.mV[1]) + - (p1.mV[0]*pb.mV[1] - pb.mV[0]*p1.mV[1]) + - (pb.mV[0]*p2.mV[1] - p2.mV[0]*pb.mV[1]); + area_21b = (p2V[0]*p1V[1] - p1V[0]*p2V[1]) + + (p1V[0]*pbV[1] - pbV[0]*p1V[1]) + + (pbV[0]*p2V[1] - p2V[0]*pbV[1]); - area_2ab = (p2.mV[0]*pa.mV[1] - pa.mV[0]*p2.mV[1]) + - (pa.mV[0]*pb.mV[1] - pb.mV[0]*pa.mV[1]) + - (pb.mV[0]*p2.mV[1] - p2.mV[0]*pb.mV[1]); + area_2ab = (p2V[0]*paV[1] - paV[0]*p2V[1]) + + (paV[0]*pbV[1] - pbV[0]*paV[1]) + + (pbV[0]*p2V[1] - p2V[0]*pbV[1]); BOOL use_tri1a2 = TRUE; BOOL tri_1a2 = TRUE; @@ -6546,10 +5928,12 @@ BOOL LLVolumeFace::createCap(LLVolume* volume, BOOL partial_build) } else { - LLVector3 d1 = p1 - pa; - LLVector3 d2 = p2 - pb; + LLVector4a d1; + d1.setSub(p1,pa); + LLVector4a d2; + d2.setSub(p2,pb); - if (d1.magVecSquared() < d2.magVecSquared()) + if (d1.dot3(d1) < d2.dot3(d2)) { use_tri1a2 = TRUE; } @@ -6598,6 +5982,8 @@ BOOL LLVolumeFace::createCap(LLVolume* volume, BOOL partial_build) } + + LL_CHECK_MEMORY return TRUE; } @@ -6900,6 +6286,7 @@ void LLVolumeFace::appendFace(const LLVolumeFace& face, LLMatrix4& mat_in, LLMat BOOL LLVolumeFace::createSide(LLVolume* volume, BOOL partial_build) { + LL_CHECK_MEMORY BOOL flat = mTypeMask & FLAT_MASK; U8 sculpt_type = volume->getParams().getSculptType(); @@ -6910,9 +6297,9 @@ BOOL LLVolumeFace::createSide(LLVolume* volume, BOOL partial_build) S32 num_vertices, num_indices; - const std::vector& mesh = volume->getMesh(); - const std::vector& profile = volume->getProfile().mProfile; - const std::vector& path_data = volume->getPath().mPath; + const LLAlignedArray& mesh = volume->getMesh(); + const LLAlignedArray& profile = volume->getProfile().mProfile; + const LLAlignedArray& path_data = volume->getPath().mPath; S32 max_s = volume->getProfile().getTotal(); @@ -6933,10 +6320,11 @@ BOOL LLVolumeFace::createSide(LLVolume* volume, BOOL partial_build) } } + LL_CHECK_MEMORY + LLVector4a* pos = (LLVector4a*) mPositions; - LLVector4a* norm = (LLVector4a*) mNormals; LLVector2* tc = (LLVector2*) mTexCoords; - F32 begin_stex = floorf(profile[mBeginS].mV[2]); + F32 begin_stex = floorf(profile[mBeginS][2]); S32 num_s = ((mTypeMask & INNER_MASK) && (mTypeMask & FLAT_MASK) && mNumS > 2) ? mNumS/2 : mNumS; S32 cur_vertex = 0; @@ -6965,11 +6353,11 @@ BOOL LLVolumeFace::createSide(LLVolume* volume, BOOL partial_build) // Get s value for tex-coord. if (!flat) { - ss = profile[mBeginS + s].mV[2]; + ss = profile[mBeginS + s][2]; } else { - ss = profile[mBeginS + s].mV[2] - begin_stex; + ss = profile[mBeginS + s][2] - begin_stex; } } @@ -6989,19 +6377,17 @@ BOOL LLVolumeFace::createSide(LLVolume* volume, BOOL partial_build) i = mBeginS + s + max_s*t; } - pos[cur_vertex].load3(mesh[i].mPos.mV); - tc[cur_vertex] = LLVector2(ss,tt); + llassert(less_than_max_mag(mesh[i])); + mesh[i].store4a((F32*)(pos+cur_vertex)); + tc[cur_vertex].set(ss,tt); - norm[cur_vertex].clear(); cur_vertex++; if (test && s > 0) { - pos[cur_vertex].load3(mesh[i].mPos.mV); - tc[cur_vertex] = LLVector2(ss,tt); - - norm[cur_vertex].clear(); - + llassert(less_than_max_mag(mesh[i])); + mesh[i].store4a((F32*)(pos+cur_vertex)); + tc[cur_vertex].set(ss,tt); cur_vertex++; } } @@ -7018,28 +6404,66 @@ BOOL LLVolumeFace::createSide(LLVolume* volume, BOOL partial_build) } i = mBeginS + s + max_s*t; - ss = profile[mBeginS + s].mV[2] - begin_stex; - pos[cur_vertex].load3(mesh[i].mPos.mV); - tc[cur_vertex] = LLVector2(ss,tt); - norm[cur_vertex].clear(); - + ss = profile[mBeginS + s][2] - begin_stex; + + llassert(less_than_max_mag(mesh[i])); + mesh[i].store4a((F32*)(pos+cur_vertex)); + tc[cur_vertex].set(ss,tt); + cur_vertex++; } } + LL_CHECK_MEMORY - //get bounding box for this side - LLVector4a& face_min = mExtents[0]; - LLVector4a& face_max = mExtents[1]; + mCenter->clear(); - face_min = face_max = pos[0]; + LLVector4a* cur_pos = pos; + LLVector4a* end_pos = pos + mNumVertices; + + //get bounding box for this side + LLVector4a face_min; + LLVector4a face_max; + + face_min = face_max = *cur_pos++; + + while (cur_pos < end_pos) + { + update_min_max(face_min, face_max, *cur_pos++); + } + + mExtents[0] = face_min; + mExtents[1] = face_max; + + U32 tc_count = mNumVertices; + if (tc_count%2 == 1) + { //odd number of texture coordinates, duplicate last entry to padded end of array + tc_count++; + mTexCoords[mNumVertices] = mTexCoords[mNumVertices-1]; + } + + LLVector4a* cur_tc = (LLVector4a*) mTexCoords; + LLVector4a* end_tc = (LLVector4a*) (mTexCoords+tc_count); + + LLVector4a tc_min; + LLVector4a tc_max; + + tc_min = tc_max = *cur_tc++; - for (U32 i = 1; i < mNumVertices; ++i) + while (cur_tc < end_tc) { - update_min_max(face_min, face_max, pos[i]); + update_min_max(tc_min, tc_max, *cur_tc++); } + F32* minp = tc_min.getF32ptr(); + F32* maxp = tc_max.getF32ptr(); + + mTexCoordExtents[0].mV[0] = llmin(minp[0], minp[2]); + mTexCoordExtents[0].mV[1] = llmin(minp[1], minp[3]); + mTexCoordExtents[1].mV[0] = llmax(maxp[0], maxp[2]); + mTexCoordExtents[1].mV[1] = llmax(maxp[1], maxp[3]); + mCenter->setAdd(face_min, face_max); mCenter->mul(0.5f); @@ -7104,33 +6528,94 @@ BOOL LLVolumeFace::createSide(LLVolume* volume, BOOL partial_build) } } + LL_CHECK_MEMORY + //clear normals - for (U32 i = 0; i < mNumVertices; i++) + F32* dst = (F32*) mNormals; + F32* end = (F32*) (mNormals+mNumVertices); + LLVector4a zero = LLVector4a::getZero(); + + while (dst < end) { - mNormals[i].clear(); + zero.store4a(dst); + dst += 4; } + LL_CHECK_MEMORY + //generate normals U32 count = mNumIndices/3; - for (U32 i = 0; i < count; i++) //for each triangle + LLVector4a* norm = mNormals; + + static LLAlignedArray triangle_normals; + triangle_normals.resize(count); + LLVector4a* output = triangle_normals.mArray; + LLVector4a* end_output = output+count; + + U16* idx = mIndices; + + while (output < end_output) { - const U16* idx = &(mIndices[i*3]); - - LLVector4a& v0 = *(pos+idx[0]); - LLVector4a& v1 = *(pos+idx[1]); - LLVector4a& v2 = *(pos+idx[2]); - - LLVector4a& n0 = *(norm+idx[0]); - LLVector4a& n1 = *(norm+idx[1]); - LLVector4a& n2 = *(norm+idx[2]); + LLVector4a b,v1,v2; + b.load4a((F32*) (pos+idx[0])); + v1.load4a((F32*) (pos+idx[1])); + v2.load4a((F32*) (pos+idx[2])); //calculate triangle normal - LLVector4a a, b, c; + LLVector4a a; - a.setSub(v0, v1); - b.setSub(v0, v2); - c.setCross3(a,b); + a.setSub(b, v1); + b.sub(v2); + + + LLQuad& vector1 = *((LLQuad*) &v1); + LLQuad& vector2 = *((LLQuad*) &v2); + + LLQuad& amQ = *((LLQuad*) &a); + LLQuad& bmQ = *((LLQuad*) &b); + + //v1.setCross3(t,v0); + //setCross3(const LLVector4a& a, const LLVector4a& b) + // Vectors are stored in memory in w, z, y, x order from high to low + // Set vector1 = { a[W], a[X], a[Z], a[Y] } + vector1 = _mm_shuffle_ps( amQ, amQ, _MM_SHUFFLE( 3, 0, 2, 1 )); + // Set vector2 = { b[W], b[Y], b[X], b[Z] } + vector2 = _mm_shuffle_ps( bmQ, bmQ, _MM_SHUFFLE( 3, 1, 0, 2 )); + // mQ = { a[W]*b[W], a[X]*b[Y], a[Z]*b[X], a[Y]*b[Z] } + vector2 = _mm_mul_ps( vector1, vector2 ); + // vector3 = { a[W], a[Y], a[X], a[Z] } + amQ = _mm_shuffle_ps( amQ, amQ, _MM_SHUFFLE( 3, 1, 0, 2 )); + // vector4 = { b[W], b[X], b[Z], b[Y] } + bmQ = _mm_shuffle_ps( bmQ, bmQ, _MM_SHUFFLE( 3, 0, 2, 1 )); + // mQ = { 0, a[X]*b[Y] - a[Y]*b[X], a[Z]*b[X] - a[X]*b[Z], a[Y]*b[Z] - a[Z]*b[Y] } + vector1 = _mm_sub_ps( vector2, _mm_mul_ps( amQ, bmQ )); + + v1.store4a((F32*) output); + + output++; + idx += 3; + } + + idx = mIndices; + + LLVector4a* src = triangle_normals.mArray; + + for (U32 i = 0; i < count; i++) //for each triangle + { + LLVector4a c; + c.load4a((F32*) (src++)); + + LLVector4a* n0p = norm+idx[0]; + LLVector4a* n1p = norm+idx[1]; + LLVector4a* n2p = norm+idx[2]; + + idx += 3; + + LLVector4a n0,n1,n2; + n0.load4a((F32*) n0p); + n1.load4a((F32*) n1p); + n2.load4a((F32*) n2p); n0.add(c); n1.add(c); @@ -7143,8 +6628,14 @@ BOOL LLVolumeFace::createSide(LLVolume* volume, BOOL partial_build) case 1: n1.add(c); break; case 2: n2.add(c); break; }; + + n0.store4a((F32*) n0p); + n1.store4a((F32*) n1p); + n2.store4a((F32*) n2p); } + LL_CHECK_MEMORY + // adjust normals based on wrapping and stitching LLVector4a top; @@ -7276,6 +6767,8 @@ BOOL LLVolumeFace::createSide(LLVolume* volume, BOOL partial_build) } + LL_CHECK_MEMORY + return TRUE; } diff --git a/indra/llmath/llvolume.h b/indra/llmath/llvolume.h index 1d3b0fe52f..5e43af92ec 100644 --- a/indra/llmath/llvolume.h +++ b/indra/llmath/llvolume.h @@ -37,7 +37,6 @@ class LLPath; template class LLOctreeNode; -class LLVector4a; class LLVolumeFace; class LLVolume; class LLVolumeTriangle; @@ -50,6 +49,8 @@ class LLVolumeTriangle; #include "v3math.h" #include "v3dmath.h" #include "v4math.h" +#include "llvector4a.h" +#include "llmatrix4a.h" #include "llquaternion.h" #include "llstrider.h" #include "v4coloru.h" @@ -194,6 +195,26 @@ const U8 LL_SCULPT_FLAG_MIRROR = 128; const S32 LL_SCULPT_MESH_MAX_FACES = 8; +template +class LLAlignedArray +{ +public: + T* mArray; + U32 mElementCount; + U32 mCapacity; + + LLAlignedArray(); + ~LLAlignedArray(); + + void push_back(const T& elem); + U32 size() const { return mElementCount; } + void resize(U32 size); + T* append(S32 N); + T& operator[](int idx); + const T& operator[](int idx) const; +}; + + class LLProfileParams { public: @@ -708,16 +729,16 @@ public: LLFaceID mFaceID; }; - std::vector mProfile; - std::vector mNormals; + LLAlignedArray mProfile; + //LLAlignedArray mNormals; std::vector mFaces; - std::vector mEdgeNormals; - std::vector mEdgeCenters; + + //LLAlignedArray mEdgeNormals; + //LLAlignedArray mEdgeCenters; friend std::ostream& operator<<(std::ostream &s, const LLProfile &profile); protected: - void genNormals(const LLProfileParams& params); static S32 getNumNGonPoints(const LLProfileParams& params, S32 sides, F32 offset=0.0f, F32 bevel = 0.0f, F32 ang_scale = 1.f, S32 split = 0); void genNGon(const LLProfileParams& params, S32 sides, F32 offset=0.0f, F32 bevel = 0.0f, F32 ang_scale = 1.f, S32 split = 0); @@ -741,13 +762,29 @@ protected: class LLPath { public: - struct PathPt + class PathPt { - LLVector3 mPos; - LLVector2 mScale; - LLQuaternion mRot; + public: + LLMatrix4a mRot; + LLVector4a mPos; + + LLVector4a mScale; F32 mTexT; - PathPt() { mPos.setVec(0,0,0); mTexT = 0; mScale.setVec(0,0); mRot.loadIdentity(); } + F32 pad[3]; //for alignment + PathPt() + { + mPos.clear(); + mTexT = 0; + mScale.clear(); + mRot.setRows(LLVector4a(1,0,0,0), + LLVector4a(0,1,0,0), + LLVector4a(0,0,1,0)); + + //distinguished data in the pad for debugging + pad[0] = 3.14159f; + pad[1] = -3.14159f; + pad[2] = 0.585f; + } }; public: @@ -779,7 +816,7 @@ public: friend std::ostream& operator<<(std::ostream &s, const LLPath &path); public: - std::vector mPath; + LLAlignedArray mPath; protected: BOOL mOpen; @@ -951,11 +988,7 @@ protected: ~LLVolume(); // use unref public: - struct Point - { - LLVector3 mPos; - }; - + struct FaceParams { LLFaceID mFaceID; @@ -978,8 +1011,8 @@ public: const LLProfile& getProfile() const { return *mProfilep; } LLPath& getPath() const { return *mPathp; } void resizePath(S32 length); - const std::vector& getMesh() const { return mMesh; } - const LLVector3& getMeshPt(const U32 i) const { return mMesh[i].mPos; } + const LLAlignedArray& getMesh() const { return mMesh; } + const LLVector4a& getMeshPt(const U32 i) const { return mMesh[i]; } void setDirty() { mPathp->setDirty(); mProfilep->setDirty(); } @@ -994,10 +1027,7 @@ public: S32 getSculptLevel() const { return mSculptLevel; } void setSculptLevel(S32 level) { mSculptLevel = level; } - S32 *getTriangleIndices(U32 &num_indices) const; - - // returns number of triangle indeces required for path/profile mesh - S32 getNumTriangleIndices() const; + static void getLoDTriangleCounts(const LLVolumeParams& params, S32* counts); S32 getNumTriangles(S32* vcount = NULL) const; @@ -1070,7 +1100,8 @@ public: LLVolumeParams mParams; LLPath *mPathp; LLProfile *mProfilep; - std::vector mMesh; + LLAlignedArray mMesh; + BOOL mGenerateSingleFace; typedef std::vector face_list_t; diff --git a/indra/newview/llflexibleobject.cpp b/indra/newview/llflexibleobject.cpp index 77a0cdffce..cd4718381b 100644 --- a/indra/newview/llflexibleobject.cpp +++ b/indra/newview/llflexibleobject.cpp @@ -683,30 +683,36 @@ void LLVolumeImplFlexible::doFlexibleUpdate() LLVector4(z_axis, 0.f), LLVector4(delta_pos, 1.f)); + LL_CHECK_MEMORY for (i=0; i<=num_render_sections; ++i) { new_point = &path->mPath[i]; LLVector3 pos = newSection[i].mPosition * rel_xform; LLQuaternion rot = mSection[i].mAxisRotation * newSection[i].mRotation * delta_rot; - - if (!mUpdated || (new_point->mPos-pos).magVec()/mVO->mDrawable->mDistanceWRTCamera > 0.001f) + + LLVector3 np(new_point->mPos.getF32ptr()); + + if (!mUpdated || (np-pos).magVec()/mVO->mDrawable->mDistanceWRTCamera > 0.001f) { - new_point->mPos = newSection[i].mPosition * rel_xform; + new_point->mPos.load3((newSection[i].mPosition * rel_xform).mV); mUpdated = FALSE; } - new_point->mRot = rot; - new_point->mScale = newSection[i].mScale; + new_point->mRot.loadu(LLMatrix3(rot)); + new_point->mScale.set(newSection[i].mScale.mV[0], newSection[i].mScale.mV[1], 0,1); new_point->mTexT = ((F32)i)/(num_render_sections); } - + LL_CHECK_MEMORY mLastSegmentRotation = parentSegmentRotation; } +static LLFastTimer::DeclareTimer FTM_FLEXI_PREBUILD("Flexi Prebuild"); + void LLVolumeImplFlexible::preRebuild() { if (!mUpdated) { + LLFastTimer t(FTM_FLEXI_PREBUILD); doFlexibleRebuild(); } } diff --git a/indra/newview/llspatialpartition.h b/indra/newview/llspatialpartition.h index b1706d9d35..b5543c4a37 100644 --- a/indra/newview/llspatialpartition.h +++ b/indra/newview/llspatialpartition.h @@ -739,7 +739,7 @@ class LLVolumeGeometryManager: public LLGeometryManager virtual void rebuildGeom(LLSpatialGroup* group); virtual void rebuildMesh(LLSpatialGroup* group); virtual void getGeometry(LLSpatialGroup* group); - void genDrawInfo(LLSpatialGroup* group, U32 mask, std::vector& faces, BOOL distance_sort = FALSE, BOOL batch_textures = FALSE); + void genDrawInfo(LLSpatialGroup* group, U32 mask, LLFace** faces, U32 face_count, BOOL distance_sort = FALSE, BOOL batch_textures = FALSE); void registerFace(LLSpatialGroup* group, LLFace* facep, U32 type); }; diff --git a/indra/newview/llvovolume.cpp b/indra/newview/llvovolume.cpp index 7adf18b6d0..597fb03526 100644 --- a/indra/newview/llvovolume.cpp +++ b/indra/newview/llvovolume.cpp @@ -1051,8 +1051,7 @@ BOOL LLVOVolume::setVolume(const LLVolumeParams ¶ms_in, const S32 detail, bo } } } - - + static LLCachedControl use_transform_feedback(gSavedSettings, "RenderUseTransformFeedback"); bool cache_in_vram = use_transform_feedback && gTransformPositionProgram.mProgramObject && @@ -4242,11 +4241,20 @@ void LLVolumeGeometryManager::rebuildGeom(LLSpatialGroup* group) mFaceList.clear(); - std::vector fullbright_faces; - std::vector bump_faces; - std::vector simple_faces; + const U32 MAX_FACE_COUNT = 4096; + + static LLFace** fullbright_faces = (LLFace**) ll_aligned_malloc(MAX_FACE_COUNT*sizeof(LLFace*),64); + static LLFace** bump_faces = (LLFace**) ll_aligned_malloc(MAX_FACE_COUNT*sizeof(LLFace*),64); + static LLFace** simple_faces = (LLFace**) ll_aligned_malloc(MAX_FACE_COUNT*sizeof(LLFace*),64); + static LLFace** alpha_faces = (LLFace**) ll_aligned_malloc(MAX_FACE_COUNT*sizeof(LLFace*),64); + + U32 fullbright_count = 0; + U32 bump_count = 0; + U32 simple_count = 0; + U32 alpha_count = 0; + - std::vector alpha_faces; + U32 useage = group->mSpatialPartition->mBufferUsage; U32 max_vertices = (gSavedSettings.getS32("RenderMaxVBOSize")*1024)/LLVertexBuffer::calcVertexSize(group->mSpatialPartition->mVertexDataMask); @@ -4257,6 +4265,8 @@ void LLVolumeGeometryManager::rebuildGeom(LLSpatialGroup* group) bool emissive = false; + + { LLFastTimer t(FTM_REBUILD_VOLUME_FACE_LIST); @@ -4558,7 +4568,10 @@ void LLVolumeGeometryManager::rebuildGeom(LLSpatialGroup* group) { if (facep->canRenderAsMask()) { //can be treated as alpha mask - simple_faces.push_back(facep); + if (simple_count < MAX_FACE_COUNT) + { + simple_faces[simple_count++] = facep; + } } else { @@ -4566,7 +4579,10 @@ void LLVolumeGeometryManager::rebuildGeom(LLSpatialGroup* group) { //only treat as alpha in the pipeline if < 100% transparent drawablep->setState(LLDrawable::HAS_ALPHA); } - alpha_faces.push_back(facep); + if (alpha_count < MAX_FACE_COUNT) + { + alpha_faces[alpha_count++] = facep; + } } } else @@ -4581,33 +4597,51 @@ void LLVolumeGeometryManager::rebuildGeom(LLSpatialGroup* group) { if (te->getBumpmap()) { //needs normal + binormal - bump_faces.push_back(facep); + if (bump_count < MAX_FACE_COUNT) + { + bump_faces[bump_count++] = facep; + } } else if (te->getShiny() || !te->getFullbright()) { //needs normal - simple_faces.push_back(facep); + if (simple_count < MAX_FACE_COUNT) + { + simple_faces[simple_count++] = facep; + } } else { //doesn't need normal facep->setState(LLFace::FULLBRIGHT); - fullbright_faces.push_back(facep); + if (fullbright_count < MAX_FACE_COUNT) + { + fullbright_faces[fullbright_count++] = facep; + } } } else { if (te->getBumpmap() && LLPipeline::sRenderBump) { //needs normal + binormal - bump_faces.push_back(facep); + if (bump_count < MAX_FACE_COUNT) + { + bump_faces[bump_count++] = facep; + } } else if ((te->getShiny() && LLPipeline::sRenderBump) || !(te->getFullbright() || bake_sunlight)) { //needs normal - simple_faces.push_back(facep); + if (simple_count < MAX_FACE_COUNT) + { + simple_faces[simple_count++] = facep; + } } else { //doesn't need normal facep->setState(LLFace::FULLBRIGHT); - fullbright_faces.push_back(facep); + if (fullbright_count < MAX_FACE_COUNT) + { + fullbright_faces[fullbright_count++] = facep; + } } } } @@ -4657,17 +4691,17 @@ void LLVolumeGeometryManager::rebuildGeom(LLSpatialGroup* group) if (batch_textures) { bump_mask |= LLVertexBuffer::MAP_BINORMAL; - genDrawInfo(group, simple_mask | LLVertexBuffer::MAP_TEXTURE_INDEX, simple_faces, FALSE, TRUE); - genDrawInfo(group, fullbright_mask | LLVertexBuffer::MAP_TEXTURE_INDEX, fullbright_faces, FALSE, TRUE); - genDrawInfo(group, bump_mask | LLVertexBuffer::MAP_TEXTURE_INDEX, bump_faces, FALSE, FALSE); - genDrawInfo(group, alpha_mask | LLVertexBuffer::MAP_TEXTURE_INDEX, alpha_faces, TRUE, TRUE); + genDrawInfo(group, simple_mask | LLVertexBuffer::MAP_TEXTURE_INDEX, simple_faces, simple_count, FALSE, TRUE); + genDrawInfo(group, fullbright_mask | LLVertexBuffer::MAP_TEXTURE_INDEX, fullbright_faces, fullbright_count, FALSE, TRUE); + genDrawInfo(group, bump_mask | LLVertexBuffer::MAP_TEXTURE_INDEX, bump_faces, bump_count, FALSE, FALSE); + genDrawInfo(group, alpha_mask | LLVertexBuffer::MAP_TEXTURE_INDEX, alpha_faces, alpha_count, TRUE, TRUE); } else { - genDrawInfo(group, simple_mask, simple_faces); - genDrawInfo(group, fullbright_mask, fullbright_faces); - genDrawInfo(group, bump_mask, bump_faces, FALSE, TRUE); - genDrawInfo(group, alpha_mask, alpha_faces, TRUE); + genDrawInfo(group, simple_mask, simple_faces, simple_count); + genDrawInfo(group, fullbright_mask, fullbright_faces, fullbright_count); + genDrawInfo(group, bump_mask, bump_faces, bump_count, FALSE, FALSE); + genDrawInfo(group, alpha_mask, alpha_faces, alpha_count, TRUE); } @@ -4699,6 +4733,7 @@ void LLVolumeGeometryManager::rebuildGeom(LLSpatialGroup* group) } } +static LLFastTimer::DeclareTimer FTM_REBUILD_MESH_FLUSH("Flush Mesh"); void LLVolumeGeometryManager::rebuildMesh(LLSpatialGroup* group) { @@ -4708,11 +4743,14 @@ void LLVolumeGeometryManager::rebuildMesh(LLSpatialGroup* group) LLFastTimer ftm(FTM_REBUILD_VOLUME_VB); LLFastTimer t(FTM_REBUILD_VOLUME_GEN_DRAW_INFO); //make sure getgeometryvolume shows up in the right place in timers - S32 num_mapped_veretx_buffer = LLVertexBuffer::sMappedCount ; - group->mBuilt = 1.f; - std::set mapped_buffers; + S32 num_mapped_vertex_buffer = LLVertexBuffer::sMappedCount ; + + const U32 MAX_BUFFER_COUNT = 4096; + LLVertexBuffer* locked_buffer[MAX_BUFFER_COUNT]; + + U32 buffer_count = 0; for (LLSpatialGroup::element_iter drawable_iter = group->getDataBegin(); drawable_iter != group->getDataEnd(); ++drawable_iter) { @@ -4722,7 +4760,7 @@ void LLVolumeGeometryManager::rebuildMesh(LLSpatialGroup* group) { LLVOVolume* vobj = drawablep->getVOVolume(); vobj->preRebuild(); - + if (drawablep->isState(LLDrawable::ANIMATED_CHILD)) { vobj->updateRelativeXform(true); @@ -4747,9 +4785,9 @@ void LLVolumeGeometryManager::rebuildMesh(LLSpatialGroup* group) } - if (buff->isLocked()) + if (buff->isLocked() && buffer_count < MAX_BUFFER_COUNT) { - mapped_buffers.insert(buff); + locked_buffer[buffer_count++] = buff; } } } @@ -4765,21 +4803,24 @@ void LLVolumeGeometryManager::rebuildMesh(LLSpatialGroup* group) } } - for (std::set::iterator iter = mapped_buffers.begin(); iter != mapped_buffers.end(); ++iter) { - (*iter)->flush(); - } - - // don't forget alpha - if(group != NULL && - !group->mVertexBuffer.isNull() && - group->mVertexBuffer->isLocked()) - { - group->mVertexBuffer->flush(); + LLFastTimer t(FTM_REBUILD_MESH_FLUSH); + for (LLVertexBuffer** iter = locked_buffer, ** end_iter = locked_buffer+buffer_count; iter != end_iter; ++iter) + { + (*iter)->flush(); + } + + // don't forget alpha + if(group != NULL && + !group->mVertexBuffer.isNull() && + group->mVertexBuffer->isLocked()) + { + group->mVertexBuffer->flush(); + } } //if not all buffers are unmapped - if(num_mapped_veretx_buffer != LLVertexBuffer::sMappedCount) + if(num_mapped_vertex_buffer != LLVertexBuffer::sMappedCount) { llwarns << "Not all mapped vertex buffers are unmapped!" << llendl ; for (LLSpatialGroup::element_iter drawable_iter = group->getDataBegin(); drawable_iter != group->getDataEnd(); ++drawable_iter) @@ -4839,7 +4880,7 @@ static LLFastTimer::DeclareTimer FTM_GEN_DRAW_INFO_RESIZE_VB("Resize VB"); -void LLVolumeGeometryManager::genDrawInfo(LLSpatialGroup* group, U32 mask, std::vector& faces, BOOL distance_sort, BOOL batch_textures) +void LLVolumeGeometryManager::genDrawInfo(LLSpatialGroup* group, U32 mask, LLFace** faces, U32 face_count, BOOL distance_sort, BOOL batch_textures) { LLFastTimer t(FTM_REBUILD_VOLUME_GEN_DRAW_INFO); @@ -4875,17 +4916,18 @@ void LLVolumeGeometryManager::genDrawInfo(LLSpatialGroup* group, U32 mask, std:: if (!distance_sort) { //sort faces by things that break batches - std::sort(faces.begin(), faces.end(), CompareBatchBreakerModified()); + std::sort(faces, faces+face_count, CompareBatchBreakerModified()); } else { //sort faces by distance - std::sort(faces.begin(), faces.end(), LLFace::CompareDistanceGreater()); + std::sort(faces, faces+face_count, LLFace::CompareDistanceGreater()); } } bool hud_group = group->isHUDGroup() ; - std::vector::iterator face_iter = faces.begin(); + LLFace** face_iter = faces; + LLFace** end_faces = faces+face_count; LLSpatialGroup::buffer_map_t buffer_map; @@ -4916,7 +4958,7 @@ void LLVolumeGeometryManager::genDrawInfo(LLSpatialGroup* group, U32 mask, std:: bool flexi = false; - while (face_iter != faces.end()) + while (face_iter != end_faces) { //pull off next face LLFace* facep = *face_iter; @@ -4945,10 +4987,13 @@ void LLVolumeGeometryManager::genDrawInfo(LLSpatialGroup* group, U32 mask, std:: flexi = flexi || facep->getViewerObject()->getVolume()->isUnique(); //sum up vertices needed for this render batch - std::vector::iterator i = face_iter; + LLFace** i = face_iter; ++i; - std::vector texture_list; + const U32 MAX_TEXTURE_COUNT = 32; + LLViewerTexture* texture_list[MAX_TEXTURE_COUNT]; + + U32 texture_count = 0; { LLFastTimer t(FTM_GEN_DRAW_INFO_FACE_SIZE); @@ -4956,12 +5001,15 @@ void LLVolumeGeometryManager::genDrawInfo(LLSpatialGroup* group, U32 mask, std:: { U8 cur_tex = 0; facep->setTextureIndex(cur_tex); - texture_list.push_back(tex); - + if (texture_count < MAX_TEXTURE_COUNT) + { + texture_list[texture_count++] = tex; + } + if (can_batch_texture(facep)) { //populate texture_list with any textures that can be batched //move i to the next unbatchable face - while (i != faces.end()) + while (i != end_faces) { facep = *i; @@ -4976,7 +5024,7 @@ void LLVolumeGeometryManager::genDrawInfo(LLSpatialGroup* group, U32 mask, std:: if (distance_sort) { //textures might be out of order, see if texture exists in current batch bool found = false; - for (U32 tex_idx = 0; tex_idx < texture_list.size(); ++tex_idx) + for (U32 tex_idx = 0; tex_idx < texture_count; ++tex_idx) { if (facep->getTexture() == texture_list[tex_idx]) { @@ -4988,7 +5036,7 @@ void LLVolumeGeometryManager::genDrawInfo(LLSpatialGroup* group, U32 mask, std:: if (!found) { - cur_tex = texture_list.size(); + cur_tex = texture_count; } } else @@ -5003,7 +5051,10 @@ void LLVolumeGeometryManager::genDrawInfo(LLSpatialGroup* group, U32 mask, std:: tex = facep->getTexture(); - texture_list.push_back(tex); + if (texture_count < MAX_TEXTURE_COUNT) + { + texture_list[texture_count++] = tex; + } } if (geom_count + facep->getGeomCount() > max_vertices) @@ -5026,7 +5077,7 @@ void LLVolumeGeometryManager::genDrawInfo(LLSpatialGroup* group, U32 mask, std:: } else { - while (i != faces.end() && + while (i != end_faces && (LLPipeline::sTextureBindTest || (distance_sort || (*i)->getTexture() == tex))) { facep = *i; -- cgit v1.3 From c04f4f66c813181eb378b00045aec969dc2c4aae Mon Sep 17 00:00:00 2001 From: Graham Madarasz Date: Mon, 11 Mar 2013 12:30:16 -0700 Subject: Moved LLAlignedArray from llmath to llcommon and put template func impls in header to work around Mac 4.3.3 link issue. --- indra/llcommon/CMakeLists.txt | 1 + indra/llmath/llvolume.cpp | 87 ------------------------------------------- indra/llmath/llvolume.h | 21 +---------- 3 files changed, 2 insertions(+), 107 deletions(-) (limited to 'indra/llmath') diff --git a/indra/llcommon/CMakeLists.txt b/indra/llcommon/CMakeLists.txt index e019c17280..0c2ceebb52 100644 --- a/indra/llcommon/CMakeLists.txt +++ b/indra/llcommon/CMakeLists.txt @@ -121,6 +121,7 @@ set(llcommon_HEADER_FILES linden_common.h linked_lists.h llaccountingcost.h + llalignedarray.h llallocator.h llallocator_heap_profile.h llagentconstants.h diff --git a/indra/llmath/llvolume.cpp b/indra/llmath/llvolume.cpp index 9fc72fd801..4f3e753276 100644 --- a/indra/llmath/llvolume.cpp +++ b/indra/llmath/llvolume.cpp @@ -96,93 +96,6 @@ extern BOOL gDebugGL; bool less_than_max_mag(const LLVector4a& vec); -template -LLAlignedArray::LLAlignedArray() -{ - mArray = NULL; - mElementCount = 0; - mCapacity = 0; -} - -template -LLAlignedArray::~LLAlignedArray() -{ - ll_aligned_free(mArray); - mArray = NULL; - mElementCount = 0; - mCapacity = 0; -} - -template -void LLAlignedArray::push_back(const T& elem) -{ - T* old_buf = NULL; - if (mCapacity <= mElementCount) - { - mCapacity++; - mCapacity *= 2; - T* new_buf = (T*) ll_aligned_malloc(mCapacity*sizeof(T), alignment); - if (mArray) - { - LLVector4a::memcpyNonAliased16((F32*) new_buf, (F32*) mArray, sizeof(T)*mElementCount); - } - old_buf = mArray; - mArray = new_buf; - } - - mArray[mElementCount++] = elem; - - //delete old array here to prevent error on a.push_back(a[0]) - ll_aligned_free(old_buf); -} - -template -void LLAlignedArray::resize(U32 size) -{ - if (mCapacity < size) - { - mCapacity = size+mCapacity*2; - T* new_buf = mCapacity > 0 ? (T*) ll_aligned_malloc(mCapacity*sizeof(T), alignment) : NULL; - if (mArray) - { - LLVector4a::memcpyNonAliased16((F32*) new_buf, (F32*) mArray, sizeof(T)*mElementCount); - ll_aligned_free(mArray); - } - - /*for (U32 i = mElementCount; i < mCapacity; ++i) - { - new(new_buf+i) T(); - }*/ - mArray = new_buf; - } - - mElementCount = size; -} - - -template -T& LLAlignedArray::operator[](int idx) -{ - llassert(idx < mElementCount); - return mArray[idx]; -} - -template -const T& LLAlignedArray::operator[](int idx) const -{ - llassert(idx < mElementCount); - return mArray[idx]; -} - -template -T* LLAlignedArray::append(S32 N) -{ - U32 sz = size(); - resize(sz+N); - return &((*this)[sz]); -} - - BOOL check_same_clock_dir( const LLVector3& pt1, const LLVector3& pt2, const LLVector3& pt3, const LLVector3& norm) { LLVector3 test = (pt2-pt1)%(pt3-pt2); diff --git a/indra/llmath/llvolume.h b/indra/llmath/llvolume.h index 5e43af92ec..1ff53590cf 100644 --- a/indra/llmath/llvolume.h +++ b/indra/llmath/llvolume.h @@ -41,6 +41,7 @@ class LLVolumeFace; class LLVolume; class LLVolumeTriangle; +#include "llalignedarray.h" #include "lldarray.h" #include "lluuid.h" #include "v4color.h" @@ -195,26 +196,6 @@ const U8 LL_SCULPT_FLAG_MIRROR = 128; const S32 LL_SCULPT_MESH_MAX_FACES = 8; -template -class LLAlignedArray -{ -public: - T* mArray; - U32 mElementCount; - U32 mCapacity; - - LLAlignedArray(); - ~LLAlignedArray(); - - void push_back(const T& elem); - U32 size() const { return mElementCount; } - void resize(U32 size); - T* append(S32 N); - T& operator[](int idx); - const T& operator[](int idx) const; -}; - - class LLProfileParams { public: -- cgit v1.3 From 356d10e6a6cfea279228f371aed88aba73ed70dd Mon Sep 17 00:00:00 2001 From: Graham Madarasz Date: Mon, 11 Mar 2013 13:12:03 -0700 Subject: Work around 'interesting' forced include order in vmath stuff on Windows --- indra/llmath/llvolume.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'indra/llmath') diff --git a/indra/llmath/llvolume.h b/indra/llmath/llvolume.h index 1ff53590cf..6b599a4126 100644 --- a/indra/llmath/llvolume.h +++ b/indra/llmath/llvolume.h @@ -41,7 +41,6 @@ class LLVolumeFace; class LLVolume; class LLVolumeTriangle; -#include "llalignedarray.h" #include "lldarray.h" #include "lluuid.h" #include "v4color.h" @@ -58,6 +57,7 @@ class LLVolumeTriangle; #include "llrefcount.h" #include "llpointer.h" #include "llfile.h" +#include "llalignedarray.h" //============================================================================ -- cgit v1.3 From 9a0f913e7089c9b2120856936bf47dcb8de9c6b1 Mon Sep 17 00:00:00 2001 From: "Graham Madarasz (Graham)" Date: Mon, 11 Mar 2013 13:55:40 -0700 Subject: Work around bogus gcc 4.3.4 warning from alignment assert --- indra/llmath/llvector4a.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'indra/llmath') diff --git a/indra/llmath/llvector4a.h b/indra/llmath/llvector4a.h index 0526793d3a..1a478bc8de 100644 --- a/indra/llmath/llvector4a.h +++ b/indra/llmath/llvector4a.h @@ -93,7 +93,11 @@ public: LLVector4a() { //DO NOT INITIALIZE -- The overhead is completely unnecessary +// This assert is causing spurious referenced before set warnings on GCC 4.3.4 +// +#if !LL_LINUX ll_assert_aligned(this,16); +#endif } LLVector4a(F32 x, F32 y, F32 z, F32 w = 0.f) -- cgit v1.3 From 6ac6736994240d9789a81bf585468bef50805fd8 Mon Sep 17 00:00:00 2001 From: Graham Madarasz Date: Mon, 11 Mar 2013 16:00:25 -0700 Subject: Move 16b aligned memcpy and alignment utilities to llmem in llcommon for easier use elsewhere --- indra/llcommon/llalignedarray.h | 16 +------ indra/llcommon/llmemory.h | 102 +++++++++++++++++++++++++++++++++++++--- indra/llmath/llsimdmath.h | 28 ----------- indra/llmath/llvector4a.cpp | 50 +------------------- 4 files changed, 99 insertions(+), 97 deletions(-) (limited to 'indra/llmath') diff --git a/indra/llcommon/llalignedarray.h b/indra/llcommon/llalignedarray.h index 5e04e8050f..ed8fd31205 100644 --- a/indra/llcommon/llalignedarray.h +++ b/indra/llcommon/llalignedarray.h @@ -29,10 +29,6 @@ #include "llmemory.h" -#if LL_WINDOWS -#include "llvector4a.h" // for 16b fast copy -#endif - template class LLAlignedArray { @@ -81,11 +77,7 @@ void LLAlignedArray::push_back(const T& elem) T* new_buf = (T*) ll_aligned_malloc(mCapacity*sizeof(T), alignment); if (mArray) { -#if LL_WINDOWS - LLVector4a::memcpyNonAliased16((F32*) new_buf, (F32*) mArray, sizeof(T)*mElementCount); -#else - memcpy((F32*)new_buf, (F32*)mArray, sizeof(T)*mElementCount); -#endif + ll_memcpy_nonaliased_aligned_16((char*)new_buf, (char*)mArray, sizeof(T)*mElementCount); } old_buf = mArray; mArray = new_buf; @@ -106,11 +98,7 @@ void LLAlignedArray::resize(U32 size) T* new_buf = mCapacity > 0 ? (T*) ll_aligned_malloc(mCapacity*sizeof(T), alignment) : NULL; if (mArray) { -#if LL_WINDOWS - LLVector4a::memcpyNonAliased16((F32*) new_buf, (F32*) mArray, sizeof(T)*mElementCount); -#else - memcpy((F32*) new_buf, (F32*) mArray, sizeof(T)*mElementCount); -#endif + ll_memcpy_nonaliased_aligned_16((char*) new_buf, (char*) mArray, sizeof(T)*mElementCount); ll_aligned_free(mArray); } diff --git a/indra/llcommon/llmemory.h b/indra/llcommon/llmemory.h index 46cabfadcd..4938775e2b 100644 --- a/indra/llcommon/llmemory.h +++ b/indra/llcommon/llmemory.h @@ -36,6 +36,44 @@ class LLMutex ; #define LL_CHECK_MEMORY #endif +LL_COMMON_API void ll_assert_aligned_func(uintptr_t ptr,U32 alignment); + +#ifdef SHOW_ASSERT +#define ll_assert_aligned(ptr,alignment) ll_assert_aligned_func(reinterpret_cast(ptr),((U32)alignment)) +#else +#define ll_assert_aligned(ptr,alignment) +#endif + +#include + +template T* LL_NEXT_ALIGNED_ADDRESS(T* address) +{ + return reinterpret_cast( + (reinterpret_cast(address) + 0xF) & ~0xF); +} + +template T* LL_NEXT_ALIGNED_ADDRESS_64(T* address) +{ + return reinterpret_cast( + (reinterpret_cast(address) + 0x3F) & ~0x3F); +} + +#if LL_LINUX || LL_DARWIN + +#define LL_ALIGN_PREFIX(x) +#define LL_ALIGN_POSTFIX(x) __attribute__((aligned(x))) + +#elif LL_WINDOWS + +#define LL_ALIGN_PREFIX(x) __declspec(align(x)) +#define LL_ALIGN_POSTFIX(x) + +#else +#error "LL_ALIGN_PREFIX and LL_ALIGN_POSTFIX undefined" +#endif + +#define LL_ALIGN_16(var) LL_ALIGN_PREFIX(16) var LL_ALIGN_POSTFIX(16) + inline void* ll_aligned_malloc( size_t size, int align ) { #if defined(LL_WINDOWS) @@ -144,6 +182,64 @@ inline void ll_aligned_free_32(void *p) #endif } + +// Copy words 16-byte blocks from src to dst. Source and destination MUST NOT OVERLAP. +// Source and dest must be 16-byte aligned and size must be multiple of 16. +// +inline void ll_memcpy_nonaliased_aligned_16(char* __restrict dst, const char* __restrict src, size_t bytes) +{ + assert(src != NULL); + assert(dst != NULL); + assert(bytes > 0); + assert((bytes % sizeof(F32))== 0); + ll_assert_aligned(src,16); + ll_assert_aligned(dst,16); + assert((src < dst) ? ((src + bytes) < dst) : ((dst + bytes) < src)); + assert(bytes%16==0); + + char* end = dst + bytes; + + if (bytes > 64) + { + void* begin_64 = LL_NEXT_ALIGNED_ADDRESS_64(dst); + + //at least 64 bytes before the end of the destination, switch to 16 byte copies + void* end_64 = end-64; + + _mm_prefetch((char*)begin_64, _MM_HINT_NTA); + _mm_prefetch((char*)begin_64 + 64, _MM_HINT_NTA); + _mm_prefetch((char*)begin_64 + 128, _MM_HINT_NTA); + _mm_prefetch((char*)begin_64 + 192, _MM_HINT_NTA); + + while (dst < begin_64) + { + + _mm_store_ps((F32*)dst, _mm_load_ps((F32*)src)); + dst += 4; + src += 4; + } + + while (dst < end_64) + { + _mm_prefetch((char*)src + 512, _MM_HINT_NTA); + _mm_prefetch((char*)dst + 512, _MM_HINT_NTA); + _mm_store_ps((F32*)dst, _mm_load_ps((F32*)src)); + _mm_store_ps((F32*)(dst + 16), _mm_load_ps((F32*)(src + 16))); + _mm_store_ps((F32*)(dst + 32), _mm_load_ps((F32*)(src + 32))); + _mm_store_ps((F32*)(dst + 48), _mm_load_ps((F32*)(src + 48))); + dst += 64; + src += 64; + } + } + + while (dst < end) + { + _mm_store_ps((F32*)dst, _mm_load_ps((F32*)src)); + dst += 16; + src += 16; + } +} + #ifndef __DEBUG_PRIVATE_MEM__ #define __DEBUG_PRIVATE_MEM__ 0 #endif @@ -552,13 +648,7 @@ void LLPrivateMemoryPoolTester::operator delete[](void* addr) // LLSingleton moved to llsingleton.h -LL_COMMON_API void ll_assert_aligned_func(uintptr_t ptr,U32 alignment); -#ifdef SHOW_ASSERT -#define ll_assert_aligned(ptr,alignment) ll_assert_aligned_func(reinterpret_cast(ptr),((U32)alignment)) -#else -#define ll_assert_aligned(ptr,alignment) -#endif #endif diff --git a/indra/llmath/llsimdmath.h b/indra/llmath/llsimdmath.h index 01458521ec..cebd2ace7d 100644 --- a/indra/llmath/llsimdmath.h +++ b/indra/llmath/llsimdmath.h @@ -39,34 +39,6 @@ #include #endif -template T* LL_NEXT_ALIGNED_ADDRESS(T* address) -{ - return reinterpret_cast( - (reinterpret_cast(address) + 0xF) & ~0xF); -} - -template T* LL_NEXT_ALIGNED_ADDRESS_64(T* address) -{ - return reinterpret_cast( - (reinterpret_cast(address) + 0x3F) & ~0x3F); -} - -#if LL_LINUX || LL_DARWIN - -#define LL_ALIGN_PREFIX(x) -#define LL_ALIGN_POSTFIX(x) __attribute__((aligned(x))) - -#elif LL_WINDOWS - -#define LL_ALIGN_PREFIX(x) __declspec(align(x)) -#define LL_ALIGN_POSTFIX(x) - -#else -#error "LL_ALIGN_PREFIX and LL_ALIGN_POSTFIX undefined" -#endif - -#define LL_ALIGN_16(var) LL_ALIGN_PREFIX(16) var LL_ALIGN_POSTFIX(16) - #include #include diff --git a/indra/llmath/llvector4a.cpp b/indra/llmath/llvector4a.cpp index 6edeb0fefe..570fa41a43 100644 --- a/indra/llmath/llvector4a.cpp +++ b/indra/llmath/llvector4a.cpp @@ -41,55 +41,7 @@ extern const LLVector4a LL_V4A_EPSILON = reinterpret_cast ( F /*static */void LLVector4a::memcpyNonAliased16(F32* __restrict dst, const F32* __restrict src, size_t bytes) { - assert(src != NULL); - assert(dst != NULL); - assert(bytes > 0); - assert((bytes % sizeof(F32))== 0); - ll_assert_aligned(src,16); - ll_assert_aligned(dst,16); - assert(bytes%16==0); - - F32* end = dst + (bytes / sizeof(F32) ); - - if (bytes > 64) - { - F32* begin_64 = LL_NEXT_ALIGNED_ADDRESS_64(dst); - - //at least 64 (16*4) bytes before the end of the destination, switch to 16 byte copies - F32* end_64 = end-16; - - _mm_prefetch((char*)begin_64, _MM_HINT_NTA); - _mm_prefetch((char*)begin_64 + 64, _MM_HINT_NTA); - _mm_prefetch((char*)begin_64 + 128, _MM_HINT_NTA); - _mm_prefetch((char*)begin_64 + 192, _MM_HINT_NTA); - - while (dst < begin_64) - { - copy4a(dst, src); - dst += 4; - src += 4; - } - - while (dst < end_64) - { - _mm_prefetch((char*)src + 512, _MM_HINT_NTA); - _mm_prefetch((char*)dst + 512, _MM_HINT_NTA); - copy4a(dst, src); - copy4a(dst+4, src+4); - copy4a(dst+8, src+8); - copy4a(dst+12, src+12); - - dst += 16; - src += 16; - } - } - - while (dst < end) - { - copy4a(dst, src); - dst += 4; - src += 4; - } + ll_memcpy_nonaliased_aligned_16((char*)dst, (char*)src, bytes); } void LLVector4a::setRotated( const LLRotation& rot, const LLVector4a& vec ) -- cgit v1.3 From e8b8a12b7365c17cf0326be365b78bcb1da1bfac Mon Sep 17 00:00:00 2001 From: Graham Madarasz Date: Tue, 12 Mar 2013 10:48:05 -0700 Subject: Mods to make compile on gcc 4.6.3 work mo betta --- indra/cmake/00-Common.cmake | 4 ++++ indra/llmath/llvolume.cpp | 5 ----- indra/llmessage/lliosocket.cpp | 1 + indra/llui/llconsole.cpp | 2 -- indra/llui/llkeywords.cpp | 2 +- indra/llui/lllayoutstack.cpp | 2 +- indra/llui/lltextbase.cpp | 3 ++- indra/llui/lltexteditor.cpp | 1 - indra/llui/lltoolbar.cpp | 4 +--- indra/lscript/lscript_execute/lscript_readlso.cpp | 6 +++--- indra/test/io.cpp | 1 + indra/test/llstreamtools_tut.cpp | 8 +++++++- indra/test/lltemplatemessagebuilder_tut.cpp | 2 ++ 13 files changed, 23 insertions(+), 18 deletions(-) (limited to 'indra/llmath') diff --git a/indra/cmake/00-Common.cmake b/indra/cmake/00-Common.cmake index 6bfd8c14d6..aa1d50b993 100644 --- a/indra/cmake/00-Common.cmake +++ b/indra/cmake/00-Common.cmake @@ -135,6 +135,10 @@ if (LINUX) set(CMAKE_CXX_FLAGS "-Wno-deprecated -Wno-uninitialized -Wno-unused-variable -Wno-unused-function ${CMAKE_CXX_FLAGS}") endif (${CXX_VERSION_NUMBER} LESS 420) + if(${CXX_VERSION_NUMBER} GREATER 459) + set(CMAKE_CXX_FLAGS "-Wno-deprecated -Wno-unused-but-set-variable -Wno-unused-variable ${CMAKE_CXX_FLAGS}") + endif (${CXX_VERSION_NUMBER} GREATER 459) + # gcc 4.3 and above don't like the LL boost and also # cause warnings due to our use of deprecated headers if(${CXX_VERSION_NUMBER} GREATER 429) diff --git a/indra/llmath/llvolume.cpp b/indra/llmath/llvolume.cpp index eff224743b..cb5633c1bb 100644 --- a/indra/llmath/llvolume.cpp +++ b/indra/llmath/llvolume.cpp @@ -5314,12 +5314,7 @@ BOOL LLVolumeFace::createUnCutCubeCap(LLVolume* volume, BOOL partial_build) S32 max_t = volume->getPath().mPath.size(); // S32 i; - S32 num_vertices = 0, num_indices = 0; S32 grid_size = (profile.size()-1)/4; - S32 quad_count = (grid_size * grid_size); - - num_vertices = (grid_size+1)*(grid_size+1); - num_indices = quad_count * 4; LLVector4a& min = mExtents[0]; LLVector4a& max = mExtents[1]; diff --git a/indra/llmessage/lliosocket.cpp b/indra/llmessage/lliosocket.cpp index 0287026659..7713e553ef 100644 --- a/indra/llmessage/lliosocket.cpp +++ b/indra/llmessage/lliosocket.cpp @@ -592,6 +592,7 @@ LLIOPipe::EStatus LLIOServerSocket::process_impl( PUMP_DEBUG; apr_pool_t* new_pool = NULL; apr_status_t status = apr_pool_create(&new_pool, mPool); + (void)status; apr_socket_t* socket = NULL; status = apr_socket_accept( &socket, diff --git a/indra/llui/llconsole.cpp b/indra/llui/llconsole.cpp index 161496b1f5..fdfaf284de 100644 --- a/indra/llui/llconsole.cpp +++ b/indra/llui/llconsole.cpp @@ -243,8 +243,6 @@ void LLConsole::draw() void LLConsole::Paragraph::makeParagraphColorSegments (const LLColor4 &color) { LLSD paragraph_color_segments; - LLColor4 lcolor=color; - paragraph_color_segments[0]["text"] =wstring_to_utf8str(mParagraphText); LLSD color_sd = color.getValue(); paragraph_color_segments[0]["color"]=color_sd; diff --git a/indra/llui/llkeywords.cpp b/indra/llui/llkeywords.cpp index c1cd04186b..537cc82302 100644 --- a/indra/llui/llkeywords.cpp +++ b/indra/llui/llkeywords.cpp @@ -368,7 +368,7 @@ void LLKeywords::findSegments(std::vector* seg_list, const LLW const llwchar* base = wtext.c_str(); const llwchar* cur = base; const llwchar* line = NULL; - + (void)line; while( *cur ) { if( *cur == '\n' || cur == base ) diff --git a/indra/llui/lllayoutstack.cpp b/indra/llui/lllayoutstack.cpp index c93f538570..32383b1f1d 100644 --- a/indra/llui/lllayoutstack.cpp +++ b/indra/llui/lllayoutstack.cpp @@ -481,7 +481,7 @@ void LLLayoutStack::createResizeBar(LLLayoutPanel* panelp) { LLResizeBar::Side side = (mOrientation == HORIZONTAL) ? LLResizeBar::RIGHT : LLResizeBar::BOTTOM; LLRect resize_bar_rect = getRect(); - + (void)resize_bar_rect; LLResizeBar::Params resize_params; resize_params.name("resize"); resize_params.resizing_view(lp); diff --git a/indra/llui/lltextbase.cpp b/indra/llui/lltextbase.cpp index 3815eec447..ec66b6df56 100644 --- a/indra/llui/lltextbase.cpp +++ b/indra/llui/lltextbase.cpp @@ -351,7 +351,6 @@ void LLTextBase::drawSelectionBackground() S32 selection_left = llmin( mSelectionStart, mSelectionEnd ); S32 selection_right = llmax( mSelectionStart, mSelectionEnd ); - LLRect selection_rect = mVisibleTextRect; // Skip through the lines we aren't drawing. LLRect content_display_rect = getVisibleDocumentRect(); @@ -2241,6 +2240,8 @@ S32 LLTextBase::getDocIndexFromLocalCoord( S32 local_x, S32 local_y, BOOL round, // Figure out which line we're nearest to. LLRect visible_region = getVisibleDocumentRect(); LLRect doc_rect = mDocumentView->getRect(); + (void)visible_region; + (void)doc_rect; S32 doc_y = local_y - doc_rect.mBottom; diff --git a/indra/llui/lltexteditor.cpp b/indra/llui/lltexteditor.cpp index 46fbd1e6a0..e4bd51c8ce 100644 --- a/indra/llui/lltexteditor.cpp +++ b/indra/llui/lltexteditor.cpp @@ -2490,7 +2490,6 @@ void LLTextEditor::updateSegments() mKeywords.findSegments(&segment_list, getWText(), mDefaultColor.get(), *this); clearSegments(); - segment_set_t::iterator insert_it = mSegments.begin(); for (segment_vec_t::iterator list_it = segment_list.begin(); list_it != segment_list.end(); ++list_it) { insertSegment(*list_it); diff --git a/indra/llui/lltoolbar.cpp b/indra/llui/lltoolbar.cpp index 63b7e452d2..62b6a0cd2f 100644 --- a/indra/llui/lltoolbar.cpp +++ b/indra/llui/lltoolbar.cpp @@ -653,7 +653,6 @@ void LLToolBar::updateLayoutAsNeeded() S32 max_row_length = 0; S32 max_length; - S32 max_total_girth; S32 cur_start; S32 cur_row ; S32 row_pad_start; @@ -664,7 +663,6 @@ void LLToolBar::updateLayoutAsNeeded() if (orientation == LLLayoutStack::HORIZONTAL) { max_length = getRect().getWidth() - mPadLeft - mPadRight; - max_total_girth = getRect().getHeight() - mPadTop - mPadBottom; row_pad_start = mPadLeft; row_pad_end = mPadRight; cur_row = mPadTop; @@ -673,7 +671,6 @@ void LLToolBar::updateLayoutAsNeeded() else // VERTICAL { max_length = getRect().getHeight() - mPadTop - mPadBottom; - max_total_girth = getRect().getWidth() - mPadLeft - mPadRight; row_pad_start = mPadTop; row_pad_end = mPadBottom; cur_row = mPadLeft; @@ -842,6 +839,7 @@ void LLToolBar::draw() { LLRect caret_rect = caret->getRect(); LLRect toolbar_rect = getRect(); + (void)toolbar_rect; if (getOrientation(mSideType) == LLLayoutStack::HORIZONTAL) { caret->setRect(LLRect(mDragx-caret_rect.getWidth()/2+1, diff --git a/indra/lscript/lscript_execute/lscript_readlso.cpp b/indra/lscript/lscript_execute/lscript_readlso.cpp index 35caa41ae1..3cdb41ac17 100644 --- a/indra/lscript/lscript_execute/lscript_readlso.cpp +++ b/indra/lscript/lscript_execute/lscript_readlso.cpp @@ -145,7 +145,7 @@ void LLScriptLSOParse::printGlobals(LLFILE *fp) // get offset to skip past name varoffset = global_v_offset; offset = bytestream2integer(mRawData, global_v_offset); - + (void)offset; //hush little compiler // get typeexport type = *(mRawData + global_v_offset++); @@ -262,8 +262,6 @@ void LLScriptLSOParse::printGlobalFunctions(LLFILE *fp) fprintf(fp, "[Function #%d] [0x%X] %s\n", function_number, orig_function_offset, name); fprintf(fp, "\tReturn Type: %s\n", LSCRIPTTypeNames[type]); type = *(mRawData + function_offset++); - S32 params; - params = 0; S32 pcount = 0; while (type) { @@ -350,6 +348,7 @@ void LLScriptLSOParse::printStates(LLFILE *fp) S32 dummy; opcode_end = worst_case_opcode_end; + (void)opcode_end; for (k = LSTT_STATE_BEGIN; k < LSTT_STATE_END; k++) { @@ -357,6 +356,7 @@ void LLScriptLSOParse::printStates(LLFILE *fp) { temp_end = bytestream2integer(mRawData, read_ahead); dummy = bytestream2integer(mRawData, read_ahead); + (void)dummy; if ( (temp_end < opcode_end) &&(temp_end > event_offset)) { diff --git a/indra/test/io.cpp b/indra/test/io.cpp index ce747f667d..7f26ac6724 100644 --- a/indra/test/io.cpp +++ b/indra/test/io.cpp @@ -1141,6 +1141,7 @@ namespace tut ensure("Connected to server", connected); lldebugs << "connected" << llendl; F32 elapsed = pump_loop(mPump,0.1f); + (void)elapsed; count = mPump->runningChains(); ensure_equals("server chain onboard", count, 2); lldebugs << "** Client is connected." << llendl; diff --git a/indra/test/llstreamtools_tut.cpp b/indra/test/llstreamtools_tut.cpp index a93f2e8f65..68e56b5ee2 100644 --- a/indra/test/llstreamtools_tut.cpp +++ b/indra/test/llstreamtools_tut.cpp @@ -386,15 +386,17 @@ namespace tut std::string actual_result; std::istringstream is; bool ret; - is.clear(); is.str(str = " First Second \t \r \n Third Fourth-ShouldThisBePartOfFourth Fifth\n"); actual_result = ""; ret = get_word(actual_result, is); // First + (void)ret; actual_result = ""; ret = get_word(actual_result, is); // Second + (void)ret; actual_result = ""; ret = get_word(actual_result, is); // Third + (void)ret; // the current implementation of get_word seems inconsistent with // skip_to_next_word. skip_to_next_word treats any character other @@ -486,6 +488,7 @@ namespace tut is.str(str = "First Second \t \r\n Third Fourth-ShouldThisBePartOfFourth IsThisFifth\n"); actual_result = ""; ret = get_line(actual_result, is); + (void)ret; expected_result = "First Second \t \r\n"; ensure_equals("get_line: 1", actual_result, expected_result); @@ -551,6 +554,7 @@ namespace tut is.str(str = "Should not skip lone \r.\r\n"); actual_result = ""; ret = get_line(actual_result, is); + (void)ret; expected_result = "Should not skip lone \r.\r\n"; ensure_equals("get_line: carriage return skipped even though not followed by newline", actual_result, expected_result); } @@ -569,6 +573,7 @@ namespace tut is.str(str = "\n"); actual_result = ""; ret = get_line(actual_result, is); + (void)ret; expected_result = "\n"; ensure_equals("get_line: Just newline", actual_result, expected_result); } @@ -588,6 +593,7 @@ namespace tut is.str(str = "First Line.\nSecond Line.\n"); actual_result = ""; ret = get_line(actual_result, is, 255); + (void)ret; expected_result = "First Line.\n"; ensure_equals("get_line: Basic Operation", actual_result, expected_result); diff --git a/indra/test/lltemplatemessagebuilder_tut.cpp b/indra/test/lltemplatemessagebuilder_tut.cpp index 6e1c82bb24..0aad3cbc15 100644 --- a/indra/test/lltemplatemessagebuilder_tut.cpp +++ b/indra/test/lltemplatemessagebuilder_tut.cpp @@ -958,11 +958,13 @@ namespace tut reader->validateMessage(buffer, builtSize, LLHost()); reader->readMessage(buffer, LLHost()); reader->getU32(_PREHASH_Test0, _PREHASH_Test0, outValue); + (void)outValue; char outBuffer[bufferSize]; memset(buffer, 0xcc, bufferSize); reader->getString(_PREHASH_Test1, _PREHASH_Test0, bufferSize, outBuffer); outValue2 = reader->getNumberOfBlocks(_PREHASH_Test1); + (void)outValue2; ensure_equals("Ensure present value ", outValue, inValue); ensure_equals("Ensure unchanged buffer ", strlen(outBuffer), 0); delete reader; -- cgit v1.3 From 5d2fea6262d91eb8d3c06d97a160ca9373b96889 Mon Sep 17 00:00:00 2001 From: "Graham Madarasz (Graham Linden)" Date: Wed, 13 Mar 2013 10:42:40 -0700 Subject: Move fast memcpy to llcommon and use it in llalignedarray pushback on all platforms. Code Review: DaveP --- indra/llcommon/llalignedarray.h | 16 +----- indra/llcommon/llmemory.h | 116 +++++++++++++++++++++++++++++++++++++--- indra/llmath/llsimdmath.h | 28 ---------- indra/llmath/llvector4a.cpp | 50 +---------------- indra/llmath/llvector4a.h | 4 -- indra/llmath/llvolume.cpp | 11 ++-- 6 files changed, 120 insertions(+), 105 deletions(-) (limited to 'indra/llmath') diff --git a/indra/llcommon/llalignedarray.h b/indra/llcommon/llalignedarray.h index 5e04e8050f..ed8fd31205 100644 --- a/indra/llcommon/llalignedarray.h +++ b/indra/llcommon/llalignedarray.h @@ -29,10 +29,6 @@ #include "llmemory.h" -#if LL_WINDOWS -#include "llvector4a.h" // for 16b fast copy -#endif - template class LLAlignedArray { @@ -81,11 +77,7 @@ void LLAlignedArray::push_back(const T& elem) T* new_buf = (T*) ll_aligned_malloc(mCapacity*sizeof(T), alignment); if (mArray) { -#if LL_WINDOWS - LLVector4a::memcpyNonAliased16((F32*) new_buf, (F32*) mArray, sizeof(T)*mElementCount); -#else - memcpy((F32*)new_buf, (F32*)mArray, sizeof(T)*mElementCount); -#endif + ll_memcpy_nonaliased_aligned_16((char*)new_buf, (char*)mArray, sizeof(T)*mElementCount); } old_buf = mArray; mArray = new_buf; @@ -106,11 +98,7 @@ void LLAlignedArray::resize(U32 size) T* new_buf = mCapacity > 0 ? (T*) ll_aligned_malloc(mCapacity*sizeof(T), alignment) : NULL; if (mArray) { -#if LL_WINDOWS - LLVector4a::memcpyNonAliased16((F32*) new_buf, (F32*) mArray, sizeof(T)*mElementCount); -#else - memcpy((F32*) new_buf, (F32*) mArray, sizeof(T)*mElementCount); -#endif + ll_memcpy_nonaliased_aligned_16((char*) new_buf, (char*) mArray, sizeof(T)*mElementCount); ll_aligned_free(mArray); } diff --git a/indra/llcommon/llmemory.h b/indra/llcommon/llmemory.h index 46cabfadcd..61e30f11cc 100644 --- a/indra/llcommon/llmemory.h +++ b/indra/llcommon/llmemory.h @@ -36,6 +36,44 @@ class LLMutex ; #define LL_CHECK_MEMORY #endif +LL_COMMON_API void ll_assert_aligned_func(uintptr_t ptr,U32 alignment); + +#ifdef SHOW_ASSERT +#define ll_assert_aligned(ptr,alignment) ll_assert_aligned_func(reinterpret_cast(ptr),((U32)alignment)) +#else +#define ll_assert_aligned(ptr,alignment) +#endif + +#include + +template T* LL_NEXT_ALIGNED_ADDRESS(T* address) +{ + return reinterpret_cast( + (reinterpret_cast(address) + 0xF) & ~0xF); +} + +template T* LL_NEXT_ALIGNED_ADDRESS_64(T* address) +{ + return reinterpret_cast( + (reinterpret_cast(address) + 0x3F) & ~0x3F); +} + +#if LL_LINUX || LL_DARWIN + +#define LL_ALIGN_PREFIX(x) +#define LL_ALIGN_POSTFIX(x) __attribute__((aligned(x))) + +#elif LL_WINDOWS + +#define LL_ALIGN_PREFIX(x) __declspec(align(x)) +#define LL_ALIGN_POSTFIX(x) + +#else +#error "LL_ALIGN_PREFIX and LL_ALIGN_POSTFIX undefined" +#endif + +#define LL_ALIGN_16(var) LL_ALIGN_PREFIX(16) var LL_ALIGN_POSTFIX(16) + inline void* ll_aligned_malloc( size_t size, int align ) { #if defined(LL_WINDOWS) @@ -144,6 +182,78 @@ inline void ll_aligned_free_32(void *p) #endif } + +// Copy words 16-byte blocks from src to dst. Source and destination MUST NOT OVERLAP. +// Source and dest must be 16-byte aligned and size must be multiple of 16. +// +inline void ll_memcpy_nonaliased_aligned_16(char* __restrict dst, const char* __restrict src, size_t bytes) +{ + assert(src != NULL); + assert(dst != NULL); + assert(bytes > 0); + assert((bytes % sizeof(F32))== 0); + ll_assert_aligned(src,16); + ll_assert_aligned(dst,16); + assert((src < dst) ? ((src + bytes) < dst) : ((dst + bytes) < src)); + assert(bytes%16==0); + + char* end = dst + bytes; + + if (bytes > 64) + { + + // Find start of 64b aligned area within block + // + void* begin_64 = LL_NEXT_ALIGNED_ADDRESS_64(dst); + + //at least 64 bytes before the end of the destination, switch to 16 byte copies + void* end_64 = end-64; + + // Prefetch the head of the 64b area now + // + _mm_prefetch((char*)begin_64, _MM_HINT_NTA); + _mm_prefetch((char*)begin_64 + 64, _MM_HINT_NTA); + _mm_prefetch((char*)begin_64 + 128, _MM_HINT_NTA); + _mm_prefetch((char*)begin_64 + 192, _MM_HINT_NTA); + + // Copy 16b chunks until we're 64b aligned + // + while (dst < begin_64) + { + + _mm_store_ps((F32*)dst, _mm_load_ps((F32*)src)); + dst += 16; + src += 16; + } + + // Copy 64b chunks up to your tail + // + // might be good to shmoo the 512b prefetch offset + // (characterize performance for various values) + // + while (dst < end_64) + { + _mm_prefetch((char*)src + 512, _MM_HINT_NTA); + _mm_prefetch((char*)dst + 512, _MM_HINT_NTA); + _mm_store_ps((F32*)dst, _mm_load_ps((F32*)src)); + _mm_store_ps((F32*)(dst + 16), _mm_load_ps((F32*)(src + 16))); + _mm_store_ps((F32*)(dst + 32), _mm_load_ps((F32*)(src + 32))); + _mm_store_ps((F32*)(dst + 48), _mm_load_ps((F32*)(src + 48))); + dst += 64; + src += 64; + } + } + + // Copy remainder 16b tail chunks (or ALL 16b chunks for sub-64b copies) + // + while (dst < end) + { + _mm_store_ps((F32*)dst, _mm_load_ps((F32*)src)); + dst += 16; + src += 16; + } +} + #ifndef __DEBUG_PRIVATE_MEM__ #define __DEBUG_PRIVATE_MEM__ 0 #endif @@ -552,13 +662,7 @@ void LLPrivateMemoryPoolTester::operator delete[](void* addr) // LLSingleton moved to llsingleton.h -LL_COMMON_API void ll_assert_aligned_func(uintptr_t ptr,U32 alignment); -#ifdef SHOW_ASSERT -#define ll_assert_aligned(ptr,alignment) ll_assert_aligned_func(reinterpret_cast(ptr),((U32)alignment)) -#else -#define ll_assert_aligned(ptr,alignment) -#endif #endif diff --git a/indra/llmath/llsimdmath.h b/indra/llmath/llsimdmath.h index 01458521ec..cebd2ace7d 100644 --- a/indra/llmath/llsimdmath.h +++ b/indra/llmath/llsimdmath.h @@ -39,34 +39,6 @@ #include #endif -template T* LL_NEXT_ALIGNED_ADDRESS(T* address) -{ - return reinterpret_cast( - (reinterpret_cast(address) + 0xF) & ~0xF); -} - -template T* LL_NEXT_ALIGNED_ADDRESS_64(T* address) -{ - return reinterpret_cast( - (reinterpret_cast(address) + 0x3F) & ~0x3F); -} - -#if LL_LINUX || LL_DARWIN - -#define LL_ALIGN_PREFIX(x) -#define LL_ALIGN_POSTFIX(x) __attribute__((aligned(x))) - -#elif LL_WINDOWS - -#define LL_ALIGN_PREFIX(x) __declspec(align(x)) -#define LL_ALIGN_POSTFIX(x) - -#else -#error "LL_ALIGN_PREFIX and LL_ALIGN_POSTFIX undefined" -#endif - -#define LL_ALIGN_16(var) LL_ALIGN_PREFIX(16) var LL_ALIGN_POSTFIX(16) - #include #include diff --git a/indra/llmath/llvector4a.cpp b/indra/llmath/llvector4a.cpp index 6edeb0fefe..570fa41a43 100644 --- a/indra/llmath/llvector4a.cpp +++ b/indra/llmath/llvector4a.cpp @@ -41,55 +41,7 @@ extern const LLVector4a LL_V4A_EPSILON = reinterpret_cast ( F /*static */void LLVector4a::memcpyNonAliased16(F32* __restrict dst, const F32* __restrict src, size_t bytes) { - assert(src != NULL); - assert(dst != NULL); - assert(bytes > 0); - assert((bytes % sizeof(F32))== 0); - ll_assert_aligned(src,16); - ll_assert_aligned(dst,16); - assert(bytes%16==0); - - F32* end = dst + (bytes / sizeof(F32) ); - - if (bytes > 64) - { - F32* begin_64 = LL_NEXT_ALIGNED_ADDRESS_64(dst); - - //at least 64 (16*4) bytes before the end of the destination, switch to 16 byte copies - F32* end_64 = end-16; - - _mm_prefetch((char*)begin_64, _MM_HINT_NTA); - _mm_prefetch((char*)begin_64 + 64, _MM_HINT_NTA); - _mm_prefetch((char*)begin_64 + 128, _MM_HINT_NTA); - _mm_prefetch((char*)begin_64 + 192, _MM_HINT_NTA); - - while (dst < begin_64) - { - copy4a(dst, src); - dst += 4; - src += 4; - } - - while (dst < end_64) - { - _mm_prefetch((char*)src + 512, _MM_HINT_NTA); - _mm_prefetch((char*)dst + 512, _MM_HINT_NTA); - copy4a(dst, src); - copy4a(dst+4, src+4); - copy4a(dst+8, src+8); - copy4a(dst+12, src+12); - - dst += 16; - src += 16; - } - } - - while (dst < end) - { - copy4a(dst, src); - dst += 4; - src += 4; - } + ll_memcpy_nonaliased_aligned_16((char*)dst, (char*)src, bytes); } void LLVector4a::setRotated( const LLRotation& rot, const LLVector4a& vec ) diff --git a/indra/llmath/llvector4a.h b/indra/llmath/llvector4a.h index 1a478bc8de..0526793d3a 100644 --- a/indra/llmath/llvector4a.h +++ b/indra/llmath/llvector4a.h @@ -93,11 +93,7 @@ public: LLVector4a() { //DO NOT INITIALIZE -- The overhead is completely unnecessary -// This assert is causing spurious referenced before set warnings on GCC 4.3.4 -// -#if !LL_LINUX ll_assert_aligned(this,16); -#endif } LLVector4a(F32 x, F32 y, F32 z, F32 w = 0.f) diff --git a/indra/llmath/llvolume.cpp b/indra/llmath/llvolume.cpp index 4f3e753276..e7a0bc7df7 100644 --- a/indra/llmath/llvolume.cpp +++ b/indra/llmath/llvolume.cpp @@ -4729,10 +4729,13 @@ void LLVolumeFace::optimize(F32 angle_cutoff) } } - llassert(new_face.mNumIndices == mNumIndices); - llassert(new_face.mNumVertices <= mNumVertices); - - swapData(new_face); + // Only swap data if we've actually optimized the mesh + // + if (new_face.mNumVertices < mNumVertices) + { + llassert(new_face.mNumIndices == mNumIndices); + swapData(new_face); + } } class LLVCacheTriangleData; -- cgit v1.3 From bba84a3fa9a1af87f6a8080f9093f9277feb1292 Mon Sep 17 00:00:00 2001 From: "Graham Madarasz (Graham Linden)" Date: Wed, 13 Mar 2013 13:38:30 -0700 Subject: Cleanup per code review of prev change with DaveP --- indra/llcommon/llmemory.h | 12 ++++++------ indra/llmath/llvolume.cpp | 11 +++++++---- 2 files changed, 13 insertions(+), 10 deletions(-) (limited to 'indra/llmath') diff --git a/indra/llcommon/llmemory.h b/indra/llcommon/llmemory.h index 61e30f11cc..d0e4bc9e25 100644 --- a/indra/llcommon/llmemory.h +++ b/indra/llcommon/llmemory.h @@ -188,14 +188,14 @@ inline void ll_aligned_free_32(void *p) // inline void ll_memcpy_nonaliased_aligned_16(char* __restrict dst, const char* __restrict src, size_t bytes) { - assert(src != NULL); - assert(dst != NULL); - assert(bytes > 0); - assert((bytes % sizeof(F32))== 0); + llassert(src != NULL); + llassert(dst != NULL); + llassert(bytes >= 16); + llassert((bytes % sizeof(F32))== 0); + llassert((src < dst) ? ((src + bytes) < dst) : ((dst + bytes) < src)); + llassert(bytes%16==0); ll_assert_aligned(src,16); ll_assert_aligned(dst,16); - assert((src < dst) ? ((src + bytes) < dst) : ((dst + bytes) < src)); - assert(bytes%16==0); char* end = dst + bytes; diff --git a/indra/llmath/llvolume.cpp b/indra/llmath/llvolume.cpp index cb5633c1bb..edd16b5688 100644 --- a/indra/llmath/llvolume.cpp +++ b/indra/llmath/llvolume.cpp @@ -4729,11 +4729,13 @@ void LLVolumeFace::optimize(F32 angle_cutoff) } } - if (new_face.mNumVertices) + // disallow data amplification + // + if (new_face.mNumVertices <= mNumVertices) { - llassert(new_face.mNumIndices == mNumIndices); - swapData(new_face); - } + llassert(new_face.mNumIndices == mNumIndices); + swapData(new_face); + } } class LLVCacheTriangleData; @@ -6731,3 +6733,4 @@ void calc_binormal_from_triangle(LLVector4a& binormal, binormal.set( 0, 1 , 0 ); } } + -- cgit v1.3 From dbfcd6c9c5709b74365c2538ba312685b09d22bf Mon Sep 17 00:00:00 2001 From: Dave Parks Date: Tue, 7 May 2013 17:20:33 -0500 Subject: Optimization -- don't draw glow in alpha pool unless needed --- indra/llmath/llvolume.cpp | 6 +++- indra/llprimitive/llmodel.cpp | 37 +++++++++++++++++++ indra/llprimitive/llmodel.h | 4 ++- indra/newview/lldrawpoolalpha.cpp | 3 +- indra/newview/llspatialpartition.cpp | 3 +- indra/newview/llspatialpartition.h | 5 +-- indra/newview/llviewerpartsim.cpp | 2 +- indra/newview/llviewerpartsource.cpp | 2 +- indra/newview/llvopartgroup.cpp | 70 ++++++++++++++++++++++++------------ 9 files changed, 101 insertions(+), 31 deletions(-) (limited to 'indra/llmath') diff --git a/indra/llmath/llvolume.cpp b/indra/llmath/llvolume.cpp index 602f2c29e5..7751ef87ee 100644 --- a/indra/llmath/llvolume.cpp +++ b/indra/llmath/llvolume.cpp @@ -4520,7 +4520,11 @@ LLVolumeFace& LLVolumeFace::operator=(const LLVolumeFace& src) S32 tc_size = (mNumVertices*sizeof(LLVector2)+0xF) & ~0xF; LLVector4a::memcpyNonAliased16((F32*) mPositions, (F32*) src.mPositions, vert_size); - LLVector4a::memcpyNonAliased16((F32*) mNormals, (F32*) src.mNormals, vert_size); + + if (src.mNormals) + { + LLVector4a::memcpyNonAliased16((F32*) mNormals, (F32*) src.mNormals, vert_size); + } if(src.mTexCoords) { diff --git a/indra/llprimitive/llmodel.cpp b/indra/llprimitive/llmodel.cpp index ef6eb75a6b..e236f98fe6 100644 --- a/indra/llprimitive/llmodel.cpp +++ b/indra/llprimitive/llmodel.cpp @@ -1122,6 +1122,43 @@ void LLModel::getNormalizedScaleTranslation(LLVector3& scale_out, LLVector3& tra translation_out = mNormalizedTranslation; } +LLVector3 LLModel::getTransformedCenter(const LLMatrix4& mat) +{ + LLVector3 ret; + + if (!mVolumeFaces.empty()) + { + LLMatrix4a m; + m.loadu(mat); + + LLVector4a minv,maxv; + + LLVector4a t; + m.affineTransform(mVolumeFaces[0].mPositions[0], t); + minv = maxv = t; + + for (S32 i = 0; i < mVolumeFaces.size(); ++i) + { + LLVolumeFace& face = mVolumeFaces[i]; + + for (U32 j = 0; j < face.mNumVertices; ++j) + { + m.affineTransform(face.mPositions[j],t); + update_min_max(minv, maxv, t); + } + } + + minv.add(maxv); + minv.mul(0.5f); + + ret.set(minv.getF32ptr()); + } + + return ret; +} + + + void LLModel::setNumVolumeFaces(S32 count) { mVolumeFaces.resize(count); diff --git a/indra/llprimitive/llmodel.h b/indra/llprimitive/llmodel.h index 1cf528fd9f..aaafc55258 100644 --- a/indra/llprimitive/llmodel.h +++ b/indra/llprimitive/llmodel.h @@ -173,13 +173,15 @@ public: void optimizeVolumeFaces(); void offsetMesh( const LLVector3& pivotPoint ); void getNormalizedScaleTranslation(LLVector3& scale_out, LLVector3& translation_out); - + LLVector3 getTransformedCenter(const LLMatrix4& mat); + //reorder face list based on mMaterialList in this and reference so //order matches that of reference (material ordering touchup) bool matchMaterialOrder(LLModel* ref, int& refFaceCnt, int& modelFaceCnt ); bool isMaterialListSubset( LLModel* ref ); bool needToAddFaces( LLModel* ref, int& refFaceCnt, int& modelFaceCnt ); + std::vector mMaterialList; //data used for skin weights diff --git a/indra/newview/lldrawpoolalpha.cpp b/indra/newview/lldrawpoolalpha.cpp index 6fa16825df..331744acb7 100644 --- a/indra/newview/lldrawpoolalpha.cpp +++ b/indra/newview/lldrawpoolalpha.cpp @@ -515,7 +515,8 @@ void LLDrawPoolAlpha::renderAlpha(U32 mask) // If this alpha mesh has glow, then draw it a second time to add the destination-alpha (=glow). Interleaving these state-changing calls could be expensive, but glow must be drawn Z-sorted with alpha. if (current_shader && draw_glow_for_this_partition && - params.mVertexBuffer->hasDataType(LLVertexBuffer::TYPE_EMISSIVE)) + params.mVertexBuffer->hasDataType(LLVertexBuffer::TYPE_EMISSIVE) && + (!params.mParticle || params.mHasGlow)) { static LLFastTimer::DeclareTimer FTM_RENDER_ALPHA_GLOW("Alpha Glow"); LLFastTimer t(FTM_RENDER_ALPHA_GLOW); diff --git a/indra/newview/llspatialpartition.cpp b/indra/newview/llspatialpartition.cpp index 1ec56eb5f8..2a1d0d223c 100644 --- a/indra/newview/llspatialpartition.cpp +++ b/indra/newview/llspatialpartition.cpp @@ -4656,7 +4656,8 @@ LLDrawInfo::LLDrawInfo(U16 start, U16 end, U32 count, U32 offset, mDistance(0.f), mDrawMode(LLRender::TRIANGLES), mBlendFuncSrc(LLRender::BF_SOURCE_ALPHA), - mBlendFuncDst(LLRender::BF_ONE_MINUS_SOURCE_ALPHA) + mBlendFuncDst(LLRender::BF_ONE_MINUS_SOURCE_ALPHA), + mHasGlow(FALSE) { mVertexBuffer->validateRange(mStart, mEnd, mCount, mOffset); diff --git a/indra/newview/llspatialpartition.h b/indra/newview/llspatialpartition.h index 08e77855c4..e9be93ce98 100644 --- a/indra/newview/llspatialpartition.h +++ b/indra/newview/llspatialpartition.h @@ -70,12 +70,12 @@ protected: public: void* operator new(size_t size) { - return ll_aligned_malloc_16(size); + return ll_aligned_malloc(size,64); } void operator delete(void* ptr) { - ll_aligned_free_16(ptr); + ll_aligned_free(ptr); } @@ -121,6 +121,7 @@ public: U32 mDrawMode; U32 mBlendFuncSrc; U32 mBlendFuncDst; + BOOL mHasGlow; struct CompareTexture { diff --git a/indra/newview/llviewerpartsim.cpp b/indra/newview/llviewerpartsim.cpp index 21f1d2619c..96cd43a8ab 100644 --- a/indra/newview/llviewerpartsim.cpp +++ b/indra/newview/llviewerpartsim.cpp @@ -387,7 +387,7 @@ void LLViewerPartGroup::updateParticles(const F32 lastdt) } // Do glow interpolation - part->mGlow.mV[3] = (U8) (lerp(part->mStartGlow, part->mEndGlow, frac)*255.f); + part->mGlow.mV[3] = (U8) llround(lerp(part->mStartGlow, part->mEndGlow, frac)*255.f); // Set the last update time to now. part->mLastUpdateTime = cur_time; diff --git a/indra/newview/llviewerpartsource.cpp b/indra/newview/llviewerpartsource.cpp index 8c49ce646d..b6bbd6140d 100644 --- a/indra/newview/llviewerpartsource.cpp +++ b/indra/newview/llviewerpartsource.cpp @@ -313,7 +313,7 @@ void LLViewerPartSourceScript::update(const F32 dt) part->mStartGlow = mPartSysData.mPartData.mStartGlow; part->mEndGlow = mPartSysData.mPartData.mEndGlow; - part->mGlow = LLColor4U(0, 0, 0, (U8) (part->mStartGlow*255.f)); + part->mGlow = LLColor4U(0, 0, 0, (U8) llround(part->mStartGlow*255.f)); if (mPartSysData.mPattern & LLPartSysData::LL_PART_SRC_PATTERN_DROP) { diff --git a/indra/newview/llvopartgroup.cpp b/indra/newview/llvopartgroup.cpp index 53d67347d1..e5e627c1ea 100644 --- a/indra/newview/llvopartgroup.cpp +++ b/indra/newview/llvopartgroup.cpp @@ -273,6 +273,10 @@ void LLVOPartGroup::getBlendFunc(S32 idx, U32& src, U32& dst) src = part->mBlendFuncSource; dst = part->mBlendFuncDest; } + else + { + llerrs << "WTF?" << llendl; + } } LLVector3 LLVOPartGroup::getCameraPosition() const @@ -670,7 +674,7 @@ void LLVOPartGroup::getGeometry(S32 idx, } else { - pglow = LLColor4U(0, 0, 0, (U8) (255.f*part.mStartGlow)); + pglow = LLColor4U(0, 0, 0, (U8) llround(255.f*part.mStartGlow)); pcolor = part.mStartColor; } } @@ -685,10 +689,13 @@ void LLVOPartGroup::getGeometry(S32 idx, *colorsp++ = color; *colorsp++ = color; - *emissivep++ = pglow; - *emissivep++ = pglow; - *emissivep++ = part.mGlow; - *emissivep++ = part.mGlow; + //if (pglow.mV[3] || part.mGlow.mV[3]) + { //only write glow if it is not zero + *emissivep++ = pglow; + *emissivep++ = pglow; + *emissivep++ = part.mGlow; + *emissivep++ = part.mGlow; + } if (!(part.mFlags & LLPartData::LL_PART_EMISSIVE_MASK)) @@ -873,8 +880,17 @@ void LLParticlePartition::getGeometry(LLSpatialGroup* group) LLStrider cur_col = colorsp + geom_idx; LLStrider cur_glow = emissivep + geom_idx; + LLColor4U* start_glow = cur_glow.get(); + object->getGeometry(facep->getTEOffset(), cur_vert, cur_norm, cur_tc, cur_col, cur_glow, cur_idx); + BOOL has_glow = FALSE; + + if (cur_glow.get() != start_glow) + { + has_glow = TRUE; + } + llassert(facep->getGeomCount() == 4); llassert(facep->getIndicesCount() == 6); @@ -894,26 +910,32 @@ void LLParticlePartition::getGeometry(LLSpatialGroup* group) object->getBlendFunc(facep->getTEOffset(), bf_src, bf_dst); - if (idx >= 0 && - draw_vec[idx]->mTexture == facep->getTexture() && - draw_vec[idx]->mFullbright == fullbright && - draw_vec[idx]->mBlendFuncDst == bf_dst && - draw_vec[idx]->mBlendFuncSrc == bf_src) + + if (idx >= 0) { - if (draw_vec[idx]->mEnd == facep->getGeomIndex()-1) - { - batched = true; - draw_vec[idx]->mCount += facep->getIndicesCount(); - draw_vec[idx]->mEnd += facep->getGeomCount(); - draw_vec[idx]->mVSize = llmax(draw_vec[idx]->mVSize, vsize); - } - else if (draw_vec[idx]->mStart == facep->getGeomIndex()+facep->getGeomCount()+1) + LLDrawInfo* info = draw_vec[idx]; + + if (info->mTexture == facep->getTexture() && + info->mHasGlow == has_glow && + info->mFullbright == fullbright && + info->mBlendFuncDst == bf_dst && + info->mBlendFuncSrc == bf_src) { - batched = true; - draw_vec[idx]->mCount += facep->getIndicesCount(); - draw_vec[idx]->mStart -= facep->getGeomCount(); - draw_vec[idx]->mOffset = facep->getIndicesStart(); - draw_vec[idx]->mVSize = llmax(draw_vec[idx]->mVSize, vsize); + if (draw_vec[idx]->mEnd == facep->getGeomIndex()-1) + { + batched = true; + info->mCount += facep->getIndicesCount(); + info->mEnd += facep->getGeomCount(); + info->mVSize = llmax(draw_vec[idx]->mVSize, vsize); + } + else if (draw_vec[idx]->mStart == facep->getGeomIndex()+facep->getGeomCount()+1) + { + batched = true; + info->mCount += facep->getIndicesCount(); + info->mStart -= facep->getGeomCount(); + info->mOffset = facep->getIndicesStart(); + info->mVSize = llmax(draw_vec[idx]->mVSize, vsize); + } } } @@ -932,6 +954,8 @@ void LLParticlePartition::getGeometry(LLSpatialGroup* group) info->mVSize = vsize; info->mBlendFuncDst = bf_dst; info->mBlendFuncSrc = bf_src; + info->mHasGlow = has_glow; + info->mParticle = TRUE; draw_vec.push_back(info); //for alpha sorting facep->setDrawInfo(info); -- cgit v1.3 From ec5bd94d59247e600f8f8702a543f99d40930d20 Mon Sep 17 00:00:00 2001 From: simon Date: Wed, 15 May 2013 11:00:00 -0700 Subject: BUG-2581 : [simon-ll-viewer-cat] Path cut, hollow and slice break planar texture mapping on prims. Credits to DaveP --- indra/llmath/llvolume.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'indra/llmath') diff --git a/indra/llmath/llvolume.cpp b/indra/llmath/llvolume.cpp index 7751ef87ee..317d15f84e 100755 --- a/indra/llmath/llvolume.cpp +++ b/indra/llmath/llvolume.cpp @@ -5604,16 +5604,16 @@ BOOL LLVolumeFace::createCap(LLVolume* volume, BOOL partial_build) LLVector4a binormal; calc_binormal_from_triangle(binormal, *mCenter, cuv, - pos[0], tc[0], - pos[1], tc[1]); + mPositions[0], mTexCoords[0], + mPositions[1], mTexCoords[1]); binormal.normalize3fast(); LLVector4a normal; LLVector4a d0, d1; - d0.setSub(*mCenter, pos[0]); - d1.setSub(*mCenter, pos[1]); + d0.setSub(*mCenter, mPositions[0]); + d1.setSub(*mCenter, mPositions[1]); if (mTypeMask & TOP_MASK) { -- cgit v1.3 From 705a0ab6770459d89474b3495b05214315a49c94 Mon Sep 17 00:00:00 2001 From: Graham Linden Date: Tue, 25 Jun 2013 11:16:27 -0700 Subject: NORSPEC-288 WIP fixes the issue with normal gen, but may be a bigger perf hit than we want --- indra/llmath/llvector4a.inl | 20 +++++++++++ indra/llmath/llvolume.cpp | 83 ++++++++++++++++++++++++--------------------- 2 files changed, 64 insertions(+), 39 deletions(-) (limited to 'indra/llmath') diff --git a/indra/llmath/llvector4a.inl b/indra/llmath/llvector4a.inl index 7c52ffef21..558fe09323 100755 --- a/indra/llmath/llvector4a.inl +++ b/indra/llmath/llvector4a.inl @@ -409,6 +409,26 @@ inline void LLVector4a::normalize3fast() mQ = _mm_mul_ps( mQ, approxRsqrt ); } +inline void LLVector4a::normalize3fast_checked(LLVector4a* d) +{ + if (!isFinite3()) + { + *this = d ? *d : LLVector4a(0,1,0,1); + return; + } + + LLVector4a lenSqrd; lenSqrd.setAllDot3( *this, *this ); + + if (lenSqrd.getF32ptr()[0] <= FLT_EPSILON) + { + *this = d ? *d : LLVector4a(0,1,0,1); + return; + } + + const LLQuad approxRsqrt = _mm_rsqrt_ps(lenSqrd.mQ); + mQ = _mm_mul_ps( mQ, approxRsqrt ); +} + // Return true if this vector is normalized with respect to x,y,z up to tolerance inline LLBool32 LLVector4a::isNormalized3( F32 tolerance ) const { diff --git a/indra/llmath/llvolume.cpp b/indra/llmath/llvolume.cpp index 14cebfe5aa..58cac57e7f 100755 --- a/indra/llmath/llvolume.cpp +++ b/indra/llmath/llvolume.cpp @@ -4472,6 +4472,9 @@ void LLVolume::generateSilhouetteVertices(std::vector &vertices, continue; //skip degenerate face } + LLVector4a default_norm; + default_norm.set(0,1,0,1); + //for each edge for (S32 k = 0; k < 3; k++) { S32 index = face.mEdge[j*3+k]; @@ -4493,14 +4496,14 @@ void LLVolume::generateSilhouetteVertices(std::vector &vertices, norm_mat.rotate(n[v1], t); - t.normalize3fast(); + t.normalize3fast_checked(&default_norm); normals.push_back(LLVector3(t[0], t[1], t[2])); mat.affineTransform(v[v2], t); vertices.push_back(LLVector3(t[0], t[1], t[2])); norm_mat.rotate(n[v2], t); - t.normalize3fast(); + t.normalize3fast_checked(&default_norm); normals.push_back(LLVector3(t[0], t[1], t[2])); } } @@ -6096,6 +6099,9 @@ BOOL LLVolumeFace::createUnCutCubeCap(LLVolume* volume, BOOL partial_build) { VertexData corners[4]; VertexData baseVert; + LLVector4a default_norm; + default_norm.set(0,1,0,1); + for(S32 t = 0; t < 4; t++) { corners[t].getPosition().load3( mesh[offset + (grid_size*t)].mPos.mV); @@ -6108,8 +6114,8 @@ BOOL LLVolumeFace::createUnCutCubeCap(LLVolume* volume, BOOL partial_build) lhs.setSub(corners[1].getPosition(), corners[0].getPosition()); LLVector4a rhs; rhs.setSub(corners[2].getPosition(), corners[1].getPosition()); - baseVert.getNormal().setCross3(lhs, rhs); - baseVert.getNormal().normalize3fast(); + baseVert.getNormal().setCross3(lhs, rhs); + baseVert.getNormal().normalize3fast_checked(&default_norm); } if(!(mTypeMask & TOP_MASK)) @@ -6559,17 +6565,12 @@ BOOL LLVolumeFace::createCap(LLVolume* volume, BOOL partial_build) d1.setSub(mPositions[mIndices[2]], mPositions[mIndices[0]]); LLVector4a normal; - normal.setCross3(d0,d1); - - if (normal.dot3(normal).getF32() > F_APPROXIMATELY_ZERO) - { - normal.normalize3fast(); - } - else - { //degenerate, make up a value - normal.set(0,0,1); - } + LLVector4a default_norm; + default_norm.set(0,1,0,1); + normal.setCross3(d0,d1); + normal.normalize3fast_checked(&default_norm); + llassert(llfinite(normal.getF32ptr()[0])); llassert(llfinite(normal.getF32ptr()[1])); llassert(llfinite(normal.getF32ptr()[2])); @@ -6611,11 +6612,13 @@ void LLVolumeFace::createTangents() CalculateTangentArray(mNumVertices, mPositions, mNormals, mTexCoords, mNumIndices/3, mIndices, mTangents); //normalize tangents + LLVector4a default_norm; + default_norm.set(0,1,0,1); for (U32 i = 0; i < mNumVertices; i++) { //binorm[i].normalize3fast(); //bump map/planar projection code requires normals to be normalized - mNormals[i].normalize3fast(); + mNormals[i].normalize3fast_checked(&default_norm); } } } @@ -6793,6 +6796,9 @@ void LLVolumeFace::appendFace(const LLVolumeFace& face, LLMatrix4& mat_in, LLMat mat.loadu(mat_in); norm_mat.loadu(norm_mat_in); + LLVector4a default_norm; + default_norm.set(0,1,0,1); + for (U32 i = 0; i < face.mNumVertices; ++i) { //transform appended face position and store @@ -6800,7 +6806,7 @@ void LLVolumeFace::appendFace(const LLVolumeFace& face, LLMatrix4& mat_in, LLMat //transform appended face normal and store norm_mat.rotate(src_norm[i], dst_norm[i]); - dst_norm[i].normalize3fast(); + dst_norm[i].normalize3fast_checked(&default_norm); //copy appended face texture coordinate dst_tc[i] = src_tc[i]; @@ -7213,42 +7219,41 @@ BOOL LLVolumeFace::createSide(LLVolume* volume, BOOL partial_build) void CalculateTangentArray(U32 vertexCount, const LLVector4a *vertex, const LLVector4a *normal, const LLVector2 *texcoord, U32 triangleCount, const U16* index_array, LLVector4a *tangent) { - //LLVector4a *tan1 = new LLVector4a[vertexCount * 2]; LLVector4a* tan1 = (LLVector4a*) ll_aligned_malloc_16(vertexCount*2*sizeof(LLVector4a)); - LLVector4a* tan2 = tan1 + vertexCount; + LLVector4a* tan2 = tan1 + vertexCount; memset(tan1, 0, vertexCount*2*sizeof(LLVector4a)); - for (U32 a = 0; a < triangleCount; a++) - { - U32 i1 = *index_array++; - U32 i2 = *index_array++; - U32 i3 = *index_array++; + for (U32 a = 0; a < triangleCount; a++) + { + U32 i1 = *index_array++; + U32 i2 = *index_array++; + U32 i3 = *index_array++; - const LLVector4a& v1 = vertex[i1]; - const LLVector4a& v2 = vertex[i2]; - const LLVector4a& v3 = vertex[i3]; + const LLVector4a& v1 = vertex[i1]; + const LLVector4a& v2 = vertex[i2]; + const LLVector4a& v3 = vertex[i3]; - const LLVector2& w1 = texcoord[i1]; - const LLVector2& w2 = texcoord[i2]; - const LLVector2& w3 = texcoord[i3]; + const LLVector2& w1 = texcoord[i1]; + const LLVector2& w2 = texcoord[i2]; + const LLVector2& w3 = texcoord[i3]; const F32* v1ptr = v1.getF32ptr(); const F32* v2ptr = v2.getF32ptr(); const F32* v3ptr = v3.getF32ptr(); - float x1 = v2ptr[0] - v1ptr[0]; - float x2 = v3ptr[0] - v1ptr[0]; - float y1 = v2ptr[1] - v1ptr[1]; - float y2 = v3ptr[1] - v1ptr[1]; - float z1 = v2ptr[2] - v1ptr[2]; - float z2 = v3ptr[2] - v1ptr[2]; + float x1 = v2ptr[0] - v1ptr[0]; + float x2 = v3ptr[0] - v1ptr[0]; + float y1 = v2ptr[1] - v1ptr[1]; + float y2 = v3ptr[1] - v1ptr[1]; + float z1 = v2ptr[2] - v1ptr[2]; + float z2 = v3ptr[2] - v1ptr[2]; - float s1 = w2.mV[0] - w1.mV[0]; - float s2 = w3.mV[0] - w1.mV[0]; - float t1 = w2.mV[1] - w1.mV[1]; - float t2 = w3.mV[1] - w1.mV[1]; + float s1 = w2.mV[0] - w1.mV[0]; + float s2 = w3.mV[0] - w1.mV[0]; + float t1 = w2.mV[1] - w1.mV[1]; + float t2 = w3.mV[1] - w1.mV[1]; F32 rd = s1*t2-s2*t1; -- cgit v1.3 From c5fc5d9b46121bb0d44d3b35402d604514e2dd4e Mon Sep 17 00:00:00 2001 From: Graham Linden Date: Sun, 28 Jul 2013 15:43:10 -0700 Subject: NORSPEC-323 fix norm/tangent gen for prim caps --- indra/llmath/llvolume.cpp | 253 ++++++++++++++++++++++++++++++++++++---------- 1 file changed, 202 insertions(+), 51 deletions(-) (limited to 'indra/llmath') diff --git a/indra/llmath/llvolume.cpp b/indra/llmath/llvolume.cpp index 58cac57e7f..b90830ddd8 100755 --- a/indra/llmath/llvolume.cpp +++ b/indra/llmath/llvolume.cpp @@ -94,6 +94,16 @@ const S32 SCULPT_MIN_AREA_DETAIL = 1; extern BOOL gDebugGL; +void calc_tangent_from_triangle( + LLVector4a& normal, + LLVector4a& tangent_out, + const LLVector4a& v1, + const LLVector2& w1, + const LLVector4a& v2, + const LLVector2& w2, + const LLVector4a& v3, + const LLVector2& w3); + BOOL check_same_clock_dir( const LLVector3& pt1, const LLVector3& pt2, const LLVector3& pt3, const LLVector3& norm) { LLVector3 test = (pt2-pt1)%(pt3-pt2); @@ -1594,7 +1604,7 @@ BOOL LLPath::generate(const LLPathParams& params, F32 detail, S32 split, S32 sides = (S32)llfloor(llfloor((MIN_DETAIL_FACES * detail + twist_mag * 3.5f * (detail-0.5f))) * params.getRevolutions()); if (is_sculpted) - sides = llmax(sculpt_size, 1); + sides = llmax(sculpt_size,1); genNGon(params, sides); } @@ -2062,7 +2072,7 @@ LLVolume::LLVolume(const LLVolumeParams ¶ms, const F32 detail, const BOOL ge generate(); - if (mParams.getSculptID().isNull() && mParams.getSculptType() == LL_SCULPT_TYPE_NONE || mParams.getSculptType() == LL_SCULPT_TYPE_MESH) + if (mParams.getSculptID().isNull() && ((mParams.getSculptType() == LL_SCULPT_TYPE_NONE) || (mParams.getSculptType() == LL_SCULPT_TYPE_MESH))) { createVolumeFaces(); } @@ -4472,9 +4482,6 @@ void LLVolume::generateSilhouetteVertices(std::vector &vertices, continue; //skip degenerate face } - LLVector4a default_norm; - default_norm.set(0,1,0,1); - //for each edge for (S32 k = 0; k < 3; k++) { S32 index = face.mEdge[j*3+k]; @@ -4496,14 +4503,14 @@ void LLVolume::generateSilhouetteVertices(std::vector &vertices, norm_mat.rotate(n[v1], t); - t.normalize3fast_checked(&default_norm); + t.normalize3fast(); normals.push_back(LLVector3(t[0], t[1], t[2])); mat.affineTransform(v[v2], t); vertices.push_back(LLVector3(t[0], t[1], t[2])); norm_mat.rotate(n[v2], t); - t.normalize3fast_checked(&default_norm); + t.normalize3fast(); normals.push_back(LLVector3(t[0], t[1], t[2])); } } @@ -5282,6 +5289,7 @@ LLVolumeFace& LLVolumeFace::operator=(const LLVolumeFace& src) if (src.mTangents) { allocateTangents(src.mNumVertices); + llassert(mTangents); LLVector4a::memcpyNonAliased16((F32*) mTangents, (F32*) src.mTangents, vert_size); } else @@ -5293,6 +5301,7 @@ LLVolumeFace& LLVolumeFace::operator=(const LLVolumeFace& src) if (src.mWeights) { allocateWeights(src.mNumVertices); + llassert(mWeights); LLVector4a::memcpyNonAliased16((F32*) mWeights, (F32*) src.mWeights, vert_size); } else @@ -5308,14 +5317,14 @@ LLVolumeFace& LLVolumeFace::operator=(const LLVolumeFace& src) LLVector4a::memcpyNonAliased16((F32*) mIndices, (F32*) src.mIndices, idx_size); } - + //delete return *this; } LLVolumeFace::~LLVolumeFace() { - ll_aligned_free_16(mExtents); + ll_aligned_free_16(mExtents); mExtents = NULL; freeData(); @@ -5325,7 +5334,7 @@ void LLVolumeFace::freeData() { ll_aligned_free_16(mPositions); mPositions = NULL; - ll_aligned_free_16( mNormals); + ll_aligned_free_16(mNormals); mNormals = NULL; ll_aligned_free_16(mTexCoords); mTexCoords = NULL; @@ -5911,10 +5920,10 @@ void LLVolumeFace::cacheOptimize() wght = (LLVector4a*) ll_aligned_malloc_16(sizeof(LLVector4a)*num_verts); } - LLVector4a* binorm = NULL; + LLVector4a* tangent = NULL; if (mTangents) { - binorm = (LLVector4a*) ll_aligned_malloc_16(sizeof(LLVector4a)*num_verts); + tangent = (LLVector4a*) ll_aligned_malloc_16(sizeof(LLVector4a)*num_verts); } //allocate mapping of old indices to new indices @@ -5939,7 +5948,7 @@ void LLVolumeFace::cacheOptimize() } if (mTangents) { - binorm[cur_idx] = mTangents[idx]; + tangent[cur_idx] = mTangents[idx]; } cur_idx++; @@ -5961,7 +5970,7 @@ void LLVolumeFace::cacheOptimize() mNormals = norm; mTexCoords = tc; mWeights = wght; - mTangents = binorm; + mTangents = tangent; //std::string result = llformat("ACMR pre/post: %.3f/%.3f -- %d triangles %d breaks", pre_acmr, post_acmr, mNumIndices/3, breaks); //llinfos << result << llendl; @@ -6099,9 +6108,6 @@ BOOL LLVolumeFace::createUnCutCubeCap(LLVolume* volume, BOOL partial_build) { VertexData corners[4]; VertexData baseVert; - LLVector4a default_norm; - default_norm.set(0,1,0,1); - for(S32 t = 0; t < 4; t++) { corners[t].getPosition().load3( mesh[offset + (grid_size*t)].mPos.mV); @@ -6115,7 +6121,7 @@ BOOL LLVolumeFace::createUnCutCubeCap(LLVolume* volume, BOOL partial_build) LLVector4a rhs; rhs.setSub(corners[2].getPosition(), corners[1].getPosition()); baseVert.getNormal().setCross3(lhs, rhs); - baseVert.getNormal().normalize3fast_checked(&default_norm); + baseVert.getNormal().normalize3fast(); } if(!(mTypeMask & TOP_MASK)) @@ -6312,6 +6318,43 @@ BOOL LLVolumeFace::createCap(LLVolume* volume, BOOL partial_build) cuv = (min_uv + max_uv)*0.5f; + + LLVector4a tangent; + calc_tangent_from_triangle( + *norm, + tangent, + *mCenter, cuv, + pos[0], tc[0], + pos[1], tc[1]); + + if (tangent.getLength3() < 0.01) + { + tangent.set(1,0,0,1); + } + else + { + LLVector4a default_tangent; + default_tangent.set(1,0,0,1); + tangent.normalize3fast_checked(&default_tangent); + } + + LLVector4a normal; + LLVector4a d0, d1; + + d0.setSub(*mCenter, pos[0]); + d1.setSub(*mCenter, pos[1]); + + if (mTypeMask & TOP_MASK) + { + normal.setCross3(d0, d1); + } + else + { + normal.setCross3(d1, d0); + } + + normal.normalize3fast_checked(); + VertexData vd; vd.setPosition(*mCenter); vd.mTexCoord = cuv; @@ -6323,7 +6366,15 @@ BOOL LLVolumeFace::createCap(LLVolume* volume, BOOL partial_build) num_vertices++; } - + + allocateTangents(num_vertices); + + for (S32 i = 0; i < num_vertices; i++) + { + mTangents[i].load4a(tangent.getF32ptr()); + norm[i].load4a(normal.getF32ptr()); + } + if (partial_build) { return TRUE; @@ -6559,31 +6610,6 @@ BOOL LLVolumeFace::createCap(LLVolume* volume, BOOL partial_build) } - LLVector4a d0,d1; - - d0.setSub(mPositions[mIndices[1]], mPositions[mIndices[0]]); - d1.setSub(mPositions[mIndices[2]], mPositions[mIndices[0]]); - - LLVector4a normal; - LLVector4a default_norm; - default_norm.set(0,1,0,1); - - normal.setCross3(d0,d1); - normal.normalize3fast_checked(&default_norm); - - llassert(llfinite(normal.getF32ptr()[0])); - llassert(llfinite(normal.getF32ptr()[1])); - llassert(llfinite(normal.getF32ptr()[2])); - - llassert(!llisnan(normal.getF32ptr()[0])); - llassert(!llisnan(normal.getF32ptr()[1])); - llassert(!llisnan(normal.getF32ptr()[2])); - - for (S32 i = 0; i < num_vertices; i++) - { - norm[i].load4a(normal.getF32ptr()); - } - return TRUE; } @@ -6616,9 +6642,9 @@ void LLVolumeFace::createTangents() default_norm.set(0,1,0,1); for (U32 i = 0; i < mNumVertices; i++) { - //binorm[i].normalize3fast(); + //tangent[i].normalize3fast(); //bump map/planar projection code requires normals to be normalized - mNormals[i].normalize3fast_checked(&default_norm); + mNormals[i].normalize3fast_checked(); } } } @@ -6693,7 +6719,7 @@ void LLVolumeFace::pushVertex(const LLVector4a& pos, const LLVector4a& norm, con void LLVolumeFace::allocateTangents(S32 num_verts) { - ll_aligned_free_16(mTangents); + ll_aligned_free_16(mTangents); mTangents = (LLVector4a*) ll_aligned_malloc_16(sizeof(LLVector4a)*num_verts); } @@ -6796,9 +6822,6 @@ void LLVolumeFace::appendFace(const LLVolumeFace& face, LLMatrix4& mat_in, LLMat mat.loadu(mat_in); norm_mat.loadu(norm_mat_in); - LLVector4a default_norm; - default_norm.set(0,1,0,1); - for (U32 i = 0; i < face.mNumVertices; ++i) { //transform appended face position and store @@ -6806,7 +6829,7 @@ void LLVolumeFace::appendFace(const LLVolumeFace& face, LLMatrix4& mat_in, LLMat //transform appended face normal and store norm_mat.rotate(src_norm[i], dst_norm[i]); - dst_norm[i].normalize3fast_checked(&default_norm); + dst_norm[i].normalize3fast_checked(); //copy appended face texture coordinate dst_tc[i] = src_tc[i]; @@ -7215,6 +7238,134 @@ BOOL LLVolumeFace::createSide(LLVolume* volume, BOOL partial_build) return TRUE; } +// Finds binormal based on three vertices with texture coordinates. +// Fills in dummy values if the triangle has degenerate texture coordinates. +void calc_binormal_from_triangle(LLVector4a& binormal, + + const LLVector4a& pos0, + const LLVector2& tex0, + const LLVector4a& pos1, + const LLVector2& tex1, + const LLVector4a& pos2, + const LLVector2& tex2) +{ + LLVector4a rx0( pos0[VX], tex0.mV[VX], tex0.mV[VY] ); + LLVector4a rx1( pos1[VX], tex1.mV[VX], tex1.mV[VY] ); + LLVector4a rx2( pos2[VX], tex2.mV[VX], tex2.mV[VY] ); + + LLVector4a ry0( pos0[VY], tex0.mV[VX], tex0.mV[VY] ); + LLVector4a ry1( pos1[VY], tex1.mV[VX], tex1.mV[VY] ); + LLVector4a ry2( pos2[VY], tex2.mV[VX], tex2.mV[VY] ); + + LLVector4a rz0( pos0[VZ], tex0.mV[VX], tex0.mV[VY] ); + LLVector4a rz1( pos1[VZ], tex1.mV[VX], tex1.mV[VY] ); + LLVector4a rz2( pos2[VZ], tex2.mV[VX], tex2.mV[VY] ); + + LLVector4a lhs, rhs; + + LLVector4a r0; + lhs.setSub(rx0, rx1); rhs.setSub(rx0, rx2); + r0.setCross3(lhs, rhs); + + LLVector4a r1; + lhs.setSub(ry0, ry1); rhs.setSub(ry0, ry2); + r1.setCross3(lhs, rhs); + + LLVector4a r2; + lhs.setSub(rz0, rz1); rhs.setSub(rz0, rz2); + r2.setCross3(lhs, rhs); + + if( r0[VX] && r1[VX] && r2[VX] ) + { + binormal.set( + -r0[VZ] / r0[VX], + -r1[VZ] / r1[VX], + -r2[VZ] / r2[VX]); + // binormal.normVec(); + } + else + { + binormal.set( 0, 1 , 0 ); + } +} + +// Finds binormal based on three vertices with texture coordinates. +// Fills in dummy values if the triangle has degenerate texture coordinates. +void calc_tangent_from_triangle( + LLVector4a& normal, + LLVector4a& tangent_out, + const LLVector4a& v1, + const LLVector2& w1, + const LLVector4a& v2, + const LLVector2& w2, + const LLVector4a& v3, + const LLVector2& w3) +{ + const F32* v1ptr = v1.getF32ptr(); + const F32* v2ptr = v2.getF32ptr(); + const F32* v3ptr = v3.getF32ptr(); + + float x1 = v2ptr[0] - v1ptr[0]; + float x2 = v3ptr[0] - v1ptr[0]; + float y1 = v2ptr[1] - v1ptr[1]; + float y2 = v3ptr[1] - v1ptr[1]; + float z1 = v2ptr[2] - v1ptr[2]; + float z2 = v3ptr[2] - v1ptr[2]; + + float s1 = w2.mV[0] - w1.mV[0]; + float s2 = w3.mV[0] - w1.mV[0]; + float t1 = w2.mV[1] - w1.mV[1]; + float t2 = w3.mV[1] - w1.mV[1]; + + F32 rd = s1*t2-s2*t1; + + float r = ((rd*rd) > FLT_EPSILON) ? 1.0F / rd : 1024.f; //some made up large ratio for division by zero + + llassert(llfinite(r)); + llassert(!llisnan(r)); + + LLVector4a sdir( + (t2 * x1 - t1 * x2) * r, + (t2 * y1 - t1 * y2) * r, + (t2 * z1 - t1 * z2) * r); + + LLVector4a tdir( + (s1 * x2 - s2 * x1) * r, + (s1 * y2 - s2 * y1) * r, + (s1 * z2 - s2 * z1) * r); + + LLVector4a n = normal; + LLVector4a t = sdir; + + LLVector4a ncrosst; + ncrosst.setCross3(n,t); + + // Gram-Schmidt orthogonalize + n.mul(n.dot3(t).getF32()); + + LLVector4a tsubn; + tsubn.setSub(t,n); + + if (tsubn.dot3(tsubn).getF32() > F_APPROXIMATELY_ZERO) + { + tsubn.normalize3fast_checked(); + + // Calculate handedness + F32 handedness = ncrosst.dot3(tdir).getF32() < 0.f ? -1.f : 1.f; + + tsubn.getF32ptr()[3] = handedness; + + tangent_out = tsubn; + } + else + { + // degenerate, make up a value + // + tangent_out.set(0,0,1,1); + } + +} + //adapted from Lengyel, Eric. “Computing Tangent Space Basis Vectors for an Arbitrary Mesh”. Terathon Software 3D Graphics Library, 2001. http://www.terathon.com/code/tangent.html void CalculateTangentArray(U32 vertexCount, const LLVector4a *vertex, const LLVector4a *normal, const LLVector2 *texcoord, U32 triangleCount, const U16* index_array, LLVector4a *tangent) -- cgit v1.3 From 62011c2871bdb09b92c56d2959eed64ba6ec3e1f Mon Sep 17 00:00:00 2001 From: Graham Linden Date: Mon, 19 Aug 2013 12:56:04 -0700 Subject: Fix merge issues from genBBoxes optimizations fighting with genBBoxes optimizations and missing statics --- indra/llmath/llvolume.cpp | 157 +++++++++++++++++++++---------------------- indra/newview/llface.cpp | 98 ++++++++++----------------- indra/newview/llface.h | 3 +- indra/newview/llvovolume.cpp | 2 +- indra/newview/pipeline.cpp | 2 + indra/newview/pipeline.h | 4 +- 6 files changed, 119 insertions(+), 147 deletions(-) (limited to 'indra/llmath') diff --git a/indra/llmath/llvolume.cpp b/indra/llmath/llvolume.cpp index ad0ca618a0..a030d889af 100755 --- a/indra/llmath/llvolume.cpp +++ b/indra/llmath/llvolume.cpp @@ -136,6 +136,82 @@ BOOL LLLineSegmentBoxIntersect(const F32* start, const F32* end, const F32* cent return true; } +// Finds tangent vec based on three vertices with texture coordinates. +// Fills in dummy values if the triangle has degenerate texture coordinates. +void calc_tangent_from_triangle( + LLVector4a& normal, + LLVector4a& tangent_out, + const LLVector4a& v1, + const LLVector2& w1, + const LLVector4a& v2, + const LLVector2& w2, + const LLVector4a& v3, + const LLVector2& w3) +{ + const F32* v1ptr = v1.getF32ptr(); + const F32* v2ptr = v2.getF32ptr(); + const F32* v3ptr = v3.getF32ptr(); + + float x1 = v2ptr[0] - v1ptr[0]; + float x2 = v3ptr[0] - v1ptr[0]; + float y1 = v2ptr[1] - v1ptr[1]; + float y2 = v3ptr[1] - v1ptr[1]; + float z1 = v2ptr[2] - v1ptr[2]; + float z2 = v3ptr[2] - v1ptr[2]; + + float s1 = w2.mV[0] - w1.mV[0]; + float s2 = w3.mV[0] - w1.mV[0]; + float t1 = w2.mV[1] - w1.mV[1]; + float t2 = w3.mV[1] - w1.mV[1]; + + F32 rd = s1*t2-s2*t1; + + float r = ((rd*rd) > FLT_EPSILON) ? 1.0F / rd : 1024.f; //some made up large ratio for division by zero + + llassert(llfinite(r)); + llassert(!llisnan(r)); + + LLVector4a sdir( + (t2 * x1 - t1 * x2) * r, + (t2 * y1 - t1 * y2) * r, + (t2 * z1 - t1 * z2) * r); + + LLVector4a tdir( + (s1 * x2 - s2 * x1) * r, + (s1 * y2 - s2 * y1) * r, + (s1 * z2 - s2 * z1) * r); + + LLVector4a n = normal; + LLVector4a t = sdir; + + LLVector4a ncrosst; + ncrosst.setCross3(n,t); + + // Gram-Schmidt orthogonalize + n.mul(n.dot3(t).getF32()); + + LLVector4a tsubn; + tsubn.setSub(t,n); + + if (tsubn.dot3(tsubn).getF32() > F_APPROXIMATELY_ZERO) + { + tsubn.normalize3fast_checked(); + + // Calculate handedness + F32 handedness = ncrosst.dot3(tdir).getF32() < 0.f ? -1.f : 1.f; + + tsubn.getF32ptr()[3] = handedness; + + tangent_out = tsubn; + } + else + { + // degenerate, make up a value + // + tangent_out.set(0,0,1,1); + } + +} // intersect test between triangle vert0, vert1, vert2 and a ray from orig in direction dir. @@ -5908,10 +5984,10 @@ void LLVolumeFace::cacheOptimize() wght = (LLVector4a*) ll_aligned_malloc_16(sizeof(LLVector4a)*num_verts); } - LLVector4a* binorm = NULL; + LLVector4a* tangent = NULL; if (mTangents) { - binorm = (LLVector4a*) ll_aligned_malloc_16(sizeof(LLVector4a)*num_verts); + tangent = (LLVector4a*) ll_aligned_malloc_16(sizeof(LLVector4a)*num_verts); } //allocate mapping of old indices to new indices @@ -7277,83 +7353,6 @@ void calc_binormal_from_triangle(LLVector4a& binormal, } } -// Finds binormal based on three vertices with texture coordinates. -// Fills in dummy values if the triangle has degenerate texture coordinates. -void calc_tangent_from_triangle( - LLVector4a& normal, - LLVector4a& tangent_out, - const LLVector4a& v1, - const LLVector2& w1, - const LLVector4a& v2, - const LLVector2& w2, - const LLVector4a& v3, - const LLVector2& w3) -{ - const F32* v1ptr = v1.getF32ptr(); - const F32* v2ptr = v2.getF32ptr(); - const F32* v3ptr = v3.getF32ptr(); - - float x1 = v2ptr[0] - v1ptr[0]; - float x2 = v3ptr[0] - v1ptr[0]; - float y1 = v2ptr[1] - v1ptr[1]; - float y2 = v3ptr[1] - v1ptr[1]; - float z1 = v2ptr[2] - v1ptr[2]; - float z2 = v3ptr[2] - v1ptr[2]; - - float s1 = w2.mV[0] - w1.mV[0]; - float s2 = w3.mV[0] - w1.mV[0]; - float t1 = w2.mV[1] - w1.mV[1]; - float t2 = w3.mV[1] - w1.mV[1]; - - F32 rd = s1*t2-s2*t1; - - float r = ((rd*rd) > FLT_EPSILON) ? 1.0F / rd : 1024.f; //some made up large ratio for division by zero - - llassert(llfinite(r)); - llassert(!llisnan(r)); - - LLVector4a sdir( - (t2 * x1 - t1 * x2) * r, - (t2 * y1 - t1 * y2) * r, - (t2 * z1 - t1 * z2) * r); - - LLVector4a tdir( - (s1 * x2 - s2 * x1) * r, - (s1 * y2 - s2 * y1) * r, - (s1 * z2 - s2 * z1) * r); - - LLVector4a n = normal; - LLVector4a t = sdir; - - LLVector4a ncrosst; - ncrosst.setCross3(n,t); - - // Gram-Schmidt orthogonalize - n.mul(n.dot3(t).getF32()); - - LLVector4a tsubn; - tsubn.setSub(t,n); - - if (tsubn.dot3(tsubn).getF32() > F_APPROXIMATELY_ZERO) - { - tsubn.normalize3fast_checked(); - - // Calculate handedness - F32 handedness = ncrosst.dot3(tdir).getF32() < 0.f ? -1.f : 1.f; - - tsubn.getF32ptr()[3] = handedness; - - tangent_out = tsubn; - } - else - { - // degenerate, make up a value - // - tangent_out.set(0,0,1,1); - } - -} - //adapted from Lengyel, Eric. “Computing Tangent Space Basis Vectors for an Arbitrary Mesh”. Terathon Software 3D Graphics Library, 2001. http://www.terathon.com/code/tangent.html void CalculateTangentArray(U32 vertexCount, const LLVector4a *vertex, const LLVector4a *normal, const LLVector2 *texcoord, U32 triangleCount, const U16* index_array, LLVector4a *tangent) diff --git a/indra/newview/llface.cpp b/indra/newview/llface.cpp index 700b31f8d3..53e5b55b89 100755 --- a/indra/newview/llface.cpp +++ b/indra/newview/llface.cpp @@ -768,7 +768,7 @@ bool less_than_max_mag(const LLVector4a& vec) } BOOL LLFace::genVolumeBBoxes(const LLVolume &volume, S32 f, - const LLMatrix4& mat_vert_in, const LLMatrix3& mat_normal_in, BOOL global_volume) + const LLMatrix4& mat_vert_in, BOOL global_volume) { //get bounding box if (mDrawablep->isState(LLDrawable::REBUILD_VOLUME | LLDrawable::REBUILD_POSITION | LLDrawable::REBUILD_RIGGED)) @@ -777,10 +777,6 @@ BOOL LLFace::genVolumeBBoxes(const LLVolume &volume, S32 f, LLMatrix4a mat_vert; mat_vert.loadu(mat_vert_in); - LLMatrix4a mat_normal; - mat_normal.loadu(mat_normal_in); - - //VECTORIZE THIS LLVector4a min,max; if (f >= volume.getNumVolumeFaces()) @@ -797,95 +793,69 @@ BOOL LLFace::genVolumeBBoxes(const LLVolume &volume, S32 f, llassert(less_than_max_mag(max)); //min, max are in volume space, convert to drawable render space - LLVector4a center; - LLVector4a t; - t.setAdd(min, max); - t.mul(0.5f); - mat_vert.affineTransform(t, center); - LLVector4a size; - size.setSub(max, min); - size.mul(0.5f); - llassert(less_than_max_mag(min)); - llassert(less_than_max_mag(max)); + //get 8 corners of bounding box + LLVector4Logical mask[6]; - if (!global_volume) + for (U32 i = 0; i < 6; ++i) { - //VECTORIZE THIS - LLVector4a scale; - scale.load3(mDrawablep->getVObj()->getScale().mV); - size.mul(scale); + mask[i].clear(); } - mat_normal.mMatrix[0].normalize3fast(); - mat_normal.mMatrix[1].normalize3fast(); - mat_normal.mMatrix[2].normalize3fast(); + mask[0].setElement<2>(); //001 + mask[1].setElement<1>(); //010 + mask[2].setElement<1>(); //011 + mask[2].setElement<2>(); + mask[3].setElement<0>(); //100 + mask[4].setElement<0>(); //101 + mask[4].setElement<2>(); + mask[5].setElement<0>(); //110 + mask[5].setElement<1>(); - LLVector4a v[4]; + LLVector4a v[8]; - //get 4 corners of bounding box - mat_normal.rotate(size,v[0]); + v[6] = min; + v[7] = max; - //VECTORIZE THIS - LLVector4a scale; - - scale.set(-1.f, -1.f, 1.f); - scale.mul(size); - mat_normal.rotate(scale, v[1]); + for (U32 i = 0; i < 6; ++i) + { + v[i].setSelectWithMask(mask[i], min, max); + } - scale.set(1.f, -1.f, -1.f); - scale.mul(size); - mat_normal.rotate(scale, v[2]); + LLVector4a tv[8]; - scale.set(-1.f, 1.f, -1.f); - scale.mul(size); - mat_normal.rotate(scale, v[3]); + //transform bounding box into drawable space + for (U32 i = 0; i < 8; ++i) + { + mat_vert.affineTransform(v[i], tv[i]); + } + //find bounding box LLVector4a& newMin = mExtents[0]; LLVector4a& newMax = mExtents[1]; - newMin = newMax = center; - - llassert(less_than_max_mag(center)); + newMin = newMax = tv[0]; - for (U32 i = 0; i < 4; i++) + for (U32 i = 1; i < 8; ++i) { - LLVector4a delta; - delta.setAbs(v[i]); - LLVector4a min; - min.setSub(center, delta); - LLVector4a max; - max.setAdd(center, delta); - - newMin.setMin(newMin,min); - newMax.setMax(newMax,max); - - llassert(less_than_max_mag(newMin)); - llassert(less_than_max_mag(newMax)); + newMin.setMin(newMin, tv[i]); + newMax.setMax(newMax, tv[i]); } if (!mDrawablep->isActive()) - { + { // Shift position for region LLVector4a offset; offset.load3(mDrawablep->getRegion()->getOriginAgent().mV); newMin.add(offset); newMax.add(offset); - - llassert(less_than_max_mag(newMin)); - llassert(less_than_max_mag(newMax)); } + LLVector4a t; t.setAdd(newMin, newMax); t.mul(0.5f); - llassert(less_than_max_mag(t)); - - //VECTORIZE THIS mCenterLocal.set(t.getF32ptr()); - llassert(less_than_max_mag(newMin)); - llassert(less_than_max_mag(newMax)); - t.setSub(newMax,newMin); mBoundingSphereRadius = t.getLength3().getF32()*0.5f; diff --git a/indra/newview/llface.h b/indra/newview/llface.h index 763634a3ab..66b5f13740 100755 --- a/indra/newview/llface.h +++ b/indra/newview/llface.h @@ -194,8 +194,7 @@ public: void setSize(S32 numVertices, S32 num_indices = 0, bool align = false); - BOOL genVolumeBBoxes(const LLVolume &volume, S32 f, - const LLMatrix4& mat, BOOL global_volume = FALSE); + BOOL genVolumeBBoxes(const LLVolume &volume, S32 f,const LLMatrix4& mat, BOOL global_volume = FALSE); void init(LLDrawable* drawablep, LLViewerObject* objp); void destroy(); diff --git a/indra/newview/llvovolume.cpp b/indra/newview/llvovolume.cpp index 113bdd2ce3..33b26494de 100755 --- a/indra/newview/llvovolume.cpp +++ b/indra/newview/llvovolume.cpp @@ -1458,7 +1458,7 @@ BOOL LLVOVolume::genBBoxes(BOOL force_global) continue; } res &= face->genVolumeBBoxes(*volume, i, - mRelativeXform, mRelativeXformInvTrans, + mRelativeXform, /*mRelativeXformInvTrans,*/ (mVolumeImpl && mVolumeImpl->isVolumeGlobal()) || force_global); if (rebuild) diff --git a/indra/newview/pipeline.cpp b/indra/newview/pipeline.cpp index c4286e73eb..8be7b2ab53 100755 --- a/indra/newview/pipeline.cpp +++ b/indra/newview/pipeline.cpp @@ -169,6 +169,7 @@ S32 LLPipeline::RenderGlowIterations; F32 LLPipeline::RenderGlowWidth; F32 LLPipeline::RenderGlowStrength; BOOL LLPipeline::RenderDepthOfField; +BOOL LLPipeline::RenderDepthOfFieldInEditMode; F32 LLPipeline::CameraFocusTransitionTime; F32 LLPipeline::CameraFNumber; F32 LLPipeline::CameraFocalLength; @@ -379,6 +380,7 @@ BOOL LLPipeline::sWaterReflections = FALSE; BOOL LLPipeline::sRenderGlow = FALSE; BOOL LLPipeline::sReflectionRender = FALSE; BOOL LLPipeline::sImpostorRender = FALSE; +BOOL LLPipeline::sImpostorRenderAlphaDepthPass = FALSE; BOOL LLPipeline::sUnderWaterRender = FALSE; BOOL LLPipeline::sTextureBindTest = FALSE; BOOL LLPipeline::sRenderFrameTest = FALSE; diff --git a/indra/newview/pipeline.h b/indra/newview/pipeline.h index f0bebbe20d..fbbcf8f06d 100755 --- a/indra/newview/pipeline.h +++ b/indra/newview/pipeline.h @@ -295,7 +295,8 @@ public: void unbindDeferredShader(LLGLSLShader& shader); void renderDeferredLighting(); - + void renderDeferredLightingToRT(LLRenderTarget* target); + void generateWaterReflection(LLCamera& camera); void generateSunShadow(LLCamera& camera); void generateHighlight(LLCamera& camera); @@ -594,6 +595,7 @@ public: static BOOL sPickAvatar; static BOOL sReflectionRender; static BOOL sImpostorRender; + static BOOL sImpostorRenderAlphaDepthPass; static BOOL sUnderWaterRender; static BOOL sRenderGlow; static BOOL sTextureBindTest; -- cgit v1.3 From cacefaf2fc1b95557aca62dc69dbe3744c6424b0 Mon Sep 17 00:00:00 2001 From: Dave Parks Date: Fri, 30 Aug 2013 15:55:20 -0500 Subject: MAINT-2811 Fix for infinite loop on octree code during teleport. --- indra/llmath/llvolume.cpp | 30 +++++++++++++++++++----------- indra/newview/llviewermenu.cpp | 2 -- indra/newview/llviewerpartsim.cpp | 4 ++++ indra/newview/llviewerpartsim.h | 4 ++++ indra/newview/llvopartgroup.cpp | 21 +++++++++++++-------- 5 files changed, 40 insertions(+), 21 deletions(-) (limited to 'indra/llmath') diff --git a/indra/llmath/llvolume.cpp b/indra/llmath/llvolume.cpp index 773995cb86..0c329c69b2 100755 --- a/indra/llmath/llvolume.cpp +++ b/indra/llmath/llvolume.cpp @@ -802,6 +802,7 @@ BOOL LLProfile::generate(const LLProfileParams& params, BOOL path_open,F32 detai { // Scale by 4 to generate proper tex coords. mProfile[i].mul(scale); + llassert(mProfile[i].isFinite3()); } if (hollow) @@ -839,6 +840,7 @@ BOOL LLProfile::generate(const LLProfileParams& params, BOOL path_open,F32 detai { // Scale by 3 to generate proper tex coords. mProfile[i].mul(scale); + llassert(mProfile[i].isFinite3()); } if (path_open) @@ -989,6 +991,7 @@ BOOL LLProfile::generate(const LLProfileParams& params, BOOL path_open,F32 detai { mOpen = FALSE; mProfile.push_back(mProfile[0]); + llassert(mProfile[0].isFinite3()); mTotal++; } } @@ -2127,7 +2130,7 @@ BOOL LLVolume::generate() { rot_mat.rotate(*profile++, tmp); dst->setAdd(tmp,offset); - llassert(less_than_max_mag(*dst)); + llassert(dst->isFinite3()); ++dst; } } @@ -2840,6 +2843,8 @@ void LLVolume::sculptGeneratePlaceholder() p[1] = (F32)(sin(F_PI * v) * sin(2.0 * F_PI * u) * RADIUS); p[2] = (F32)(cos(F_PI * v) * RADIUS); + llassert(pt.isFinite3()); + } line += sizeT; } @@ -2927,6 +2932,8 @@ void LLVolume::sculptGenerateMapVertices(U16 sculpt_width, U16 sculpt_height, S8 LLVector4a scale(-1.f,1,1,1); pt.mul(scale); } + + llassert(pt.isFinite3()); } line += sizeT; @@ -5552,12 +5559,14 @@ BOOL LLVolumeFace::createCap(LLVolume* volume, BOOL partial_build) tc->mV[0] = (*p)[0]+0.5f; tc->mV[1] = (*p)[1]+0.5f; - llassert(less_than_max_mag(*src)); + llassert(src->isFinite3()); update_min_max(min,max,*src); update_min_max(min_uv, max_uv, *tc); *pos = *src; + llassert(pos->isFinite3()); + ++p; ++tc; ++src; @@ -5577,11 +5586,13 @@ BOOL LLVolumeFace::createCap(LLVolume* volume, BOOL partial_build) tc->mV[0] = (*p)[0]+0.5f; tc->mV[1] = 0.5f - (*p)[1]; - llassert(less_than_max_mag(*src)); + llassert(src->isFinite3()); update_min_max(min,max,*src); update_min_max(min_uv, max_uv, *tc); *pos = *src; + + llassert(pos->isFinite3()); ++p; ++tc; @@ -6468,8 +6479,11 @@ BOOL LLVolumeFace::createSide(LLVolume* volume, BOOL partial_build) // mQ = { 0, a[X]*b[Y] - a[Y]*b[X], a[Z]*b[X] - a[X]*b[Z], a[Y]*b[Z] - a[Z]*b[Y] } vector1 = _mm_sub_ps( vector2, _mm_mul_ps( amQ, bmQ )); + llassert(v1.isFinite3()); + v1.store4a((F32*) output); + output++; idx += 3; } @@ -6498,14 +6512,8 @@ BOOL LLVolumeFace::createSide(LLVolume* volume, BOOL partial_build) n1.add(c); n2.add(c); - llassert(llfinite(c.getF32ptr()[0])); - llassert(llfinite(c.getF32ptr()[1])); - llassert(llfinite(c.getF32ptr()[2])); - - llassert(!llisnan(c.getF32ptr()[0])); - llassert(!llisnan(c.getF32ptr()[1])); - llassert(!llisnan(c.getF32ptr()[2])); - + llassert(c.isFinite3()); + //even out quad contributions switch (i%2+1) { diff --git a/indra/newview/llviewermenu.cpp b/indra/newview/llviewermenu.cpp index 33d4b65d4a..c6c1090f45 100755 --- a/indra/newview/llviewermenu.cpp +++ b/indra/newview/llviewermenu.cpp @@ -1073,8 +1073,6 @@ class LLAdvancedCheckInfoDisplay : public view_listener_t U32 info_display = info_display_from_string( userdata.asString() ); bool new_value = false; - LL_INFOS("ViewerMenu") << "check " << userdata.asString() << LL_ENDL; - if ( info_display != 0 ) { new_value = LLPipeline::toggleRenderDebugControl( (void*)info_display ); diff --git a/indra/newview/llviewerpartsim.cpp b/indra/newview/llviewerpartsim.cpp index 96cd43a8ab..163c90019f 100755 --- a/indra/newview/llviewerpartsim.cpp +++ b/indra/newview/llviewerpartsim.cpp @@ -160,7 +160,11 @@ LLViewerPartGroup::LLViewerPartGroup(const LLVector3 ¢er_agent, const F32 bo } mVOPartGroupp->setViewerPartGroup(this); mVOPartGroupp->setPositionAgent(getCenterAgent()); + + mBoxSide = box_side; + F32 scale = box_side * 0.5f; + mVOPartGroupp->setScale(LLVector3(scale,scale,scale)); //gPipeline.addObject(mVOPartGroupp); diff --git a/indra/newview/llviewerpartsim.h b/indra/newview/llviewerpartsim.h index 095de2060c..2daa07ed8c 100755 --- a/indra/newview/llviewerpartsim.h +++ b/indra/newview/llviewerpartsim.h @@ -105,6 +105,9 @@ public: void shift(const LLVector3 &offset); + F32 getBoxRadius() { return mBoxRadius; } + F32 getBoxSide() { return mBoxSide; } + typedef std::vector part_list_t; part_list_t mParticles; @@ -125,6 +128,7 @@ public: protected: LLVector3 mCenterAgent; F32 mBoxRadius; + F32 mBoxSide; LLVector3 mMinObjPos; LLVector3 mMaxObjPos; diff --git a/indra/newview/llvopartgroup.cpp b/indra/newview/llvopartgroup.cpp index a65de0c047..1630b5d484 100755 --- a/indra/newview/llvopartgroup.cpp +++ b/indra/newview/llvopartgroup.cpp @@ -176,24 +176,28 @@ BOOL LLVOPartGroup::isActive() const F32 LLVOPartGroup::getBinRadius() { - return mScale.mV[0]*2.f; + return mViewerPartGroupp->getBoxSide(); } void LLVOPartGroup::updateSpatialExtents(LLVector4a& newMin, LLVector4a& newMax) { const LLVector3& pos_agent = getPositionAgent(); - newMin.load3( (pos_agent - mScale).mV); - newMax.load3( (pos_agent + mScale).mV); + LLVector4a scale; + LLVector4a p; + + p.load3(pos_agent.mV); + + scale.splat(mScale.mV[0]+mViewerPartGroupp->getBoxSide()*0.5f); + + newMin.setSub(p, scale); + newMax.setAdd(p,scale); llassert(newMin.isFinite3()); llassert(newMax.isFinite3()); - LLVector4a pos; - pos.load3(pos_agent.mV); - - llassert(pos.isFinite3()); - mDrawable->setPositionGroup(pos); + llassert(p.isFinite3()); + mDrawable->setPositionGroup(p); } void LLVOPartGroup::idleUpdate(LLAgent &agent, LLWorld &world, const F64 &time) @@ -459,6 +463,7 @@ BOOL LLVOPartGroup::updateGeometry(LLDrawable *drawable) } //record max scale (used to stretch bounding box for visibility culling) + mScale.set(max_scale, max_scale, max_scale); mDrawable->movePartition(); -- cgit v1.3 From 2108e9de5bb93d60a040ff385f50552ceed9b076 Mon Sep 17 00:00:00 2001 From: "katharine@exodusviewer.com" Date: Wed, 4 Sep 2013 16:37:33 -0400 Subject: OPEN-149: fix documentation of LLColor4 constructor for F32 vector --- doc/contributions.txt | 1 + indra/llmath/v4color.h | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) (limited to 'indra/llmath') diff --git a/doc/contributions.txt b/doc/contributions.txt index c377de8513..b2df058614 100755 --- a/doc/contributions.txt +++ b/doc/contributions.txt @@ -688,6 +688,7 @@ Kagehi Kohn Kaimen Takahe Katharine Berry STORM-1900 + OPEN-149 Keklily Longfall Ken Lavender Ken March diff --git a/indra/llmath/v4color.h b/indra/llmath/v4color.h index b047f86e6e..8c8c315808 100755 --- a/indra/llmath/v4color.h +++ b/indra/llmath/v4color.h @@ -50,7 +50,7 @@ class LLColor4 LLColor4(F32 r, F32 g, F32 b); // Initializes LLColor4 to (r, g, b, 1) LLColor4(F32 r, F32 g, F32 b, F32 a); // Initializes LLColor4 to (r. g, b, a) LLColor4(U32 clr); // Initializes LLColor4 to (r=clr>>24, etc)) - LLColor4(const F32 *vec); // Initializes LLColor4 to (vec[0]. vec[1], vec[2], 1) + LLColor4(const F32 *vec); // Initializes LLColor4 to (vec[0]. vec[1], vec[2], vec[3]) LLColor4(const LLColor3 &vec, F32 a = 1.f); // Initializes LLColor4 to (vec, a) explicit LLColor4(const LLSD& sd); explicit LLColor4(const LLColor4U& color4u); // "explicit" to avoid automatic conversion -- cgit v1.3 From 29216ac5e725254c48711737f5c22d2ad3e5e4a2 Mon Sep 17 00:00:00 2001 From: Graham Linden Date: Mon, 23 Sep 2013 13:43:01 -0700 Subject: MAINT-3179 fix bad indexing of position info generating false alarms on range checks, issue with free of data now allocated contig with positions instead, and added copious amounts of llwarns to error return sites so 'invalid element' will be a reason to read the log and not a tease. --- indra/llmath/llvolume.cpp | 6 ++++-- indra/llprimitive/llmodel.cpp | 17 +++++++++++++++-- 2 files changed, 19 insertions(+), 4 deletions(-) (limited to 'indra/llmath') diff --git a/indra/llmath/llvolume.cpp b/indra/llmath/llvolume.cpp index de960e5d95..2b865b4a8e 100755 --- a/indra/llmath/llvolume.cpp +++ b/indra/llmath/llvolume.cpp @@ -4827,13 +4827,15 @@ void LLVolumeFace::optimize(F32 angle_cutoff) if (angle_cutoff > 1.f && !mNormals) { - ll_aligned_free_16(new_face.mNormals); + // Now alloc'd with positions + //ll_aligned_free_16(new_face.mNormals); new_face.mNormals = NULL; } if (!mTexCoords) { - ll_aligned_free_16(new_face.mTexCoords); + // Now alloc'd with positions + //ll_aligned_free_16(new_face.mTexCoords); new_face.mTexCoords = NULL; } diff --git a/indra/llprimitive/llmodel.cpp b/indra/llprimitive/llmodel.cpp index 37f0bfcbb8..34e0483a83 100755 --- a/indra/llprimitive/llmodel.cpp +++ b/indra/llprimitive/llmodel.cpp @@ -166,6 +166,7 @@ LLModel::EModelStatus load_face_from_dom_triangles(std::vector& fa if ( !get_dom_sources(inputs, pos_offset, tc_offset, norm_offset, idx_stride, pos_source, tc_source, norm_source) || !pos_source ) { + llwarns << "Could not find dom sources for basic geo data; invalid model." << llendl; return LLModel::BAD_ELEMENT; } @@ -201,6 +202,7 @@ LLModel::EModelStatus load_face_from_dom_triangles(std::vector& fa if (((i + pos_offset) > index_count) || ((idx[i+pos_offset]*3+2) > vertex_count)) { + llwarns << "Out of range index data; invalid model." << llendl; return LLModel::BAD_ELEMENT; } @@ -210,6 +212,7 @@ LLModel::EModelStatus load_face_from_dom_triangles(std::vector& fa if (!cv.getPosition().isFinite3()) { + llwarns << "Nan positional data, invalid model." << llendl; return LLModel::BAD_ELEMENT; } } @@ -222,6 +225,7 @@ LLModel::EModelStatus load_face_from_dom_triangles(std::vector& fa if (((i + tc_offset) > index_count) || ((idx[i+tc_offset]*2+1) > tc_count)) { + llwarns << "Out of range TC indices." << llendl; return LLModel::BAD_ELEMENT; } @@ -242,6 +246,7 @@ LLModel::EModelStatus load_face_from_dom_triangles(std::vector& fa if (((i + norm_offset) > index_count) || ((idx[i+norm_offset]*3+2) > norm_count)) { + llwarns << "Found out of range norm indices, invalid model." << llendl; return LLModel::BAD_ELEMENT; } @@ -380,6 +385,7 @@ LLModel::EModelStatus load_face_from_dom_polylist(std::vector& fac if (!get_dom_sources(inputs, pos_offset, tc_offset, norm_offset, idx_stride, pos_source, tc_source, norm_source)) { + llwarns << "Could not get DOM sources for basic geo data, invalid model." << llendl; return LLModel::BAD_ELEMENT; } @@ -430,9 +436,10 @@ LLModel::EModelStatus load_face_from_dom_polylist(std::vector& fac { // guard against model data specifiying out of range indices or verts // - if (((i + pos_offset) > index_count) - || ((idx[i+pos_offset]*3+2) > vertex_count)) + if (((cur_idx + pos_offset) > index_count) + || ((idx[cur_idx+pos_offset]*3+2) > vertex_count)) { + llwarns << "Out of range position indices, invalid model." << llendl; return LLModel::BAD_ELEMENT; } @@ -455,6 +462,7 @@ LLModel::EModelStatus load_face_from_dom_polylist(std::vector& fac if (((cur_idx + tc_offset) > index_count) || ((idx[cur_idx+tc_offset]*2+1) > tc_count)) { + llwarns << "Out of range TC indices, invalid model." << llendl; return LLModel::BAD_ELEMENT; } @@ -475,6 +483,7 @@ LLModel::EModelStatus load_face_from_dom_polylist(std::vector& fac if (((cur_idx + norm_offset) > index_count) || ((idx[cur_idx+norm_offset]*3+2) > norm_count)) { + llwarns << "Out of range norm indices, invalid model." << llendl; return LLModel::BAD_ELEMENT; } @@ -653,6 +662,7 @@ LLModel::EModelStatus load_face_from_dom_polygons(std::vector& fac domVertices* vertices = (domVertices*) elem.cast(); if (!vertices) { + llwarns << "Could not find vertex source, invalid model." << llendl; return LLModel::BAD_ELEMENT; } domInputLocal_Array& v_inp = vertices->getInput_array(); @@ -666,6 +676,7 @@ LLModel::EModelStatus load_face_from_dom_polygons(std::vector& fac domSource* src = (domSource*) elem.cast(); if (!src) { + llwarns << "Could not find DOM source, invalid model." << llendl; return LLModel::BAD_ELEMENT; } v = &(src->getFloat_array()->getValue()); @@ -681,6 +692,7 @@ LLModel::EModelStatus load_face_from_dom_polygons(std::vector& fac domSource* src = (domSource*) elem.cast(); if (!src) { + llwarns << "Could not find DOM source, invalid model." << llendl; return LLModel::BAD_ELEMENT; } n = &(src->getFloat_array()->getValue()); @@ -693,6 +705,7 @@ LLModel::EModelStatus load_face_from_dom_polygons(std::vector& fac domSource* src = (domSource*) elem.cast(); if (!src) { + llwarns << "Could not find DOM source, invalid model." << llendl; return LLModel::BAD_ELEMENT; } t = &(src->getFloat_array()->getValue()); -- cgit v1.3