diff options
32 files changed, 1494 insertions, 917 deletions
diff --git a/indra/cmake/00-Common.cmake b/indra/cmake/00-Common.cmake index d9e1dd9afb..f0e2890448 100644 --- a/indra/cmake/00-Common.cmake +++ b/indra/cmake/00-Common.cmake @@ -172,7 +172,8 @@ if (LINUX) if (WORD_SIZE EQUAL 32) add_definitions(-march=pentium3) endif (WORD_SIZE EQUAL 32) - add_definitions(-mfpmath=sse -ftree-vectorize) + add_definitions(-mfpmath=sse) + #add_definitions(-ftree-vectorize) # THIS CRASHES GCC 3.1-3.2 if (NOT STANDALONE) # this stops us requiring a really recent glibc at runtime add_definitions(-fno-stack-protector) diff --git a/indra/llcommon/lldefs.h b/indra/llcommon/lldefs.h index f3b5ca361f..10e6cb34bf 100644 --- a/indra/llcommon/lldefs.h +++ b/indra/llcommon/lldefs.h @@ -242,5 +242,13 @@ inline LLDATATYPE llclampb(const LLDATATYPE& a) return llmin(llmax(a, (LLDATATYPE)0), (LLDATATYPE)255); } +template <class LLDATATYPE> +inline void llswap(LLDATATYPE& lhs, LLDATATYPE& rhs) +{ + LLDATATYPE tmp = lhs; + lhs = rhs; + rhs = tmp; +} + #endif // LL_LLDEFS_H diff --git a/indra/llcommon/llmemory.cpp b/indra/llcommon/llmemory.cpp index 2a8015e26d..5c6ca30cdd 100644 --- a/indra/llcommon/llmemory.cpp +++ b/indra/llcommon/llmemory.cpp @@ -73,25 +73,6 @@ void LLMemory::freeReserve() reserveMem = NULL; } -void* ll_allocate (size_t size) -{ - if (size == 0) - { - llwarns << "Null allocation" << llendl; - } - void *p = malloc(size); - if (p == NULL) - { - LLMemory::freeReserve(); - llerrs << "Out of memory Error" << llendl; - } - return p; -} - -void ll_release (void *p) -{ - free(p); -} //---------------------------------------------------------------------------- diff --git a/indra/llcommon/llmemory.h b/indra/llcommon/llmemory.h index 1c6f64dd8b..117268cfe7 100644 --- a/indra/llcommon/llmemory.h +++ b/indra/llcommon/llmemory.h @@ -32,14 +32,37 @@ #ifndef LLMEMORY_H #define LLMEMORY_H +#include <stdlib.h> +inline void* ll_aligned_malloc_16(size_t size) // returned hunk MUST be freed with ll_aligned_free(). +{ +#if defined(LL_WINDOWS) + return _mm_malloc(size, 16); +#elif defined(LL_DARWIN) + return malloc(size); // default osx malloc is 16 byte aligned. +#else + void *rtn; + if (LL_LIKELY(0 == posix_memalign(&rtn, 16, size))) + { + return rtn; + } + else // bad alignment requested, or out of memory + { + return NULL; + } +#endif +} -extern S32 gTotalDAlloc; -extern S32 gTotalDAUse; -extern S32 gDACount; - -extern void* ll_allocate (size_t size); -extern void ll_release (void *p); +inline void ll_aligned_free_16(void *p) +{ +#if defined(LL_WINDOWS) + _mm_free(p); +#elif defined(LL_DARWIN) + return free(p); +#else + free(p); // posix_memalign() is compatible with heap deallocator +#endif +} class LL_COMMON_API LLMemory { diff --git a/indra/llmath/CMakeLists.txt b/indra/llmath/CMakeLists.txt index e93fe90650..367486eee7 100644 --- a/indra/llmath/CMakeLists.txt +++ b/indra/llmath/CMakeLists.txt @@ -62,6 +62,8 @@ set(llmath_HEADER_FILES llv4matrix3.h llv4matrix4.h llv4vector3.h + llvector4a.h + llmatrix4a.h llvolume.h llvolumemgr.h llsdutil_math.h diff --git a/indra/llmath/llvolume.cpp b/indra/llmath/llvolume.cpp index 5ffc61ce9c..d8fbc081fa 100644 --- a/indra/llmath/llvolume.cpp +++ b/indra/llmath/llvolume.cpp @@ -1,4 +1,5 @@ /** + * @file llvolume.cpp * * $LicenseInfo:firstyear=2002&license=viewergpl$ @@ -43,10 +44,12 @@ #include "v4math.h" #include "m4math.h" #include "m3math.h" +#include "llmatrix4a.h" #include "lldarray.h" #include "llvolume.h" #include "llstl.h" #include "llsdserialize.h" +#include "llvector4a.h" #define DEBUG_SILHOUETTE_BINORMALS 0 #define DEBUG_SILHOUETTE_NORMALS 0 // TomY: Use this to display normals using the silhouette @@ -87,8 +90,6 @@ const F32 SKEW_MAX = 0.95f; const F32 SCULPT_MIN_AREA = 0.002f; const S32 SCULPT_MIN_AREA_DETAIL = 1; -#define GEN_TRI_STRIP 0 - BOOL check_same_clock_dir( const LLVector3& pt1, const LLVector3& pt2, const LLVector3& pt3, const LLVector3& norm) { LLVector3 test = (pt2-pt1)%(pt3-pt2); @@ -132,21 +133,25 @@ BOOL LLLineSegmentBoxIntersect(const LLVector3& start, const LLVector3& end, con // and returns the intersection point along dir in intersection_t. // Moller-Trumbore algorithm -BOOL LLTriangleRayIntersect(const LLVector3& vert0, const LLVector3& vert1, const LLVector3& vert2, const LLVector3& orig, const LLVector3& dir, +BOOL LLTriangleRayIntersect(const LLVector4a& vert0, const LLVector4a& vert1, const LLVector4a& vert2, const LLVector4a& orig, const LLVector4a& dir, F32* intersection_a, F32* intersection_b, F32* intersection_t, BOOL two_sided) { F32 u, v, t; /* find vectors for two edges sharing vert0 */ - LLVector3 edge1 = vert1 - vert0; + LLVector4a edge1; + edge1.setSub(vert1, vert0); - LLVector3 edge2 = vert2 - vert0;; + + LLVector4a edge2; + edge2.setSub(vert2, vert0); /* begin calculating determinant - also used to calculate U parameter */ - LLVector3 pvec = dir % edge2; - + LLVector4a pvec; + pvec.setCross3(dir, edge2); + /* if determinant is near zero, ray lies in plane of triangle */ - F32 det = edge1 * pvec; + F32 det = edge1.dot3(pvec); if (!two_sided) { @@ -156,10 +161,11 @@ BOOL LLTriangleRayIntersect(const LLVector3& vert0, const LLVector3& vert1, cons } /* calculate distance from vert0 to ray origin */ - LLVector3 tvec = orig - vert0; + LLVector4a tvec; + tvec.setSub(orig, vert0); /* calculate U parameter and test bounds */ - u = tvec * pvec; + u = tvec.dot3(pvec); if (u < 0.f || u > det) { @@ -167,17 +173,18 @@ BOOL LLTriangleRayIntersect(const LLVector3& vert0, const LLVector3& vert1, cons } /* prepare to test V parameter */ - LLVector3 qvec = tvec % edge1; + LLVector4a qvec; + qvec.setCross3(tvec, edge1); /* calculate V parameter and test bounds */ - v = dir * qvec; + v = dir.dot3(qvec); if (v < 0.f || u + v > det) { return FALSE; } /* calculate t, scale parameters, ray intersects triangle */ - t = edge2 * qvec; + t = edge2.dot3(qvec); F32 inv_det = 1.0 / det; t *= inv_det; u *= inv_det; @@ -193,20 +200,22 @@ BOOL LLTriangleRayIntersect(const LLVector3& vert0, const LLVector3& vert1, cons F32 inv_det = 1.0 / det; /* calculate distance from vert0 to ray origin */ - LLVector3 tvec = orig - vert0; + LLVector4a tvec; + tvec.setSub(orig, vert0); /* calculate U parameter and test bounds */ - u = (tvec * pvec) * inv_det; + u = (tvec.dot3(pvec)) * inv_det; if (u < 0.f || u > 1.f) { return FALSE; } /* prepare to test V parameter */ - LLVector3 qvec = tvec - edge1; + LLVector4a qvec; + qvec.setSub(tvec, edge1); /* calculate V parameter and test bounds */ - v = (dir * qvec) * inv_det; + v = (dir.dot3(qvec)) * inv_det; if (v < 0.f || u + v > 1.f) { @@ -214,7 +223,7 @@ BOOL LLTriangleRayIntersect(const LLVector3& vert0, const LLVector3& vert1, cons } /* calculate t, ray intersects triangle */ - t = (edge2 * qvec) * inv_det; + t = (edge2.dot3(qvec)) * inv_det; } if (intersection_a != NULL) @@ -228,6 +237,21 @@ BOOL LLTriangleRayIntersect(const LLVector3& vert0, const LLVector3& vert1, cons return TRUE; } +//helper for non-aligned vectors +BOOL LLTriangleRayIntersect(const LLVector3& vert0, const LLVector3& vert1, const LLVector3& vert2, const LLVector3& orig, const LLVector3& dir, + F32* intersection_a, F32* intersection_b, F32* intersection_t, BOOL two_sided) +{ + LLVector4a vert0a, vert1a, vert2a, origa, dira; + vert0a.load3(vert0.mV); + vert1a.load3(vert1.mV); + vert2a.load3(vert2.mV); + origa.load3(orig.mV); + dira.load3(dir.mV); + + return LLTriangleRayIntersect(vert0a, vert1a, vert2a, origa, dira, + intersection_a, intersection_b, intersection_t, two_sided); +} + //------------------------------------------------------------------- // statics @@ -1880,15 +1904,15 @@ bool LLVolumeFace::VertexData::operator==(const LLVolumeFace::VertexData& rhs)co bool LLVolumeFace::VertexData::compareNormal(const LLVolumeFace::VertexData& rhs, F32 angle_cutoff) const { bool retval = false; - if (rhs.mPosition == mPosition && rhs.mTexCoord == mTexCoord) + if (rhs.mData[POSITION].equal3(mData[POSITION]) && rhs.mTexCoord == mTexCoord) { if (angle_cutoff > 1.f) { - retval = (mNormal == rhs.mNormal); + retval = (mData[NORMAL].equal3(rhs.mData[NORMAL])); } else { - F32 cur_angle = rhs.mNormal*mNormal; + F32 cur_angle = rhs.mData[NORMAL].dot3(mData[NORMAL]); retval = cur_angle > angle_cutoff; } } @@ -1992,11 +2016,10 @@ bool LLVolume::unpackVolumeFaces(std::istream& is, S32 size) LLVolumeFace& face = mVolumeFaces[i]; - face.mHasBinormals = false; - //copy out indices - face.mIndices.resize(idx.size()/2); - if (idx.empty() || face.mIndices.size() < 3) + face.resizeIndices(idx.size()/2); + + if (idx.empty() || face.mNumIndices < 3) { //why is there an empty index list? llerrs <<"WTF?" << llendl; continue; @@ -2010,7 +2033,7 @@ bool LLVolume::unpackVolumeFaces(std::istream& is, S32 size) //copy out vertices U32 num_verts = pos.size()/(3*2); - face.mVertices.resize(num_verts); + face.resizeVertices(num_verts); if (mdl[i].has("Weights")) { @@ -2059,7 +2082,6 @@ bool LLVolume::unpackVolumeFaces(std::istream& is, S32 size) LLVector3 max_pos; LLVector2 min_tc; LLVector2 max_tc; - min_pos.setValue(mdl[i]["PositionDomain"]["Min"]); max_pos.setValue(mdl[i]["PositionDomain"]["Max"]); @@ -2074,36 +2096,44 @@ bool LLVolume::unpackVolumeFaces(std::istream& is, S32 size) min = max = LLVector3(0,0,0); + F32* pos_out = (F32*) face.mPositions; + F32* norm_out = (F32*) face.mNormals; + F32* tc_out = (F32*) face.mTexCoords; + for (U32 j = 0; j < num_verts; ++j) { U16* v = (U16*) &(pos[j*3*2]); - face.mVertices[j].mPosition.setVec( - (F32) v[0] / 65535.f * pos_range.mV[0] + min_pos.mV[0], - (F32) v[1] / 65535.f * pos_range.mV[1] + min_pos.mV[1], - (F32) v[2] / 65535.f * pos_range.mV[2] + min_pos.mV[2]); + pos_out[0] = (F32) v[0] / 65535.f * pos_range.mV[0] + min_pos.mV[0]; + pos_out[1] = (F32) v[1] / 65535.f * pos_range.mV[1] + min_pos.mV[1]; + pos_out[2] = (F32) v[2] / 65535.f * pos_range.mV[2] + min_pos.mV[2]; + if (j == 0) { - min = max = face.mVertices[j].mPosition; + min = max = LLVector3(pos_out); } else { - update_min_max(min,max,face.mVertices[j].mPosition); + update_min_max(min,max,pos_out); } + pos_out += 4; + U16* n = (U16*) &(norm[j*3*2]); - face.mVertices[j].mNormal.setVec( - (F32) n[0] / 65535.f * 2.f - 1.f, - (F32) n[1] / 65535.f * 2.f - 1.f, - (F32) n[2] / 65535.f * 2.f - 1.f); + + norm_out[0] = (F32) n[0] / 65535.f * 2.f - 1.f; + norm_out[1] = (F32) n[1] / 65535.f * 2.f - 1.f; + norm_out[2] = (F32) n[2] / 65535.f * 2.f - 1.f; + norm_out += 4; U16* t = (U16*) &(tc[j*2*2]); - face.mVertices[j].mTexCoord.setVec( - (F32) t[0] / 65535.f * tc_range.mV[0] + min_tc.mV[0], - (F32) t[1] / 65535.f * tc_range.mV[1] + min_tc.mV[1]); + tc_out[0] = (F32) t[0] / 65535.f * tc_range.mV[0] + min_tc.mV[0]; + tc_out[1] = (F32) t[1] / 65535.f * tc_range.mV[1] + min_tc.mV[1]; + + tc_out += 8; } @@ -2133,24 +2163,29 @@ bool LLVolume::unpackVolumeFaces(std::istream& is, S32 size) if (do_reflect_x) { - for (S32 i = 0; i < face.mVertices.size(); i++) + LLVector4a* p = (LLVector4a*) face.mPositions; + LLVector4a* n = (LLVector4a*) face.mNormals; + + for (S32 i = 0; i < face.mNumVertices; i++) { - face.mVertices[i].mPosition.mV[VX] *= -1.0f; - face.mVertices[i].mNormal.mV[VX] *= -1.0f; + p[i].mul(-1.0f); + n[i].mul(-1.0f); } } if (do_invert_normals) { - for (S32 i = 0; i < face.mVertices.size(); i++) + LLVector4a* n = (LLVector4a*) face.mNormals; + + for (S32 i = 0; i < face.mNumVertices; i++) { - face.mVertices[i].mNormal *= -1.0f; + n[i].mul(-1.0f); } } if (do_reverse_triangles) { - for (U32 j = 0; j < face.mIndices.size(); j += 3) + for (U32 j = 0; j < face.mNumIndices; j += 3) { // swap the 2nd and 3rd index S32 swap = face.mIndices[j+1]; @@ -2168,13 +2203,15 @@ bool LLVolume::unpackVolumeFaces(std::istream& is, S32 size) void tetrahedron_set_normal(LLVolumeFace::VertexData* cv) { - LLVector3 nrm = (cv[1].mPosition-cv[0].mPosition)%(cv[2].mPosition-cv[0].mPosition); - - nrm.normVec(); - - cv[0].mNormal = nrm; - cv[1].mNormal = nrm; - cv[2].mNormal = nrm; + LLVector4a v0; + v0.setSub(cv[1].getPosition(), cv[0].getNormal()); + LLVector4a v1; + v1.setSub(cv[2].getNormal(), cv[0].getPosition()); + + cv[0].getNormal().setCross3(v0,v1); + cv[0].getNormal().normalize3fast(); + cv[1].setNormal(cv[0].getNormal()); + cv[2].setNormal(cv[1].getNormal()); } BOOL LLVolume::isTetrahedron() @@ -2189,12 +2226,12 @@ void LLVolume::makeTetrahedron() LLVolumeFace face; F32 x = 0.25f; - LLVector3 p[] = + LLVector4a p[] = { //unit tetrahedron corners - LLVector3(x,x,x), - LLVector3(-x,-x,x), - LLVector3(-x,x,-x), - LLVector3(x,-x,-x) + LLVector4a(x,x,x), + LLVector4a(-x,-x,x), + LLVector4a(-x,x,-x), + LLVector4a(x,-x,-x) }; face.mExtents[0].setVec(-x,-x,-x); @@ -2209,53 +2246,105 @@ void LLVolume::makeTetrahedron() //side 1 - cv[0].mPosition = p[1]; - cv[1].mPosition = p[0]; - cv[2].mPosition = p[2]; + cv[0].setPosition(p[1]); + cv[1].setPosition(p[0]); + cv[2].setPosition(p[2]); tetrahedron_set_normal(cv); - face.mVertices.push_back(cv[0]); - face.mVertices.push_back(cv[1]); - face.mVertices.push_back(cv[2]); + face.resizeVertices(12); + face.resizeIndices(12); + + LLVector4a* v = (LLVector4a*) face.mPositions; + LLVector4a* n = (LLVector4a*) face.mNormals; + LLVector2* tc = (LLVector2*) face.mTexCoords; + + v[0] = cv[0].getPosition(); + v[1] = cv[1].getPosition(); + v[2] = cv[2].getPosition(); + v += 3; + + n[0] = cv[0].getNormal(); + n[1] = cv[1].getNormal(); + n[2] = cv[2].getNormal(); + n += 3; + + tc[0] = cv[0].mTexCoord; + tc[1] = cv[1].mTexCoord; + tc[2] = cv[2].mTexCoord; + tc += 3; + //side 2 - cv[0].mPosition = p[3]; - cv[1].mPosition = p[0]; - cv[2].mPosition = p[1]; + cv[0].setPosition(p[3]); + cv[1].setPosition(p[0]); + cv[2].setPosition(p[1]); tetrahedron_set_normal(cv); - face.mVertices.push_back(cv[0]); - face.mVertices.push_back(cv[1]); - face.mVertices.push_back(cv[2]); + v[0] = cv[0].getPosition(); + v[1] = cv[1].getPosition(); + v[2] = cv[2].getPosition(); + v += 3; + + n[0] = cv[0].getNormal(); + n[1] = cv[1].getNormal(); + n[2] = cv[2].getNormal(); + n += 3; + + tc[0] = cv[0].mTexCoord; + tc[1] = cv[1].mTexCoord; + tc[2] = cv[2].mTexCoord; + tc += 3; //side 3 - cv[0].mPosition = p[3]; - cv[1].mPosition = p[1]; - cv[2].mPosition = p[2]; + cv[0].setPosition(p[3]); + cv[1].setPosition(p[1]); + cv[2].setPosition(p[2]); tetrahedron_set_normal(cv); - face.mVertices.push_back(cv[0]); - face.mVertices.push_back(cv[1]); - face.mVertices.push_back(cv[2]); + v[0] = cv[0].getPosition(); + v[1] = cv[1].getPosition(); + v[2] = cv[2].getPosition(); + v += 3; + + n[0] = cv[0].getNormal(); + n[1] = cv[1].getNormal(); + n[2] = cv[2].getNormal(); + n += 3; + + tc[0] = cv[0].mTexCoord; + tc[1] = cv[1].mTexCoord; + tc[2] = cv[2].mTexCoord; + tc += 3; //side 4 - cv[0].mPosition = p[2]; - cv[1].mPosition = p[0]; - cv[2].mPosition = p[3]; + cv[0].setPosition(p[2]); + cv[1].setPosition(p[0]); + cv[2].setPosition(p[3]); tetrahedron_set_normal(cv); - face.mVertices.push_back(cv[0]); - face.mVertices.push_back(cv[1]); - face.mVertices.push_back(cv[2]); + v[0] = cv[0].getPosition(); + v[1] = cv[1].getPosition(); + v[2] = cv[2].getPosition(); + v += 3; + + n[0] = cv[0].getNormal(); + n[1] = cv[1].getNormal(); + n[2] = cv[2].getNormal(); + n += 3; + + tc[0] = cv[0].mTexCoord; + tc[1] = cv[1].mTexCoord; + tc[2] = cv[2].mTexCoord; + tc += 3; //set index buffer - for (U32 i = 0; i < 12; i++) + for (U16 i = 0; i < 12; i++) { - face.mIndices.push_back(i); + face.mIndices[i] = i; } mVolumeFaces.push_back(face); @@ -3831,7 +3920,7 @@ S32 LLVolume::getNumTriangles() const for (S32 i = 0; i < getNumVolumeFaces(); ++i) { - triangle_count += getVolumeFace(i).mIndices.size()/3; + triangle_count += getVolumeFace(i).mNumIndices/3; } return triangle_count; @@ -3844,13 +3933,22 @@ S32 LLVolume::getNumTriangles() const void LLVolume::generateSilhouetteVertices(std::vector<LLVector3> &vertices, std::vector<LLVector3> &normals, std::vector<S32> &segments, - const LLVector3& obj_cam_vec, - const LLMatrix4& mat, - const LLMatrix3& norm_mat, + const LLVector3& obj_cam_vec_in, + const LLMatrix4& mat_in, + const LLMatrix3& norm_mat_in, S32 face_mask) { LLMemType m1(LLMemType::MTYPE_VOLUME); + LLMatrix4a mat; + mat.loadu(mat_in); + + LLMatrix4a norm_mat; + norm_mat.loadu(norm_mat_in); + + LLVector4a obj_cam_vec; + obj_cam_vec.load3(obj_cam_vec_in.mV); + vertices.clear(); normals.clear(); segments.clear(); @@ -3868,7 +3966,7 @@ void LLVolume::generateSilhouetteVertices(std::vector<LLVector3> &vertices, LLVolumeFace& face = *iter; if (!(face_mask & (0x1 << cur_index++)) || - face.mIndices.empty() || face.mEdge.empty()) + face.mNumIndices == 0 || face.mEdge.empty()) { continue; } @@ -3885,7 +3983,7 @@ void LLVolume::generateSilhouetteVertices(std::vector<LLVector3> &vertices, #if DEBUG_SILHOUETTE_EDGE_MAP //for each triangle - U32 count = face.mIndices.size(); + U32 count = face.mNumIndices; for (U32 j = 0; j < count/3; j++) { //get vertices S32 v1 = face.mIndices[j*3+0]; @@ -3893,9 +3991,9 @@ void LLVolume::generateSilhouetteVertices(std::vector<LLVector3> &vertices, S32 v3 = face.mIndices[j*3+2]; //get current face center - LLVector3 cCenter = (face.mVertices[v1].mPosition + - face.mVertices[v2].mPosition + - face.mVertices[v3].mPosition) / 3.0f; + LLVector3 cCenter = (face.mVertices[v1].getPosition() + + face.mVertices[v2].getPosition() + + face.mVertices[v3].getPosition()) / 3.0f; //for each edge for (S32 k = 0; k < 3; k++) { @@ -3913,9 +4011,9 @@ void LLVolume::generateSilhouetteVertices(std::vector<LLVector3> &vertices, v3 = face.mIndices[nIndex*3+2]; //get neighbor face center - LLVector3 nCenter = (face.mVertices[v1].mPosition + - face.mVertices[v2].mPosition + - face.mVertices[v3].mPosition) / 3.0f; + LLVector3 nCenter = (face.mVertices[v1].getPosition() + + face.mVertices[v2].getPosition() + + face.mVertices[v3].getPosition()) / 3.0f; //draw line vertices.push_back(cCenter); @@ -3938,15 +4036,15 @@ void LLVolume::generateSilhouetteVertices(std::vector<LLVector3> &vertices, #elif DEBUG_SILHOUETTE_NORMALS //for each vertex - for (U32 j = 0; j < face.mVertices.size(); j++) { - vertices.push_back(face.mVertices[j].mPosition); - vertices.push_back(face.mVertices[j].mPosition + face.mVertices[j].mNormal*0.1f); + for (U32 j = 0; j < face.mNumVertices; j++) { + vertices.push_back(face.mVertices[j].getPosition()); + vertices.push_back(face.mVertices[j].getPosition() + face.mVertices[j].getNormal()*0.1f); normals.push_back(LLVector3(0,0,1)); normals.push_back(LLVector3(0,0,1)); segments.push_back(vertices.size()); #if DEBUG_SILHOUETTE_BINORMALS - vertices.push_back(face.mVertices[j].mPosition); - vertices.push_back(face.mVertices[j].mPosition + face.mVertices[j].mBinormal*0.1f); + vertices.push_back(face.mVertices[j].getPosition()); + vertices.push_back(face.mVertices[j].getPosition() + face.mVertices[j].mBinormal*0.1f); normals.push_back(LLVector3(0,0,1)); normals.push_back(LLVector3(0,0,1)); segments.push_back(vertices.size()); @@ -3964,26 +4062,36 @@ void LLVolume::generateSilhouetteVertices(std::vector<LLVector3> &vertices, //for each triangle std::vector<U8> fFacing; - vector_append(fFacing, face.mIndices.size()/3); - for (U32 j = 0; j < face.mIndices.size()/3; j++) + vector_append(fFacing, face.mNumIndices/3); + + LLVector4a* v = (LLVector4a*) face.mPositions; + LLVector4a* n = (LLVector4a*) face.mNormals; + + for (U32 j = 0; j < face.mNumIndices/3; j++) { //approximate normal S32 v1 = face.mIndices[j*3+0]; S32 v2 = face.mIndices[j*3+1]; S32 v3 = face.mIndices[j*3+2]; - LLVector3 norm = (face.mVertices[v1].mPosition - face.mVertices[v2].mPosition) % - (face.mVertices[v2].mPosition - face.mVertices[v3].mPosition); - - if (norm.magVecSquared() < 0.00000001f) + LLVector4a c1,c2; + c1.setSub(v[v1], v[v2]); + c2.setSub(v[v2], v[v3]); + + LLVector4a norm; + + norm.setCross3(c1, c2); + + if (norm.dot3(norm) < 0.00000001f) { fFacing[j] = AWAY | TOWARDS; } else { //get view vector - LLVector3 view = (obj_cam_vec-face.mVertices[v1].mPosition); - bool away = view * norm > 0.0f; + LLVector4a view; + view.setSub(obj_cam_vec, v[v1]); + bool away = view.dot3(norm) > 0.0f; if (away) { fFacing[j] = AWAY; @@ -3996,7 +4104,7 @@ void LLVolume::generateSilhouetteVertices(std::vector<LLVector3> &vertices, } //for each triangle - for (U32 j = 0; j < face.mIndices.size()/3; j++) + for (U32 j = 0; j < face.mNumIndices/3; j++) { if (fFacing[j] == (AWAY | TOWARDS)) { //this is a degenerate triangle @@ -4029,15 +4137,21 @@ void LLVolume::generateSilhouetteVertices(std::vector<LLVector3> &vertices, S32 v1 = face.mIndices[j*3+k]; S32 v2 = face.mIndices[j*3+((k+1)%3)]; - vertices.push_back(face.mVertices[v1].mPosition*mat); - LLVector3 norm1 = face.mVertices[v1].mNormal * norm_mat; - norm1.normVec(); - normals.push_back(norm1); + LLVector4a t; + mat.affineTransform(v[v1], t); + vertices.push_back(LLVector3(t[0], t[1], t[2])); - vertices.push_back(face.mVertices[v2].mPosition*mat); - LLVector3 norm2 = face.mVertices[v2].mNormal * norm_mat; - norm2.normVec(); - normals.push_back(norm2); + norm_mat.rotate(n[v1], t); + + t.normalize3fast(); + normals.push_back(LLVector3(t[0], t[1], t[2])); + + mat.affineTransform(v[v2], t); + vertices.push_back(LLVector3(t[0], t[1], t[2])); + + norm_mat.rotate(n[v2], t); + t.normalize3fast(); + normals.push_back(LLVector3(t[0], t[1], t[2])); segments.push_back(vertices.size()); } @@ -4076,7 +4190,7 @@ S32 LLVolume::lineSegmentIntersect(const LLVector3& start, const LLVector3& end, for (S32 i = start_face; i <= end_face; i++) { - const LLVolumeFace &face = getVolumeFace((U32)i); + LLVolumeFace &face = mVolumeFaces[i]; LLVector3 box_center = (face.mExtents[0] + face.mExtents[1]) / 2.f; LLVector3 box_size = face.mExtents[1] - face.mExtents[0]; @@ -4088,7 +4202,14 @@ S32 LLVolume::lineSegmentIntersect(const LLVector3& start, const LLVector3& end, genBinormals(i); } - for (U32 tri = 0; tri < face.mIndices.size()/3; tri++) + LLVector4a starta, dira; + + starta.load3(start.mV); + dira.load3(dir.mV); + + LLVector4a* p = (LLVector4a*) face.mPositions; + + for (U32 tri = 0; tri < face.mNumIndices/3; tri++) { S32 index1 = face.mIndices[tri*3+0]; S32 index2 = face.mIndices[tri*3+1]; @@ -4096,15 +4217,15 @@ S32 LLVolume::lineSegmentIntersect(const LLVector3& start, const LLVector3& end, F32 a, b, t; - if (LLTriangleRayIntersect(face.mVertices[index1].mPosition, - face.mVertices[index2].mPosition, - face.mVertices[index3].mPosition, - start, dir, &a, &b, &t, FALSE)) + if (LLTriangleRayIntersect(p[index1], + p[index2], + p[index3], + starta, dira, &a, &b, &t, FALSE)) { if ((t >= 0.f) && // if hit is after start (t <= 1.f) && // and before end (t < closest_t)) // and this hit is closer - { + { closest_t = t; hit_face = i; @@ -4112,27 +4233,32 @@ S32 LLVolume::lineSegmentIntersect(const LLVector3& start, const LLVector3& end, { *intersection = start + dir * closest_t; } - + + if (tex_coord != NULL) - { - *tex_coord = ((1.f - a - b) * face.mVertices[index1].mTexCoord + - a * face.mVertices[index2].mTexCoord + - b * face.mVertices[index3].mTexCoord); + { + LLVector2* tc = (LLVector2*) face.mTexCoords; + *tex_coord = ((1.f - a - b) * tc[index1] + + a * tc[index2] + + b * tc[index3]); } if (normal != NULL) - { - *normal = ((1.f - a - b) * face.mVertices[index1].mNormal + - a * face.mVertices[index2].mNormal + - b * face.mVertices[index3].mNormal); + { + LLVector4* norm = (LLVector4*) face.mNormals; + + *normal = ((1.f - a - b) * LLVector3(norm[index1]) + + a * LLVector3(norm[index2]) + + b * LLVector3(norm[index3])); } if (bi_normal != NULL) - { - *bi_normal = ((1.f - a - b) * face.mVertices[index1].mBinormal + - a * face.mVertices[index2].mBinormal + - b * face.mVertices[index3].mBinormal); + { + LLVector4* binormal = (LLVector4*) face.mBinormals; + *bi_normal = ((1.f - a - b) * LLVector3(binormal[index1]) + + a * LLVector3(binormal[index2]) + + b * LLVector3(binormal[index3])); } } @@ -4903,6 +5029,14 @@ std::ostream& operator<<(std::ostream &s, const LLVolume *volumep) return s; } +LLVolumeFace::~LLVolumeFace() +{ + _mm_free(mPositions); + _mm_free(mNormals); + _mm_free(mTexCoords); + _mm_free(mIndices); + _mm_free(mBinormals); +} BOOL LLVolumeFace::create(LLVolume* volume, BOOL partial_build) { @@ -4921,6 +5055,13 @@ BOOL LLVolumeFace::create(LLVolume* volume, BOOL partial_build) } } +void LLVolumeFace::getVertexData(U16 index, LLVolumeFace::VertexData& cv) +{ + cv.setPosition(mPositions[index]); + cv.setNormal(mNormals[index]); + cv.mTexCoord = mTexCoords[index]; +} + void LLVolumeFace::optimize(F32 angle_cutoff) { LLVolumeFace new_face; @@ -4928,14 +5069,15 @@ void LLVolumeFace::optimize(F32 angle_cutoff) VertexMapData::PointMap point_map; //remove redundant vertices - for (U32 i = 0; i < mIndices.size(); ++i) + for (U32 i = 0; i < mNumIndices; ++i) { U16 index = mIndices[i]; - LLVolumeFace::VertexData cv = mVertices[index]; - + LLVolumeFace::VertexData cv; + getVertexData(index, cv); + BOOL found = FALSE; - VertexMapData::PointMap::iterator point_iter = point_map.find(cv.mPosition); + VertexMapData::PointMap::iterator point_iter = point_map.find(cv.getPosition()); if (point_iter != point_map.end()) { //duplicate point might exist for (U32 j = 0; j < point_iter->second.size(); ++j) @@ -4944,7 +5086,7 @@ void LLVolumeFace::optimize(F32 angle_cutoff) if (tv.compareNormal(cv, angle_cutoff)) { found = TRUE; - new_face.mIndices.push_back((point_iter->second)[j].mIndex); + new_face.pushIndex((point_iter->second)[j].mIndex); break; } } @@ -4952,14 +5094,14 @@ void LLVolumeFace::optimize(F32 angle_cutoff) if (!found) { - new_face.mVertices.push_back(cv); - U16 index = (U16) new_face.mVertices.size()-1; - new_face.mIndices.push_back(index); + new_face.pushVertex(cv); + U16 index = (U16) new_face.mNumVertices-1; + new_face.pushIndex(index); VertexMapData d; - d.mPosition = cv.mPosition; + d.setPosition(cv.getPosition()); d.mTexCoord = cv.mTexCoord; - d.mNormal = cv.mNormal; + d.setNormal(cv.getNormal()); d.mIndex = index; if (point_iter != point_map.end()) { @@ -4967,13 +5109,23 @@ void LLVolumeFace::optimize(F32 angle_cutoff) } else { - point_map[d.mPosition].push_back(d); + point_map[d.getPosition()].push_back(d); } } } - mVertices = new_face.mVertices; - mIndices = new_face.mIndices; + swapData(new_face); +} + +void LLVolumeFace::swapData(LLVolumeFace& rhs) +{ + llswap(rhs.mPositions, mPositions); + llswap(rhs.mNormals, mNormals); + llswap(rhs.mBinormals, mBinormals); + llswap(rhs.mTexCoords, mTexCoords); + llswap(rhs.mIndices,mIndices); + llswap(rhs.mNumVertices, mNumVertices); + llswap(rhs.mNumIndices, mNumIndices); } void LerpPlanarVertex(LLVolumeFace::VertexData& v0, @@ -4983,10 +5135,21 @@ void LerpPlanarVertex(LLVolumeFace::VertexData& v0, F32 coef01, F32 coef02) { - vout.mPosition = v0.mPosition + ((v1.mPosition-v0.mPosition)*coef01)+((v2.mPosition-v0.mPosition)*coef02); + + LLVector4a lhs; + lhs.setSub(v1.getPosition(), v0.getPosition()); + lhs.mul(coef01); + LLVector4a rhs; + rhs.setSub(v2.getPosition(), v0.getPosition()); + rhs.mul(coef02); + + rhs.add(lhs); + rhs.add(v0.getPosition()); + + vout.setPosition(rhs); + vout.mTexCoord = v0.mTexCoord + ((v1.mTexCoord-v0.mTexCoord)*coef01)+((v2.mTexCoord-v0.mTexCoord)*coef02); - vout.mNormal = v0.mNormal; - vout.mBinormal = v0.mBinormal; + vout.setNormal(v0.getNormal()); } BOOL LLVolumeFace::createUnCutCubeCap(LLVolume* volume, BOOL partial_build) @@ -5018,16 +5181,22 @@ BOOL LLVolumeFace::createUnCutCubeCap(LLVolume* volume, BOOL partial_build) VertexData corners[4]; VertexData baseVert; for(int t = 0; t < 4; t++){ - corners[t].mPosition = mesh[offset + (grid_size*t)].mPos; + corners[t].getPosition().load3( mesh[offset + (grid_size*t)].mPos.mV); corners[t].mTexCoord.mV[0] = profile[grid_size*t].mV[0]+0.5f; corners[t].mTexCoord.mV[1] = 0.5f - profile[grid_size*t].mV[1]; } - baseVert.mNormal = - ((corners[1].mPosition-corners[0].mPosition) % - (corners[2].mPosition-corners[1].mPosition)); - baseVert.mNormal.normVec(); + + { + LLVector4a lhs; + lhs.setSub(corners[1].getPosition(), corners[0].getPosition()); + LLVector4a rhs; + rhs.setSub(corners[2].getPosition(), corners[1].getPosition()); + baseVert.getNormal().setCross3(lhs, rhs); + baseVert.getNormal().normalize3fast(); + } + if(!(mTypeMask & TOP_MASK)){ - baseVert.mNormal *= -1.0f; + baseVert.getNormal().mul(-1.0f); }else{ //Swap the UVs on the U(X) axis for top face LLVector2 swap; @@ -5038,22 +5207,23 @@ BOOL LLVolumeFace::createUnCutCubeCap(LLVolume* volume, BOOL partial_build) corners[1].mTexCoord=corners[2].mTexCoord; corners[2].mTexCoord=swap; } - baseVert.mBinormal = calc_binormal_from_triangle( - corners[0].mPosition, corners[0].mTexCoord, - corners[1].mPosition, corners[1].mTexCoord, - corners[2].mPosition, corners[2].mTexCoord); - for(int t = 0; t < 4; t++){ - corners[t].mBinormal = baseVert.mBinormal; - corners[t].mNormal = baseVert.mNormal; - } - mHasBinormals = TRUE; - if (partial_build) - { - mVertices.clear(); - } + LLVector4a binormal; + + calc_binormal_from_triangle( binormal, + corners[0].getPosition(), corners[0].mTexCoord, + corners[1].getPosition(), corners[1].mTexCoord, + corners[2].getPosition(), corners[2].mTexCoord); + + S32 size = (grid_size+1)*(grid_size+1); + resizeVertices(size); + allocateBinormals(size); + + LLVector4a* pos = (LLVector4a*) mPositions; + LLVector4a* norm = (LLVector4a*) mNormals; + LLVector4a* binorm = (LLVector4a*) mBinormals; + LLVector2* tc = (LLVector2*) mTexCoords; - S32 vtop = mVertices.size(); for(int gx = 0;gx<grid_size+1;gx++){ for(int gy = 0;gy<grid_size+1;gy++){ VertexData newVert; @@ -5064,15 +5234,19 @@ BOOL LLVolumeFace::createUnCutCubeCap(LLVolume* volume, BOOL partial_build) newVert, (F32)gx/(F32)grid_size, (F32)gy/(F32)grid_size); - mVertices.push_back(newVert); + + *pos++ = newVert.getPosition(); + *norm++ = baseVert.getNormal(); + *tc++ = newVert.mTexCoord; + *binorm++ = binormal; if (gx == 0 && gy == 0) { - min = max = newVert.mPosition; + min = max = LLVector3(newVert.getPosition().getF32()); } else { - update_min_max(min,max,newVert.mPosition); + update_min_max(min,max,newVert.getPosition().getF32()); } } } @@ -5081,9 +5255,10 @@ BOOL LLVolumeFace::createUnCutCubeCap(LLVolume* volume, BOOL partial_build) if (!partial_build) { -#if GEN_TRI_STRIP - mTriStrip.clear(); -#endif + resizeIndices(grid_size*grid_size*6); + + U16* out = mIndices; + S32 idxs[] = {0,1,(grid_size+1)+1,(grid_size+1)+1,(grid_size+1),0}; for(S32 gx = 0;gx<grid_size;gx++) { @@ -5094,61 +5269,19 @@ BOOL LLVolumeFace::createUnCutCubeCap(LLVolume* volume, BOOL partial_build) { for(S32 i=5;i>=0;i--) { - mIndices.push_back(vtop+(gy*(grid_size+1))+gx+idxs[i]); - } - -#if GEN_TRI_STRIP - if (gy == 0) - { - mTriStrip.push_back((gx+1)*(grid_size+1)); - mTriStrip.push_back((gx+1)*(grid_size+1)); - mTriStrip.push_back(gx*(grid_size+1)); - } - - mTriStrip.push_back(gy+1+(gx+1)*(grid_size+1)); - mTriStrip.push_back(gy+1+gx*(grid_size+1)); - - - if (gy == grid_size-1) - { - mTriStrip.push_back(gy+1+gx*(grid_size+1)); - } -#endif + *out++ = ((gy*(grid_size+1))+gx+idxs[i]); + } } else { for(S32 i=0;i<6;i++) { - mIndices.push_back(vtop+(gy*(grid_size+1))+gx+idxs[i]); + *out++ = ((gy*(grid_size+1))+gx+idxs[i]); } - -#if GEN_TRI_STRIP - if (gy == 0) - { - mTriStrip.push_back(gx*(grid_size+1)); - mTriStrip.push_back(gx*(grid_size+1)); - mTriStrip.push_back((gx+1)*(grid_size+1)); - } - - mTriStrip.push_back(gy+1+gx*(grid_size+1)); - mTriStrip.push_back(gy+1+(gx+1)*(grid_size+1)); - - if (gy == grid_size-1) - { - mTriStrip.push_back(gy+1+(gx+1)*(grid_size+1)); - } -#endif } } } - -#if GEN_TRI_STRIP - if (mTriStrip.size()%2 == 1) - { - mTriStrip.push_back(mTriStrip[mTriStrip.size()-1]); - } -#endif } return TRUE; @@ -5178,11 +5311,25 @@ BOOL LLVolumeFace::createCap(LLVolume* volume, BOOL partial_build) num_vertices = profile.size(); num_indices = (profile.size() - 2)*3; - mVertices.resize(num_vertices); + if (!(mTypeMask & HOLLOW_MASK) && !(mTypeMask & OPEN_MASK)) + { + resizeVertices(num_vertices+1); + allocateBinormals(num_vertices+1); - if (!partial_build) + if (!partial_build) + { + resizeIndices(num_indices+3); + } + } + else { - mIndices.resize(num_indices); + resizeVertices(num_vertices); + allocateBinormals(num_vertices); + + if (!partial_build) + { + resizeIndices(num_indices); + } } S32 max_s = volume->getProfile().getTotal(); @@ -5209,79 +5356,88 @@ BOOL LLVolumeFace::createCap(LLVolume* volume, BOOL partial_build) LLVector3& min = mExtents[0]; LLVector3& max = mExtents[1]; + LLVector2* tc = (LLVector2*) mTexCoords; + LLVector4a* pos = (LLVector4a*) mPositions; + LLVector4a* norm = (LLVector4a*) mNormals; + LLVector4a* binorm = (LLVector4a*) mBinormals; + // Copy the vertices into the array for (S32 i = 0; i < num_vertices; i++) { if (mTypeMask & TOP_MASK) { - mVertices[i].mTexCoord.mV[0] = profile[i].mV[0]+0.5f; - mVertices[i].mTexCoord.mV[1] = profile[i].mV[1]+0.5f; + tc[i].mV[0] = profile[i].mV[0]+0.5f; + tc[i].mV[1] = profile[i].mV[1]+0.5f; } else { // Mirror for underside. - mVertices[i].mTexCoord.mV[0] = profile[i].mV[0]+0.5f; - mVertices[i].mTexCoord.mV[1] = 0.5f - profile[i].mV[1]; + tc[i].mV[0] = profile[i].mV[0]+0.5f; + tc[i].mV[1] = 0.5f - profile[i].mV[1]; } - mVertices[i].mPosition = mesh[i + offset].mPos; + pos[i].load3(mesh[i + offset].mPos.mV); if (i == 0) { - min = max = mVertices[i].mPosition; - min_uv = max_uv = mVertices[i].mTexCoord; + min = max = mesh[i+offset].mPos; + min_uv = max_uv = tc[i]; } else { - update_min_max(min,max, mVertices[i].mPosition); - update_min_max(min_uv, max_uv, mVertices[i].mTexCoord); + update_min_max(min,max, mesh[i+offset].mPos); + update_min_max(min_uv, max_uv, tc[i]); } } mCenter = (min+max)*0.5f; cuv = (min_uv + max_uv)*0.5f; - LLVector3 binormal = calc_binormal_from_triangle( - mCenter, cuv, - mVertices[0].mPosition, mVertices[0].mTexCoord, - mVertices[1].mPosition, mVertices[1].mTexCoord); - binormal.normVec(); + LLVector4a center; + center.load3(mCenter.mV); + + LLVector4a binormal; + calc_binormal_from_triangle(binormal, + center, cuv, + pos[0], tc[0], + pos[1], tc[1]); + binormal.normalize3fast(); + + LLVector4a normal; + LLVector4a d0, d1; + - LLVector3 d0; - LLVector3 d1; - LLVector3 normal; + d0.setSub(center, pos[0]); + d1.setSub(center, pos[1]); - d0 = mCenter-mVertices[0].mPosition; - d1 = mCenter-mVertices[1].mPosition; + if (mTypeMask & TOP_MASK) + { + normal.setCross3(d0, d1); + } + else + { + normal.setCross3(d1, d0); + } - normal = (mTypeMask & TOP_MASK) ? (d0%d1) : (d1%d0); - normal.normVec(); + normal.normalize3fast(); VertexData vd; - vd.mPosition = mCenter; - vd.mNormal = normal; - vd.mBinormal = binormal; + vd.getPosition().load3(mCenter.mV); vd.mTexCoord = cuv; if (!(mTypeMask & HOLLOW_MASK) && !(mTypeMask & OPEN_MASK)) { - mVertices.push_back(vd); + pos[num_vertices].load4a((F32*) ¢er.mQ); + tc[num_vertices] = cuv; num_vertices++; - if (!partial_build) - { - vector_append(mIndices, 3); - } } - for (S32 i = 0; i < num_vertices; i++) { - mVertices[i].mBinormal = binormal; - mVertices[i].mNormal = normal; + binorm[i].load4a((F32*) &binormal.mQ); + norm[i].load4a((F32*) &normal.mQ); } - mHasBinormals = TRUE; - if (partial_build) { return TRUE; @@ -5389,8 +5545,6 @@ BOOL LLVolumeFace::createCap(LLVolume* volume, BOOL partial_build) pt2--; } } - - makeTriStrip(); } else { @@ -5495,8 +5649,6 @@ BOOL LLVolumeFace::createCap(LLVolume* volume, BOOL partial_build) pt2--; } } - - makeTriStrip(); } } else @@ -5518,167 +5670,276 @@ BOOL LLVolumeFace::createCap(LLVolume* volume, BOOL partial_build) mIndices[3*i+v2] = i + 1; } -#if GEN_TRI_STRIP - //make tri strip - if (mTypeMask & OPEN_MASK) - { - makeTriStrip(); - } - else - { - S32 j = num_vertices-2; - if (mTypeMask & TOP_MASK) - { - mTriStrip.push_back(0); - for (S32 i = 0; i <= j; ++i) - { - mTriStrip.push_back(i); - if (i != j) - { - mTriStrip.push_back(j); - } - --j; - } - } - else - { - mTriStrip.push_back(j); - for (S32 i = 0; i <= j; ++i) - { - if (i != j) - { - mTriStrip.push_back(j); - } - mTriStrip.push_back(i); - --j; - } - } - - mTriStrip.push_back(mTriStrip[mTriStrip.size()-1]); - if (mTriStrip.size()%2 == 1) - { - mTriStrip.push_back(mTriStrip[mTriStrip.size()-1]); - } - } -#endif } return TRUE; } -void LLVolumeFace::makeTriStrip() -{ -#if GEN_TRI_STRIP - for (U32 i = 0; i < mIndices.size(); i+=3) - { - U16 i0 = mIndices[i]; - U16 i1 = mIndices[i+1]; - U16 i2 = mIndices[i+2]; - - if ((i/3)%2 == 1) - { - mTriStrip.push_back(i0); - mTriStrip.push_back(i0); - mTriStrip.push_back(i1); - mTriStrip.push_back(i2); - mTriStrip.push_back(i2); - } - else - { - mTriStrip.push_back(i2); - mTriStrip.push_back(i2); - mTriStrip.push_back(i1); - mTriStrip.push_back(i0); - mTriStrip.push_back(i0); - } - } - - if (mTriStrip.size()%2 == 1) - { - mTriStrip.push_back(mTriStrip[mTriStrip.size()-1]); - } -#endif -} - void LLVolumeFace::createBinormals() { LLMemType m1(LLMemType::MTYPE_VOLUME); - if (!mHasBinormals) + if (!mBinormals) { + allocateBinormals(mNumVertices); + //generate binormals - for (U32 i = 0; i < mIndices.size()/3; i++) + LLVector4a* pos = mPositions; + LLVector2* tc = (LLVector2*) mTexCoords; + LLVector4a* binorm = (LLVector4a*) mBinormals; + + for (U32 i = 0; i < mNumIndices/3; i++) { //for each triangle - const VertexData& v0 = mVertices[mIndices[i*3+0]]; - const VertexData& v1 = mVertices[mIndices[i*3+1]]; - const VertexData& v2 = mVertices[mIndices[i*3+2]]; + const U16& i0 = mIndices[i*3+0]; + const U16& i1 = mIndices[i*3+1]; + const U16& i2 = mIndices[i*3+2]; //calculate binormal - LLVector3 binorm = calc_binormal_from_triangle(v0.mPosition, v0.mTexCoord, - v1.mPosition, v1.mTexCoord, - v2.mPosition, v2.mTexCoord); + LLVector4a binormal; + calc_binormal_from_triangle(binormal, + pos[i0], tc[i0], + pos[i1], tc[i1], + pos[i2], tc[i2]); - for (U32 j = 0; j < 3; j++) - { //add triangle normal to vertices - mVertices[mIndices[i*3+j]].mBinormal += binorm; // * (weight_sum - d[j])/weight_sum; - } + + //add triangle normal to vertices + binorm[i0].add(binormal); + binorm[i1].add(binormal); + binorm[i2].add(binormal); //even out quad contributions if (i % 2 == 0) { - mVertices[mIndices[i*3+2]].mBinormal += binorm; + binorm[i2].add(binormal); } else { - mVertices[mIndices[i*3+1]].mBinormal += binorm; + binorm[i1].add(binormal); } } //normalize binormals - for (U32 i = 0; i < mVertices.size(); i++) + for (U32 i = 0; i < mNumVertices; i++) { - mVertices[i].mBinormal.normVec(); - mVertices[i].mNormal.normVec(); + binorm[i].normalize3fast(); } + } +} + +void LLVolumeFace::resizeVertices(S32 num_verts) +{ + _mm_free(mPositions); + _mm_free(mNormals); + _mm_free(mBinormals); + _mm_free(mTexCoords); - mHasBinormals = TRUE; + mBinormals = NULL; + + if (num_verts) + { + mPositions = (LLVector4a*) _mm_malloc(num_verts*16, 16); + mNormals = (LLVector4a*) _mm_malloc(num_verts*16, 16); + + //pad texture coordinate block end to allow for QWORD reads + S32 size = ((num_verts*8) + 0xF) & ~0xF; + mTexCoords = (LLVector2*) _mm_malloc(size, 16); } + else + { + mPositions = NULL; + mNormals = NULL; + mTexCoords = NULL; + } + + mNumVertices = num_verts; +} + +void LLVolumeFace::pushVertex(const LLVolumeFace::VertexData& cv) +{ + pushVertex(cv.getPosition(), cv.getNormal(), cv.mTexCoord); } -void LLVolumeFace::appendFace(const LLVolumeFace& face, LLMatrix4& mat, LLMatrix4& norm_mat) +void LLVolumeFace::pushVertex(const LLVector4a& pos, const LLVector4a& norm, const LLVector2& tc) { - U16 offset = mVertices.size(); + S32 new_verts = mNumVertices+1; + S32 new_size = new_verts*16; + + //positions + LLVector4a* dst = (LLVector4a*) _mm_malloc(new_size, 16); + if (mPositions) + { + LLVector4a::memcpyNonAliased16((F32*) dst, (F32*) mPositions, new_size/4); + _mm_free(mPositions); + } + mPositions = dst; - if (face.mVertices.size() + mVertices.size() > 65536) + //normals + dst = (LLVector4a*) _mm_malloc(new_size, 16); + if (mNormals) { - llerrs << "Cannot append face -- 16-bit overflow will occur." << llendl; + LLVector4a::memcpyNonAliased16((F32*) dst, (F32*) mNormals, new_size/4); + _mm_free(mNormals); + } + mNormals = dst; + + //tex coords + new_size = ((new_verts*8)+0xF) & ~0xF; + + { + LLVector2* dst = (LLVector2*) _mm_malloc(new_size, 16); + if (mTexCoords) + { + LLVector4a::memcpyNonAliased16((F32*) dst, (F32*) mTexCoords, new_size/4); + _mm_free(mTexCoords); + } + } + + //just clear binormals + _mm_free(mBinormals); + mBinormals = NULL; + + mPositions[mNumVertices] = pos; + mNormals[mNumVertices] = norm; + mTexCoords[mNumVertices] = tc; + + mNumVertices++; +} + +void LLVolumeFace::allocateBinormals(S32 num_verts) +{ + _mm_free(mBinormals); + mBinormals = (LLVector4a*) _mm_malloc(num_verts*16, 16); +} + + +void LLVolumeFace::resizeIndices(S32 num_indices) +{ + _mm_free(mIndices); + + if (num_indices) + { + //pad index block end to allow for QWORD reads + S32 size = ((num_indices*2) + 0xF) & ~0xF; + + mIndices = (U16*) _mm_malloc(size,16); + } + else + { + mIndices = NULL; + } + + mNumIndices = num_indices; +} + +void LLVolumeFace::pushIndex(const U16& idx) +{ + S32 new_count = mNumIndices + 1; + S32 new_size = ((new_count*2)+0xF) & ~0xF; + + S32 old_size = (mNumIndices+0xF) & ~0xF; + if (new_size != old_size) + { + U16* dst = (U16*) _mm_malloc(new_size, 16); + LLVector4a::memcpyNonAliased16((F32*) dst, (F32*) mIndices, new_size/4); + _mm_free(mIndices); + mIndices = dst; } - for (U32 i = 0; i < face.mVertices.size(); ++i) + mIndices[mNumIndices++] = idx; +} + +void LLVolumeFace::fillFromLegacyData(std::vector<LLVolumeFace::VertexData>& v, std::vector<U16>& idx) +{ + resizeVertices(v.size()); + resizeIndices(idx.size()); + + for (U32 i = 0; i < v.size(); ++i) { - VertexData v = face.mVertices[i]; - v.mPosition = v.mPosition*mat; - v.mNormal = v.mNormal * norm_mat; + mPositions[i] = v[i].getPosition(); + mNormals[i] = v[i].getNormal(); + mTexCoords[i] = v[i].mTexCoord; + } + + for (U32 i = 0; i < idx.size(); ++i) + { + mIndices[i] = idx[i]; + } +} + +void LLVolumeFace::appendFace(const LLVolumeFace& face, LLMatrix4& mat_in, LLMatrix4& norm_mat_in) +{ + U16 offset = mNumVertices; + + S32 new_count = face.mNumVertices + mNumVertices; + + if (new_count > 65536) + { + llerrs << "Cannot append face -- 16-bit overflow will occur." << llendl; + } + + + LLVector4a* new_pos = (LLVector4a*) _mm_malloc(new_count*16, 16); + LLVector4a* new_norm = (LLVector4a*) _mm_malloc(new_count*16, 16); + LLVector2* new_tc = (LLVector2*) _mm_malloc((new_count*8+0xF) & ~0xF, 16); + + LLVector4a::memcpyNonAliased16((F32*) new_pos, (F32*) mPositions, new_count*4); + LLVector4a::memcpyNonAliased16((F32*) new_norm, (F32*) mNormals, new_count*4); + LLVector4a::memcpyNonAliased16((F32*) new_tc, (F32*) mTexCoords, new_count*2); + + _mm_free(mPositions); + _mm_free(mNormals); + _mm_free(mTexCoords); + + mPositions = new_pos; + mNormals = new_norm; + mTexCoords = new_tc; + + mNumVertices = new_count; - v.mNormal.normalize(); + LLVector4a* dst_pos = (LLVector4a*) mPositions+offset; + LLVector2* dst_tc = (LLVector2*) mTexCoords+offset; + LLVector4a* dst_norm = (LLVector4a*) mNormals+offset; - mVertices.push_back(v); + LLVector4a* src_pos = (LLVector4a*) face.mPositions; + LLVector2* src_tc = (LLVector2*) face.mTexCoords; + LLVector4a* src_norm = (LLVector4a*) face.mNormals; + + LLMatrix4a mat, norm_mat; + mat.loadu(mat_in); + norm_mat.loadu(norm_mat_in); + + for (U32 i = 0; i < face.mNumVertices; ++i) + { + mat.affineTransform(src_pos[i], dst_pos[i]); + norm_mat.rotate(src_norm[i], dst_norm[i]); + dst_norm[i].normalize3fast(); + + dst_tc[i] = src_tc[i]; if (offset == 0 && i == 0) { - mExtents[0] = mExtents[1] = v.mPosition; + mExtents[0] = mExtents[1] = LLVector3((F32*) &(dst_pos[i].mQ)); } else { - update_min_max(mExtents[0], mExtents[1], v.mPosition); + update_min_max(mExtents[0], mExtents[1], (F32*) &(dst_pos[i].mQ)); } } - - for (U32 i = 0; i < face.mIndices.size(); ++i) + + new_count = mNumIndices + face.mNumIndices; + U16* new_indices = (U16*) _mm_malloc((new_count*2+0xF) & ~0xF, 16); + LLVector4a::memcpyNonAliased16((F32*) new_indices, (F32*) mIndices, new_count/2); + _mm_free(mIndices); + mIndices = new_indices; + mNumIndices = new_count; + + U16* dst_idx = mIndices+offset; + + for (U32 i = 0; i < face.mNumIndices; ++i) { - mIndices.push_back(face.mIndices[i]+offset); + dst_idx[i] = face.mIndices[i]+offset; } } @@ -5708,21 +5969,20 @@ BOOL LLVolumeFace::createSide(LLVolume* volume, BOOL partial_build) num_vertices = mNumS*mNumT; num_indices = (mNumS-1)*(mNumT-1)*6; - mVertices.resize(num_vertices); - if (!partial_build) { - mIndices.resize(num_indices); + resizeVertices(num_vertices); + resizeIndices(num_indices); if ((volume->getParams().getSculptType() & LL_SCULPT_TYPE_MASK) != LL_SCULPT_TYPE_MESH) { mEdge.resize(num_indices); } } - else - { - mHasBinormals = FALSE; - } + + LLVector4a* pos = (LLVector4a*) mPositions; + LLVector4a* norm = (LLVector4a*) mNormals; + LLVector2* tc = (LLVector2*) mTexCoords; S32 begin_stex = llfloor( profile[mBeginS].mV[2] ); S32 num_s = ((mTypeMask & INNER_MASK) && (mTypeMask & FLAT_MASK) && mNumS > 2) ? mNumS/2 : mNumS; @@ -5774,21 +6034,21 @@ BOOL LLVolumeFace::createSide(LLVolume* volume, BOOL partial_build) i = mBeginS + s + max_s*t; } - mVertices[cur_vertex].mPosition = mesh[i].mPos; - mVertices[cur_vertex].mTexCoord = LLVector2(ss,tt); + pos[cur_vertex].load3(mesh[i].mPos.mV); + tc[cur_vertex] = LLVector2(ss,tt); - mVertices[cur_vertex].mNormal = LLVector3(0,0,0); - mVertices[cur_vertex].mBinormal = LLVector3(0,0,0); + norm[cur_vertex].clear(); cur_vertex++; if ((mTypeMask & INNER_MASK) && (mTypeMask & FLAT_MASK) && mNumS > 2 && s > 0) { - mVertices[cur_vertex].mPosition = mesh[i].mPos; - mVertices[cur_vertex].mTexCoord = LLVector2(ss,tt); + + pos[cur_vertex].load3(mesh[i].mPos.mV); + tc[cur_vertex] = LLVector2(ss,tt); - mVertices[cur_vertex].mNormal = LLVector3(0,0,0); - mVertices[cur_vertex].mBinormal = LLVector3(0,0,0); + norm[cur_vertex].clear(); + cur_vertex++; } } @@ -5806,12 +6066,10 @@ BOOL LLVolumeFace::createSide(LLVolume* volume, BOOL partial_build) i = mBeginS + s + max_s*t; ss = profile[mBeginS + s].mV[2] - begin_stex; - mVertices[cur_vertex].mPosition = mesh[i].mPos; - mVertices[cur_vertex].mTexCoord = LLVector2(ss,tt); - - mVertices[cur_vertex].mNormal = LLVector3(0,0,0); - mVertices[cur_vertex].mBinormal = LLVector3(0,0,0); - + pos[cur_vertex].load3(mesh[i].mPos.mV); + tc[cur_vertex] = LLVector2(ss,tt); + norm[cur_vertex].clear(); + cur_vertex++; } } @@ -5822,10 +6080,11 @@ BOOL LLVolumeFace::createSide(LLVolume* volume, BOOL partial_build) LLVector3& face_max = mExtents[1]; mCenter.clearVec(); - face_min = face_max = mVertices[0].mPosition; - for (U32 i = 1; i < mVertices.size(); ++i) + face_min = face_max = LLVector3((F32*) &(pos[0].mQ)); + + for (U32 i = 1; i < mNumVertices; ++i) { - update_min_max(face_min, face_max, mVertices[i].mPosition); + update_min_max(face_min, face_max, (F32*) &(pos[i].mQ)); } mCenter = (face_min + face_max) * 0.5f; @@ -5836,18 +6095,9 @@ BOOL LLVolumeFace::createSide(LLVolume* volume, BOOL partial_build) if (!partial_build) { -#if GEN_TRI_STRIP - mTriStrip.clear(); -#endif - // Now we generate the indices. for (t = 0; t < (mNumT-1); t++) { -#if GEN_TRI_STRIP - //prepend terminating index to strip - mTriStrip.push_back(mNumS*t); -#endif - for (s = 0; s < (mNumS-1); s++) { mIndices[cur_index++] = s + mNumS*t; //bottom left @@ -5857,16 +6107,6 @@ BOOL LLVolumeFace::createSide(LLVolume* volume, BOOL partial_build) mIndices[cur_index++] = s+1 + mNumS*t; //bottom right mIndices[cur_index++] = s+1 + mNumS*(t+1); //top right -#if GEN_TRI_STRIP - if (s == 0) - { - mTriStrip.push_back(s+mNumS*t); - mTriStrip.push_back(s+mNumS*(t+1)); - } - mTriStrip.push_back(s+1+mNumS*t); - mTriStrip.push_back(s+1+mNumS*(t+1)); -#endif - mEdge[cur_edge++] = (mNumS-1)*2*t+s*2+1; //bottom left/top right neighbor face if (t < mNumT-2) { //top right/top left neighbor face mEdge[cur_edge++] = (mNumS-1)*2*(t+1)+s*2+1; @@ -5907,52 +6147,55 @@ BOOL LLVolumeFace::createSide(LLVolume* volume, BOOL partial_build) } mEdge[cur_edge++] = (mNumS-1)*2*t+s*2; //top right/bottom left neighbor face } -#if GEN_TRI_STRIP - //append terminating vertex to strip - mTriStrip.push_back(mNumS-1+mNumS*(t+1)); -#endif } - -#if GEN_TRI_STRIP - if (mTriStrip.size()%2 == 1) - { - mTriStrip.push_back(mTriStrip[mTriStrip.size()-1]); - } -#endif } //generate normals - for (U32 i = 0; i < mIndices.size()/3; i++) //for each triangle + for (U32 i = 0; i < mNumIndices/3; i++) //for each triangle { const U16* idx = &(mIndices[i*3]); - - VertexData* v[] = - { &mVertices[idx[0]], &mVertices[idx[1]], &mVertices[idx[2]] }; - - //calculate triangle normal - LLVector3 norm = (v[0]->mPosition-v[1]->mPosition) % (v[0]->mPosition-v[2]->mPosition); + - v[0]->mNormal += norm; - v[1]->mNormal += norm; - v[2]->mNormal += norm; + LLVector4a* v[] = + { pos+idx[0], pos+idx[1], pos+idx[2] }; + + LLVector4a* n[] = + { norm+idx[0], norm+idx[1], norm+idx[2] }; + + //calculate triangle normal + LLVector4a a, b, c; + + a.setSub(*v[0], *v[1]); + b.setSub(*v[0], *v[2]); + c.setCross3(a,b); + n[0]->add(c); + n[1]->add(c); + n[2]->add(c); + //even out quad contributions - v[i%2+1]->mNormal += norm; + n[i%2+1]->add(c); } // adjust normals based on wrapping and stitching - BOOL s_bottom_converges = ((mVertices[0].mPosition - mVertices[mNumS*(mNumT-2)].mPosition).magVecSquared() < 0.000001f); - BOOL s_top_converges = ((mVertices[mNumS-1].mPosition - mVertices[mNumS*(mNumT-2)+mNumS-1].mPosition).magVecSquared() < 0.000001f); + LLVector4a top; + top.setSub(pos[0], pos[mNumS*(mNumT-2)]); + BOOL s_bottom_converges = (top.dot3(top) < 0.000001f); + + top.setSub(pos[mNumS-1], pos[mNumS*(mNumT-2)+mNumS-1]); + BOOL s_top_converges = (top.dot3(top) < 0.000001f); + if (sculpt_stitching == LL_SCULPT_TYPE_NONE) // logic for non-sculpt volumes { if (volume->getPath().isOpen() == FALSE) { //wrap normals on T for (S32 i = 0; i < mNumS; i++) { - LLVector3 norm = mVertices[i].mNormal + mVertices[mNumS*(mNumT-1)+i].mNormal; - mVertices[i].mNormal = norm; - mVertices[mNumS*(mNumT-1)+i].mNormal = norm; + LLVector4a n; + n.setAdd(norm[i], norm[mNumS*(mNumT-1)+i]); + norm[i] = n; + norm[mNumS*(mNumT-1)+i] = n; } } @@ -5960,9 +6203,10 @@ BOOL LLVolumeFace::createSide(LLVolume* volume, BOOL partial_build) { //wrap normals on S for (S32 i = 0; i < mNumT; i++) { - LLVector3 norm = mVertices[mNumS*i].mNormal + mVertices[mNumS*i+mNumS-1].mNormal; - mVertices[mNumS * i].mNormal = norm; - mVertices[mNumS * i+mNumS-1].mNormal = norm; + LLVector4a n; + n.setAdd(norm[mNumS*i], norm[mNumS*i+mNumS-1]); + norm[mNumS * i] = n; + norm[mNumS * i+mNumS-1] = n; } } @@ -5973,7 +6217,7 @@ BOOL LLVolumeFace::createSide(LLVolume* volume, BOOL partial_build) { //all lower S have same normal for (S32 i = 0; i < mNumT; i++) { - mVertices[mNumS*i].mNormal = LLVector3(1,0,0); + norm[mNumS*i].set(1,0,0); } } @@ -5981,7 +6225,7 @@ BOOL LLVolumeFace::createSide(LLVolume* volume, BOOL partial_build) { //all upper S have same normal for (S32 i = 0; i < mNumT; i++) { - mVertices[mNumS*i+mNumS-1].mNormal = LLVector3(-1,0,0); + norm[mNumS*i+mNumS-1].set(-1,0,0); } } } @@ -6009,30 +6253,33 @@ BOOL LLVolumeFace::createSide(LLVolume* volume, BOOL partial_build) { // average normals for north pole - LLVector3 average(0.0, 0.0, 0.0); + LLVector4a average; + average.clear(); + for (S32 i = 0; i < mNumS; i++) { - average += mVertices[i].mNormal; + average.add(norm[i]); } // set average for (S32 i = 0; i < mNumS; i++) { - mVertices[i].mNormal = average; + norm[i] = average; } // average normals for south pole - average = LLVector3(0.0, 0.0, 0.0); + average.clear(); + for (S32 i = 0; i < mNumS; i++) { - average += mVertices[i + mNumS * (mNumT - 1)].mNormal; + average.add(norm[i + mNumS * (mNumT - 1)]); } // set average for (S32 i = 0; i < mNumS; i++) { - mVertices[i + mNumS * (mNumT - 1)].mNormal = average; + norm[i + mNumS * (mNumT - 1)] = average; } } @@ -6042,23 +6289,22 @@ BOOL LLVolumeFace::createSide(LLVolume* volume, BOOL partial_build) { for (S32 i = 0; i < mNumT; i++) { - LLVector3 norm = mVertices[mNumS*i].mNormal + mVertices[mNumS*i+mNumS-1].mNormal; - mVertices[mNumS * i].mNormal = norm; - mVertices[mNumS * i+mNumS-1].mNormal = norm; + LLVector4a n; + n.setAdd(norm[mNumS*i], norm[mNumS*i+mNumS-1]); + norm[mNumS * i] = n; + norm[mNumS * i+mNumS-1] = n; } } - - if (wrap_t) { for (S32 i = 0; i < mNumS; i++) { - LLVector3 norm = mVertices[i].mNormal + mVertices[mNumS*(mNumT-1)+i].mNormal; - mVertices[i].mNormal = norm; - mVertices[mNumS*(mNumT-1)+i].mNormal = norm; + LLVector4a n; + n.setAdd(norm[i], norm[mNumS*(mNumT-1)+i]); + norm[i] = n; + norm[mNumS*(mNumT-1)+i] = n; } - } } @@ -6068,41 +6314,51 @@ BOOL LLVolumeFace::createSide(LLVolume* volume, BOOL partial_build) // Finds binormal based on three vertices with texture coordinates. // Fills in dummy values if the triangle has degenerate texture coordinates. -LLVector3 calc_binormal_from_triangle( - const LLVector3& pos0, +void calc_binormal_from_triangle(LLVector4a& binormal, + + const LLVector4a& pos0, const LLVector2& tex0, - const LLVector3& pos1, + const LLVector4a& pos1, const LLVector2& tex1, - const LLVector3& pos2, + const LLVector4a& pos2, const LLVector2& tex2) { - LLVector3 rx0( pos0.mV[VX], tex0.mV[VX], tex0.mV[VY] ); - LLVector3 rx1( pos1.mV[VX], tex1.mV[VX], tex1.mV[VY] ); - LLVector3 rx2( pos2.mV[VX], tex2.mV[VX], tex2.mV[VY] ); + LLVector4a rx0( pos0[VX], tex0.mV[VX], tex0.mV[VY] ); + LLVector4a rx1( pos1[VX], tex1.mV[VX], tex1.mV[VY] ); + LLVector4a rx2( pos2[VX], tex2.mV[VX], tex2.mV[VY] ); - LLVector3 ry0( pos0.mV[VY], tex0.mV[VX], tex0.mV[VY] ); - LLVector3 ry1( pos1.mV[VY], tex1.mV[VX], tex1.mV[VY] ); - LLVector3 ry2( pos2.mV[VY], tex2.mV[VX], tex2.mV[VY] ); + LLVector4a ry0( pos0[VY], tex0.mV[VX], tex0.mV[VY] ); + LLVector4a ry1( pos1[VY], tex1.mV[VX], tex1.mV[VY] ); + LLVector4a ry2( pos2[VY], tex2.mV[VX], tex2.mV[VY] ); - LLVector3 rz0( pos0.mV[VZ], tex0.mV[VX], tex0.mV[VY] ); - LLVector3 rz1( pos1.mV[VZ], tex1.mV[VX], tex1.mV[VY] ); - LLVector3 rz2( pos2.mV[VZ], tex2.mV[VX], tex2.mV[VY] ); + LLVector4a rz0( pos0[VZ], tex0.mV[VX], tex0.mV[VY] ); + LLVector4a rz1( pos1[VZ], tex1.mV[VX], tex1.mV[VY] ); + LLVector4a rz2( pos2[VZ], tex2.mV[VX], tex2.mV[VY] ); - LLVector3 r0 = (rx0 - rx1) % (rx0 - rx2); - LLVector3 r1 = (ry0 - ry1) % (ry0 - ry2); - LLVector3 r2 = (rz0 - rz1) % (rz0 - rz2); + LLVector4a lhs, rhs; + + LLVector4a r0; + lhs.setSub(rx0, rx1); rhs.setSub(rx0, rx2); + r0.setCross3(lhs, rhs); + + LLVector4a r1; + lhs.setSub(ry0, ry1); rhs.setSub(ry0, ry2); + r1.setCross3(lhs, rhs); + + LLVector4a r2; + lhs.setSub(rz0, rz1); rhs.setSub(rz0, rz2); + r2.setCross3(lhs, rhs); - if( r0.mV[VX] && r1.mV[VX] && r2.mV[VX] ) + if( r0[VX] && r1[VX] && r2[VX] ) { - LLVector3 binormal( - -r0.mV[VZ] / r0.mV[VX], - -r1.mV[VZ] / r1.mV[VX], - -r2.mV[VZ] / r2.mV[VX]); + binormal.set( + -r0[VZ] / r0[VX], + -r1[VZ] / r1[VX], + -r2[VZ] / r2[VX]); // binormal.normVec(); - return binormal; } else { - return LLVector3( 0, 1 , 0 ); + binormal.set( 0, 1 , 0 ); } } diff --git a/indra/llmath/llvolume.h b/indra/llmath/llvolume.h index c6a156ae37..aa58d6d114 100644 --- a/indra/llmath/llvolume.h +++ b/indra/llmath/llvolume.h @@ -55,6 +55,7 @@ class LLVolume; #include "v4coloru.h" #include "llrefcount.h" #include "llfile.h" +#include "llvector4a.h" //============================================================================ @@ -791,35 +792,114 @@ public: class LLVolumeFace { public: + class VertexData + { + enum + { + POSITION = 0, + NORMAL = 1 + }; + + private: + void init() + { + mData = (LLVector4a*) _mm_malloc(32, 16); + } + public: + VertexData() + { + init(); + } + + VertexData(const VertexData& rhs) + { + init(); + LLVector4a::memcpyNonAliased16((F32*) mData, (F32*) rhs.mData, 8); + mTexCoord = rhs.mTexCoord; + } + + ~VertexData() + { + _mm_free(mData); + } + + LLVector4a& getPosition() + { + return mData[POSITION]; + } + + LLVector4a& getNormal() + { + return mData[NORMAL]; + } + + const LLVector4a& getPosition() const + { + return mData[POSITION]; + } + + const LLVector4a& getNormal() const + { + return mData[NORMAL]; + } + + + void setPosition(const LLVector4a& pos) + { + mData[POSITION] = pos; + } + + void setNormal(const LLVector4a& norm) + { + mData[NORMAL] = norm; + } + + LLVector2 mTexCoord; + + bool operator<(const VertexData& rhs) const; + bool operator==(const VertexData& rhs) const; + bool compareNormal(const VertexData& rhs, F32 angle_cutoff) const; + + private: + LLVector4a* mData; + }; + LLVolumeFace() : mID(0), mTypeMask(0), - mHasBinormals(FALSE), mBeginS(0), mBeginT(0), mNumS(0), - mNumT(0) + mNumT(0), + mNumVertices(0), + mNumIndices(0), + mPositions(NULL), + mNormals(NULL), + mBinormals(NULL), + mTexCoords(NULL), + mIndices(NULL) { } + ~LLVolumeFace(); + BOOL create(LLVolume* volume, BOOL partial_build = FALSE); void createBinormals(); - void makeTriStrip(); void appendFace(const LLVolumeFace& face, LLMatrix4& transform, LLMatrix4& normal_tranform); - class VertexData - { - public: - LLVector3 mPosition; - LLVector3 mNormal; - LLVector3 mBinormal; - LLVector2 mTexCoord; + void resizeVertices(S32 num_verts); + void allocateBinormals(S32 num_verts); + void resizeIndices(S32 num_indices); + void fillFromLegacyData(std::vector<LLVolumeFace::VertexData>& v, std::vector<U16>& idx); - bool operator<(const VertexData& rhs) const; - bool operator==(const VertexData& rhs) const; - bool compareNormal(const VertexData& rhs, F32 angle_cutoff) const; - }; + void pushVertex(const VertexData& cv); + void pushVertex(const LLVector4a& pos, const LLVector4a& norm, const LLVector2& tc); + void pushIndex(const U16& idx); + + void swapData(LLVolumeFace& rhs); + + void getVertexData(U16 indx, LLVolumeFace::VertexData& cv); class VertexMapData : public LLVolumeFace::VertexData { @@ -828,28 +908,20 @@ public: bool operator==(const LLVolumeFace::VertexData& rhs) const { - return mPosition == rhs.mPosition && + return getPosition().equal3(rhs.getPosition()) && mTexCoord == rhs.mTexCoord && - mNormal == rhs.mNormal; + getNormal().equal3(rhs.getNormal()); } struct ComparePosition { - bool operator()(const LLVector3& a, const LLVector3& b) const + bool operator()(const LLVector4a& a, const LLVector4a& b) const { - if (a.mV[0] != b.mV[0]) - { - return a.mV[0] < b.mV[0]; - } - if (a.mV[1] != b.mV[1]) - { - return a.mV[1] < b.mV[1]; - } - return a.mV[2] < b.mV[2]; + return a.less3(b); } }; - typedef std::map<LLVector3, std::vector<VertexMapData>, VertexMapData::ComparePosition > PointMap; + typedef std::map<LLVector4a, std::vector<VertexMapData>, VertexMapData::ComparePosition > PointMap; }; void optimize(F32 angle_cutoff = 2.f); @@ -873,7 +945,6 @@ public: S32 mID; U32 mTypeMask; LLVector3 mCenter; - BOOL mHasBinormals; // Only used for INNER/OUTER faces S32 mBeginS; @@ -883,9 +954,15 @@ public: LLVector3 mExtents[2]; //minimum and maximum point of face - std::vector<VertexData> mVertices; - std::vector<U16> mIndices; - std::vector<U16> mTriStrip; + S32 mNumVertices; + S32 mNumIndices; + + LLVector4a* mPositions; + LLVector4a* mNormals; + LLVector4a* mBinormals; + LLVector2* mTexCoords; + U16* mIndices; + std::vector<S32> mEdge; //list of skin weights for rigged volumes @@ -1038,17 +1115,22 @@ public: std::ostream& operator<<(std::ostream &s, const LLVolumeParams &volume_params); -LLVector3 calc_binormal_from_triangle( - const LLVector3& pos0, +void calc_binormal_from_triangle( + LLVector4a& binormal, + const LLVector4a& pos0, const LLVector2& tex0, - const LLVector3& pos1, + const LLVector4a& pos1, const LLVector2& tex1, - const LLVector3& pos2, + const LLVector4a& pos2, const LLVector2& tex2); BOOL LLLineSegmentBoxIntersect(const LLVector3& start, const LLVector3& end, const LLVector3& center, const LLVector3& size); + BOOL LLTriangleRayIntersect(const LLVector3& vert0, const LLVector3& vert1, const LLVector3& vert2, const LLVector3& orig, const LLVector3& dir, F32* intersection_a, F32* intersection_b, F32* intersection_t, BOOL two_sided); + +BOOL LLTriangleRayIntersect(const LLVector4a& vert0, const LLVector4a& vert1, const LLVector4a& vert2, const LLVector4a& orig, const LLVector4a& dir, + F32* intersection_a, F32* intersection_b, F32* intersection_t, BOOL two_sided); diff --git a/indra/llmath/v3math.h b/indra/llmath/v3math.h index 76dd938887..0e7d72e958 100644 --- a/indra/llmath/v3math.h +++ b/indra/llmath/v3math.h @@ -532,6 +532,21 @@ inline void update_min_max(LLVector3& min, LLVector3& max, const LLVector3& pos) } } +inline void update_min_max(LLVector3& min, LLVector3& max, const F32* pos) +{ + for (U32 i = 0; i < 3; i++) + { + if (min.mV[i] > pos[i]) + { + min.mV[i] = pos[i]; + } + if (max.mV[i] < pos[i]) + { + max.mV[i] = pos[i]; + } + } +} + inline F32 angle_between(const LLVector3& a, const LLVector3& b) { LLVector3 an = a; diff --git a/indra/llrender/llimagegl.cpp b/indra/llrender/llimagegl.cpp index 2f02ccf30b..2579bad0b6 100644 --- a/indra/llrender/llimagegl.cpp +++ b/indra/llrender/llimagegl.cpp @@ -1813,7 +1813,7 @@ BOOL LLImageGL::getMask(const LLVector2 &tc) { LL_WARNS_ONCE("render") << "Ugh, non-finite u/v in mask pick" << LL_ENDL; u = v = 0.f; - llassert(false); + //llassert(false); } if (LL_UNLIKELY(u < 0.f || u > 1.f || @@ -1821,7 +1821,7 @@ BOOL LLImageGL::getMask(const LLVector2 &tc) { LL_WARNS_ONCE("render") << "Ugh, u/v out of range in image mask pick" << LL_ENDL; u = v = 0.f; - llassert(false); + //llassert(false); } S32 x = llfloor(u * mPickMaskWidth); diff --git a/indra/llrender/llvertexbuffer.cpp b/indra/llrender/llvertexbuffer.cpp index 7fa47cd171..7f14a8d5ac 100644 --- a/indra/llrender/llvertexbuffer.cpp +++ b/indra/llrender/llvertexbuffer.cpp @@ -39,6 +39,7 @@ #include "llglheaders.h" #include "llmemtype.h" #include "llrender.h" +#include "llvector4a.h" //============================================================================ @@ -66,6 +67,27 @@ S32 LLVertexBuffer::sWeight4Loc = -1; std::vector<U32> LLVertexBuffer::sDeleteList; +#define LL_ALIGNED_VB 1 + +#if LL_ALIGNED_VB + +S32 LLVertexBuffer::sTypeOffsets[LLVertexBuffer::TYPE_MAX] = +{ + sizeof(LLVector4), // TYPE_VERTEX, + sizeof(LLVector4), // TYPE_NORMAL, + sizeof(LLVector2), // TYPE_TEXCOORD0, + sizeof(LLVector2), // TYPE_TEXCOORD1, + sizeof(LLVector2), // TYPE_TEXCOORD2, + sizeof(LLVector2), // TYPE_TEXCOORD3, + sizeof(LLColor4U), // TYPE_COLOR, + sizeof(LLVector4), // TYPE_BINORMAL, + sizeof(F32), // TYPE_WEIGHT, + sizeof(LLVector4), // TYPE_WEIGHT4, + sizeof(LLVector4), // TYPE_CLOTHWEIGHT, +}; + +#else + S32 LLVertexBuffer::sTypeOffsets[LLVertexBuffer::TYPE_MAX] = { sizeof(LLVector3), // TYPE_VERTEX, @@ -81,6 +103,8 @@ S32 LLVertexBuffer::sTypeOffsets[LLVertexBuffer::TYPE_MAX] = sizeof(LLVector4), // TYPE_CLOTHWEIGHT, }; +#endif + U32 LLVertexBuffer::sGLMode[LLRender::NUM_MODES] = { GL_TRIANGLES, @@ -237,10 +261,8 @@ void LLVertexBuffer::setupClientArrays(U32 data_mask) } } -void LLVertexBuffer::drawRange(U32 mode, U32 start, U32 end, U32 count, U32 indices_offset) const +void LLVertexBuffer::validateRange(U32 start, U32 end, U32 count, U32 indices_offset) const { - llassert(mRequestedNumVerts >= 0); - if (start >= (U32) mRequestedNumVerts || end >= (U32) mRequestedNumVerts) { @@ -255,6 +277,25 @@ void LLVertexBuffer::drawRange(U32 mode, U32 start, U32 end, U32 count, U32 indi llerrs << "Bad index buffer draw range: [" << indices_offset << ", " << indices_offset+count << "]" << llendl; } + if (gDebugGL && !useVBOs()) + { + U16* idx = ((U16*) getIndicesPointer())+indices_offset; + for (U32 i = 0; i < count; ++i) + { + if (idx[i] < start || idx[i] > end) + { + llerrs << "Index out of range: " << idx[i] << " not in [" << start << ", " << end << "]" << llendl; + } + } + } +} + +void LLVertexBuffer::drawRange(U32 mode, U32 start, U32 end, U32 count, U32 indices_offset) const +{ + validateRange(start, end, count, indices_offset); + + llassert(mRequestedNumVerts >= 0); + if (mGLIndices != sGLRenderIndices) { llerrs << "Wrong index buffer bound." << llendl; @@ -273,17 +314,6 @@ void LLVertexBuffer::drawRange(U32 mode, U32 start, U32 end, U32 count, U32 indi U16* idx = ((U16*) getIndicesPointer())+indices_offset; - if (gDebugGL && !useVBOs()) - { - for (U32 i = 0; i < count; ++i) - { - if (idx[i] < start || idx[i] > end) - { - llerrs << "Index out of range: " << idx[i] << " not in [" << start << ", " << end << "]" << llendl; - } - } - } - stop_glerror(); glDrawRangeElements(sGLMode[mode], start, end, count, GL_UNSIGNED_SHORT, idx); @@ -428,11 +458,42 @@ LLVertexBuffer::LLVertexBuffer(U32 typemask, S32 usage) : mTypeMask = typemask; mStride = stride; + mAlignedOffset = 0; + mAlignedIndexOffset = 0; + sCount++; } +#if LL_ALIGNED_VB +//static +S32 LLVertexBuffer::calcStride(const U32& typemask, S32* offsets, S32 num_vertices) +{ + S32 offset = 0; + for (S32 i=0; i<TYPE_MAX; i++) + { + U32 mask = 1<<i; + if (typemask & mask) + { + if (offsets) + { + offsets[i] = offset; + offset += LLVertexBuffer::sTypeOffsets[i]*num_vertices; + offset = (offset + 0xF) & ~0xF; + } + } + } + + return offset+16; +} + +S32 LLVertexBuffer::getSize() const +{ + return mStride; +} + +#else //static -S32 LLVertexBuffer::calcStride(const U32& typemask, S32* offsets) +S32 LLVertexBuffer::calcStride(const U32& typemask, S32* offsets, S32 num_vertices) { S32 stride = 0; for (S32 i=0; i<TYPE_MAX; i++) @@ -451,6 +512,12 @@ S32 LLVertexBuffer::calcStride(const U32& typemask, S32* offsets) return stride; } +S32 LLVertexBuffer::getSize() const +{ + return mNumVerts*mStride; +} + +#endif // protected, use unref() //virtual LLVertexBuffer::~LLVertexBuffer() @@ -560,7 +627,7 @@ void LLVertexBuffer::createGLBuffer() { static int gl_buffer_idx = 0; mGLBuffer = ++gl_buffer_idx; - mMappedData = new U8[size]; + mMappedData = (U8*) _mm_malloc(size, 16); memset(mMappedData, 0, size); } } @@ -582,16 +649,20 @@ void LLVertexBuffer::createGLIndices() mEmpty = TRUE; + //pad by 16 bytes for aligned copies + size += 16; + if (useVBOs()) { + //pad by another 16 bytes for VBO pointer adjustment + size += 16; mMappedIndexData = NULL; genIndices(); mResized = TRUE; } else { - mMappedIndexData = new U8[size]; - memset(mMappedIndexData, 0, size); + mMappedIndexData = (U8*) _mm_malloc(size, 16); static int gl_buffer_idx = 0; mGLIndices = ++gl_buffer_idx; } @@ -612,7 +683,7 @@ void LLVertexBuffer::destroyGLBuffer() } else { - delete [] mMappedData; + _mm_free(mMappedData); mMappedData = NULL; mEmpty = TRUE; } @@ -639,7 +710,7 @@ void LLVertexBuffer::destroyGLIndices() } else { - delete [] mMappedIndexData; + _mm_free(mMappedIndexData); mMappedIndexData = NULL; mEmpty = TRUE; } @@ -664,7 +735,7 @@ void LLVertexBuffer::updateNumVerts(S32 nverts) } mRequestedNumVerts = nverts; - + if (!mDynamicSize) { mNumVerts = nverts; @@ -679,6 +750,9 @@ void LLVertexBuffer::updateNumVerts(S32 nverts) } mNumVerts = nverts; } +#if LL_ALIGNED_VB + mStride = calcStride(mTypeMask, mOffsets, mNumVerts); +#endif } @@ -754,9 +828,6 @@ void LLVertexBuffer::resizeBuffer(S32 newnverts, S32 newnindices) { sAllocatedBytes -= getSize() + getIndicesSize(); - S32 oldsize = getSize(); - S32 old_index_size = getIndicesSize(); - updateNumVerts(newnverts); updateNumIndices(newnindices); @@ -773,26 +844,10 @@ void LLVertexBuffer::resizeBuffer(S32 newnverts, S32 newnindices) } else { - //delete old buffer, keep GL buffer for now if (!useVBOs()) { - U8* old = mMappedData; - mMappedData = new U8[newsize]; - if (old) - { - memcpy(mMappedData, old, llmin(newsize, oldsize)); - if (newsize > oldsize) - { - memset(mMappedData+oldsize, 0, newsize-oldsize); - } - - delete [] old; - } - else - { - memset(mMappedData, 0, newsize); - mEmpty = TRUE; - } + _mm_free(mMappedData); + mMappedData = (U8*) _mm_malloc(newsize, 16); } mResized = TRUE; } @@ -812,24 +867,8 @@ void LLVertexBuffer::resizeBuffer(S32 newnverts, S32 newnindices) { if (!useVBOs()) { - //delete old buffer, keep GL buffer for now - U8* old = mMappedIndexData; - mMappedIndexData = new U8[new_index_size]; - - if (old) - { - memcpy(mMappedIndexData, old, llmin(new_index_size, old_index_size)); - if (new_index_size > old_index_size) - { - memset(mMappedIndexData+old_index_size, 0, new_index_size - old_index_size); - } - delete [] old; - } - else - { - memset(mMappedIndexData, 0, new_index_size); - mEmpty = TRUE; - } + _mm_free(mMappedIndexData); + mMappedIndexData = (U8*) _mm_malloc(new_index_size, 16); } mResized = TRUE; } @@ -886,12 +925,19 @@ U8* LLVertexBuffer::mapBuffer(S32 access) setBuffer(0); mLocked = TRUE; stop_glerror(); - mMappedData = (U8*) glMapBufferARB(GL_ARRAY_BUFFER_ARB, GL_WRITE_ONLY_ARB); + + U8* src = (U8*) glMapBufferARB(GL_ARRAY_BUFFER_ARB, GL_WRITE_ONLY_ARB); + mMappedData = LL_NEXT_ALIGNED_ADDRESS<U8>(src); + mAlignedOffset = mMappedData - src; + stop_glerror(); } { LLMemType mt_v(LLMemType::MTYPE_VERTEX_MAP_BUFFER_INDICES); - mMappedIndexData = (U8*) glMapBufferARB(GL_ELEMENT_ARRAY_BUFFER_ARB, GL_WRITE_ONLY_ARB); + U8* src = (U8*) glMapBufferARB(GL_ELEMENT_ARRAY_BUFFER_ARB, GL_WRITE_ONLY_ARB); + mMappedIndexData = LL_NEXT_ALIGNED_ADDRESS<U8>(src); + mAlignedIndexOffset = mMappedIndexData - src; + stop_glerror(); } @@ -975,6 +1021,45 @@ void LLVertexBuffer::unmapBuffer() //---------------------------------------------------------------------------- +#if LL_ALIGNED_VB + +template <class T,S32 type> struct VertexBufferStrider +{ + typedef LLStrider<T> strider_t; + static bool get(LLVertexBuffer& vbo, + strider_t& strider, + S32 index) + { + if (vbo.mapBuffer() == NULL) + { + llwarns << "mapBuffer failed!" << llendl; + return FALSE; + } + + if (type == LLVertexBuffer::TYPE_INDEX) + { + S32 stride = sizeof(T); + strider = (T*)(vbo.getMappedIndices() + index*stride); + strider.setStride(0); + return TRUE; + } + else if (vbo.hasDataType(type)) + { + S32 stride = LLVertexBuffer::sTypeOffsets[type]; + strider = (T*)(vbo.getMappedData() + vbo.getOffset(type)+index*stride); + strider.setStride(stride); + return TRUE; + } + else + { + llerrs << "VertexBufferStrider could not find valid vertex data." << llendl; + } + return FALSE; + } +}; + +#else + template <class T,S32 type> struct VertexBufferStrider { typedef LLStrider<T> strider_t; @@ -1010,6 +1095,7 @@ template <class T,S32 type> struct VertexBufferStrider } }; +#endif bool LLVertexBuffer::getVertexStrider(LLStrider<LLVector3>& strider, S32 index) { @@ -1272,6 +1358,81 @@ void LLVertexBuffer::setBuffer(U32 data_mask) } } +#if LL_ALIGNED_VB + +// virtual (default) +void LLVertexBuffer::setupVertexBuffer(U32 data_mask) const +{ + LLMemType mt2(LLMemType::MTYPE_VERTEX_SETUP_VERTEX_BUFFER); + stop_glerror(); + U8* base = useVBOs() ? (U8*) mAlignedOffset : mMappedData; + + if ((data_mask & mTypeMask) != data_mask) + { + llerrs << "LLVertexBuffer::setupVertexBuffer missing required components for supplied data mask." << llendl; + } + + if (data_mask & MAP_NORMAL) + { + glNormalPointer(GL_FLOAT, LLVertexBuffer::sTypeOffsets[TYPE_NORMAL], (void*)(base + mOffsets[TYPE_NORMAL])); + } + if (data_mask & MAP_TEXCOORD3) + { + glClientActiveTextureARB(GL_TEXTURE3_ARB); + glTexCoordPointer(2,GL_FLOAT, LLVertexBuffer::sTypeOffsets[TYPE_TEXCOORD3], (void*)(base + mOffsets[TYPE_TEXCOORD3])); + glClientActiveTextureARB(GL_TEXTURE0_ARB); + } + if (data_mask & MAP_TEXCOORD2) + { + glClientActiveTextureARB(GL_TEXTURE2_ARB); + glTexCoordPointer(2,GL_FLOAT, LLVertexBuffer::sTypeOffsets[TYPE_TEXCOORD2], (void*)(base + mOffsets[TYPE_TEXCOORD2])); + glClientActiveTextureARB(GL_TEXTURE0_ARB); + } + if (data_mask & MAP_TEXCOORD1) + { + glClientActiveTextureARB(GL_TEXTURE1_ARB); + glTexCoordPointer(2,GL_FLOAT, LLVertexBuffer::sTypeOffsets[TYPE_TEXCOORD1], (void*)(base + mOffsets[TYPE_TEXCOORD1])); + glClientActiveTextureARB(GL_TEXTURE0_ARB); + } + if (data_mask & MAP_BINORMAL) + { + glClientActiveTextureARB(GL_TEXTURE2_ARB); + glTexCoordPointer(3,GL_FLOAT, LLVertexBuffer::sTypeOffsets[TYPE_BINORMAL], (void*)(base + mOffsets[TYPE_BINORMAL])); + glClientActiveTextureARB(GL_TEXTURE0_ARB); + } + if (data_mask & MAP_TEXCOORD0) + { + glTexCoordPointer(2,GL_FLOAT, LLVertexBuffer::sTypeOffsets[TYPE_TEXCOORD0], (void*)(base + mOffsets[TYPE_TEXCOORD0])); + } + if (data_mask & MAP_COLOR) + { + glColorPointer(4, GL_UNSIGNED_BYTE, LLVertexBuffer::sTypeOffsets[TYPE_COLOR], (void*)(base + mOffsets[TYPE_COLOR])); + } + + if (data_mask & MAP_WEIGHT) + { + glVertexAttribPointerARB(1, 1, GL_FLOAT, FALSE, LLVertexBuffer::sTypeOffsets[TYPE_WEIGHT], (void*)(base + mOffsets[TYPE_WEIGHT])); + } + + if (data_mask & MAP_WEIGHT4 && sWeight4Loc != -1) + { + glVertexAttribPointerARB(sWeight4Loc, 4, GL_FLOAT, FALSE, LLVertexBuffer::sTypeOffsets[TYPE_WEIGHT4], (void*)(base+mOffsets[TYPE_WEIGHT4])); + } + + if (data_mask & MAP_CLOTHWEIGHT) + { + glVertexAttribPointerARB(4, 4, GL_FLOAT, TRUE, LLVertexBuffer::sTypeOffsets[TYPE_CLOTHWEIGHT], (void*)(base + mOffsets[TYPE_CLOTHWEIGHT])); + } + if (data_mask & MAP_VERTEX) + { + glVertexPointer(3,GL_FLOAT, LLVertexBuffer::sTypeOffsets[TYPE_VERTEX], (void*)(base + 0)); + } + + llglassertok(); +} + +#else + // virtual (default) void LLVertexBuffer::setupVertexBuffer(U32 data_mask) const { @@ -1344,6 +1505,8 @@ void LLVertexBuffer::setupVertexBuffer(U32 data_mask) const llglassertok(); } +#endif + void LLVertexBuffer::markDirty(U32 vert_index, U32 vert_count, U32 indices_index, U32 indices_count) { // TODO: use GL_APPLE_flush_buffer_range here diff --git a/indra/llrender/llvertexbuffer.h b/indra/llrender/llvertexbuffer.h index d1700aa54a..47146a5ec4 100644 --- a/indra/llrender/llvertexbuffer.h +++ b/indra/llrender/llvertexbuffer.h @@ -98,7 +98,7 @@ public: //if offsets is not NULL, its contents will be filled //with the offset of each vertex component in the buffer, // indexed by the following enum - static S32 calcStride(const U32& typemask, S32* offsets = NULL); + static S32 calcStride(const U32& typemask, S32* offsets = NULL, S32 num_vertices = 0); enum { TYPE_VERTEX, @@ -187,12 +187,12 @@ public: S32 getRequestedVerts() const { return mRequestedNumVerts; } S32 getRequestedIndices() const { return mRequestedNumIndices; } - U8* getIndicesPointer() const { return useVBOs() ? NULL : mMappedIndexData; } + U8* getIndicesPointer() const { return useVBOs() ? (U8*) mAlignedIndexOffset : mMappedIndexData; } U8* getVerticesPointer() const { return useVBOs() ? NULL : mMappedData; } S32 getStride() const { return mStride; } U32 getTypeMask() const { return mTypeMask; } BOOL hasDataType(S32 type) const { return ((1 << type) & getTypeMask()) ? TRUE : FALSE; } - S32 getSize() const { return mNumVerts*mStride; } + S32 getSize() const; S32 getIndicesSize() const { return mNumIndices * sizeof(U16); } U8* getMappedData() const { return mMappedData; } U8* getMappedIndices() const { return mMappedIndexData; } @@ -207,12 +207,19 @@ public: void drawArrays(U32 mode, U32 offset, U32 count) const; void drawRange(U32 mode, U32 start, U32 end, U32 count, U32 indices_offset) const; + //for debugging, validate data in given range is valid + void validateRange(U32 start, U32 end, U32 count, U32 offset) const; + + + protected: S32 mNumVerts; // Number of vertices allocated S32 mNumIndices; // Number of indices allocated S32 mRequestedNumVerts; // Number of vertices requested S32 mRequestedNumIndices; // Number of indices requested + ptrdiff_t mAlignedOffset; + ptrdiff_t mAlignedIndexOffset; S32 mStride; U32 mTypeMask; S32 mUsage; // GL usage @@ -227,7 +234,7 @@ protected: S32 mOffsets[TYPE_MAX]; BOOL mResized; // if TRUE, client buffer has been resized and GL buffer has not BOOL mDynamicSize; // if TRUE, buffer has been resized at least once (and should be padded) - + class DirtyRegion { public: diff --git a/indra/newview/llappviewer.cpp b/indra/newview/llappviewer.cpp index 6de34d9458..600e3294eb 100644 --- a/indra/newview/llappviewer.cpp +++ b/indra/newview/llappviewer.cpp @@ -79,6 +79,7 @@ #include "llteleporthistory.h" #include "lllocationhistory.h" #include "llfasttimerview.h" +#include "llvector4a.h" #include "llviewermenufile.h" #include "llvoicechannel.h" #include "llvoavatarself.h" @@ -612,6 +613,9 @@ bool LLAppViewer::init() // LLFastTimer::reset(); + // initialize SSE options + LLVector4a::initClass(); + // Need to do this initialization before we do anything else, since anything // that touches files should really go through the lldir API gDirUtilp->initAppDirs("SecondLife"); diff --git a/indra/newview/lldrawpoolavatar.cpp b/indra/newview/lldrawpoolavatar.cpp index d1f4be71f5..0fa0e80cb7 100644 --- a/indra/newview/lldrawpoolavatar.cpp +++ b/indra/newview/lldrawpoolavatar.cpp @@ -1201,14 +1201,14 @@ void LLDrawPoolAvatar::updateRiggedFaceVertexBuffer(LLFace* face, const LLMeshSk if (!buff || buff->getTypeMask() != data_mask || - buff->getRequestedVerts() != vol_face.mVertices.size()) + buff->getRequestedVerts() != vol_face.mNumVertices) { face->setGeomIndex(0); face->setIndicesIndex(0); - face->setSize(vol_face.mVertices.size(), vol_face.mIndices.size()); + face->setSize(vol_face.mNumVertices, vol_face.mNumIndices); face->mVertexBuffer = new LLVertexBuffer(data_mask, 0); - face->mVertexBuffer->allocateBuffer(vol_face.mVertices.size(), vol_face.mIndices.size(), true); + face->mVertexBuffer->allocateBuffer(vol_face.mNumVertices, vol_face.mNumIndices, true); U16 offset = 0; @@ -1546,20 +1546,23 @@ void LLVertexBufferAvatar::setupVertexBuffer(U32 data_mask) const { U8* base = useVBOs() ? NULL : mMappedData; - glVertexPointer(3,GL_FLOAT, mStride, (void*)(base + 0)); - glNormalPointer(GL_FLOAT, mStride, (void*)(base + mOffsets[TYPE_NORMAL])); - glTexCoordPointer(2,GL_FLOAT, mStride, (void*)(base + mOffsets[TYPE_TEXCOORD0])); + glVertexPointer(3,GL_FLOAT, LLVertexBuffer::sTypeOffsets[LLVertexBuffer::TYPE_VERTEX], (void*)(base + 0)); + glNormalPointer(GL_FLOAT, LLVertexBuffer::sTypeOffsets[LLVertexBuffer::TYPE_NORMAL], (void*)(base + mOffsets[TYPE_NORMAL])); + glTexCoordPointer(2,GL_FLOAT, LLVertexBuffer::sTypeOffsets[LLVertexBuffer::TYPE_TEXCOORD0], (void*)(base + mOffsets[TYPE_TEXCOORD0])); - set_vertex_weights(LLDrawPoolAvatar::sVertexProgram->mAttribute[LLViewerShaderMgr::AVATAR_WEIGHT], mStride, (F32*)(base + mOffsets[TYPE_WEIGHT])); + set_vertex_weights(LLDrawPoolAvatar::sVertexProgram->mAttribute[LLViewerShaderMgr::AVATAR_WEIGHT], + LLVertexBuffer::sTypeOffsets[LLVertexBuffer::TYPE_WEIGHT], (F32*)(base + mOffsets[TYPE_WEIGHT])); if (sShaderLevel >= LLDrawPoolAvatar::SHADER_LEVEL_BUMP) { - set_binormals(LLDrawPoolAvatar::sVertexProgram->mAttribute[LLViewerShaderMgr::BINORMAL], mStride, (LLVector3*)(base + mOffsets[TYPE_BINORMAL])); + set_binormals(LLDrawPoolAvatar::sVertexProgram->mAttribute[LLViewerShaderMgr::BINORMAL], + LLVertexBuffer::sTypeOffsets[LLVertexBuffer::TYPE_BINORMAL], (LLVector3*)(base + mOffsets[TYPE_BINORMAL])); } if (sShaderLevel >= LLDrawPoolAvatar::SHADER_LEVEL_CLOTH) { - set_vertex_clothing_weights(LLDrawPoolAvatar::sVertexProgram->mAttribute[LLViewerShaderMgr::AVATAR_CLOTHING], mStride, (LLVector4*)(base + mOffsets[TYPE_CLOTHWEIGHT])); + set_vertex_clothing_weights(LLDrawPoolAvatar::sVertexProgram->mAttribute[LLViewerShaderMgr::AVATAR_CLOTHING], + LLVertexBuffer::sTypeOffsets[LLVertexBuffer::TYPE_CLOTHWEIGHT], (LLVector4*)(base + mOffsets[TYPE_CLOTHWEIGHT])); } } else diff --git a/indra/newview/lldrawpoolbump.cpp b/indra/newview/lldrawpoolbump.cpp index 1fae8e518c..b7092f32a2 100644 --- a/indra/newview/lldrawpoolbump.cpp +++ b/indra/newview/lldrawpoolbump.cpp @@ -82,7 +82,7 @@ static S32 bump_channel = -1; // static void LLStandardBumpmap::init() { - LLStandardBumpmap::restoreGL(); + // do nothing } // static @@ -876,6 +876,8 @@ void LLBumpImageList::clear() // these will be re-populated on-demand mBrightnessEntries.clear(); mDarknessEntries.clear(); + + LLStandardBumpmap::clear(); } void LLBumpImageList::shutdown() @@ -892,8 +894,8 @@ void LLBumpImageList::destroyGL() void LLBumpImageList::restoreGL() { - // Images will be recreated as they are needed. LLStandardBumpmap::restoreGL(); + // Images will be recreated as they are needed. } diff --git a/indra/newview/llface.cpp b/indra/newview/llface.cpp index 53dc335c16..1571bab7de 100644 --- a/indra/newview/llface.cpp +++ b/indra/newview/llface.cpp @@ -366,8 +366,14 @@ void LLFace::setDrawable(LLDrawable *drawable) mXform = &drawable->mXform; } -void LLFace::setSize(const S32 num_vertices, const S32 num_indices) +void LLFace::setSize(S32 num_vertices, S32 num_indices, bool align) { + if (align) + { + //allocate vertices in blocks of 4 for alignment + num_vertices = (num_vertices + 0x3) & ~0x3; + } + if (mGeomCount != num_vertices || mIndicesCount != num_indices) { @@ -928,8 +934,8 @@ BOOL LLFace::getGeometryVolume(const LLVolume& volume, { LLFastTimer t(FTM_FACE_GET_GEOM); const LLVolumeFace &vf = volume.getVolumeFace(f); - S32 num_vertices = (S32)vf.mVertices.size(); - S32 num_indices = LLPipeline::sUseTriStrips ? (S32)vf.mTriStrip.size() : (S32) vf.mIndices.size(); + S32 num_vertices = (S32)vf.mNumVertices; + S32 num_indices = (S32) vf.mNumIndices; if (mVertexBuffer.notNull()) { @@ -1128,19 +1134,9 @@ BOOL LLFace::getGeometryVolume(const LLVolume& volume, if (full_rebuild) { mVertexBuffer->getIndexStrider(indicesp, mIndicesIndex); - if (LLPipeline::sUseTriStrips) + for (U32 i = 0; i < (U32) num_indices; i++) { - for (U32 i = 0; i < (U32) num_indices; i++) - { - *indicesp++ = vf.mTriStrip[i] + index_offset; - } - } - else - { - for (U32 i = 0; i < (U32) num_indices; i++) - { - *indicesp++ = vf.mIndices[i] + index_offset; - } + *indicesp++ = vf.mIndices[i] + index_offset; } } @@ -1214,28 +1210,41 @@ BOOL LLFace::getGeometryVolume(const LLVolume& volume, bool bake_sunlight = !getTextureEntry()->getFullbright() && !mVertexBuffer->hasDataType(LLVertexBuffer::TYPE_NORMAL); + //VECTORIZE THIS for (S32 i = 0; i < num_vertices; i++) { + LLVector3 vf_binormal; + if (vf.mBinormals) + { + vf_binormal.setVec(vf.mBinormals[i].getF32()); + } + + LLVector3 vf_normal; + vf_normal.set(vf.mNormals[i].getF32()); + + LLVector3 vf_position; + vf_position.set(vf.mPositions[i].getF32()); + if (rebuild_tcoord) { - LLVector2 tc = vf.mVertices[i].mTexCoord; + LLVector2 tc(vf.mTexCoords[i]); if (texgen != LLTextureEntry::TEX_GEN_DEFAULT) { - LLVector3 vec = vf.mVertices[i].mPosition; + LLVector3 vec = vf_position; vec.scaleVec(scale); switch (texgen) { case LLTextureEntry::TEX_GEN_PLANAR: - planarProjection(tc, vf.mVertices[i].mNormal, vf.mCenter, vec); + planarProjection(tc, vf_normal, vf.mCenter, vec); break; case LLTextureEntry::TEX_GEN_SPHERICAL: - sphericalProjection(tc, vf.mVertices[i].mNormal, vf.mCenter, vec); + sphericalProjection(tc, vf_normal, vf.mCenter, vec); break; case LLTextureEntry::TEX_GEN_CYLINDRICAL: - cylindricalProjection(tc, vf.mVertices[i].mNormal, vf.mCenter, vec); + cylindricalProjection(tc, vf_normal, vf.mCenter, vec); break; default: break; @@ -1345,10 +1354,10 @@ BOOL LLFace::getGeometryVolume(const LLVolume& volume, if (bump_code && mVertexBuffer->hasDataType(LLVertexBuffer::TYPE_TEXCOORD1)) { - LLVector3 tangent = vf.mVertices[i].mBinormal % vf.mVertices[i].mNormal; + LLVector3 tangent = vf_binormal % vf_normal; LLMatrix3 tangent_to_object; - tangent_to_object.setRows(tangent, vf.mVertices[i].mBinormal, vf.mVertices[i].mNormal); + tangent_to_object.setRows(tangent, vf_binormal, vf_normal); LLVector3 binormal = binormal_dir * tangent_to_object; binormal = binormal * mat_normal; @@ -1366,12 +1375,12 @@ BOOL LLFace::getGeometryVolume(const LLVolume& volume, if (rebuild_pos) { - *vertices++ = vf.mVertices[i].mPosition * mat_vert; + *vertices++ = vf_position * mat_vert; } if (rebuild_normal) { - LLVector3 normal = vf.mVertices[i].mNormal * mat_normal; + LLVector3 normal = vf_normal * mat_normal; normal.normVec(); *normals++ = normal; @@ -1379,21 +1388,21 @@ BOOL LLFace::getGeometryVolume(const LLVolume& volume, if (rebuild_binormal) { - LLVector3 binormal = vf.mVertices[i].mBinormal * mat_normal; + LLVector3 binormal = vf_binormal * mat_normal; binormal.normVec(); *binormals++ = binormal; } if (rebuild_weights && vf.mWeights.size() > i) { - *weights++ = vf.mWeights[i]; + (*weights++) = vf.mWeights[i]; } if (rebuild_color) { if (bake_sunlight) { - LLVector3 normal = vf.mVertices[i].mNormal * mat_normal; + LLVector3 normal = vf_normal * mat_normal; normal.normVec(); F32 da = normal * gPipeline.mSunDir; diff --git a/indra/newview/llface.h b/indra/newview/llface.h index f9e9c3e078..48909d7895 100644 --- a/indra/newview/llface.h +++ b/indra/newview/llface.h @@ -166,7 +166,7 @@ public: S32 getColors(LLStrider<LLColor4U> &colors); S32 getIndices(LLStrider<U16> &indices); - void setSize(const S32 numVertices, const S32 num_indices = 0); + void setSize(S32 numVertices, S32 num_indices = 0, bool align = false); BOOL genVolumeBBoxes(const LLVolume &volume, S32 f, const LLMatrix4& mat, const LLMatrix3& inv_trans_mat, BOOL global_volume = FALSE); diff --git a/indra/newview/llfloaterimagepreview.cpp b/indra/newview/llfloaterimagepreview.cpp index 159e4b41ca..28fe2a14b7 100644 --- a/indra/newview/llfloaterimagepreview.cpp +++ b/indra/newview/llfloaterimagepreview.cpp @@ -864,8 +864,8 @@ void LLImagePreviewSculpted::setPreviewTarget(LLImageRaw* imagep, F32 distance) } const LLVolumeFace &vf = mVolume->getVolumeFace(0); - U32 num_indices = vf.mIndices.size(); - U32 num_vertices = vf.mVertices.size(); + U32 num_indices = vf.mNumIndices; + U32 num_vertices = vf.mNumVertices; mVertexBuffer = new LLVertexBuffer(LLVertexBuffer::MAP_VERTEX | LLVertexBuffer::MAP_NORMAL, 0); mVertexBuffer->allocateBuffer(num_vertices, num_indices, TRUE); @@ -879,10 +879,16 @@ void LLImagePreviewSculpted::setPreviewTarget(LLImageRaw* imagep, F32 distance) mVertexBuffer->getIndexStrider(index_strider); // build vertices and normals + LLStrider<LLVector3> pos; + pos = (LLVector3*) vf.mPositions; pos.setStride(16); + LLStrider<LLVector3> norm; + norm = (LLVector3*) vf.mNormals; norm.setStride(16); + + for (U32 i = 0; i < num_vertices; i++) { - *(vertex_strider++) = vf.mVertices[i].mPosition; - LLVector3 normal = vf.mVertices[i].mNormal; + *(vertex_strider++) = *pos++; + LLVector3 normal = *norm++; normal.normalize(); *(normal_strider++) = normal; } @@ -901,7 +907,6 @@ void LLImagePreviewSculpted::setPreviewTarget(LLImageRaw* imagep, F32 distance) BOOL LLImagePreviewSculpted::render() { mNeedsUpdate = FALSE; - LLGLSUIDefault def; LLGLDisable no_blend(GL_BLEND); LLGLEnable cull(GL_CULL_FACE); @@ -946,7 +951,7 @@ BOOL LLImagePreviewSculpted::render() LLViewerCamera::getInstance()->setPerspective(FALSE, mOrigin.mX, mOrigin.mY, mFullWidth, mFullHeight, FALSE); const LLVolumeFace &vf = mVolume->getVolumeFace(0); - U32 num_indices = vf.mIndices.size(); + U32 num_indices = vf.mNumIndices; mVertexBuffer->setBuffer(LLVertexBuffer::MAP_VERTEX | LLVertexBuffer::MAP_NORMAL); @@ -959,7 +964,6 @@ BOOL LLImagePreviewSculpted::render() mVertexBuffer->draw(LLRender::TRIANGLES, num_indices, 0); gGL.popMatrix(); - return TRUE; } diff --git a/indra/newview/llhudicon.cpp b/indra/newview/llhudicon.cpp index 28b0e7356a..3c5a4de7f8 100644 --- a/indra/newview/llhudicon.cpp +++ b/indra/newview/llhudicon.cpp @@ -39,6 +39,7 @@ #include "llviewerobject.h" #include "lldrawable.h" +#include "llvector4a.h" #include "llviewercamera.h" #include "llviewertexture.h" #include "llviewerwindow.h" @@ -266,21 +267,41 @@ BOOL LLHUDIcon::lineSegmentIntersect(const LLVector3& start, const LLVector3& en LLVector3 x_scale = image_aspect * (F32)gViewerWindow->getWindowHeightScaled() * mScale * scale_factor * x_pixel_vec; LLVector3 y_scale = (F32)gViewerWindow->getWindowHeightScaled() * mScale * scale_factor * y_pixel_vec; - LLVector3 lower_left = icon_position - (x_scale * 0.5f); - LLVector3 lower_right = icon_position + (x_scale * 0.5f); - LLVector3 upper_left = icon_position - (x_scale * 0.5f) + y_scale; - LLVector3 upper_right = icon_position + (x_scale * 0.5f) + y_scale; + LLVector4a x_scalea; + LLVector4a icon_positiona; + LLVector4a y_scalea; - - F32 t = 0.f; - LLVector3 dir = end-start; + x_scalea.load3(x_scale.mV); + x_scalea.mul(0.5f); + y_scalea.load3(y_scale.mV); + + icon_positiona.load3(icon_position.mV); - if (LLTriangleRayIntersect(upper_right, upper_left, lower_right, start, dir, NULL, NULL, &t, FALSE) || - LLTriangleRayIntersect(upper_left, lower_left, lower_right, start, dir, NULL, NULL, &t, FALSE)) + LLVector4a lower_left; + lower_left.setSub(icon_positiona, x_scalea); + LLVector4a lower_right; + lower_right.setAdd(icon_positiona, x_scalea); + LLVector4a upper_left; + upper_left.setAdd(lower_left, y_scalea); + LLVector4a upper_right; + upper_right.setAdd(lower_right, y_scalea); + + F32 t = 0.f; + LLVector4a enda; + enda.load3(end.mV); + LLVector4a starta; + starta.load3(start.mV); + LLVector4a dir; + dir.setSub(enda, starta); + + if (LLTriangleRayIntersect(upper_right, upper_left, lower_right, starta, dir, NULL, NULL, &t, FALSE) || + LLTriangleRayIntersect(upper_left, lower_left, lower_right, starta, dir, NULL, NULL, &t, FALSE)) { if (intersection) { - *intersection = start + dir*t; + dir.mul(t); + starta.add(dir); + *intersection = LLVector3((F32*) &starta.mQ); } return TRUE; } diff --git a/indra/newview/llinventorybridge.cpp b/indra/newview/llinventorybridge.cpp index ea43670da0..10cc6fae32 100644 --- a/indra/newview/llinventorybridge.cpp +++ b/indra/newview/llinventorybridge.cpp @@ -3400,7 +3400,7 @@ openSoundPreview((void*)this); //send_uuid_sound_trigger(item->getAssetUUID(), 1.0); } */ - } +} void LLSoundBridge::previewItem() { @@ -4187,21 +4187,9 @@ void LLObjectBridge::performAction(LLInventoryModel* model, std::string action) void LLObjectBridge::openItem() { - LLViewerInventoryItem* item = getItem(); - - if (item) - { - LLInvFVBridgeAction::doAction(item->getType(),mUUID,getInventoryModel()); - } - - LLSD key; - key["id"] = mUUID; - LLSideTray::getInstance()->showPanel("sidepanel_inventory", key); - - // Disable old properties floater; this is replaced by the sidepanel. - /* - LLFloaterReg::showInstance("properties", mUUID); - */ + // object double-click action is to wear/unwear object + performAction(getInventoryModel(), + get_is_item_worn(mUUID) ? "detach" : "attach"); } LLFontGL::StyleFlags LLObjectBridge::getLabelStyle() const diff --git a/indra/newview/llpanelobject.cpp b/indra/newview/llpanelobject.cpp index 669ff3ffd6..77f3984ecb 100644 --- a/indra/newview/llpanelobject.cpp +++ b/indra/newview/llpanelobject.cpp @@ -1241,6 +1241,16 @@ void LLPanelObject::sendIsPhantom() } } +#include "llsdutil.h" +class CostResponder : public LLHTTPClient::Responder +{ +public: + CostResponder(U32 id) { mID = id; } + virtual void result(const LLSD& content) { llinfos << ll_pretty_print_sd(content) << llendl; } + + U32 mID; +}; + void LLPanelObject::sendPhysicsShapeType() { U8 value = (U8)mComboPhysicsShapeType->getCurrentIndex(); @@ -1255,6 +1265,13 @@ void LLPanelObject::sendPhysicsShapeType() { llinfos << "update physics shape type not changed" << llendl; } + + std::string url = gAgent.getRegion()->getCapability("GetObjectCost"); + LLSD body = LLSD::emptyArray(); + + body.append(LLSelectMgr::getInstance()->getSelection()->getFirstObject()->getID()); + + LLHTTPClient::post( url, body, new CostResponder(body[0].asInteger()) ); } void LLPanelObject::sendCastShadows() diff --git a/indra/newview/llpolymesh.cpp b/indra/newview/llpolymesh.cpp index b8bdbfb2f8..d10e4fee3a 100644 --- a/indra/newview/llpolymesh.cpp +++ b/indra/newview/llpolymesh.cpp @@ -35,7 +35,8 @@ //----------------------------------------------------------------------------- #include "llviewerprecompiledheaders.h" -#include "llpolymesh.h" +#include "llfasttimer.h" +#include "llmemory.h" #include "llviewercontrol.h" #include "llxmltree.h" @@ -45,7 +46,7 @@ #include "llvolume.h" #include "llendianswizzle.h" -#include "llfasttimer.h" +#include "llpolymesh.h" #define HEADER_ASCII "Linden Mesh 1.0" #define HEADER_BINARY "Linden Binary Mesh 1.0" @@ -140,7 +141,7 @@ void LLPolyMeshSharedData::freeMeshData() delete [] mDetailTexCoords; mDetailTexCoords = NULL; - delete [] mWeights; + ll_aligned_free_16(mWeights); mWeights = NULL; } @@ -230,7 +231,7 @@ BOOL LLPolyMeshSharedData::allocateVertexData( U32 numVertices ) mBaseBinormals = new LLVector3[ numVertices ]; mTexCoords = new LLVector2[ numVertices ]; mDetailTexCoords = new LLVector2[ numVertices ]; - mWeights = new F32[ numVertices ]; + mWeights = (F32*) ll_aligned_malloc_16((numVertices*sizeof(F32)+0xF) & ~0xF); for (i = 0; i < numVertices; i++) { mWeights[i] = 0.f; @@ -715,15 +716,22 @@ LLPolyMesh::LLPolyMesh(LLPolyMeshSharedData *shared_data, LLPolyMesh *reference_ int nfloats = nverts * (2*4 + 3*3 + 2 + 4); //use aligned vertex data to make LLPolyMesh SSE friendly - mVertexData = (F32*) _mm_malloc(nfloats*4, 16); + mVertexData = (F32*) ll_aligned_malloc_16(nfloats*4); int offset = 0; - mCoords = (LLVector4*)(mVertexData + offset); offset += 4*nverts; - mNormals = (LLVector4*)(mVertexData + offset); offset += 4*nverts; - mScaledNormals = (LLVector3*)(mVertexData + offset); offset += 3*nverts; - mBinormals = (LLVector3*)(mVertexData + offset); offset += 3*nverts; - mScaledBinormals = (LLVector3*)(mVertexData + offset); offset += 3*nverts; - mTexCoords = (LLVector2*)(mVertexData + offset); offset += 2*nverts; - mClothingWeights = (LLVector4*)(mVertexData + offset); offset += 4*nverts; + + //all members must be 16-byte aligned except the last 3 + mCoords = (LLVector4*)(mVertexData + offset); offset += 4*nverts; + mNormals = (LLVector4*)(mVertexData + offset); offset += 4*nverts; + mClothingWeights = (LLVector4*)(mVertexData + offset); offset += 4*nverts; + mTexCoords = (LLVector2*)(mVertexData + offset); offset += 2*nverts; + + // these members don't need to be 16-byte aligned, but the first one might be + // read during an aligned memcpy of mTexCoords + mScaledNormals = (LLVector3*)(mVertexData + offset); offset += 3*nverts; + mBinormals = (LLVector3*)(mVertexData + offset); offset += 3*nverts; + mScaledBinormals = (LLVector3*)(mVertexData + offset); offset += 3*nverts; + + #else mCoords = new LLVector3[mSharedData->mNumVertices]; mNormals = new LLVector3[mSharedData->mNumVertices]; @@ -759,7 +767,7 @@ LLPolyMesh::~LLPolyMesh() delete [] mClothingWeights; delete [] mTexCoords; #else - _mm_free(mVertexData); + ll_aligned_free_16(mVertexData); #endif } diff --git a/indra/newview/llspatialpartition.cpp b/indra/newview/llspatialpartition.cpp index 77c38798d1..470c332b42 100644 --- a/indra/newview/llspatialpartition.cpp +++ b/indra/newview/llspatialpartition.cpp @@ -512,50 +512,6 @@ void LLSpatialGroup::checkStates() #endif } -void validate_draw_info(LLDrawInfo& params) -{ -#if LL_OCTREE_PARANOIA_CHECK - if (params.mVertexBuffer.isNull()) - { - llerrs << "Draw batch has no vertex buffer." << llendl; - } - - //bad range - if (params.mStart >= params.mEnd) - { - llerrs << "Draw batch has invalid range." << llendl; - } - - if (params.mEnd >= (U32) params.mVertexBuffer->getNumVerts()) - { - llerrs << "Draw batch has buffer overrun error." << llendl; - } - - if (params.mOffset + params.mCount > (U32) params.mVertexBuffer->getNumIndices()) - { - llerrs << "Draw batch has index buffer ovverrun error." << llendl; - } - - //bad indices - U16* indicesp = (U16*) params.mVertexBuffer->getIndicesPointer(); - if (indicesp) - { - for (U32 i = params.mOffset; i < params.mOffset+params.mCount; i++) - { - if (indicesp[i] < (U16)params.mStart) - { - llerrs << "Draw batch has vertex buffer index out of range error (index too low)." << llendl; - } - - if (indicesp[i] > (U16)params.mEnd) - { - llerrs << "Draw batch has vertex buffer index out of range error (index too high)." << llendl; - } - } - } -#endif -} - void LLSpatialGroup::validateDrawMap() { #if LL_OCTREE_PARANOIA_CHECK @@ -565,8 +521,8 @@ void LLSpatialGroup::validateDrawMap() for (drawmap_elem_t::iterator j = draw_vec.begin(); j != draw_vec.end(); ++j) { LLDrawInfo& params = **j; - - validate_draw_info(params); + + params.validate(); } } #endif @@ -3379,18 +3335,9 @@ LLDrawInfo::LLDrawInfo(U16 start, U16 end, U32 count, U32 offset, mDistance(0.f), mDrawMode(LLRender::TRIANGLES) { - mDebugColor = (rand() << 16) + rand(); - if (mStart >= mVertexBuffer->getRequestedVerts() || - mEnd >= mVertexBuffer->getRequestedVerts()) - { - llerrs << "Invalid draw info vertex range." << llendl; - } + mVertexBuffer->validateRange(mStart, mEnd, mCount, mOffset); - if (mOffset >= (U32) mVertexBuffer->getRequestedIndices() || - mOffset + mCount > (U32) mVertexBuffer->getRequestedIndices()) - { - llerrs << "Invalid draw info index range." << llendl; - } + mDebugColor = (rand() << 16) + rand(); } LLDrawInfo::~LLDrawInfo() @@ -3406,6 +3353,11 @@ LLDrawInfo::~LLDrawInfo() } } +void LLDrawInfo::validate() +{ + mVertexBuffer->validateRange(mStart, mEnd, mCount, mOffset); +} + LLVertexBuffer* LLGeometryManager::createVertexBuffer(U32 type_mask, U32 usage) { return new LLVertexBuffer(type_mask, usage); diff --git a/indra/newview/llspatialpartition.h b/indra/newview/llspatialpartition.h index 67c33d5b0f..9b252d1035 100644 --- a/indra/newview/llspatialpartition.h +++ b/indra/newview/llspatialpartition.h @@ -75,6 +75,8 @@ public: BOOL fullbright = FALSE, U8 bump = 0, BOOL particle = FALSE, F32 part_size = 0); + void validate(); + LLPointer<LLVertexBuffer> mVertexBuffer; LLPointer<LLViewerTexture> mTexture; LLColor4U mGlowColor; @@ -676,8 +678,6 @@ public: virtual void shift(const LLVector3 &offset); }; -void validate_draw_info(LLDrawInfo& params); - extern const F32 SG_BOX_SIDE; extern const F32 SG_BOX_OFFSET; extern const F32 SG_BOX_RAD; diff --git a/indra/newview/llviewercamera.cpp b/indra/newview/llviewercamera.cpp index aa82c216d9..cef7c4abbb 100644 --- a/indra/newview/llviewercamera.cpp +++ b/indra/newview/llviewercamera.cpp @@ -38,6 +38,7 @@ // Viewer includes #include "llagent.h" #include "llagentcamera.h" +#include "llmatrix4a.h" #include "llviewercontrol.h" #include "llviewerobjectlist.h" #include "llviewerregion.h" @@ -787,21 +788,29 @@ BOOL LLViewerCamera::areVertsVisible(LLViewerObject* volumep, BOOL all_verts) LLMatrix4 render_mat(vo_volume->getRenderRotation(), LLVector4(vo_volume->getRenderPosition())); + LLMatrix4a render_mata; + render_mata.loadu(render_mat); + LLMatrix4a mata; + mata.loadu(mat); + num_faces = volume->getNumVolumeFaces(); for (i = 0; i < num_faces; i++) { const LLVolumeFace& face = volume->getVolumeFace(i); - for (U32 v = 0; v < face.mVertices.size(); v++) + for (U32 v = 0; v < face.mNumVertices; v++) { - LLVector4 vec = LLVector4(face.mVertices[v].mPosition) * mat; + const LLVector4a& src_vec = face.mPositions[v]; + LLVector4a vec; + mata.affineTransform(src_vec, vec); if (drawablep->isActive()) { - vec = vec * render_mat; + LLVector4a t = vec; + render_mata.affineTransform(t, vec); } - BOOL in_frustum = pointInFrustum(LLVector3(vec)) > 0; + BOOL in_frustum = pointInFrustum(LLVector3(vec.getF32())) > 0; if (( !in_frustum && all_verts) || (in_frustum && !all_verts)) diff --git a/indra/newview/llviewerjointmesh.cpp b/indra/newview/llviewerjointmesh.cpp index 236ad98d68..91605005e3 100644 --- a/indra/newview/llviewerjointmesh.cpp +++ b/indra/newview/llviewerjointmesh.cpp @@ -655,6 +655,9 @@ U32 LLViewerJointMesh::drawShape( F32 pixelArea, BOOL first_pass, BOOL is_dummy) //----------------------------------------------------------------------------- void LLViewerJointMesh::updateFaceSizes(U32 &num_vertices, U32& num_indices, F32 pixel_area) { + //bump num_vertices to next multiple of 4 + num_vertices = (num_vertices + 0x3) & ~0x3; + // Do a pre-alloc pass to determine sizes of data. if (mMesh && mValid) { @@ -677,6 +680,8 @@ static LLFastTimer::DeclareTimer FTM_AVATAR_FACE("Avatar Face"); void LLViewerJointMesh::updateFaceData(LLFace *face, F32 pixel_area, BOOL damp_wind, bool terse_update) { + //IF THIS FUNCTION BREAKS, SEE LLPOLYMESH CONSTRUCTOR AND CHECK ALIGNMENT OF INPUT ARRAYS + mFace = face; if (mFace->mVertexBuffer.isNull()) @@ -684,6 +689,16 @@ void LLViewerJointMesh::updateFaceData(LLFace *face, F32 pixel_area, BOOL damp_w return; } + LLDrawPool *poolp = mFace->getPool(); + BOOL hardware_skinning = (poolp && poolp->getVertexShaderLevel() > 0) ? TRUE : FALSE; + + if (!hardware_skinning && terse_update) + { //no need to do terse updates if we're doing software vertex skinning + // since mMesh is being copied into mVertexBuffer every frame + return; + } + + LLFastTimer t(FTM_AVATAR_FACE); LLStrider<LLVector3> verticesp; @@ -696,108 +711,52 @@ void LLViewerJointMesh::updateFaceData(LLFace *face, F32 pixel_area, BOOL damp_w // Copy data into the faces from the polymesh data. if (mMesh && mValid) { - if (mMesh->getNumVertices()) + const U32 num_verts = mMesh->getNumVertices(); + + if (num_verts) { - stop_glerror(); face->getGeometryAvatar(verticesp, normalsp, tex_coordsp, vertex_weightsp, clothing_weightsp); - stop_glerror(); face->mVertexBuffer->getIndexStrider(indicesp); - stop_glerror(); verticesp += mMesh->mFaceVertexOffset; - tex_coordsp += mMesh->mFaceVertexOffset; normalsp += mMesh->mFaceVertexOffset; - vertex_weightsp += mMesh->mFaceVertexOffset; - clothing_weightsp += mMesh->mFaceVertexOffset; - - const U32* __restrict coords = (U32*) mMesh->getCoords(); - const U32* __restrict tex_coords = (U32*) mMesh->getTexCoords(); - const U32* __restrict normals = (U32*) mMesh->getNormals(); - const U32* __restrict weights = (U32*) mMesh->getWeights(); - const U32* __restrict cloth_weights = (U32*) mMesh->getClothingWeights(); - - const U32 num_verts = mMesh->getNumVertices(); - - U32 i = 0; - - const U32 skip = verticesp.getSkip()/sizeof(U32); + + F32* v = (F32*) verticesp.get(); + F32* n = (F32*) normalsp.get(); + + U32 words = num_verts*4; - U32* __restrict v = (U32*) verticesp.get(); - U32* __restrict n = (U32*) normalsp.get(); + LLVector4a::memcpyNonAliased16(v, (F32*) mMesh->getCoords(), words); + LLVector4a::memcpyNonAliased16(n, (F32*) mMesh->getNormals(), words); + - if (terse_update) + if (!terse_update) { - for (S32 i = num_verts; i > 0; --i) - { - //morph target application only, only update positions and normals - v[0] = coords[0]; - v[1] = coords[1]; - v[2] = coords[2]; - coords += 4; - v += skip; - } + vertex_weightsp += mMesh->mFaceVertexOffset; + clothing_weightsp += mMesh->mFaceVertexOffset; + tex_coordsp += mMesh->mFaceVertexOffset; + + F32* tc = (F32*) tex_coordsp.get(); + F32* vw = (F32*) vertex_weightsp.get(); + F32* cw = (F32*) clothing_weightsp.get(); - for (S32 i = num_verts; i > 0; --i) - { - n[0] = normals[0]; - n[1] = normals[1]; - n[2] = normals[2]; - normals += 4; - n += skip; - } + LLVector4a::memcpyNonAliased16(tc, (F32*) mMesh->getTexCoords(), num_verts*2); + LLVector4a::memcpyNonAliased16(vw, (F32*) mMesh->getWeights(), num_verts); + LLVector4a::memcpyNonAliased16(cw, (F32*) mMesh->getClothingWeights(), num_verts*4); } - else - { - - U32* __restrict tc = (U32*) tex_coordsp.get(); - U32* __restrict vw = (U32*) vertex_weightsp.get(); - U32* __restrict cw = (U32*) clothing_weightsp.get(); - - do - { - v[0] = coords[0]; - v[1] = coords[1]; - v[2] = coords[2]; - coords += 4; - v += skip; - - tc[0] = *(tex_coords++); - tc[1] = *(tex_coords++); - tc += skip; - - n[0] = normals[0]; - n[1] = normals[1]; - n[2] = normals[2]; - normals += 4; - n += skip; - - vw[0] = *(weights++); - vw += skip; - - cw[0] = *(cloth_weights++); - cw[1] = *(cloth_weights++); - cw[2] = *(cloth_weights++); - cw[3] = *(cloth_weights++); - cw += skip; - } - while (++i < num_verts); - - const U32 idx_count = mMesh->getNumFaces()*3; - indicesp += mMesh->mFaceIndexOffset; + const U32 idx_count = mMesh->getNumFaces()*3; - U16* __restrict idx = indicesp.get(); - S32* __restrict src_idx = (S32*) mMesh->getFaces(); + indicesp += mMesh->mFaceIndexOffset; - i = 0; + U16* __restrict idx = indicesp.get(); + S32* __restrict src_idx = (S32*) mMesh->getFaces(); - const S32 offset = (S32) mMesh->mFaceVertexOffset; + const S32 offset = (S32) mMesh->mFaceVertexOffset; - do - { - *(idx++) = *(src_idx++)+offset; - } - while (++i < idx_count); + for (S32 i = 0; i < idx_count; ++i) + { + *(idx++) = *(src_idx++)+offset; } } } @@ -824,50 +783,44 @@ void LLViewerJointMesh::updateGeometryOriginal(LLFace *mFace, LLPolyMesh *mMesh) buffer->getVertexStrider(o_vertices, 0); buffer->getNormalStrider(o_normals, 0); - //F32 last_weight = F32_MAX; - LLMatrix4a gBlendMat; + F32* __restrict vert = o_vertices[0].mV; + F32* __restrict norm = o_normals[0].mV; const F32* __restrict weights = mMesh->getWeights(); const LLVector4a* __restrict coords = (LLVector4a*) mMesh->getCoords(); const LLVector4a* __restrict normals = (LLVector4a*) mMesh->getNormals(); + U32 offset = mMesh->mFaceVertexOffset*4; + vert += offset; + norm += offset; + for (U32 index = 0; index < mMesh->getNumVertices(); index++) { - U32 bidx = index + mMesh->mFaceVertexOffset; - - // blend by first matrix - F32 w = weights[index]; - - //LLVector4a coord; - //coord.load4a(coords[index].mV); + // equivalent to joint = floorf(weights[index]); + S32 joint = _mm_cvtt_ss2si(_mm_load_ss(weights+index)); + F32 w = weights[index] - joint; - //LLVector4a norm; - //norm.load4a(normals[index].mV); + LLMatrix4a gBlendMat; - S32 joint = llfloor(w); - w -= joint; - - if (w > 0.f) + if (w != 0.f) { - // Try to keep all the accesses to the matrix data as close - // together as possible. This function is a hot spot on the - // Mac. JC + // blend between matrices and apply gBlendMat.setLerp(gJointMatAligned[joint+0], gJointMatAligned[joint+1], w); LLVector4a res; gBlendMat.affineTransform(coords[index], res); - o_vertices[bidx].setVec(res[0], res[1], res[2]); + res.store4a(vert+index*4); gBlendMat.rotate(normals[index], res); - o_normals[bidx].setVec(res[0], res[1], res[2]); + res.store4a(norm+index*4); } else { // No lerp required in this case. LLVector4a res; gJointMatAligned[joint].affineTransform(coords[index], res); - o_vertices[bidx].setVec(res[0], res[1], res[2]); + res.store4a(vert+index*4); gJointMatAligned[joint].rotate(normals[index], res); - o_normals[bidx].setVec(res[0], res[1], res[2]); + res.store4a(norm+index*4); } } diff --git a/indra/newview/llviewerjointmesh_sse.cpp b/indra/newview/llviewerjointmesh_sse.cpp index e586b910cd..4fb5fafad1 100644 --- a/indra/newview/llviewerjointmesh_sse.cpp +++ b/indra/newview/llviewerjointmesh_sse.cpp @@ -94,8 +94,8 @@ void LLViewerJointMesh::updateGeometrySSE(LLFace *face, LLPolyMesh *mesh) buffer->getNormalStrider(o_normals, mesh->mFaceVertexOffset); const F32* weights = mesh->getWeights(); - const LLVector3* coords = mesh->getCoords(); - const LLVector3* normals = mesh->getNormals(); + const LLVector3* coords = (const LLVector3*)mesh->getCoords(); + const LLVector3* normals = (const LLVector3*)mesh->getNormals(); for (U32 index = 0, index_end = mesh->getNumVertices(); index < index_end; ++index) { if( weight != weights[index]) diff --git a/indra/newview/llviewerjointmesh_sse2.cpp b/indra/newview/llviewerjointmesh_sse2.cpp index 550044c321..58cd75871d 100644 --- a/indra/newview/llviewerjointmesh_sse2.cpp +++ b/indra/newview/llviewerjointmesh_sse2.cpp @@ -101,8 +101,8 @@ void LLViewerJointMesh::updateGeometrySSE2(LLFace *face, LLPolyMesh *mesh) buffer->getNormalStrider(o_normals, mesh->mFaceVertexOffset); const F32* weights = mesh->getWeights(); - const LLVector3* coords = mesh->getCoords(); - const LLVector3* normals = mesh->getNormals(); + const LLVector3* coords = (const LLVector3*)mesh->getCoords(); + const LLVector3* normals = (const LLVector3*)mesh->getNormals(); for (U32 index = 0, index_end = mesh->getNumVertices(); index < index_end; ++index) { if( weight != weights[index]) diff --git a/indra/newview/llviewerregion.cpp b/indra/newview/llviewerregion.cpp index 4fdabd7ff0..268e14674e 100644 --- a/indra/newview/llviewerregion.cpp +++ b/indra/newview/llviewerregion.cpp @@ -1497,6 +1497,7 @@ void LLViewerRegion::setSeedCapability(const std::string& url) capabilityNames.append("FetchLibDescendents"); capabilityNames.append("GetTexture"); capabilityNames.append("GetMesh"); + capabilityNames.append("GetObjectCost"); capabilityNames.append("GroupProposalBallot"); capabilityNames.append("HomeLocation"); capabilityNames.append("LandResources"); @@ -1513,13 +1514,13 @@ void LLViewerRegion::setSeedCapability(const std::string& url) capabilityNames.append("ProvisionVoiceAccountRequest"); capabilityNames.append("RemoteParcelRequest"); capabilityNames.append("RequestTextureDownload"); - capabilityNames.append("SimulatorFeatures"); capabilityNames.append("SearchStatRequest"); capabilityNames.append("SearchStatTracking"); capabilityNames.append("SendPostcard"); capabilityNames.append("SendUserReport"); capabilityNames.append("SendUserReportWithScreenshot"); capabilityNames.append("ServerReleaseNotes"); + capabilityNames.append("SimulatorFeatures"); capabilityNames.append("StartGroupProposal"); capabilityNames.append("TextureStats"); capabilityNames.append("UntrustedSimulatorMessage"); diff --git a/indra/newview/llvograss.cpp b/indra/newview/llvograss.cpp index a82afbeb76..91c9b762c5 100644 --- a/indra/newview/llvograss.cpp +++ b/indra/newview/llvograss.cpp @@ -594,7 +594,7 @@ BOOL LLVOGrass::lineSegmentIntersect(const LLVector3& start, const LLVector3& en LLVector2 tc[4]; LLVector3 v[4]; - // LLVector3 n[4]; // unused! + //LLVector3 n[4]; F32 closest_t = 1.f; @@ -640,7 +640,6 @@ BOOL LLVOGrass::lineSegmentIntersect(const LLVector3& start, const LLVector3& en position.mV[2] += blade_height; v[3] = v1 = position + mRegionp->getOriginAgent(); - F32 a,b,t; BOOL hit = FALSE; diff --git a/indra/newview/llvosurfacepatch.cpp b/indra/newview/llvosurfacepatch.cpp index ef7b161003..eef62ddf1a 100644 --- a/indra/newview/llvosurfacepatch.cpp +++ b/indra/newview/llvosurfacepatch.cpp @@ -60,27 +60,77 @@ public: LLVertexBuffer(MAP_VERTEX | MAP_NORMAL | MAP_TEXCOORD0 | MAP_TEXCOORD1 | MAP_COLOR, GL_DYNAMIC_DRAW_ARB) { //texture coordinates 2 and 3 exist, but use the same data as texture coordinate 1 - mOffsets[TYPE_TEXCOORD3] = mOffsets[TYPE_TEXCOORD2] = mOffsets[TYPE_TEXCOORD1]; - mTypeMask |= MAP_TEXCOORD2 | MAP_TEXCOORD3; }; - /*// virtual + // virtual void setupVertexBuffer(U32 data_mask) const - { - if (LLDrawPoolTerrain::getDetailMode() == 0 || LLPipeline::sShadowRender) + { + U8* base = useVBOs() ? (U8*) mAlignedOffset : mMappedData; + + //assume tex coords 2 and 3 are present + U32 type_mask = mTypeMask | MAP_TEXCOORD2 | MAP_TEXCOORD3; + + if ((data_mask & type_mask) != data_mask) + { + llerrs << "LLVertexBuffer::setupVertexBuffer missing required components for supplied data mask." << llendl; + } + + if (data_mask & MAP_NORMAL) + { + glNormalPointer(GL_FLOAT, LLVertexBuffer::sTypeOffsets[TYPE_NORMAL], (void*)(base + mOffsets[TYPE_NORMAL])); + } + if (data_mask & MAP_TEXCOORD3) + { //substitute tex coord 0 for tex coord 3 + glClientActiveTextureARB(GL_TEXTURE3_ARB); + glTexCoordPointer(2,GL_FLOAT, LLVertexBuffer::sTypeOffsets[TYPE_TEXCOORD0], (void*)(base + mOffsets[TYPE_TEXCOORD0])); + glClientActiveTextureARB(GL_TEXTURE0_ARB); + } + if (data_mask & MAP_TEXCOORD2) + { //substitute tex coord 0 for tex coord 2 + glClientActiveTextureARB(GL_TEXTURE2_ARB); + glTexCoordPointer(2,GL_FLOAT, LLVertexBuffer::sTypeOffsets[TYPE_TEXCOORD0], (void*)(base + mOffsets[TYPE_TEXCOORD0])); + glClientActiveTextureARB(GL_TEXTURE0_ARB); + } + if (data_mask & MAP_TEXCOORD1) { - LLVertexBuffer::setupVertexBuffer(data_mask); + glClientActiveTextureARB(GL_TEXTURE1_ARB); + glTexCoordPointer(2,GL_FLOAT, LLVertexBuffer::sTypeOffsets[TYPE_TEXCOORD1], (void*)(base + mOffsets[TYPE_TEXCOORD1])); + glClientActiveTextureARB(GL_TEXTURE0_ARB); } - else if (data_mask & LLVertexBuffer::MAP_TEXCOORD1) + if (data_mask & MAP_BINORMAL) { - LLVertexBuffer::setupVertexBuffer(data_mask); + glClientActiveTextureARB(GL_TEXTURE2_ARB); + glTexCoordPointer(3,GL_FLOAT, LLVertexBuffer::sTypeOffsets[TYPE_BINORMAL], (void*)(base + mOffsets[TYPE_BINORMAL])); + glClientActiveTextureARB(GL_TEXTURE0_ARB); } - else + if (data_mask & MAP_TEXCOORD0) { - LLVertexBuffer::setupVertexBuffer(data_mask); + glTexCoordPointer(2,GL_FLOAT, LLVertexBuffer::sTypeOffsets[TYPE_TEXCOORD0], (void*)(base + mOffsets[TYPE_TEXCOORD0])); } - llglassertok(); - }*/ + if (data_mask & MAP_COLOR) + { + glColorPointer(4, GL_UNSIGNED_BYTE, LLVertexBuffer::sTypeOffsets[TYPE_COLOR], (void*)(base + mOffsets[TYPE_COLOR])); + } + + if (data_mask & MAP_WEIGHT) + { + glVertexAttribPointerARB(1, 1, GL_FLOAT, FALSE, LLVertexBuffer::sTypeOffsets[TYPE_WEIGHT], (void*)(base + mOffsets[TYPE_WEIGHT])); + } + + if (data_mask & MAP_WEIGHT4 && sWeight4Loc != -1) + { + glVertexAttribPointerARB(sWeight4Loc, 4, GL_FLOAT, FALSE, LLVertexBuffer::sTypeOffsets[TYPE_WEIGHT4], (void*)(base+mOffsets[TYPE_WEIGHT4])); + } + + if (data_mask & MAP_CLOTHWEIGHT) + { + glVertexAttribPointerARB(4, 4, GL_FLOAT, TRUE, LLVertexBuffer::sTypeOffsets[TYPE_CLOTHWEIGHT], (void*)(base + mOffsets[TYPE_CLOTHWEIGHT])); + } + if (data_mask & MAP_VERTEX) + { + glVertexPointer(3,GL_FLOAT, LLVertexBuffer::sTypeOffsets[TYPE_VERTEX], (void*)(base + 0)); + } + } }; //============================================================================ diff --git a/indra/newview/llvotextbubble.cpp b/indra/newview/llvotextbubble.cpp index 428ef20006..339da3c0bf 100644 --- a/indra/newview/llvotextbubble.cpp +++ b/indra/newview/llvotextbubble.cpp @@ -45,6 +45,7 @@ #include "llviewertexturelist.h" #include "llvolume.h" #include "pipeline.h" +#include "llvector4a.h" #include "llviewerregion.h" LLVOTextBubble::LLVOTextBubble(const LLUUID &id, const LLPCode pcode, LLViewerRegion *regionp) @@ -217,7 +218,7 @@ void LLVOTextBubble::updateFaceSize(S32 idx) else { const LLVolumeFace& vol_face = getVolume()->getVolumeFace(idx); - face->setSize(vol_face.mVertices.size(), vol_face.mIndices.size()); + face->setSize(vol_face.mNumVertices, vol_face.mNumIndices); } } @@ -235,19 +236,37 @@ void LLVOTextBubble::getGeometry(S32 idx, const LLVolumeFace& face = getVolume()->getVolumeFace(idx); - LLVector3 pos = getPositionAgent(); + LLVector4a pos; + pos.load3(getPositionAgent().mV); + + LLVector4a scale; + scale.load3(getScale().mV); + LLColor4U color = LLColor4U(getTE(idx)->getColor()); U32 offset = mDrawable->getFace(idx)->getGeomIndex(); - for (U32 i = 0; i < face.mVertices.size(); i++) + LLVector4a* dst_pos = (LLVector4a*) verticesp.get(); + LLVector4a* src_pos = (LLVector4a*) face.mPositions; + + LLVector4a* dst_norm = (LLVector4a*) normalsp.get(); + LLVector4a* src_norm = (LLVector4a*) face.mNormals; + + LLVector2* dst_tc = (LLVector2*) texcoordsp.get(); + LLVector2* src_tc = (LLVector2*) face.mTexCoords; + + LLVector4a::memcpyNonAliased16((F32*) dst_norm, (F32*) src_norm, face.mNumVertices*4); + LLVector4a::memcpyNonAliased16((F32*) dst_tc, (F32*) src_tc, face.mNumVertices*2); + + + for (U32 i = 0; i < face.mNumVertices; i++) { - *verticesp++ = face.mVertices[i].mPosition.scaledVec(getScale()) + pos; - *normalsp++ = face.mVertices[i].mNormal; - *texcoordsp++ = face.mVertices[i].mTexCoord; + LLVector4a t; + t.setMul(src_pos[i], scale); + dst_pos[i].setAdd(t, pos); *colorsp++ = color; } - for (U32 i = 0; i < face.mIndices.size(); i++) + for (U32 i = 0; i < face.mNumIndices; i++) { *indicesp++ = face.mIndices[i] + offset; } diff --git a/indra/newview/llvovolume.cpp b/indra/newview/llvovolume.cpp index cea964a100..8022f81f19 100644 --- a/indra/newview/llvovolume.cpp +++ b/indra/newview/llvovolume.cpp @@ -60,6 +60,7 @@ #include "llflexibleobject.h" #include "llsky.h" #include "lltexturefetch.h" +#include "llvector4a.h" #include "llviewercamera.h" #include "llviewertexturelist.h" #include "llviewerobjectlist.h" @@ -1601,14 +1602,8 @@ void LLVOVolume::updateFaceSize(S32 idx) else { const LLVolumeFace& vol_face = getVolume()->getVolumeFace(idx); - if (LLPipeline::sUseTriStrips) - { - facep->setSize(vol_face.mVertices.size(), vol_face.mTriStrip.size()); - } - else - { - facep->setSize(vol_face.mVertices.size(), vol_face.mIndices.size()); - } + facep->setSize(vol_face.mNumVertices, vol_face.mNumIndices); + } } @@ -1863,21 +1858,25 @@ bool LLVOVolume::hasMedia() const LLVector3 LLVOVolume::getApproximateFaceNormal(U8 face_id) { LLVolume* volume = getVolume(); - LLVector3 result; + LLVector4a result; + result.clear(); + + LLVector3 ret; if (volume && face_id < volume->getNumVolumeFaces()) { const LLVolumeFace& face = volume->getVolumeFace(face_id); - for (S32 i = 0; i < (S32)face.mVertices.size(); ++i) + for (S32 i = 0; i < (S32)face.mNumVertices; ++i) { - result += face.mVertices[i].mNormal; + result.add(face.mNormals[i]); } - result = volumeDirectionToAgent(result); - result.normVec(); + LLVector3 ret((F32*) &result.mQ); + ret = volumeDirectionToAgent(ret); + ret.normVec(); } - return result; + return ret; } void LLVOVolume::requestMediaDataUpdate(bool isNew) @@ -3032,7 +3031,7 @@ F32 LLVOVolume::getBinRadius() for (S32 i = 0; i < volume->getNumVolumeFaces(); ++i) { const LLVolumeFace& face = volume->getVolumeFace(i); - vert_count += face.mVertices.size(); + vert_count += face.mNumVertices; } scale = 1.f/llmax(vert_count/1024.f, 1.f); @@ -3383,7 +3382,7 @@ void LLVolumeGeometryManager::registerFace(LLSpatialGroup* group, LLFace* facep, draw_vec[idx]->mCount += facep->getIndicesCount(); draw_vec[idx]->mEnd += facep->getGeomCount(); draw_vec[idx]->mVSize = llmax(draw_vec[idx]->mVSize, facep->getVirtualSize()); - validate_draw_info(*draw_vec[idx]); + draw_vec[idx]->validate(); update_min_max(draw_vec[idx]->mExtents[0], draw_vec[idx]->mExtents[1], facep->mExtents[0]); update_min_max(draw_vec[idx]->mExtents[0], draw_vec[idx]->mExtents[1], facep->mExtents[1]); } @@ -3407,12 +3406,13 @@ void LLVolumeGeometryManager::registerFace(LLSpatialGroup* group, LLFace* facep, } draw_info->mExtents[0] = facep->mExtents[0]; draw_info->mExtents[1] = facep->mExtents[1]; - validate_draw_info(*draw_info); if (LLPipeline::sUseTriStrips) { draw_info->mDrawMode = LLRender::TRIANGLE_STRIP; } + + draw_info->validate(); } } |