diff options
Diffstat (limited to 'indra/llmath')
-rw-r--r-- | indra/llmath/llmatrix4a.h | 41 | ||||
-rw-r--r-- | indra/llmath/lloctree.h | 13 | ||||
-rw-r--r-- | indra/llmath/llrigginginfo.h | 29 | ||||
-rw-r--r-- | indra/llmath/llvector4a.h | 11 | ||||
-rw-r--r-- | indra/llmath/llvector4a.inl | 4 | ||||
-rw-r--r-- | indra/llmath/llvolume.cpp | 25 | ||||
-rw-r--r-- | indra/llmath/llvolumeoctree.h | 27 | ||||
-rw-r--r-- | indra/llmath/m4math.cpp | 9 | ||||
-rw-r--r-- | indra/llmath/m4math.h | 2 | ||||
-rw-r--r-- | indra/llmath/v3math.cpp | 6 | ||||
-rw-r--r-- | indra/llmath/v3math.h | 5 |
11 files changed, 101 insertions, 71 deletions
diff --git a/indra/llmath/llmatrix4a.h b/indra/llmath/llmatrix4a.h index 7ba347062f..5291a05607 100644 --- a/indra/llmath/llmatrix4a.h +++ b/indra/llmath/llmatrix4a.h @@ -36,6 +36,26 @@ class LLMatrix4a public: LL_ALIGN_16(LLVector4a mMatrix[4]); + LLMatrix4a() + { + + } + + explicit LLMatrix4a(const LLMatrix4& val) + { + loadu(val); + } + + inline F32* getF32ptr() + { + return (F32*) &mMatrix; + } + + inline const F32* getF32ptr() const + { + return (F32*)&mMatrix; + } + inline void clear() { mMatrix[0].clear(); @@ -44,6 +64,14 @@ public: mMatrix[3].clear(); } + inline void setIdentity() + { + mMatrix[0].set(1.f, 0.f, 0.f, 0.f); + mMatrix[1].set(0.f, 1.f, 0.f, 0.f); + mMatrix[2].set(0.f, 0.f, 1.f, 0.f); + mMatrix[3].set(0.f, 0.f, 0.f, 1.f); + } + inline void loadu(const LLMatrix4& src) { mMatrix[0] = _mm_loadu_ps(src.mMatrix[0]); @@ -105,7 +133,7 @@ public: mMatrix[3].setAdd(a.mMatrix[3],d3); } - inline void rotate(const LLVector4a& v, LLVector4a& res) + inline void rotate(const LLVector4a& v, LLVector4a& res) const { LLVector4a y,z; @@ -151,6 +179,8 @@ public: { affineTransformSSE(v,res); } + + const LLVector4a& getTranslation() const { return mMatrix[3]; } }; inline LLVector4a rowMul(const LLVector4a &row, const LLMatrix4a &mat) @@ -176,6 +206,15 @@ inline void matMul(const LLMatrix4a &a, const LLMatrix4a &b, LLMatrix4a &res) res.mMatrix[3] = row3; } +//Faster version of matMul wehere res must not be a or b +inline void matMulUnsafe(const LLMatrix4a &a, const LLMatrix4a &b, LLMatrix4a &res) +{ + res.mMatrix[0] = rowMul(a.mMatrix[0], b); + res.mMatrix[1] = rowMul(a.mMatrix[1], b); + res.mMatrix[2] = rowMul(a.mMatrix[2], b); + res.mMatrix[3] = rowMul(a.mMatrix[3], b); +} + inline std::ostream& operator<<(std::ostream& s, const LLMatrix4a& m) { s << "[" << m.mMatrix[0] << ", " << m.mMatrix[1] << ", " << m.mMatrix[2] << ", " << m.mMatrix[3] << "]"; diff --git a/indra/llmath/lloctree.h b/indra/llmath/lloctree.h index 0e2f62f9db..8c4a1304b4 100644 --- a/indra/llmath/lloctree.h +++ b/indra/llmath/lloctree.h @@ -74,8 +74,9 @@ public: }; template <class T> -class LLOctreeNode : public LLTreeNode<T> +class alignas(16) LLOctreeNode : public LLTreeNode<T> { + LL_ALIGN_NEW public: typedef LLOctreeTraveler<T> oct_traveler; @@ -91,16 +92,6 @@ public: typedef LLOctreeNode<T> oct_node; typedef LLOctreeListener<T> oct_listener; - void* operator new(size_t size) - { - return ll_aligned_malloc_16(size); - } - - void operator delete(void* ptr) - { - ll_aligned_free_16(ptr); - } - LLOctreeNode( const LLVector4a& center, const LLVector4a& size, BaseType* parent, diff --git a/indra/llmath/llrigginginfo.h b/indra/llmath/llrigginginfo.h index b3d6bc2d19..059c6ae082 100644 --- a/indra/llmath/llrigginginfo.h +++ b/indra/llmath/llrigginginfo.h @@ -34,9 +34,9 @@ // Extents are in joint space // isRiggedTo is based on the state of all currently associated rigged meshes -LL_ALIGN_PREFIX(16) -class LLJointRiggingInfo +class alignas(16) LLJointRiggingInfo { + LL_ALIGN_NEW public: LLJointRiggingInfo(); bool isRiggedTo() const; @@ -45,31 +45,10 @@ public: const LLVector4a *getRiggedExtents() const; void merge(const LLJointRiggingInfo& other); - void* operator new(size_t size) - { - return ll_aligned_malloc_16(size); - } - - void operator delete(void* ptr) - { - ll_aligned_free_16(ptr); - } - - void* operator new[](size_t size) - { - return ll_aligned_malloc_16(size); - } - - void operator delete[](void* ptr) - { - ll_aligned_free_16(ptr); - } - - private: - LL_ALIGN_16(LLVector4a mRiggedExtents[2]); + LLVector4a mRiggedExtents[2]; bool mIsRiggedTo; -} LL_ALIGN_POSTFIX(16); +}; // For storing all the rigging info associated with a given avatar or // object, keyed by joint_num. diff --git a/indra/llmath/llvector4a.h b/indra/llmath/llvector4a.h index 27abf39537..53c8f604f6 100644 --- a/indra/llmath/llvector4a.h +++ b/indra/llmath/llvector4a.h @@ -46,11 +46,10 @@ class LLRotation; // of this writing, July 08, 2010) about getting it implemented before you resort to // LLVector3/LLVector4. ///////////////////////////////// -struct LLVector4a; -LL_ALIGN_PREFIX(16) -struct LLVector4a +class alignas(16) LLVector4a { + LL_ALIGN_NEW public: /////////////////////////////////// @@ -138,10 +137,10 @@ public: // BASIC GET/SET //////////////////////////////////// - // Return a "this" as an F32 pointer. Do not use unless you have a very good reason. (Not sure? Ask Falcon) + // Return a "this" as an F32 pointer. inline F32* getF32ptr(); - // Return a "this" as a const F32 pointer. Do not use unless you have a very good reason. (Not sure? Ask Falcon) + // Return a "this" as a const F32 pointer. inline const F32* const getF32ptr() const; // Read-only access a single float in this vector. Do not use in proximity to any function call that manipulates @@ -324,7 +323,7 @@ public: private: LLQuad mQ; -} LL_ALIGN_POSTFIX(16); +}; inline void update_min_max(LLVector4a& min, LLVector4a& max, const LLVector4a& p) { diff --git a/indra/llmath/llvector4a.inl b/indra/llmath/llvector4a.inl index 69d3d01efe..8be1c1b114 100644 --- a/indra/llmath/llvector4a.inl +++ b/indra/llmath/llvector4a.inl @@ -58,13 +58,13 @@ inline void LLVector4a::store4a(F32* dst) const // BASIC GET/SET //////////////////////////////////// -// Return a "this" as an F32 pointer. Do not use unless you have a very good reason. (Not sure? Ask Falcon) +// Return a "this" as an F32 pointer. F32* LLVector4a::getF32ptr() { return (F32*) &mQ; } -// Return a "this" as a const F32 pointer. Do not use unless you have a very good reason. (Not sure? Ask Falcon) +// Return a "this" as a const F32 pointer. const F32* const LLVector4a::getF32ptr() const { return (const F32* const) &mQ; diff --git a/indra/llmath/llvolume.cpp b/indra/llmath/llvolume.cpp index e085fa6ada..130f30bedc 100644 --- a/indra/llmath/llvolume.cpp +++ b/indra/llmath/llvolume.cpp @@ -383,6 +383,7 @@ public: virtual void visit(const LLOctreeNode<LLVolumeTriangle>* branch) { //this is a depth first traversal, so it's safe to assum all children have complete //bounding data + LL_PROFILE_ZONE_SCOPED LLVolumeOctreeListener* node = (LLVolumeOctreeListener*) branch->getListener(0); @@ -822,6 +823,8 @@ S32 LLProfile::getNumPoints(const LLProfileParams& params, BOOL path_open,F32 de BOOL LLProfile::generate(const LLProfileParams& params, BOOL path_open,F32 detail, S32 split, BOOL is_sculpted, S32 sculpt_size) { + LL_PROFILE_ZONE_SCOPED + if ((!mDirty) && (!is_sculpted)) { return FALSE; @@ -1302,6 +1305,8 @@ S32 LLPath::getNumNGonPoints(const LLPathParams& params, S32 sides, F32 startOff void LLPath::genNGon(const LLPathParams& params, S32 sides, F32 startOff, F32 end_scale, F32 twist_scale) { + LL_PROFILE_ZONE_SCOPED + // Generates a circular path, starting at (1, 0, 0), counterclockwise along the xz plane. static const F32 tableScale[] = { 1, 1, 1, 0.5f, 0.707107f, 0.53f, 0.525f, 0.5f }; @@ -1536,6 +1541,8 @@ S32 LLPath::getNumPoints(const LLPathParams& params, F32 detail) BOOL LLPath::generate(const LLPathParams& params, F32 detail, S32 split, BOOL is_sculpted, S32 sculpt_size) { + LL_PROFILE_ZONE_SCOPED + if ((!mDirty) && (!is_sculpted)) { return FALSE; @@ -2112,6 +2119,8 @@ LLVolume::~LLVolume() BOOL LLVolume::generate() { + LL_PROFILE_ZONE_SCOPED + LL_CHECK_MEMORY llassert_always(mProfilep); @@ -2370,6 +2379,8 @@ bool LLVolumeFace::VertexData::compareNormal(const LLVolumeFace::VertexData& rhs bool LLVolume::unpackVolumeFaces(std::istream& is, S32 size) { + LL_PROFILE_ZONE_SCOPED + //input stream is now pointing at a zlib compressed block of LLSD //decompress block LLSD mdl; @@ -2755,6 +2766,8 @@ S32 LLVolume::getNumFaces() const void LLVolume::createVolumeFaces() { + LL_PROFILE_ZONE_SCOPED + if (mGenerateSingleFace) { // do nothing @@ -3720,6 +3733,8 @@ void LLVolume::generateSilhouetteVertices(std::vector<LLVector3> &vertices, const LLMatrix3& norm_mat_in, S32 face_mask) { + LL_PROFILE_ZONE_SCOPED + LLMatrix4a mat; mat.loadu(mat_in); @@ -4846,6 +4861,8 @@ void LLVolumeFace::freeData() BOOL LLVolumeFace::create(LLVolume* volume, BOOL partial_build) { + LL_PROFILE_ZONE_SCOPED + //tree for this face is no longer valid delete mOctree; mOctree = NULL; @@ -5514,6 +5531,8 @@ bool LLVolumeFace::cacheOptimize() void LLVolumeFace::createOctree(F32 scaler, const LLVector4a& center, const LLVector4a& size) { + LL_PROFILE_ZONE_SCOPED + if (mOctree) { return; @@ -6287,6 +6306,8 @@ void CalculateTangentArray(U32 vertexCount, const LLVector4a *vertex, const LLVe void LLVolumeFace::createTangents() { + LL_PROFILE_ZONE_SCOPED + if (!mTangents) { allocateTangents(mNumVertices); @@ -6482,6 +6503,8 @@ void LLVolumeFace::fillFromLegacyData(std::vector<LLVolumeFace::VertexData>& v, BOOL LLVolumeFace::createSide(LLVolume* volume, BOOL partial_build) { + LL_PROFILE_ZONE_SCOPED + LL_CHECK_MEMORY BOOL flat = mTypeMask & FLAT_MASK; @@ -6974,6 +6997,8 @@ BOOL LLVolumeFace::createSide(LLVolume* volume, BOOL partial_build) void CalculateTangentArray(U32 vertexCount, const LLVector4a *vertex, const LLVector4a *normal, const LLVector2 *texcoord, U32 triangleCount, const U16* index_array, LLVector4a *tangent) { + LL_PROFILE_ZONE_SCOPED + //LLVector4a *tan1 = new LLVector4a[vertexCount * 2]; LLVector4a* tan1 = (LLVector4a*) ll_aligned_malloc_16(vertexCount*2*sizeof(LLVector4a)); // new(tan1) LLVector4a; diff --git a/indra/llmath/llvolumeoctree.h b/indra/llmath/llvolumeoctree.h index 13150028d8..b2bc440368 100644 --- a/indra/llmath/llvolumeoctree.h +++ b/indra/llmath/llvolumeoctree.h @@ -34,19 +34,10 @@ #include "llvolume.h" #include "llvector4a.h" -class LLVolumeTriangle : public LLRefCount +class alignas(16) LLVolumeTriangle : public LLRefCount { + LL_ALIGN_NEW public: - void* operator new(size_t size) - { - return ll_aligned_malloc_16(size); - } - - void operator delete(void* ptr) - { - ll_aligned_free_16(ptr); - } - LLVolumeTriangle() { mBinIndex = -1; @@ -86,20 +77,10 @@ public: }; -class LLVolumeOctreeListener : public LLOctreeListener<LLVolumeTriangle> +class alignas(16) LLVolumeOctreeListener : public LLOctreeListener<LLVolumeTriangle> { + LL_ALIGN_NEW public: - - void* operator new(size_t size) - { - return ll_aligned_malloc_16(size); - } - - void operator delete(void* ptr) - { - ll_aligned_free_16(ptr); - } - LLVolumeOctreeListener(LLOctreeNode<LLVolumeTriangle>* node); ~LLVolumeOctreeListener(); diff --git a/indra/llmath/m4math.cpp b/indra/llmath/m4math.cpp index 3baf1bad18..6e40dae30b 100644 --- a/indra/llmath/m4math.cpp +++ b/indra/llmath/m4math.cpp @@ -32,8 +32,7 @@ #include "m4math.h" #include "m3math.h" #include "llquaternion.h" - - +#include "llmatrix4a.h" // LLMatrix4 @@ -115,6 +114,12 @@ LLMatrix4::LLMatrix4(const LLQuaternion &q) *this = initRotation(q); } +LLMatrix4::LLMatrix4(const LLMatrix4a& mat) + : LLMatrix4(mat.getF32ptr()) +{ + +} + LLMatrix4::LLMatrix4(const LLQuaternion &q, const LLVector4 &pos) { *this = initRotTrans(q, pos); diff --git a/indra/llmath/m4math.h b/indra/llmath/m4math.h index bf60adb9b6..b9da970cde 100644 --- a/indra/llmath/m4math.h +++ b/indra/llmath/m4math.h @@ -32,6 +32,7 @@ class LLVector4; class LLMatrix3; class LLQuaternion; +class LLMatrix4a; // NOTA BENE: Currently assuming a right-handed, x-forward, y-left, z-up universe @@ -104,6 +105,7 @@ public: explicit LLMatrix4(const F32 *mat); // Initializes Matrix to values in mat explicit LLMatrix4(const LLMatrix3 &mat); // Initializes Matrix to values in mat and sets position to (0,0,0) explicit LLMatrix4(const LLQuaternion &q); // Initializes Matrix with rotation q and sets position to (0,0,0) + explicit LLMatrix4(const LLMatrix4a& mat); LLMatrix4(const LLMatrix3 &mat, const LLVector4 &pos); // Initializes Matrix to values in mat and pos diff --git a/indra/llmath/v3math.cpp b/indra/llmath/v3math.cpp index b04c67d926..93010d2250 100644 --- a/indra/llmath/v3math.cpp +++ b/indra/llmath/v3math.cpp @@ -316,6 +316,12 @@ LLVector3::LLVector3(const LLVector4 &vec) mV[VZ] = (F32)vec.mV[VZ]; } +LLVector3::LLVector3(const LLVector4a& vec) + : LLVector3(vec.getF32ptr()) +{ + +} + LLVector3::LLVector3(const LLSD& sd) { setValue(sd); diff --git a/indra/llmath/v3math.h b/indra/llmath/v3math.h index 6f857d7061..068f489020 100644 --- a/indra/llmath/v3math.h +++ b/indra/llmath/v3math.h @@ -33,6 +33,7 @@ #include "llsd.h" class LLVector2; class LLVector4; +class LLVector4a; class LLMatrix3; class LLMatrix4; class LLVector3d; @@ -62,7 +63,9 @@ class LLVector3 explicit LLVector3(const LLVector2 &vec); // Initializes LLVector3 to (vec[0]. vec[1], 0) explicit LLVector3(const LLVector3d &vec); // Initializes LLVector3 to (vec[0]. vec[1], vec[2]) explicit LLVector3(const LLVector4 &vec); // Initializes LLVector4 to (vec[0]. vec[1], vec[2]) - explicit LLVector3(const LLSD& sd); + explicit LLVector3(const LLVector4a& vec); // Initializes LLVector4 to (vec[0]. vec[1], vec[2]) + explicit LLVector3(const LLSD& sd); + LLSD getValue() const; |