diff options
author | James Cook <james@lindenlab.com> | 2007-07-02 23:52:40 +0000 |
---|---|---|
committer | James Cook <james@lindenlab.com> | 2007-07-02 23:52:40 +0000 |
commit | 1a33bc19b4ce94ab210749911dff14409b4454dd (patch) | |
tree | b674d97d37240a29c0a6671adfe950a506ef0ea4 /indra/llmath/llv4matrix4.h | |
parent | e5124431b54d4342d4677371fccca5bc7250c079 (diff) |
svn merge -r 62595:62596 and 62598:63308 sse-skinning-3 for faster software avatar rendering. Visual Studio 2005 project file fixed pending.
Diffstat (limited to 'indra/llmath/llv4matrix4.h')
-rw-r--r-- | indra/llmath/llv4matrix4.h | 231 |
1 files changed, 231 insertions, 0 deletions
diff --git a/indra/llmath/llv4matrix4.h b/indra/llmath/llv4matrix4.h new file mode 100644 index 0000000000..0673f6fa7d --- /dev/null +++ b/indra/llmath/llv4matrix4.h @@ -0,0 +1,231 @@ +/** + * @file llviewerjointmesh.cpp + * @brief LLV4* class header file - vector processor enabled math + * + * Copyright (c) 2007-$CurrentYear$, Linden Research, Inc. + * $License$ + */ + +#ifndef LL_LLV4MATRIX4_H +#define LL_LLV4MATRIX4_H + +#include "llv4math.h" +#include "llv4matrix3.h" // just for operator LLV4Matrix3() +#include "llv4vector3.h" + +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- +// LLV4Matrix4 +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- + +LL_LLV4MATH_ALIGN_PREFIX + +class LLV4Matrix4 +{ +public: + union { + F32 mMatrix[LLV4_NUM_AXIS][LLV4_NUM_AXIS]; + V4F32 mV[LLV4_NUM_AXIS]; + }; + + void lerp(const LLV4Matrix4 &a, const LLV4Matrix4 &b, const F32 &w); + void multiply(const LLVector3 &a, LLVector3& o) const; + void multiply(const LLVector3 &a, LLV4Vector3& o) const; + + const LLV4Matrix4& transpose(); + const LLV4Matrix4& translate(const LLVector3 &vec); + const LLV4Matrix4& translate(const LLV4Vector3 &vec); + const LLV4Matrix4& operator=(const LLMatrix4& a); + + operator LLMatrix4() const { return *(reinterpret_cast<const LLMatrix4*>(const_cast<const F32*>(&mMatrix[0][0]))); } + operator LLV4Matrix3() const { return *(reinterpret_cast<const LLV4Matrix3*>(const_cast<const F32*>(&mMatrix[0][0]))); } + + friend LLVector3 operator*(const LLVector3 &a, const LLV4Matrix4 &b); +} + +LL_LLV4MATH_ALIGN_POSTFIX; + +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- +// LLV4Matrix4 - SSE +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- + +#if LL_VECTORIZE + +inline void LLV4Matrix4::lerp(const LLV4Matrix4 &a, const LLV4Matrix4 &b, const F32 &w) +{ + __m128 vw = _mm_set1_ps(w); + mV[VX] = _mm_add_ps(_mm_mul_ps(_mm_sub_ps(b.mV[VX], a.mV[VX]), vw), a.mV[VX]); // ( b - a ) * w + a + mV[VY] = _mm_add_ps(_mm_mul_ps(_mm_sub_ps(b.mV[VY], a.mV[VY]), vw), a.mV[VY]); + mV[VZ] = _mm_add_ps(_mm_mul_ps(_mm_sub_ps(b.mV[VZ], a.mV[VZ]), vw), a.mV[VZ]); + mV[VW] = _mm_add_ps(_mm_mul_ps(_mm_sub_ps(b.mV[VW], a.mV[VW]), vw), a.mV[VW]); +} + +inline void LLV4Matrix4::multiply(const LLVector3 &a, LLVector3& o) const +{ + LLV4Vector3 j; + j.v = _mm_add_ps(mV[VW], _mm_mul_ps(_mm_set1_ps(a.mV[VX]), mV[VX])); // ( ax * vx ) + vw + j.v = _mm_add_ps(j.v , _mm_mul_ps(_mm_set1_ps(a.mV[VY]), mV[VY])); + j.v = _mm_add_ps(j.v , _mm_mul_ps(_mm_set1_ps(a.mV[VZ]), mV[VZ])); + o.setVec(j.mV); +} + +inline void LLV4Matrix4::multiply(const LLVector3 &a, LLV4Vector3& o) const +{ + o.v = _mm_add_ps(mV[VW], _mm_mul_ps(_mm_set1_ps(a.mV[VX]), mV[VX])); // ( ax * vx ) + vw + o.v = _mm_add_ps(o.v , _mm_mul_ps(_mm_set1_ps(a.mV[VY]), mV[VY])); + o.v = _mm_add_ps(o.v , _mm_mul_ps(_mm_set1_ps(a.mV[VZ]), mV[VZ])); +} + +inline const LLV4Matrix4& LLV4Matrix4::translate(const LLV4Vector3 &vec) +{ + mV[VW] = _mm_add_ps(mV[VW], vec.v); + return (*this); +} + +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- +// LLV4Matrix4 +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- + +#else + +inline void LLV4Matrix4::lerp(const LLV4Matrix4 &a, const LLV4Matrix4 &b, const F32 &w) +{ + mMatrix[VX][VX] = llv4lerp(a.mMatrix[VX][VX], b.mMatrix[VX][VX], w); + mMatrix[VX][VY] = llv4lerp(a.mMatrix[VX][VY], b.mMatrix[VX][VY], w); + mMatrix[VX][VZ] = llv4lerp(a.mMatrix[VX][VZ], b.mMatrix[VX][VZ], w); + + mMatrix[VY][VX] = llv4lerp(a.mMatrix[VY][VX], b.mMatrix[VY][VX], w); + mMatrix[VY][VY] = llv4lerp(a.mMatrix[VY][VY], b.mMatrix[VY][VY], w); + mMatrix[VY][VZ] = llv4lerp(a.mMatrix[VY][VZ], b.mMatrix[VY][VZ], w); + + mMatrix[VZ][VX] = llv4lerp(a.mMatrix[VZ][VX], b.mMatrix[VZ][VX], w); + mMatrix[VZ][VY] = llv4lerp(a.mMatrix[VZ][VY], b.mMatrix[VZ][VY], w); + mMatrix[VZ][VZ] = llv4lerp(a.mMatrix[VZ][VZ], b.mMatrix[VZ][VZ], w); + + mMatrix[VW][VX] = llv4lerp(a.mMatrix[VW][VX], b.mMatrix[VW][VX], w); + mMatrix[VW][VY] = llv4lerp(a.mMatrix[VW][VY], b.mMatrix[VW][VY], w); + mMatrix[VW][VZ] = llv4lerp(a.mMatrix[VW][VZ], b.mMatrix[VW][VZ], w); +} + +inline void LLV4Matrix4::multiply(const LLVector3 &a, LLVector3& o) const +{ + o.setVec( a.mV[VX] * mMatrix[VX][VX] + + a.mV[VY] * mMatrix[VY][VX] + + a.mV[VZ] * mMatrix[VZ][VX] + + mMatrix[VW][VX], + + a.mV[VX] * mMatrix[VX][VY] + + a.mV[VY] * mMatrix[VY][VY] + + a.mV[VZ] * mMatrix[VZ][VY] + + mMatrix[VW][VY], + + a.mV[VX] * mMatrix[VX][VZ] + + a.mV[VY] * mMatrix[VY][VZ] + + a.mV[VZ] * mMatrix[VZ][VZ] + + mMatrix[VW][VZ]); +} + +inline void LLV4Matrix4::multiply(const LLVector3 &a, LLV4Vector3& o) const +{ + o.setVec( a.mV[VX] * mMatrix[VX][VX] + + a.mV[VY] * mMatrix[VY][VX] + + a.mV[VZ] * mMatrix[VZ][VX] + + mMatrix[VW][VX], + + a.mV[VX] * mMatrix[VX][VY] + + a.mV[VY] * mMatrix[VY][VY] + + a.mV[VZ] * mMatrix[VZ][VY] + + mMatrix[VW][VY], + + a.mV[VX] * mMatrix[VX][VZ] + + a.mV[VY] * mMatrix[VY][VZ] + + a.mV[VZ] * mMatrix[VZ][VZ] + + mMatrix[VW][VZ]); +} + +inline const LLV4Matrix4& LLV4Matrix4::translate(const LLV4Vector3 &vec) +{ + mMatrix[3][0] += vec.mV[0]; + mMatrix[3][1] += vec.mV[1]; + mMatrix[3][2] += vec.mV[2]; + return (*this); +} + +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- +// LLV4Matrix4 +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- + +#endif + +inline const LLV4Matrix4& LLV4Matrix4::operator=(const LLMatrix4& a) +{ + memcpy(mMatrix, a.mMatrix, sizeof(F32) * 16 ); + return *this; +} + +inline const LLV4Matrix4& LLV4Matrix4::transpose() +{ +#if LL_VECTORIZE && defined(_MM_TRANSPOSE4_PS) + _MM_TRANSPOSE4_PS(mV[VX], mV[VY], mV[VZ], mV[VW]); +#else + LLV4Matrix4 mat; + mat.mMatrix[0][0] = mMatrix[0][0]; + mat.mMatrix[1][0] = mMatrix[0][1]; + mat.mMatrix[2][0] = mMatrix[0][2]; + mat.mMatrix[3][0] = mMatrix[0][3]; + + mat.mMatrix[0][1] = mMatrix[1][0]; + mat.mMatrix[1][1] = mMatrix[1][1]; + mat.mMatrix[2][1] = mMatrix[1][2]; + mat.mMatrix[3][1] = mMatrix[1][3]; + + mat.mMatrix[0][2] = mMatrix[2][0]; + mat.mMatrix[1][2] = mMatrix[2][1]; + mat.mMatrix[2][2] = mMatrix[2][2]; + mat.mMatrix[3][2] = mMatrix[2][3]; + + mat.mMatrix[0][3] = mMatrix[3][0]; + mat.mMatrix[1][3] = mMatrix[3][1]; + mat.mMatrix[2][3] = mMatrix[3][2]; + mat.mMatrix[3][3] = mMatrix[3][3]; + + *this = mat; +#endif + return *this; +} + +inline const LLV4Matrix4& LLV4Matrix4::translate(const LLVector3 &vec) +{ + mMatrix[3][0] += vec.mV[0]; + mMatrix[3][1] += vec.mV[1]; + mMatrix[3][2] += vec.mV[2]; + return (*this); +} + +inline LLVector3 operator*(const LLVector3 &a, const LLV4Matrix4 &b) +{ + return LLVector3(a.mV[VX] * b.mMatrix[VX][VX] + + a.mV[VY] * b.mMatrix[VY][VX] + + a.mV[VZ] * b.mMatrix[VZ][VX] + + b.mMatrix[VW][VX], + + a.mV[VX] * b.mMatrix[VX][VY] + + a.mV[VY] * b.mMatrix[VY][VY] + + a.mV[VZ] * b.mMatrix[VZ][VY] + + b.mMatrix[VW][VY], + + a.mV[VX] * b.mMatrix[VX][VZ] + + a.mV[VY] * b.mMatrix[VY][VZ] + + a.mV[VZ] * b.mMatrix[VZ][VZ] + + b.mMatrix[VW][VZ]); +} + + +#endif |