diff options
Diffstat (limited to 'indra/llmath/llvector4a.inl')
| -rw-r--r-- | indra/llmath/llvector4a.inl | 24 |
1 files changed, 17 insertions, 7 deletions
diff --git a/indra/llmath/llvector4a.inl b/indra/llmath/llvector4a.inl index 17e7de6eeb..0f7c4123ac 100644 --- a/indra/llmath/llvector4a.inl +++ b/indra/llmath/llvector4a.inl @@ -115,7 +115,7 @@ inline void LLVector4a::set(F32 x, F32 y, F32 z, F32 w) // Set to all zeros inline void LLVector4a::clear() { - mQ = LLVector4a::getZero().mQ; + mQ = _mm_setzero_ps(); } inline void LLVector4a::splat(const F32 x) @@ -272,6 +272,9 @@ inline void LLVector4a::setCross3(const LLVector4a& a, const LLVector4a& b) // Set all elements to the dot product of the x, y, and z elements in a and b inline void LLVector4a::setAllDot3(const LLVector4a& a, const LLVector4a& b) { +#if (defined(__arm64__) || defined(__aarch64__)) + mQ = _mm_dp_ps(a.mQ, b.mQ, 0x7f); +#else // ab = { a[W]*b[W], a[Z]*b[Z], a[Y]*b[Y], a[X]*b[X] } const LLQuad ab = _mm_mul_ps( a.mQ, b.mQ ); // yzxw = { a[W]*b[W], a[Z]*b[Z], a[X]*b[X], a[Y]*b[Y] } @@ -284,11 +287,15 @@ inline void LLVector4a::setAllDot3(const LLVector4a& a, const LLVector4a& b) const __m128i zSplat = _mm_shuffle_epi32(_mm_castps_si128(ab), _MM_SHUFFLE( 2, 2, 2, 2 )); // mQ = { a[Z] * b[Z] + a[Y] * b[Y] + a[X] * b[X], same, same, same } mQ = _mm_add_ps(_mm_castsi128_ps(zSplat), xPlusYSplat); +#endif } // Set all elements to the dot product of the x, y, z, and w elements in a and b inline void LLVector4a::setAllDot4(const LLVector4a& a, const LLVector4a& b) { +#if (defined(__arm64__) || defined(__aarch64__)) + mQ = _mm_dp_ps(a.mQ, b.mQ, 0xff); +#else // ab = { a[W]*b[W], a[Z]*b[Z], a[Y]*b[Y], a[X]*b[X] } const LLQuad ab = _mm_mul_ps( a.mQ, b.mQ ); // yzxw = { a[W]*b[W], a[Z]*b[Z], a[X]*b[X], a[Y]*b[Y] } @@ -301,21 +308,29 @@ inline void LLVector4a::setAllDot4(const LLVector4a& a, const LLVector4a& b) // mQ = { a[W]*b[W] + a[Z] * b[Z] + a[Y] * b[Y] + a[X] * b[X], same, same, same } mQ = _mm_add_ps(xPlusYSplat, zPlusWSplat); +#endif } // Return the 3D dot product of this vector and b inline LLSimdScalar LLVector4a::dot3(const LLVector4a& b) const { +#if (defined(__arm64__) || defined(__aarch64__)) + return _mm_dp_ps(mQ, b.mQ, 0x7f); +#else const LLQuad ab = _mm_mul_ps( mQ, b.mQ ); const LLQuad splatY = _mm_castsi128_ps( _mm_shuffle_epi32( _mm_castps_si128(ab), _MM_SHUFFLE(1, 1, 1, 1) ) ); const LLQuad splatZ = _mm_castsi128_ps( _mm_shuffle_epi32( _mm_castps_si128(ab), _MM_SHUFFLE(2, 2, 2, 2) ) ); const LLQuad xPlusY = _mm_add_ps( ab, splatY ); return _mm_add_ps( xPlusY, splatZ ); +#endif } // Return the 4D dot product of this vector and b inline LLSimdScalar LLVector4a::dot4(const LLVector4a& b) const { +#if (defined(__arm64__) || defined(__aarch64__)) + return _mm_dp_ps(mQ, b.mQ, 0xff); +#else // ab = { w, z, y, x } const LLQuad ab = _mm_mul_ps( mQ, b.mQ ); // upperProdsInLowerElems = { y, x, y, x } @@ -325,6 +340,7 @@ inline LLSimdScalar LLVector4a::dot4(const LLVector4a& b) const // shuffled = { z+x, z+x, z+x, z+x } const LLQuad shuffled = _mm_castsi128_ps( _mm_shuffle_epi32( _mm_castps_si128( sumOfPairs ), _MM_SHUFFLE(1, 1, 1, 1) ) ); return _mm_add_ss( sumOfPairs, shuffled ); +#endif } // Normalize this vector with respect to the x, y, and z components only. Accurate to 22 bites of precision. W component is destroyed @@ -608,12 +624,6 @@ inline bool LLVector4a::equals3(const LLVector4a& rhs, F32 tolerance ) const //////////////////////////////////// // Do NOT add aditional operators without consulting someone with SSE experience -inline const LLVector4a& LLVector4a::operator= ( const LLVector4a& rhs ) -{ - mQ = rhs.mQ; - return *this; -} - inline const LLVector4a& LLVector4a::operator= ( const LLQuad& rhs ) { mQ = rhs; |
