summaryrefslogtreecommitdiff
path: root/indra/llmath
diff options
context:
space:
mode:
Diffstat (limited to 'indra/llmath')
-rw-r--r--indra/llmath/llsimdmath.h14
-rw-r--r--indra/llmath/llvector4a.inl18
-rw-r--r--indra/llmath/tests/llquaternion_test.cpp6
3 files changed, 32 insertions, 6 deletions
diff --git a/indra/llmath/llsimdmath.h b/indra/llmath/llsimdmath.h
index 40953dc2e8..b27b034cf3 100644
--- a/indra/llmath/llsimdmath.h
+++ b/indra/llmath/llsimdmath.h
@@ -31,16 +31,26 @@
#error "Please include llmath.h before this file."
#endif
-#if ( ( LL_DARWIN || LL_LINUX ) && !(__SSE2__) ) || ( LL_WINDOWS && ( _M_IX86_FP < 2 && ADDRESS_SIZE == 32 ) )
-#error SSE2 not enabled. LLVector4a and related class will not compile.
+// the check for this error case must be split into multiple parts
+// because some versions of VS complain about '__SSE2__'
+#if ( ( LL_DARWIN || LL_LINUX ) )
+ #if !(__SSE2__) && !(__arm64__) && !(__aarch64__)
+ #error SSE2 not enabled. LLVector4a and related class will not compile.
+ #endif
+#elif ( LL_WINDOWS && ( _M_IX86_FP < 2 && ADDRESS_SIZE == 32 ) )
+ #error SSE2 not enabled. LLVector4a and related class will not compile.
#endif
#if !LL_WINDOWS
#include <stdint.h>
#endif
+#if defined(__arm64__) || defined(__aarch64__)
+#include "sse2neon.h"
+#else
#include <xmmintrin.h>
#include <emmintrin.h>
+#endif
#include "llmemory.h"
#include "llsimdtypes.h"
diff --git a/indra/llmath/llvector4a.inl b/indra/llmath/llvector4a.inl
index 77a0257fbf..443a46c317 100644
--- a/indra/llmath/llvector4a.inl
+++ b/indra/llmath/llvector4a.inl
@@ -115,7 +115,7 @@ inline void LLVector4a::set(F32 x, F32 y, F32 z, F32 w)
// Set to all zeros
inline void LLVector4a::clear()
{
- mQ = LLVector4a::getZero().mQ;
+ mQ = _mm_setzero_ps();
}
inline void LLVector4a::splat(const F32 x)
@@ -272,6 +272,9 @@ inline void LLVector4a::setCross3(const LLVector4a& a, const LLVector4a& b)
// Set all elements to the dot product of the x, y, and z elements in a and b
inline void LLVector4a::setAllDot3(const LLVector4a& a, const LLVector4a& b)
{
+#if (defined(__arm64__) || defined(__aarch64__))
+ mQ = _mm_dp_ps(a.mQ, b.mQ, 0x7f);
+#else
// ab = { a[W]*b[W], a[Z]*b[Z], a[Y]*b[Y], a[X]*b[X] }
const LLQuad ab = _mm_mul_ps( a.mQ, b.mQ );
// yzxw = { a[W]*b[W], a[Z]*b[Z], a[X]*b[X], a[Y]*b[Y] }
@@ -284,11 +287,15 @@ inline void LLVector4a::setAllDot3(const LLVector4a& a, const LLVector4a& b)
const __m128i zSplat = _mm_shuffle_epi32(_mm_castps_si128(ab), _MM_SHUFFLE( 2, 2, 2, 2 ));
// mQ = { a[Z] * b[Z] + a[Y] * b[Y] + a[X] * b[X], same, same, same }
mQ = _mm_add_ps(_mm_castsi128_ps(zSplat), xPlusYSplat);
+#endif
}
// Set all elements to the dot product of the x, y, z, and w elements in a and b
inline void LLVector4a::setAllDot4(const LLVector4a& a, const LLVector4a& b)
{
+#if (defined(__arm64__) || defined(__aarch64__))
+ mQ = _mm_dp_ps(a.mQ, b.mQ, 0xff);
+#else
// ab = { a[W]*b[W], a[Z]*b[Z], a[Y]*b[Y], a[X]*b[X] }
const LLQuad ab = _mm_mul_ps( a.mQ, b.mQ );
// yzxw = { a[W]*b[W], a[Z]*b[Z], a[X]*b[X], a[Y]*b[Y] }
@@ -301,21 +308,29 @@ inline void LLVector4a::setAllDot4(const LLVector4a& a, const LLVector4a& b)
// mQ = { a[W]*b[W] + a[Z] * b[Z] + a[Y] * b[Y] + a[X] * b[X], same, same, same }
mQ = _mm_add_ps(xPlusYSplat, zPlusWSplat);
+#endif
}
// Return the 3D dot product of this vector and b
inline LLSimdScalar LLVector4a::dot3(const LLVector4a& b) const
{
+#if (defined(__arm64__) || defined(__aarch64__))
+ return _mm_dp_ps(mQ, b.mQ, 0x7f);
+#else
const LLQuad ab = _mm_mul_ps( mQ, b.mQ );
const LLQuad splatY = _mm_castsi128_ps( _mm_shuffle_epi32( _mm_castps_si128(ab), _MM_SHUFFLE(1, 1, 1, 1) ) );
const LLQuad splatZ = _mm_castsi128_ps( _mm_shuffle_epi32( _mm_castps_si128(ab), _MM_SHUFFLE(2, 2, 2, 2) ) );
const LLQuad xPlusY = _mm_add_ps( ab, splatY );
return _mm_add_ps( xPlusY, splatZ );
+#endif
}
// Return the 4D dot product of this vector and b
inline LLSimdScalar LLVector4a::dot4(const LLVector4a& b) const
{
+#if (defined(__arm64__) || defined(__aarch64__))
+ return _mm_dp_ps(mQ, b.mQ, 0xff);
+#else
// ab = { w, z, y, x }
const LLQuad ab = _mm_mul_ps( mQ, b.mQ );
// upperProdsInLowerElems = { y, x, y, x }
@@ -325,6 +340,7 @@ inline LLSimdScalar LLVector4a::dot4(const LLVector4a& b) const
// shuffled = { z+x, z+x, z+x, z+x }
const LLQuad shuffled = _mm_castsi128_ps( _mm_shuffle_epi32( _mm_castps_si128( sumOfPairs ), _MM_SHUFFLE(1, 1, 1, 1) ) );
return _mm_add_ss( sumOfPairs, shuffled );
+#endif
}
// Normalize this vector with respect to the x, y, and z components only. Accurate to 22 bites of precision. W component is destroyed
diff --git a/indra/llmath/tests/llquaternion_test.cpp b/indra/llmath/tests/llquaternion_test.cpp
index aa3c0ad843..ba18d54d55 100644
--- a/indra/llmath/tests/llquaternion_test.cpp
+++ b/indra/llmath/tests/llquaternion_test.cpp
@@ -349,9 +349,9 @@ namespace tut
ensure(
"2. LLVector4 operator*(const LLVector4 &a, const LLQuaternion &rot) failed",
is_approx_equal(-58153.5390f, result.mV[0]) &&
- (183787.8125f == result.mV[1]) &&
- (116864.164063f == result.mV[2]) &&
- (78.099998f == result.mV[3]));
+ is_approx_equal(183787.8125f, result.mV[1]) &&
+ is_approx_equal(116864.164063f, result.mV[2]) &&
+ is_approx_equal(78.099998f, result.mV[3]));
}
//test case for LLVector3 operator*(const LLVector3 &a, const LLQuaternion &rot) fn.