diff options
-rw-r--r-- | indra/llmath/llquaternion2.inl | 5 | ||||
-rw-r--r-- | indra/llmath/llvector4a.cpp | 10 | ||||
-rw-r--r-- | indra/llmath/llvector4a.inl | 15 |
3 files changed, 30 insertions, 0 deletions
diff --git a/indra/llmath/llquaternion2.inl b/indra/llmath/llquaternion2.inl index ce5ed73926..b431d5766c 100644 --- a/indra/llmath/llquaternion2.inl +++ b/indra/llmath/llquaternion2.inl @@ -26,8 +26,13 @@ #include "llquaternion2.h" +#if _M_ARM64 +static const LLQuad LL_V4A_PLUS_ONE = {.n128_f32 = {1.f, 1.f, 1.f, 1.f}}; +static const LLQuad LL_V4A_MINUS_ONE = {.n128_f32 = {-1.f, -1.f, -1.f, -1.f}}; +#else static const LLQuad LL_V4A_PLUS_ONE = {1.f, 1.f, 1.f, 1.f}; static const LLQuad LL_V4A_MINUS_ONE = {-1.f, -1.f, -1.f, -1.f}; +#endif // Ctor from LLQuaternion inline LLQuaternion2::LLQuaternion2( const LLQuaternion& quat ) diff --git a/indra/llmath/llvector4a.cpp b/indra/llmath/llvector4a.cpp index b81d50f0f9..df20585d16 100644 --- a/indra/llmath/llvector4a.cpp +++ b/indra/llmath/llvector4a.cpp @@ -30,6 +30,15 @@ #include "llmath.h" #include "llquantize.h" +#if _M_ARM64 +extern const LLQuad F_ZERO_4A = {.n128_f32 = {0, 0, 0, 0}}; +extern const LLQuad F_APPROXIMATELY_ZERO_4A = {.n128_f32 = { + F_APPROXIMATELY_ZERO, + F_APPROXIMATELY_ZERO, + F_APPROXIMATELY_ZERO, + F_APPROXIMATELY_ZERO +}}; +#else extern const LLQuad F_ZERO_4A = { 0, 0, 0, 0 }; extern const LLQuad F_APPROXIMATELY_ZERO_4A = { F_APPROXIMATELY_ZERO, @@ -37,6 +46,7 @@ extern const LLQuad F_APPROXIMATELY_ZERO_4A = { F_APPROXIMATELY_ZERO, F_APPROXIMATELY_ZERO }; +#endif extern const LLVector4a LL_V4A_ZERO = reinterpret_cast<const LLVector4a&> ( F_ZERO_4A ); extern const LLVector4a LL_V4A_EPSILON = reinterpret_cast<const LLVector4a&> ( F_APPROXIMATELY_ZERO_4A ); diff --git a/indra/llmath/llvector4a.inl b/indra/llmath/llvector4a.inl index 36dbec078c..17e7de6eeb 100644 --- a/indra/llmath/llvector4a.inl +++ b/indra/llmath/llvector4a.inl @@ -335,8 +335,13 @@ inline void LLVector4a::normalize3() LLVector4a lenSqrd; lenSqrd.setAllDot3( *this, *this ); // rsqrt = approximate reciprocal square (i.e., { ~1/len(a)^2, ~1/len(a)^2, ~1/len(a)^2, ~1/len(a)^2 } const LLQuad rsqrt = _mm_rsqrt_ps(lenSqrd.mQ); +#if _M_ARM64 + static const LLQuad half = {.n128_f32 = {0.5f, 0.5f, 0.5f, 0.5f}}; + static const LLQuad three = {.n128_f32 = {3.f, 3.f, 3.f, 3.f }}; +#else static const LLQuad half = { 0.5f, 0.5f, 0.5f, 0.5f }; static const LLQuad three = {3.f, 3.f, 3.f, 3.f }; +#endif // Now we do one round of Newton-Raphson approximation to get full accuracy // According to the Newton-Raphson method, given a first 'w' for the root of f(x) = 1/x^2 - a (i.e., x = 1/sqrt(a)) // the next better approximation w[i+1] = w - f(w)/f'(w) = w - (1/w^2 - a)/(-2*w^(-3)) @@ -359,8 +364,13 @@ inline void LLVector4a::normalize4() LLVector4a lenSqrd; lenSqrd.setAllDot4( *this, *this ); // rsqrt = approximate reciprocal square (i.e., { ~1/len(a)^2, ~1/len(a)^2, ~1/len(a)^2, ~1/len(a)^2 } const LLQuad rsqrt = _mm_rsqrt_ps(lenSqrd.mQ); +#if _M_ARM64 + static const LLQuad half = {.n128_f32 = {0.5f, 0.5f, 0.5f, 0.5f}}; + static const LLQuad three = {.n128_f32 = {3.f, 3.f, 3.f, 3.f}}; +#else static const LLQuad half = { 0.5f, 0.5f, 0.5f, 0.5f }; static const LLQuad three = {3.f, 3.f, 3.f, 3.f }; +#endif // Now we do one round of Newton-Raphson approximation to get full accuracy // According to the Newton-Raphson method, given a first 'w' for the root of f(x) = 1/x^2 - a (i.e., x = 1/sqrt(a)) // the next better approximation w[i+1] = w - f(w)/f'(w) = w - (1/w^2 - a)/(-2*w^(-3)) @@ -383,8 +393,13 @@ inline LLSimdScalar LLVector4a::normalize3withLength() LLVector4a lenSqrd; lenSqrd.setAllDot3( *this, *this ); // rsqrt = approximate reciprocal square (i.e., { ~1/len(a)^2, ~1/len(a)^2, ~1/len(a)^2, ~1/len(a)^2 } const LLQuad rsqrt = _mm_rsqrt_ps(lenSqrd.mQ); +#if _M_ARM64 + static const LLQuad half = {.n128_f32 = {0.5f, 0.5f, 0.5f, 0.5f}}; + static const LLQuad three = {.n128_f32 = {3.f, 3.f, 3.f, 3.f}}; +#else static const LLQuad half = { 0.5f, 0.5f, 0.5f, 0.5f }; static const LLQuad three = {3.f, 3.f, 3.f, 3.f }; +#endif // Now we do one round of Newton-Raphson approximation to get full accuracy // According to the Newton-Raphson method, given a first 'w' for the root of f(x) = 1/x^2 - a (i.e., x = 1/sqrt(a)) // the next better approximation w[i+1] = w - f(w)/f'(w) = w - (1/w^2 - a)/(-2*w^(-3)) |