From c4bbf2134ce541854498561a939bb29dfb058e04 Mon Sep 17 00:00:00 2001 From: Erik Kundiman Date: Mon, 23 Jun 2025 06:40:16 +0800 Subject: Fix "too many initializers" LLQuad initialisations LLQuad is a typedef of __m128, which is already translated by sse2neon to float32x4_t (I thought sse2neon wasn't taking effect and I tried just replacing __m128 with float32x4_t to see that it didn't make a difference), but then I searched using the keyword float32x4_t this time and found that others have had a similar problem: https://developercommunity.visualstudio.com/t/static-initialization-arm64-neon-datatypes/1238406 https://stackoverflow.com/questions/54016821/error-c2078-when-initializing-uint32x4-t-on-arm https://github.com/kcat/openal-soft/issues/494 Looking at the type definition, on arm64 it can be initialised using a designator, the member with the float type and 4 array elements. I know it's an MSVC (arm64) problem, but since MSVC is also used on x64 and only Windows arm64 is suffering from this one in our case anyway (we only support Windows arm64 building using MSVC so far), it's just simpler to use the _M_ARM64 preprocessor instead of _MSC_VER. --- indra/llmath/llquaternion2.inl | 5 +++++ indra/llmath/llvector4a.cpp | 10 ++++++++++ indra/llmath/llvector4a.inl | 15 +++++++++++++++ 3 files changed, 30 insertions(+) (limited to 'indra/llmath') diff --git a/indra/llmath/llquaternion2.inl b/indra/llmath/llquaternion2.inl index ce5ed73926..b431d5766c 100644 --- a/indra/llmath/llquaternion2.inl +++ b/indra/llmath/llquaternion2.inl @@ -26,8 +26,13 @@ #include "llquaternion2.h" +#if _M_ARM64 +static const LLQuad LL_V4A_PLUS_ONE = {.n128_f32 = {1.f, 1.f, 1.f, 1.f}}; +static const LLQuad LL_V4A_MINUS_ONE = {.n128_f32 = {-1.f, -1.f, -1.f, -1.f}}; +#else static const LLQuad LL_V4A_PLUS_ONE = {1.f, 1.f, 1.f, 1.f}; static const LLQuad LL_V4A_MINUS_ONE = {-1.f, -1.f, -1.f, -1.f}; +#endif // Ctor from LLQuaternion inline LLQuaternion2::LLQuaternion2( const LLQuaternion& quat ) diff --git a/indra/llmath/llvector4a.cpp b/indra/llmath/llvector4a.cpp index b81d50f0f9..df20585d16 100644 --- a/indra/llmath/llvector4a.cpp +++ b/indra/llmath/llvector4a.cpp @@ -30,6 +30,15 @@ #include "llmath.h" #include "llquantize.h" +#if _M_ARM64 +extern const LLQuad F_ZERO_4A = {.n128_f32 = {0, 0, 0, 0}}; +extern const LLQuad F_APPROXIMATELY_ZERO_4A = {.n128_f32 = { + F_APPROXIMATELY_ZERO, + F_APPROXIMATELY_ZERO, + F_APPROXIMATELY_ZERO, + F_APPROXIMATELY_ZERO +}}; +#else extern const LLQuad F_ZERO_4A = { 0, 0, 0, 0 }; extern const LLQuad F_APPROXIMATELY_ZERO_4A = { F_APPROXIMATELY_ZERO, @@ -37,6 +46,7 @@ extern const LLQuad F_APPROXIMATELY_ZERO_4A = { F_APPROXIMATELY_ZERO, F_APPROXIMATELY_ZERO }; +#endif extern const LLVector4a LL_V4A_ZERO = reinterpret_cast ( F_ZERO_4A ); extern const LLVector4a LL_V4A_EPSILON = reinterpret_cast ( F_APPROXIMATELY_ZERO_4A ); diff --git a/indra/llmath/llvector4a.inl b/indra/llmath/llvector4a.inl index 36dbec078c..17e7de6eeb 100644 --- a/indra/llmath/llvector4a.inl +++ b/indra/llmath/llvector4a.inl @@ -335,8 +335,13 @@ inline void LLVector4a::normalize3() LLVector4a lenSqrd; lenSqrd.setAllDot3( *this, *this ); // rsqrt = approximate reciprocal square (i.e., { ~1/len(a)^2, ~1/len(a)^2, ~1/len(a)^2, ~1/len(a)^2 } const LLQuad rsqrt = _mm_rsqrt_ps(lenSqrd.mQ); +#if _M_ARM64 + static const LLQuad half = {.n128_f32 = {0.5f, 0.5f, 0.5f, 0.5f}}; + static const LLQuad three = {.n128_f32 = {3.f, 3.f, 3.f, 3.f }}; +#else static const LLQuad half = { 0.5f, 0.5f, 0.5f, 0.5f }; static const LLQuad three = {3.f, 3.f, 3.f, 3.f }; +#endif // Now we do one round of Newton-Raphson approximation to get full accuracy // According to the Newton-Raphson method, given a first 'w' for the root of f(x) = 1/x^2 - a (i.e., x = 1/sqrt(a)) // the next better approximation w[i+1] = w - f(w)/f'(w) = w - (1/w^2 - a)/(-2*w^(-3)) @@ -359,8 +364,13 @@ inline void LLVector4a::normalize4() LLVector4a lenSqrd; lenSqrd.setAllDot4( *this, *this ); // rsqrt = approximate reciprocal square (i.e., { ~1/len(a)^2, ~1/len(a)^2, ~1/len(a)^2, ~1/len(a)^2 } const LLQuad rsqrt = _mm_rsqrt_ps(lenSqrd.mQ); +#if _M_ARM64 + static const LLQuad half = {.n128_f32 = {0.5f, 0.5f, 0.5f, 0.5f}}; + static const LLQuad three = {.n128_f32 = {3.f, 3.f, 3.f, 3.f}}; +#else static const LLQuad half = { 0.5f, 0.5f, 0.5f, 0.5f }; static const LLQuad three = {3.f, 3.f, 3.f, 3.f }; +#endif // Now we do one round of Newton-Raphson approximation to get full accuracy // According to the Newton-Raphson method, given a first 'w' for the root of f(x) = 1/x^2 - a (i.e., x = 1/sqrt(a)) // the next better approximation w[i+1] = w - f(w)/f'(w) = w - (1/w^2 - a)/(-2*w^(-3)) @@ -383,8 +393,13 @@ inline LLSimdScalar LLVector4a::normalize3withLength() LLVector4a lenSqrd; lenSqrd.setAllDot3( *this, *this ); // rsqrt = approximate reciprocal square (i.e., { ~1/len(a)^2, ~1/len(a)^2, ~1/len(a)^2, ~1/len(a)^2 } const LLQuad rsqrt = _mm_rsqrt_ps(lenSqrd.mQ); +#if _M_ARM64 + static const LLQuad half = {.n128_f32 = {0.5f, 0.5f, 0.5f, 0.5f}}; + static const LLQuad three = {.n128_f32 = {3.f, 3.f, 3.f, 3.f}}; +#else static const LLQuad half = { 0.5f, 0.5f, 0.5f, 0.5f }; static const LLQuad three = {3.f, 3.f, 3.f, 3.f }; +#endif // Now we do one round of Newton-Raphson approximation to get full accuracy // According to the Newton-Raphson method, given a first 'w' for the root of f(x) = 1/x^2 - a (i.e., x = 1/sqrt(a)) // the next better approximation w[i+1] = w - f(w)/f'(w) = w - (1/w^2 - a)/(-2*w^(-3)) -- cgit v1.2.3