summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorErik Kundiman <erik@megapahit.org>2025-06-23 06:40:16 +0800
committerErik Kundiman <erik@megapahit.org>2025-06-23 06:40:16 +0800
commitc4bbf2134ce541854498561a939bb29dfb058e04 (patch)
tree7e568ddc826dcc6f3d2e249cb4b6db0f65a3e02c
parentf5de250c3e74ecc8eb658d0b070c0884616f041d (diff)
Fix "too many initializers" LLQuad initialisations
LLQuad is a typedef of __m128, which is already translated by sse2neon to float32x4_t (I thought sse2neon wasn't taking effect and I tried just replacing __m128 with float32x4_t to see that it didn't make a difference), but then I searched using the keyword float32x4_t this time and found that others have had a similar problem: https://developercommunity.visualstudio.com/t/static-initialization-arm64-neon-datatypes/1238406 https://stackoverflow.com/questions/54016821/error-c2078-when-initializing-uint32x4-t-on-arm https://github.com/kcat/openal-soft/issues/494 Looking at the type definition, on arm64 it can be initialised using a designator, the member with the float type and 4 array elements. I know it's an MSVC (arm64) problem, but since MSVC is also used on x64 and only Windows arm64 is suffering from this one in our case anyway (we only support Windows arm64 building using MSVC so far), it's just simpler to use the _M_ARM64 preprocessor instead of _MSC_VER.
-rw-r--r--indra/llmath/llquaternion2.inl5
-rw-r--r--indra/llmath/llvector4a.cpp10
-rw-r--r--indra/llmath/llvector4a.inl15
3 files changed, 30 insertions, 0 deletions
diff --git a/indra/llmath/llquaternion2.inl b/indra/llmath/llquaternion2.inl
index ce5ed73926..b431d5766c 100644
--- a/indra/llmath/llquaternion2.inl
+++ b/indra/llmath/llquaternion2.inl
@@ -26,8 +26,13 @@
#include "llquaternion2.h"
+#if _M_ARM64
+static const LLQuad LL_V4A_PLUS_ONE = {.n128_f32 = {1.f, 1.f, 1.f, 1.f}};
+static const LLQuad LL_V4A_MINUS_ONE = {.n128_f32 = {-1.f, -1.f, -1.f, -1.f}};
+#else
static const LLQuad LL_V4A_PLUS_ONE = {1.f, 1.f, 1.f, 1.f};
static const LLQuad LL_V4A_MINUS_ONE = {-1.f, -1.f, -1.f, -1.f};
+#endif
// Ctor from LLQuaternion
inline LLQuaternion2::LLQuaternion2( const LLQuaternion& quat )
diff --git a/indra/llmath/llvector4a.cpp b/indra/llmath/llvector4a.cpp
index b81d50f0f9..df20585d16 100644
--- a/indra/llmath/llvector4a.cpp
+++ b/indra/llmath/llvector4a.cpp
@@ -30,6 +30,15 @@
#include "llmath.h"
#include "llquantize.h"
+#if _M_ARM64
+extern const LLQuad F_ZERO_4A = {.n128_f32 = {0, 0, 0, 0}};
+extern const LLQuad F_APPROXIMATELY_ZERO_4A = {.n128_f32 = {
+ F_APPROXIMATELY_ZERO,
+ F_APPROXIMATELY_ZERO,
+ F_APPROXIMATELY_ZERO,
+ F_APPROXIMATELY_ZERO
+}};
+#else
extern const LLQuad F_ZERO_4A = { 0, 0, 0, 0 };
extern const LLQuad F_APPROXIMATELY_ZERO_4A = {
F_APPROXIMATELY_ZERO,
@@ -37,6 +46,7 @@ extern const LLQuad F_APPROXIMATELY_ZERO_4A = {
F_APPROXIMATELY_ZERO,
F_APPROXIMATELY_ZERO
};
+#endif
extern const LLVector4a LL_V4A_ZERO = reinterpret_cast<const LLVector4a&> ( F_ZERO_4A );
extern const LLVector4a LL_V4A_EPSILON = reinterpret_cast<const LLVector4a&> ( F_APPROXIMATELY_ZERO_4A );
diff --git a/indra/llmath/llvector4a.inl b/indra/llmath/llvector4a.inl
index 36dbec078c..17e7de6eeb 100644
--- a/indra/llmath/llvector4a.inl
+++ b/indra/llmath/llvector4a.inl
@@ -335,8 +335,13 @@ inline void LLVector4a::normalize3()
LLVector4a lenSqrd; lenSqrd.setAllDot3( *this, *this );
// rsqrt = approximate reciprocal square (i.e., { ~1/len(a)^2, ~1/len(a)^2, ~1/len(a)^2, ~1/len(a)^2 }
const LLQuad rsqrt = _mm_rsqrt_ps(lenSqrd.mQ);
+#if _M_ARM64
+ static const LLQuad half = {.n128_f32 = {0.5f, 0.5f, 0.5f, 0.5f}};
+ static const LLQuad three = {.n128_f32 = {3.f, 3.f, 3.f, 3.f }};
+#else
static const LLQuad half = { 0.5f, 0.5f, 0.5f, 0.5f };
static const LLQuad three = {3.f, 3.f, 3.f, 3.f };
+#endif
// Now we do one round of Newton-Raphson approximation to get full accuracy
// According to the Newton-Raphson method, given a first 'w' for the root of f(x) = 1/x^2 - a (i.e., x = 1/sqrt(a))
// the next better approximation w[i+1] = w - f(w)/f'(w) = w - (1/w^2 - a)/(-2*w^(-3))
@@ -359,8 +364,13 @@ inline void LLVector4a::normalize4()
LLVector4a lenSqrd; lenSqrd.setAllDot4( *this, *this );
// rsqrt = approximate reciprocal square (i.e., { ~1/len(a)^2, ~1/len(a)^2, ~1/len(a)^2, ~1/len(a)^2 }
const LLQuad rsqrt = _mm_rsqrt_ps(lenSqrd.mQ);
+#if _M_ARM64
+ static const LLQuad half = {.n128_f32 = {0.5f, 0.5f, 0.5f, 0.5f}};
+ static const LLQuad three = {.n128_f32 = {3.f, 3.f, 3.f, 3.f}};
+#else
static const LLQuad half = { 0.5f, 0.5f, 0.5f, 0.5f };
static const LLQuad three = {3.f, 3.f, 3.f, 3.f };
+#endif
// Now we do one round of Newton-Raphson approximation to get full accuracy
// According to the Newton-Raphson method, given a first 'w' for the root of f(x) = 1/x^2 - a (i.e., x = 1/sqrt(a))
// the next better approximation w[i+1] = w - f(w)/f'(w) = w - (1/w^2 - a)/(-2*w^(-3))
@@ -383,8 +393,13 @@ inline LLSimdScalar LLVector4a::normalize3withLength()
LLVector4a lenSqrd; lenSqrd.setAllDot3( *this, *this );
// rsqrt = approximate reciprocal square (i.e., { ~1/len(a)^2, ~1/len(a)^2, ~1/len(a)^2, ~1/len(a)^2 }
const LLQuad rsqrt = _mm_rsqrt_ps(lenSqrd.mQ);
+#if _M_ARM64
+ static const LLQuad half = {.n128_f32 = {0.5f, 0.5f, 0.5f, 0.5f}};
+ static const LLQuad three = {.n128_f32 = {3.f, 3.f, 3.f, 3.f}};
+#else
static const LLQuad half = { 0.5f, 0.5f, 0.5f, 0.5f };
static const LLQuad three = {3.f, 3.f, 3.f, 3.f };
+#endif
// Now we do one round of Newton-Raphson approximation to get full accuracy
// According to the Newton-Raphson method, given a first 'w' for the root of f(x) = 1/x^2 - a (i.e., x = 1/sqrt(a))
// the next better approximation w[i+1] = w - f(w)/f'(w) = w - (1/w^2 - a)/(-2*w^(-3))