From 71de5f622a7917f78823a7e7840194e1b0f8f070 Mon Sep 17 00:00:00 2001 From: Dave Parks Date: Thu, 26 Aug 2010 14:23:12 -0500 Subject: Add missing files from viewer-experimental --- indra/llmath/llvector4a.h | 331 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 331 insertions(+) create mode 100644 indra/llmath/llvector4a.h (limited to 'indra/llmath/llvector4a.h') diff --git a/indra/llmath/llvector4a.h b/indra/llmath/llvector4a.h new file mode 100644 index 0000000000..76a3e999ce --- /dev/null +++ b/indra/llmath/llvector4a.h @@ -0,0 +1,331 @@ +/** + * @file llvector4a.h + * @brief LLVector4a class header file - memory aligned and vectorized 4 component vector + * + * $LicenseInfo:firstyear=2010&license=viewergpl$ + * + * Copyright (c) 2007-2010, Linden Research, Inc. + * + * Second Life Viewer Source Code + * The source code in this file ("Source Code") is provided by Linden Lab + * to you under the terms of the GNU General Public License, version 2.0 + * ("GPL"), unless you have obtained a separate licensing agreement + * ("Other License"), formally executed by you and Linden Lab. Terms of + * the GPL can be found in doc/GPL-license.txt in this distribution, or + * online at http://secondlifegrid.net/programs/open_source/licensing/gplv2 + * + * There are special exceptions to the terms and conditions of the GPL as + * it is applied to this Source Code. View the full text of the exception + * in the file doc/FLOSS-exception.txt in this software distribution, or + * online at + * http://secondlifegrid.net/programs/open_source/licensing/flossexception + * + * By copying, modifying or distributing this software, you acknowledge + * that you have read and understood your obligations described above, + * and agree to abide by those obligations. + * + * ALL LINDEN LAB SOURCE CODE IS PROVIDED "AS IS." LINDEN LAB MAKES NO + * WARRANTIES, EXPRESS, IMPLIED OR OTHERWISE, REGARDING ITS ACCURACY, + * COMPLETENESS OR PERFORMANCE. + * $/LicenseInfo$ + */ + +#ifndef LL_LLVECTOR4A_H +#define LL_LLVECTOR4A_H + + +class LLRotation; + +#include +#include "llpreprocessor.h" + +/////////////////////////////////// +// FIRST TIME USERS PLEASE READ +////////////////////////////////// +// This is just the beginning of LLVector4a. There are many more useful functions +// yet to be implemented. For example, setNeg to negate a vector, rotate() to apply +// a matrix rotation, various functions to manipulate only the X, Y, and Z elements +// and many others (including a whole variety of accessors). So if you don't see a +// function here that you need, please contact Falcon or someone else with SSE +// experience (Richard, I think, has some and davep has a little as of the time +// of this writing, July 08, 2010) about getting it implemented before you resort to +// LLVector3/LLVector4. +///////////////////////////////// + +class LLVector4a +{ +public: + + /////////////////////////////////// + // STATIC METHODS + /////////////////////////////////// + + // Call initClass() at startup to avoid 15,000+ cycle penalties from denormalized numbers + static void initClass() + { + _MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_ON); + _MM_SET_ROUNDING_MODE(_MM_ROUND_NEAREST); + } + + // Return a vector of all zeros + static inline const LLVector4a& getZero() + { + extern const LLVector4a LL_V4A_ZERO; + return LL_V4A_ZERO; + } + + // Return a vector of all epsilon, where epsilon is a small float suitable for approximate equality checks + static inline const LLVector4a& getEpsilon() + { + extern const LLVector4a LL_V4A_EPSILON; + return LL_V4A_EPSILON; + } + + // Copy 16 bytes from src to dst. Source and destination must be 16-byte aligned + static inline void copy4a(F32* dst, const F32* src) + { + _mm_store_ps(dst, _mm_load_ps(src)); + } + + // Copy words 16-byte blocks from src to dst. Source and destination must not overlap. + static void memcpyNonAliased16(F32* __restrict dst, const F32* __restrict src, size_t bytes); + + //////////////////////////////////// + // CONSTRUCTORS + //////////////////////////////////// + + LLVector4a() + { //DO NOT INITIALIZE -- The overhead is completely unnecessary + } + + LLVector4a(F32 x, F32 y, F32 z, F32 w = 0.f) + { + set(x,y,z,w); + } + + LLVector4a(F32 x) + { + splat(x); + } + + LLVector4a(const LLSimdScalar& x) + { + splat(x); + } + + LLVector4a(LLQuad q) + { + mQ = q; + } + + //////////////////////////////////// + // LOAD/STORE + //////////////////////////////////// + + // Load from 16-byte aligned src array (preferred method of loading) + inline void load4a(const F32* src); + + // Load from unaligned src array (NB: Significantly slower than load4a) + inline void loadua(const F32* src); + + // Load only three floats beginning at address 'src'. Slowest method. + inline void load3(const F32* src); + + // Store to a 16-byte aligned memory address + inline void store4a(F32* dst) const; + + //////////////////////////////////// + // BASIC GET/SET + //////////////////////////////////// + + // Return a "this" as an F32 pointer. Do not use unless you have a very good reason. (Not sure? Ask Falcon) + inline F32* getF32ptr(); + + // Return a "this" as a const F32 pointer. Do not use unless you have a very good reason. (Not sure? Ask Falcon) + inline const F32* const getF32ptr() const; + + // Read-only access a single float in this vector. Do not use in proximity to any function call that manipulates + // the data at the whole vector level or you will incur a substantial penalty. Consider using the splat functions instead + inline F32 operator[](const S32 idx) const; + + // Prefer this method for read-only access to a single element. Prefer the templated version if the elem is known at compile time. + inline LLSimdScalar getScalarAt(const S32 idx) const; + + // Prefer this method for read-only access to a single element. Prefer the templated version if the elem is known at compile time. + template LL_FORCE_INLINE LLSimdScalar getScalarAt() const; + template <> LL_FORCE_INLINE LLSimdScalar getScalarAt<0>() const; + + // Set to an x, y, z and optional w provided + inline void set(F32 x, F32 y, F32 z, F32 w = 0.f); + + // Set to all zeros. This is preferred to using ::getZero() + inline void clear(); + + // Set all elements to 'x' + inline void splat(const F32 x); + + // Set all elements to 'x' + inline void splat(const LLSimdScalar& x); + + // Set all 4 elements to element N of src, with N known at compile time + template void splat(const LLVector4a& src); + + // Set all 4 elements to element i of v, with i NOT known at compile time + inline void splat(const LLVector4a& v, U32 i); + + // Select bits from sourceIfTrue and sourceIfFalse according to bits in mask + inline void setSelectWithMask( const LLVector4Logical& mask, const LLVector4a& sourceIfTrue, const LLVector4a& sourceIfFalse ); + + //////////////////////////////////// + // ALGEBRAIC + //////////////////////////////////// + + // Set this to the element-wise (a + b) + inline void setAdd(const LLVector4a& a, const LLVector4a& b); + + // Set this to element-wise (a - b) + inline void setSub(const LLVector4a& a, const LLVector4a& b); + + // Set this to element-wise multiply (a * b) + inline void setMul(const LLVector4a& a, const LLVector4a& b); + + // Set this to element-wise quotient (a / b) + inline void setDiv(const LLVector4a& a, const LLVector4a& b); + + // Set this to the element-wise absolute value of src + inline void setAbs(const LLVector4a& src); + + // Add to each component in this vector the corresponding component in rhs + inline void add(const LLVector4a& rhs); + + // Subtract from each component in this vector the corresponding component in rhs + inline void sub(const LLVector4a& rhs); + + // Multiply each component in this vector by the corresponding component in rhs + inline void mul(const LLVector4a& rhs); + + // Divide each component in this vector by the corresponding component in rhs + inline void div(const LLVector4a& rhs); + + // Multiply this vector by x in a scalar fashion + inline void mul(const F32 x); + + // Set this to (a x b) (geometric cross-product) + inline void setCross3(const LLVector4a& a, const LLVector4a& b); + + // Set all elements to the dot product of the x, y, and z elements in a and b + inline void setAllDot3(const LLVector4a& a, const LLVector4a& b); + + // Set all elements to the dot product of the x, y, z, and w elements in a and b + inline void setAllDot4(const LLVector4a& a, const LLVector4a& b); + + // Return the 3D dot product of this vector and b + inline LLSimdScalar dot3(const LLVector4a& b) const; + + // Return the 4D dot product of this vector and b + inline LLSimdScalar dot4(const LLVector4a& b) const; + + // Normalize this vector with respect to the x, y, and z components only. Accurate to 22 bites of precision. W component is destroyed + // Note that this does not consider zero length vectors! + inline void normalize3(); + + // Same as normalize3() but with respect to all 4 components + inline void normalize4(); + + // Same as normalize3(), but returns length as a SIMD scalar + inline LLSimdScalar normalize3withLength(); + + // Normalize this vector with respect to the x, y, and z components only. Accurate only to 10-12 bits of precision. W component is destroyed + // Note that this does not consider zero length vectors! + inline void normalize3fast(); + + // Return true if this vector is normalized with respect to x,y,z up to tolerance + inline LLBool32 isNormalized3( F32 tolerance = 1e-3 ) const; + + // Return true if this vector is normalized with respect to all components up to tolerance + inline LLBool32 isNormalized4( F32 tolerance = 1e-3 ) const; + + // Set all elements to the length of vector 'v' + inline void setAllLength3( const LLVector4a& v ); + + // Get this vector's length + inline LLSimdScalar getLength3() const; + + // Set the components of this vector to the minimum of the corresponding components of lhs and rhs + inline void setMin(const LLVector4a& lhs, const LLVector4a& rhs); + + // Set the components of this vector to the maximum of the corresponding components of lhs and rhs + inline void setMax(const LLVector4a& lhs, const LLVector4a& rhs); + + // Clamps this vector to be within the component-wise range low to high (inclusive) + inline void clamp( const LLVector4a& low, const LLVector4a& high ); + + // Set this to (c * lhs) + rhs * ( 1 - c) + inline void setLerp(const LLVector4a& lhs, const LLVector4a& rhs, F32 c); + + // Return true (nonzero) if x, y, z (and w for Finite4) are all finite floats + inline LLBool32 isFinite3() const; + inline LLBool32 isFinite4() const; + + // Set this vector to 'vec' rotated by the LLRotation or LLQuaternion2 provided + void setRotated( const LLRotation& rot, const LLVector4a& vec ); + void setRotated( const class LLQuaternion2& quat, const LLVector4a& vec ); + + // Set this vector to 'vec' rotated by the INVERSE of the LLRotation or LLQuaternion2 provided + inline void setRotatedInv( const LLRotation& rot, const LLVector4a& vec ); + inline void setRotatedInv( const class LLQuaternion2& quat, const LLVector4a& vec ); + + // Quantize this vector to 8 or 16 bit precision + void quantize8( const LLVector4a& low, const LLVector4a& high ); + void quantize16( const LLVector4a& low, const LLVector4a& high ); + + //////////////////////////////////// + // LOGICAL + //////////////////////////////////// + // The functions in this section will compare the elements in this vector + // to those in rhs and return an LLVector4Logical with all bits set in elements + // where the comparison was true and all bits unset in elements where the comparison + // was false. See llvector4logica.h + //////////////////////////////////// + // WARNING: Other than equals3 and equals4, these functions do NOT account + // for floating point tolerance. You should include the appropriate tolerance + // in the inputs. + //////////////////////////////////// + + inline LLVector4Logical greaterThan(const LLVector4a& rhs) const; + + inline LLVector4Logical lessThan(const LLVector4a& rhs) const; + + inline LLVector4Logical greaterEqual(const LLVector4a& rhs) const; + + inline LLVector4Logical lessEqual(const LLVector4a& rhs) const; + + inline LLVector4Logical equal(const LLVector4a& rhs) const; + + // Returns true if this and rhs are componentwise equal up to the specified absolute tolerance + inline bool equals4(const LLVector4a& rhs, F32 tolerance = F_APPROXIMATELY_ZERO ) const; + + inline bool equals3(const LLVector4a& rhs, F32 tolerance = F_APPROXIMATELY_ZERO ) const; + + //////////////////////////////////// + // OPERATORS + //////////////////////////////////// + + // Do NOT add aditional operators without consulting someone with SSE experience + inline const LLVector4a& operator= ( const LLVector4a& rhs ); + + inline const LLVector4a& operator= ( const LLQuad& rhs ); + + inline operator LLQuad() const; + +private: + LLQuad mQ; +}; + +inline void update_min_max(LLVector4a& min, LLVector4a& max, const LLVector4a& p) +{ + min.setMin(min, p); + max.setMax(max, p); +} + +#endif -- cgit v1.2.3 From 054e8fbde325c03bc8f9a7c10aa8bb9319d501ba Mon Sep 17 00:00:00 2001 From: "Nyx (Neal Orman)" Date: Thu, 26 Aug 2010 16:52:32 -0400 Subject: buildfix: fixing linux build to handle new SSE2 architecture code reviewed by davep and falcon --- indra/llmath/llvector4a.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'indra/llmath/llvector4a.h') diff --git a/indra/llmath/llvector4a.h b/indra/llmath/llvector4a.h index 76a3e999ce..a80e9b49dd 100644 --- a/indra/llmath/llvector4a.h +++ b/indra/llmath/llvector4a.h @@ -153,7 +153,6 @@ public: // Prefer this method for read-only access to a single element. Prefer the templated version if the elem is known at compile time. template LL_FORCE_INLINE LLSimdScalar getScalarAt() const; - template <> LL_FORCE_INLINE LLSimdScalar getScalarAt<0>() const; // Set to an x, y, z and optional w provided inline void set(F32 x, F32 y, F32 z, F32 w = 0.f); @@ -322,6 +321,8 @@ private: LLQuad mQ; }; +template <> LL_FORCE_INLINE LLSimdScalar LLVector4a::getScalarAt<0>() const; + inline void update_min_max(LLVector4a& min, LLVector4a& max, const LLVector4a& p) { min.setMin(min, p); -- cgit v1.2.3 From 0d305552003d8378f692cddf87caebb7c47d8ab3 Mon Sep 17 00:00:00 2001 From: "Nyx (Neal Orman)" Date: Thu, 26 Aug 2010 16:57:24 -0400 Subject: buildfix removing unnecessary line in llvector4a.h upon further review, falcon determined this fix was unnecessary. --- indra/llmath/llvector4a.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'indra/llmath/llvector4a.h') diff --git a/indra/llmath/llvector4a.h b/indra/llmath/llvector4a.h index a80e9b49dd..1e0b3bc854 100644 --- a/indra/llmath/llvector4a.h +++ b/indra/llmath/llvector4a.h @@ -321,8 +321,6 @@ private: LLQuad mQ; }; -template <> LL_FORCE_INLINE LLSimdScalar LLVector4a::getScalarAt<0>() const; - inline void update_min_max(LLVector4a& min, LLVector4a& max, const LLVector4a& p) { min.setMin(min, p); -- cgit v1.2.3 From a5619d16f74863168f45b04b37cc6383e1a92263 Mon Sep 17 00:00:00 2001 From: Oz Linden Date: Wed, 13 Oct 2010 07:24:37 -0400 Subject: correct licenses (fix problem with license change merge) --- indra/llmath/llvector4a.h | 36 +++++++++++++++--------------------- 1 file changed, 15 insertions(+), 21 deletions(-) (limited to 'indra/llmath/llvector4a.h') diff --git a/indra/llmath/llvector4a.h b/indra/llmath/llvector4a.h index 1e0b3bc854..79022eade3 100644 --- a/indra/llmath/llvector4a.h +++ b/indra/llmath/llvector4a.h @@ -2,31 +2,25 @@ * @file llvector4a.h * @brief LLVector4a class header file - memory aligned and vectorized 4 component vector * - * $LicenseInfo:firstyear=2010&license=viewergpl$ - * - * Copyright (c) 2007-2010, Linden Research, Inc. - * + * $LicenseInfo:firstyear=2010&license=viewerlgpl$ * Second Life Viewer Source Code - * The source code in this file ("Source Code") is provided by Linden Lab - * to you under the terms of the GNU General Public License, version 2.0 - * ("GPL"), unless you have obtained a separate licensing agreement - * ("Other License"), formally executed by you and Linden Lab. Terms of - * the GPL can be found in doc/GPL-license.txt in this distribution, or - * online at http://secondlifegrid.net/programs/open_source/licensing/gplv2 + * Copyright (C) 2010, Linden Research, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; + * version 2.1 of the License only. * - * There are special exceptions to the terms and conditions of the GPL as - * it is applied to this Source Code. View the full text of the exception - * in the file doc/FLOSS-exception.txt in this software distribution, or - * online at - * http://secondlifegrid.net/programs/open_source/licensing/flossexception + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. * - * By copying, modifying or distributing this software, you acknowledge - * that you have read and understood your obligations described above, - * and agree to abide by those obligations. + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA * - * ALL LINDEN LAB SOURCE CODE IS PROVIDED "AS IS." LINDEN LAB MAKES NO - * WARRANTIES, EXPRESS, IMPLIED OR OTHERWISE, REGARDING ITS ACCURACY, - * COMPLETENESS OR PERFORMANCE. + * Linden Research, Inc., 945 Battery Street, San Francisco, CA 94111 USA * $/LicenseInfo$ */ -- cgit v1.2.3 From 57153cf0f1fffe669b9d8871c33f9c4aaba67a2f Mon Sep 17 00:00:00 2001 From: Roxie Linden Date: Thu, 23 Dec 2010 01:48:44 -0800 Subject: SH-655 - Project mesh viewer crashes on exit. A copy constructor was implemented which did a memcpy, which included the vtable pointer which was to another object of another class (same child though). This resulted in the wrong destructor being called. The reason for the memcpy was for alignment purposes. The solution was to move to LLVector4a, which is intrinsicly aligned. Also, did some performance optimizations based on the LLVector4a optimizations. The solution was to re-implement the --- indra/llmath/llvector4a.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'indra/llmath/llvector4a.h') diff --git a/indra/llmath/llvector4a.h b/indra/llmath/llvector4a.h index 79022eade3..596082509d 100644 --- a/indra/llmath/llvector4a.h +++ b/indra/llmath/llvector4a.h @@ -67,7 +67,7 @@ public: extern const LLVector4a LL_V4A_ZERO; return LL_V4A_ZERO; } - + // Return a vector of all epsilon, where epsilon is a small float suitable for approximate equality checks static inline const LLVector4a& getEpsilon() { -- cgit v1.2.3