Integrate SIMD API from oreh/server-trunk-oreh

author: Dave Parks <davep@lindenlab.com> 2010-08-19 12:25:15 -0500
committer: Dave Parks <davep@lindenlab.com> 2010-08-19 12:25:15 -0500
commit: 2fea1d5d33ec1b41a3cfa4307a1bfa58d8014f88 (patch)
tree: 0438f2363b2a91a5ffe970a8130faa118f260e7e
parent: bd0b3a2ddeafaf0d1669ede7ab5aee22d8da9af7 (diff)
57 files changed, 2730 insertions, 2601 deletions
diff --git a/indra/cmake/00-Common.cmake b/indra/cmake/00-Common.cmake
index 2c7bb77758..f10a61e1e7 100644
--- a/indra/cmake/00-Common.cmake
+++ b/indra/cmake/00-Common.cmake
@@ -68,6 +68,7 @@ if (WINDOWS)
    
     add_definitions(
       /Zc:wchar_t-
+      /arch:SSE2
       )
   endif (MSVC80 OR MSVC90)
   
diff --git a/indra/llcharacter/llkeyframemotion.cpp b/indra/llcharacter/llkeyframemotion.cpp
index 7bc9e97eb1..bce272082e 100644
--- a/indra/llcharacter/llkeyframemotion.cpp
+++ b/indra/llcharacter/llkeyframemotion.cpp
@@ -1151,7 +1151,7 @@ void LLKeyframeMotion::applyConstraint(JointConstraint* constraint, F32 time, U8
 			constraint->mPositions[joint_num] = new_pos;
 		}
 		constraint->mFixupDistanceRMS *= 1.f / (constraint->mTotalLength * (F32)(shared_data->mChainLength - 1));
-		constraint->mFixupDistanceRMS = fsqrtf(constraint->mFixupDistanceRMS);
+		constraint->mFixupDistanceRMS = (F32) sqrt(constraint->mFixupDistanceRMS);
 
 		//reset old joint rots
 		for (joint_num = 0; joint_num <= shared_data->mChainLength; joint_num++)
diff --git a/indra/llmath/CMakeLists.txt b/indra/llmath/CMakeLists.txt
index dda07133d5..8d85765eb8 100644
--- a/indra/llmath/CMakeLists.txt
+++ b/indra/llmath/CMakeLists.txt
@@ -1,118 +1,128 @@
-# -*- cmake -*-
-
-project(llmath)
-
-include(00-Common)
-include(LLCommon)
-
-include_directories(
-    ${LLCOMMON_INCLUDE_DIRS}
-    )
-
-set(llmath_SOURCE_FILES
-    llbbox.cpp
-    llbboxlocal.cpp
-    llcamera.cpp
-    llcoordframe.cpp
-    llline.cpp
-    llmodularmath.cpp
-    llperlin.cpp
-    llquaternion.cpp
-    llrect.cpp
-    llsphere.cpp
-    llvolume.cpp
-    llvolumemgr.cpp
-    llvolumeoctree.cpp
-    llsdutil_math.cpp
-    m3math.cpp
-    m4math.cpp
-    raytrace.cpp
-    v2math.cpp
-    v3color.cpp
-    v3dmath.cpp
-    v3math.cpp
-    v4color.cpp
-    v4coloru.cpp
-    v4math.cpp
-    xform.cpp
-    )
-
-set(llmath_HEADER_FILES
-    CMakeLists.txt
-
-    camera.h
-    coordframe.h
-    llbbox.h
-    llbboxlocal.h
-    llcamera.h
-    llcoord.h
-    llcoordframe.h
-    llinterp.h
-    llline.h
-    llmath.h
-    llmodularmath.h
-    lloctree.h
-    llperlin.h
-    llplane.h
-    llquantize.h
-    llquaternion.h
-    llrect.h
-    llsphere.h
-    lltreenode.h
-    llv4math.h
-    llv4matrix3.h
-    llv4matrix4.h
-    llv4vector3.h
-    llvector4a.h
-    llmatrix4a.h
-    llvolume.h
-    llvolumemgr.h
-    llvolumeoctree.h
-    llsdutil_math.h
-    m3math.h
-    m4math.h
-    raytrace.h
-    v2math.h
-    v3color.h
-    v3dmath.h
-    v3math.h
-    v4color.h
-    v4coloru.h
-    v4math.h
-    xform.h
-    )
-
-set_source_files_properties(${llmath_HEADER_FILES}
-                            PROPERTIES HEADER_FILE_ONLY TRUE)
-
-list(APPEND llmath_SOURCE_FILES ${llmath_HEADER_FILES})
-
-add_library (llmath ${llmath_SOURCE_FILES})
-
-# Add tests
-if (LL_TESTS)
-  include(LLAddBuildTest)
-  # UNIT TESTS
-  SET(llmath_TEST_SOURCE_FILES
-    llbboxlocal.cpp
-    llmodularmath.cpp
-    llrect.cpp
-    v2math.cpp
-    v3color.cpp
-    v4color.cpp
-    v4coloru.cpp
-    )
-  LL_ADD_PROJECT_UNIT_TESTS(llmath "${llmath_TEST_SOURCE_FILES}")
-
-  # INTEGRATION TESTS
-  set(test_libs llmath llcommon ${LLCOMMON_LIBRARIES} ${WINDOWS_LIBRARIES})
-  # TODO: Some of these need refactoring to be proper Unit tests rather than Integration tests.
-  LL_ADD_INTEGRATION_TEST(llbbox llbbox.cpp "${test_libs}")
-  LL_ADD_INTEGRATION_TEST(llquaternion llquaternion.cpp "${test_libs}")
-  LL_ADD_INTEGRATION_TEST(mathmisc "" "${test_libs}")
-  LL_ADD_INTEGRATION_TEST(m3math "" "${test_libs}")
-  LL_ADD_INTEGRATION_TEST(v3dmath v3dmath.cpp "${test_libs}")
-  LL_ADD_INTEGRATION_TEST(v3math v3math.cpp "${test_libs}")
-  LL_ADD_INTEGRATION_TEST(v4math v4math.cpp "${test_libs}")
-  LL_ADD_INTEGRATION_TEST(xform xform.cpp "${test_libs}")
-endif (LL_TESTS)
+# -*- cmake -*-
+
+project(llmath)
+
+include(00-Common)
+include(LLCommon)
+
+include_directories(
+    ${LLCOMMON_INCLUDE_DIRS}
+    )
+
+set(llmath_SOURCE_FILES
+    llbbox.cpp
+    llbboxlocal.cpp
+    llcamera.cpp
+    llcoordframe.cpp
+    llline.cpp
+    llmatrix3a.cpp
+    llmodularmath.cpp
+    llperlin.cpp
+    llquaternion.cpp
+    llrect.cpp
+    llsphere.cpp
+    llvector4a.cpp
+    llvolume.cpp
+    llvolumemgr.cpp
+    llvolumeoctree.cpp
+    llsdutil_math.cpp
+    m3math.cpp
+    m4math.cpp
+    raytrace.cpp
+    v2math.cpp
+    v3color.cpp
+    v3dmath.cpp
+    v3math.cpp
+    v4color.cpp
+    v4coloru.cpp
+    v4math.cpp
+    xform.cpp
+    )
+
+set(llmath_HEADER_FILES
+    CMakeLists.txt
+
+    camera.h
+    coordframe.h
+    llbbox.h
+    llbboxlocal.h
+    llcamera.h
+    llcoord.h
+    llcoordframe.h
+    llinterp.h
+    llline.h
+    llmath.h
+    llmatrix3a.h
+    llmatrix3a.inl
+    llmodularmath.h
+    lloctree.h
+    llperlin.h
+    llplane.h
+    llquantize.h
+    llquaternion.h
+    llquaternion2.h
+    llquaternion2.inl
+    llrect.h
+    llsimdmath.h
+    llsimdtypes.h
+    llsimdtypes.inl
+    llsphere.h
+    lltreenode.h
+    llvector4a.h
+    llvector4a.inl
+    llvector4logical.h
+    llv4math.h
+    llv4matrix3.h
+    llv4matrix4.h
+    llv4vector3.h
+    llvolume.h
+    llvolumemgr.h
+    llvolumeoctree.h
+    llsdutil_math.h
+    m3math.h
+    m4math.h
+    raytrace.h
+    v2math.h
+    v3color.h
+    v3dmath.h
+    v3math.h
+    v4color.h
+    v4coloru.h
+    v4math.h
+    xform.h
+    )
+
+set_source_files_properties(${llmath_HEADER_FILES}
+                            PROPERTIES HEADER_FILE_ONLY TRUE)
+
+list(APPEND llmath_SOURCE_FILES ${llmath_HEADER_FILES})
+
+add_library (llmath ${llmath_SOURCE_FILES})
+
+# Add tests
+if (LL_TESTS)
+  include(LLAddBuildTest)
+  # UNIT TESTS
+  SET(llmath_TEST_SOURCE_FILES
+    llbboxlocal.cpp
+    llmodularmath.cpp
+    llrect.cpp
+    v2math.cpp
+    v3color.cpp
+    v4color.cpp
+    v4coloru.cpp
+    )
+  LL_ADD_PROJECT_UNIT_TESTS(llmath "${llmath_TEST_SOURCE_FILES}")
+
+  # INTEGRATION TESTS
+  set(test_libs llmath llcommon ${LLCOMMON_LIBRARIES} ${WINDOWS_LIBRARIES})
+  # TODO: Some of these need refactoring to be proper Unit tests rather than Integration tests.
+  LL_ADD_INTEGRATION_TEST(llbbox llbbox.cpp "${test_libs}")
+  LL_ADD_INTEGRATION_TEST(llquaternion llquaternion.cpp "${test_libs}")
+  LL_ADD_INTEGRATION_TEST(mathmisc "" "${test_libs}")
+  LL_ADD_INTEGRATION_TEST(m3math "" "${test_libs}")
+  LL_ADD_INTEGRATION_TEST(v3dmath v3dmath.cpp "${test_libs}")
+  LL_ADD_INTEGRATION_TEST(v3math v3math.cpp "${test_libs}")
+  LL_ADD_INTEGRATION_TEST(v4math v4math.cpp "${test_libs}")
+  LL_ADD_INTEGRATION_TEST(xform xform.cpp "${test_libs}")
+endif (LL_TESTS)
diff --git a/indra/llmath/llcamera.cpp b/indra/llmath/llcamera.cpp
index 6b56e4870e..beb5c48624 100644
--- a/indra/llmath/llcamera.cpp
+++ b/indra/llmath/llcamera.cpp
@@ -77,7 +77,7 @@ const LLCamera& LLCamera::operator=(const LLCamera& rhs)
 {
 	memcpy(this, &rhs, sizeof(LLCamera));
 	alignPlanes();
-	LLVector4a::memcpyNonAliased16((F32*) mAgentPlanes, (F32*) rhs.mAgentPlanes, 4*7);
+	LLVector4a::memcpyNonAliased16((F32*) mAgentPlanes, (F32*) rhs.mAgentPlanes, 4*7*sizeof(F32));
 	return *this;
 }
 
diff --git a/indra/llmath/llmath.h b/indra/llmath/llmath.h
index c3c15e1374..742bbc4751 100644
--- a/indra/llmath/llmath.h
+++ b/indra/llmath/llmath.h
@@ -1,525 +1,509 @@
-/** 
- * @file llmath.h
- * @brief Useful math constants and macros.
- *
- * $LicenseInfo:firstyear=2000&license=viewergpl$
- * 
- * Copyright (c) 2000-2009, Linden Research, Inc.
- * 
- * Second Life Viewer Source Code
- * The source code in this file ("Source Code") is provided by Linden Lab
- * to you under the terms of the GNU General Public License, version 2.0
- * ("GPL"), unless you have obtained a separate licensing agreement
- * ("Other License"), formally executed by you and Linden Lab.  Terms of
- * the GPL can be found in doc/GPL-license.txt in this distribution, or
- * online at http://secondlifegrid.net/programs/open_source/licensing/gplv2
- * 
- * There are special exceptions to the terms and conditions of the GPL as
- * it is applied to this Source Code. View the full text of the exception
- * in the file doc/FLOSS-exception.txt in this software distribution, or
- * online at
- * http://secondlifegrid.net/programs/open_source/licensing/flossexception
- * 
- * By copying, modifying or distributing this software, you acknowledge
- * that you have read and understood your obligations described above,
- * and agree to abide by those obligations.
- * 
- * ALL LINDEN LAB SOURCE CODE IS PROVIDED "AS IS." LINDEN LAB MAKES NO
- * WARRANTIES, EXPRESS, IMPLIED OR OTHERWISE, REGARDING ITS ACCURACY,
- * COMPLETENESS OR PERFORMANCE.
- * $/LicenseInfo$
- */
-
-#ifndef LLMATH_H
-#define LLMATH_H
-
-#include <cmath>
-#include <cstdlib>
-#include <complex>
-#include "lldefs.h"
-//#include "llstl.h" // *TODO: Remove when LLString is gone
-//#include "llstring.h" // *TODO: Remove when LLString is gone
-// lltut.h uses is_approx_equal_fraction(). This was moved to its own header
-// file in llcommon so we can use lltut.h for llcommon tests without making
-// llcommon depend on llmath.
-#include "is_approx_equal_fraction.h"
-
-// work around for Windows & older gcc non-standard function names.
-#if LL_WINDOWS
-#include <float.h>
-#define llisnan(val)	_isnan(val)
-#define llfinite(val)	_finite(val)
-#elif (LL_LINUX && __GNUC__ <= 2)
-#define llisnan(val)	isnan(val)
-#define llfinite(val)	isfinite(val)
-#elif LL_SOLARIS
-#define llisnan(val)    isnan(val)
-#define llfinite(val)   (val <= std::numeric_limits<double>::max())
-#else
-#define llisnan(val)	std::isnan(val)
-#define llfinite(val)	std::isfinite(val)
-#endif
-
-// Single Precision Floating Point Routines
-#ifndef sqrtf
-#define sqrtf(x)	((F32)sqrt((F64)(x)))
-#endif
-#ifndef fsqrtf
-#define fsqrtf(x)	sqrtf(x)
-#endif
-
-#ifndef cosf
-#define cosf(x)		((F32)cos((F64)(x)))
-#endif
-#ifndef sinf
-#define sinf(x)		((F32)sin((F64)(x)))
-#endif
-#ifndef tanf
-#define tanf(x)		((F32)tan((F64)(x)))
-#endif
-#ifndef acosf
-#define acosf(x)	((F32)acos((F64)(x)))
-#endif
-
-#ifndef powf
-#define powf(x,y)	((F32)pow((F64)(x),(F64)(y)))
-#endif
-#ifndef expf
-#define expf(x)		((F32)exp((F64)(x)))
-#endif
-
-const F32	GRAVITY			= -9.8f;
-
-// mathematical constants
-const F32	F_PI		= 3.1415926535897932384626433832795f;
-const F32	F_TWO_PI	= 6.283185307179586476925286766559f;
-const F32	F_PI_BY_TWO	= 1.5707963267948966192313216916398f;
-const F32	F_SQRT_TWO_PI = 2.506628274631000502415765284811f;
-const F32	F_E			= 2.71828182845904523536f;
-const F32	F_SQRT2		= 1.4142135623730950488016887242097f;
-const F32	F_SQRT3		= 1.73205080756888288657986402541f;
-const F32	OO_SQRT2	= 0.7071067811865475244008443621049f;
-const F32	DEG_TO_RAD	= 0.017453292519943295769236907684886f;
-const F32	RAD_TO_DEG	= 57.295779513082320876798154814105f;
-const F32	F_APPROXIMATELY_ZERO = 0.00001f;
-const F32	F_LN2		= 0.69314718056f;
-const F32	OO_LN2		= 1.4426950408889634073599246810019f;
-
-const F32	F_ALMOST_ZERO	= 0.0001f;
-const F32	F_ALMOST_ONE	= 1.0f - F_ALMOST_ZERO;
-
-// BUG: Eliminate in favor of F_APPROXIMATELY_ZERO above?
-const F32 FP_MAG_THRESHOLD = 0.0000001f;
-
-// TODO: Replace with logic like is_approx_equal
-inline BOOL is_approx_zero( F32 f ) { return (-F_APPROXIMATELY_ZERO < f) && (f < F_APPROXIMATELY_ZERO); }
-
-// These functions work by interpreting sign+exp+mantissa as an unsigned
-// integer.
-// For example:
-// x = <sign>1 <exponent>00000010 <mantissa>00000000000000000000000
-// y = <sign>1 <exponent>00000001 <mantissa>11111111111111111111111
-//
-// interpreted as ints = 
-// x = 10000001000000000000000000000000
-// y = 10000000111111111111111111111111
-// which is clearly a different of 1 in the least significant bit
-// Values with the same exponent can be trivially shown to work.
-//
-// WARNING: Denormals of opposite sign do not work
-// x = <sign>1 <exponent>00000000 <mantissa>00000000000000000000001
-// y = <sign>0 <exponent>00000000 <mantissa>00000000000000000000001
-// Although these values differ by 2 in the LSB, the sign bit makes
-// the int comparison fail.
-//
-// WARNING: NaNs can compare equal
-// There is no special treatment of exceptional values like NaNs
-//
-// WARNING: Infinity is comparable with F32_MAX and negative 
-// infinity is comparable with F32_MIN
-
-inline BOOL is_approx_equal(F32 x, F32 y)
-{
-	const S32 COMPARE_MANTISSA_UP_TO_BIT = 0x02;
-	return (std::abs((S32) ((U32&)x - (U32&)y) ) < COMPARE_MANTISSA_UP_TO_BIT);
-}
-
-inline BOOL is_approx_equal(F64 x, F64 y)
-{
-	const S64 COMPARE_MANTISSA_UP_TO_BIT = 0x02;
-	return (std::abs((S32) ((U64&)x - (U64&)y) ) < COMPARE_MANTISSA_UP_TO_BIT);
-}
-
-inline S32 llabs(const S32 a)
-{
-	return S32(std::labs(a));
-}
-
-inline F32 llabs(const F32 a)
-{
-	return F32(std::fabs(a));
-}
-
-inline F64 llabs(const F64 a)
-{
-	return F64(std::fabs(a));
-}
-
-inline S32 lltrunc( F32 f )
-{
-#if LL_WINDOWS && !defined( __INTEL_COMPILER )
-		// Avoids changing the floating point control word.
-		// Add or subtract 0.5 - epsilon and then round
-		const static U32 zpfp[] = { 0xBEFFFFFF, 0x3EFFFFFF };
-		S32 result;
-		__asm {
-			fld		f
-			mov		eax,	f
-			shr		eax,	29
-			and		eax,	4
-			fadd	dword ptr [zpfp + eax]
-			fistp	result
-		}
-		return result;
-#else
-		return (S32)f;
-#endif
-}
-
-inline S32 lltrunc( F64 f )
-{
-	return (S32)f;
-}
-
-inline S32 llfloor( F32 f )
-{
-#if LL_WINDOWS && !defined( __INTEL_COMPILER )
-		// Avoids changing the floating point control word.
-		// Accurate (unlike Stereopsis version) for all values between S32_MIN and S32_MAX and slightly faster than Stereopsis version.
-		// Add -(0.5 - epsilon) and then round
-		const U32 zpfp = 0xBEFFFFFF;
-		S32 result;
-		__asm { 
-			fld		f
-			fadd	dword ptr [zpfp]
-			fistp	result
-		}
-		return result;
-#else
-		return (S32)floorf(f);
-#endif
-}
-
-
-inline S32 llceil( F32 f )
-{
-	// This could probably be optimized, but this works.
-	return (S32)ceil(f);
-}
-
-
-#ifndef BOGUS_ROUND
-// Use this round.  Does an arithmetic round (0.5 always rounds up)
-inline S32 llround(const F32 val)
-{
-	return llfloor(val + 0.5f);
-}
-
-#else // BOGUS_ROUND
-// Old llround implementation - does banker's round (toward nearest even in the case of a 0.5.
-// Not using this because we don't have a consistent implementation on both platforms, use
-// llfloor(val + 0.5f), which is consistent on all platforms.
-inline S32 llround(const F32 val)
-{
-	#if LL_WINDOWS
-		// Note: assumes that the floating point control word is set to rounding mode (the default)
-		S32 ret_val;
-		_asm fld	val
-		_asm fistp	ret_val;
-		return ret_val;
-	#elif LL_LINUX
-		// Note: assumes that the floating point control word is set
-		// to rounding mode (the default)
-		S32 ret_val;
-		__asm__ __volatile__( "flds %1    \n\t"
-							  "fistpl %0  \n\t"
-							  : "=m" (ret_val)
-							  : "m" (val) );
-		return ret_val;
-	#else
-		return llfloor(val + 0.5f);
-	#endif
-}
-
-// A fast arithmentic round on intel, from Laurent de Soras http://ldesoras.free.fr
-inline int round_int(double x)
-{
-	const float round_to_nearest = 0.5f;
-	int i;
-	__asm
-	{
-		fld x
-		fadd st, st (0)
-		fadd round_to_nearest
-		fistp i
-		sar i, 1
-	}
-	return (i);
-}
-#endif // BOGUS_ROUND
-
-inline F32 llround( F32 val, F32 nearest )
-{
-	return F32(floor(val * (1.0f / nearest) + 0.5f)) * nearest;
-}
-
-inline F64 llround( F64 val, F64 nearest )
-{
-	return F64(floor(val * (1.0 / nearest) + 0.5)) * nearest;
-}
-
-// these provide minimum peak error
-//
-// avg  error = -0.013049 
-// peak error = -31.4 dB
-// RMS  error = -28.1 dB
-
-const F32 FAST_MAG_ALPHA = 0.960433870103f;
-const F32 FAST_MAG_BETA = 0.397824734759f;
-
-// these provide minimum RMS error
-//
-// avg  error = 0.000003 
-// peak error = -32.6 dB
-// RMS  error = -25.7 dB
-//
-//const F32 FAST_MAG_ALPHA = 0.948059448969f;
-//const F32 FAST_MAG_BETA = 0.392699081699f;
-
-inline F32 fastMagnitude(F32 a, F32 b)
-{ 
-	a = (a > 0) ? a : -a;
-	b = (b > 0) ? b : -b;
-	return(FAST_MAG_ALPHA * llmax(a,b) + FAST_MAG_BETA * llmin(a,b));
-}
-
-
-
-////////////////////
-//
-// Fast F32/S32 conversions
-//
-// Culled from www.stereopsis.com/FPU.html
-
-const F64 LL_DOUBLE_TO_FIX_MAGIC	= 68719476736.0*1.5;     //2^36 * 1.5,  (52-_shiftamt=36) uses limited precisicion to floor
-const S32 LL_SHIFT_AMOUNT			= 16;                    //16.16 fixed point representation,
-
-// Endian dependent code
-#ifdef LL_LITTLE_ENDIAN
-	#define LL_EXP_INDEX				1
-	#define LL_MAN_INDEX				0
-#else
-	#define LL_EXP_INDEX				0
-	#define LL_MAN_INDEX				1
-#endif
-
-/* Deprecated: use llround(), lltrunc(), or llfloor() instead
-// ================================================================================================
-// Real2Int
-// ================================================================================================
-inline S32 F64toS32(F64 val)
-{
-	val		= val + LL_DOUBLE_TO_FIX_MAGIC;
-	return ((S32*)&val)[LL_MAN_INDEX] >> LL_SHIFT_AMOUNT; 
-}
-
-// ================================================================================================
-// Real2Int
-// ================================================================================================
-inline S32 F32toS32(F32 val)
-{
-	return F64toS32 ((F64)val);
-}
-*/
-
-////////////////////////////////////////////////
-//
-// Fast exp and log
-//
-
-// Implementation of fast exp() approximation (from a paper by Nicol N. Schraudolph
-// http://www.inf.ethz.ch/~schraudo/pubs/exp.pdf
-static union
-{
-	double d;
-	struct
-	{
-#ifdef LL_LITTLE_ENDIAN
-		S32 j, i;
-#else
-		S32 i, j;
-#endif
-	} n;
-} LLECO; // not sure what the name means
-
-#define LL_EXP_A (1048576 * OO_LN2) // use 1512775 for integer
-#define LL_EXP_C (60801)			// this value of C good for -4 < y < 4
-
-#define LL_FAST_EXP(y) (LLECO.n.i = llround(F32(LL_EXP_A*(y))) + (1072693248 - LL_EXP_C), LLECO.d)
-
-
-
-inline F32 llfastpow(const F32 x, const F32 y)
-{
-	return (F32)(LL_FAST_EXP(y * log(x)));
-}
-
-
-inline F32 snap_to_sig_figs(F32 foo, S32 sig_figs)
-{
-	// compute the power of ten
-	F32 bar = 1.f;
-	for (S32 i = 0; i < sig_figs; i++)
-	{
-		bar *= 10.f;
-	}
-
-	foo = (F32)llround(foo * bar);
-
-	// shift back
-	foo /= bar;
-	return foo;
-}
-
-inline F32 lerp(F32 a, F32 b, F32 u) 
-{
-	return a + ((b - a) * u);
-}
-
-inline F32 lerp2d(F32 x00, F32 x01, F32 x10, F32 x11, F32 u, F32 v)
-{
-	F32 a = x00 + (x01-x00)*u;
-	F32 b = x10 + (x11-x10)*u;
-	F32 r = a + (b-a)*v;
-	return r;
-}
-
-inline F32 ramp(F32 x, F32 a, F32 b)
-{
-	return (a == b) ? 0.0f : ((a - x) / (a - b));
-}
-
-inline F32 rescale(F32 x, F32 x1, F32 x2, F32 y1, F32 y2)
-{
-	return lerp(y1, y2, ramp(x, x1, x2));
-}
-
-inline F32 clamp_rescale(F32 x, F32 x1, F32 x2, F32 y1, F32 y2)
-{
-	if (y1 < y2)
-	{
-		return llclamp(rescale(x,x1,x2,y1,y2),y1,y2);
-	}
-	else
-	{
-		return llclamp(rescale(x,x1,x2,y1,y2),y2,y1);
-	}
-}
-
-
-inline F32 cubic_step( F32 x, F32 x0, F32 x1, F32 s0, F32 s1 )
-{
-	if (x <= x0)
-		return s0;
-
-	if (x >= x1)
-		return s1;
-
-	F32 f = (x - x0) / (x1 - x0);
-
-	return	s0 + (s1 - s0) * (f * f) * (3.0f - 2.0f * f);
-}
-
-inline F32 cubic_step( F32 x )
-{
-	x = llclampf(x);
-
-	return	(x * x) * (3.0f - 2.0f * x);
-}
-
-inline F32 quadratic_step( F32 x, F32 x0, F32 x1, F32 s0, F32 s1 )
-{
-	if (x <= x0)
-		return s0;
-
-	if (x >= x1)
-		return s1;
-
-	F32 f = (x - x0) / (x1 - x0);
-	F32 f_squared = f * f;
-
-	return	(s0 * (1.f - f_squared)) + ((s1 - s0) * f_squared);
-}
-
-inline F32 llsimple_angle(F32 angle)
-{
-	while(angle <= -F_PI)
-		angle += F_TWO_PI;
-	while(angle >  F_PI)
-		angle -= F_TWO_PI;
-	return angle;
-}
-
-//SDK - Renamed this to get_lower_power_two, since this is what this actually does.
-inline U32 get_lower_power_two(U32 val, U32 max_power_two)
-{
-	if(!max_power_two)
-	{
-		max_power_two = 1 << 31 ;
-	}
-	if(max_power_two & (max_power_two - 1))
-	{
-		return 0 ;
-	}
-
-	for(; val < max_power_two ; max_power_two >>= 1) ;
-	
-	return max_power_two ;
-}
-
-// calculate next highest power of two, limited by max_power_two
-// This is taken from a brilliant little code snipped on http://acius2.blogspot.com/2007/11/calculating-next-power-of-2.html
-// Basically we convert the binary to a solid string of 1's with the same
-// number of digits, then add one.  We subtract 1 initially to handle
-// the case where the number passed in is actually a power of two.
-// WARNING: this only works with 32 bit ints.
-inline U32 get_next_power_two(U32 val, U32 max_power_two)
-{
-	if(!max_power_two)
-	{
-		max_power_two = 1 << 31 ;
-	}
-
-	if(val >= max_power_two)
-	{
-		return max_power_two;
-	}
-
-	val--;
-	val = (val >> 1) | val;
-	val = (val >> 2) | val;
-	val = (val >> 4) | val;
-	val = (val >> 8) | val;
-	val = (val >> 16) | val;
-	val++;
-
-	return val;
-}
-
-//get the gaussian value given the linear distance from axis x and guassian value o
-inline F32 llgaussian(F32 x, F32 o)
-{
-	return 1.f/(F_SQRT_TWO_PI*o)*powf(F_E, -(x*x)/(2*o*o));
-}
-
-#endif
+/** 
+ * @file llmath.h
+ * @brief Useful math constants and macros.
+ *
+ * $LicenseInfo:firstyear=2000&license=viewergpl$
+ * 
+ * Copyright (c) 2000-2009, Linden Research, Inc.
+ * 
+ * Second Life Viewer Source Code
+ * The source code in this file ("Source Code") is provided by Linden Lab
+ * to you under the terms of the GNU General Public License, version 2.0
+ * ("GPL"), unless you have obtained a separate licensing agreement
+ * ("Other License"), formally executed by you and Linden Lab.  Terms of
+ * the GPL can be found in doc/GPL-license.txt in this distribution, or
+ * online at http://secondlifegrid.net/programs/open_source/licensing/gplv2
+ * 
+ * There are special exceptions to the terms and conditions of the GPL as
+ * it is applied to this Source Code. View the full text of the exception
+ * in the file doc/FLOSS-exception.txt in this software distribution, or
+ * online at
+ * http://secondlifegrid.net/programs/open_source/licensing/flossexception
+ * 
+ * By copying, modifying or distributing this software, you acknowledge
+ * that you have read and understood your obligations described above,
+ * and agree to abide by those obligations.
+ * 
+ * ALL LINDEN LAB SOURCE CODE IS PROVIDED "AS IS." LINDEN LAB MAKES NO
+ * WARRANTIES, EXPRESS, IMPLIED OR OTHERWISE, REGARDING ITS ACCURACY,
+ * COMPLETENESS OR PERFORMANCE.
+ * $/LicenseInfo$
+ */
+
+#ifndef LLMATH_H
+#define LLMATH_H
+
+#include <cmath>
+#include <cstdlib>
+#include "lldefs.h"
+//#include "llstl.h" // *TODO: Remove when LLString is gone
+//#include "llstring.h" // *TODO: Remove when LLString is gone
+// lltut.h uses is_approx_equal_fraction(). This was moved to its own header
+// file in llcommon so we can use lltut.h for llcommon tests without making
+// llcommon depend on llmath.
+#include "is_approx_equal_fraction.h"
+
+// work around for Windows & older gcc non-standard function names.
+#if LL_WINDOWS
+#include <float.h>
+#define llisnan(val)	_isnan(val)
+#define llfinite(val)	_finite(val)
+#elif (LL_LINUX && __GNUC__ <= 2)
+#define llisnan(val)	isnan(val)
+#define llfinite(val)	isfinite(val)
+#elif LL_SOLARIS
+#define llisnan(val)    isnan(val)
+#define llfinite(val)   (val <= std::numeric_limits<double>::max())
+#else
+#define llisnan(val)	std::isnan(val)
+#define llfinite(val)	std::isfinite(val)
+#endif
+
+// Single Precision Floating Point Routines
+// (There used to be more defined here, but they appeared to be redundant and 
+// were breaking some other includes. Removed by Falcon, reviewed by Andrew, 11/25/09)
+/*#ifndef tanf
+#define tanf(x)		((F32)tan((F64)(x)))
+#endif*/
+
+const F32	GRAVITY			= -9.8f;
+
+// mathematical constants
+const F32	F_PI		= 3.1415926535897932384626433832795f;
+const F32	F_TWO_PI	= 6.283185307179586476925286766559f;
+const F32	F_PI_BY_TWO	= 1.5707963267948966192313216916398f;
+const F32	F_SQRT_TWO_PI = 2.506628274631000502415765284811f;
+const F32	F_E			= 2.71828182845904523536f;
+const F32	F_SQRT2		= 1.4142135623730950488016887242097f;
+const F32	F_SQRT3		= 1.73205080756888288657986402541f;
+const F32	OO_SQRT2	= 0.7071067811865475244008443621049f;
+const F32	DEG_TO_RAD	= 0.017453292519943295769236907684886f;
+const F32	RAD_TO_DEG	= 57.295779513082320876798154814105f;
+const F32	F_APPROXIMATELY_ZERO = 0.00001f;
+const F32	F_LN2		= 0.69314718056f;
+const F32	OO_LN2		= 1.4426950408889634073599246810019f;
+
+const F32	F_ALMOST_ZERO	= 0.0001f;
+const F32	F_ALMOST_ONE	= 1.0f - F_ALMOST_ZERO;
+
+// BUG: Eliminate in favor of F_APPROXIMATELY_ZERO above?
+const F32 FP_MAG_THRESHOLD = 0.0000001f;
+
+// TODO: Replace with logic like is_approx_equal
+inline BOOL is_approx_zero( F32 f ) { return (-F_APPROXIMATELY_ZERO < f) && (f < F_APPROXIMATELY_ZERO); }
+
+// These functions work by interpreting sign+exp+mantissa as an unsigned
+// integer.
+// For example:
+// x = <sign>1 <exponent>00000010 <mantissa>00000000000000000000000
+// y = <sign>1 <exponent>00000001 <mantissa>11111111111111111111111
+//
+// interpreted as ints = 
+// x = 10000001000000000000000000000000
+// y = 10000000111111111111111111111111
+// which is clearly a different of 1 in the least significant bit
+// Values with the same exponent can be trivially shown to work.
+//
+// WARNING: Denormals of opposite sign do not work
+// x = <sign>1 <exponent>00000000 <mantissa>00000000000000000000001
+// y = <sign>0 <exponent>00000000 <mantissa>00000000000000000000001
+// Although these values differ by 2 in the LSB, the sign bit makes
+// the int comparison fail.
+//
+// WARNING: NaNs can compare equal
+// There is no special treatment of exceptional values like NaNs
+//
+// WARNING: Infinity is comparable with F32_MAX and negative 
+// infinity is comparable with F32_MIN
+
+inline BOOL is_approx_equal(F32 x, F32 y)
+{
+	const S32 COMPARE_MANTISSA_UP_TO_BIT = 0x02;
+	return (std::abs((S32) ((U32&)x - (U32&)y) ) < COMPARE_MANTISSA_UP_TO_BIT);
+}
+
+inline BOOL is_approx_equal(F64 x, F64 y)
+{
+	const S64 COMPARE_MANTISSA_UP_TO_BIT = 0x02;
+	return (std::abs((S32) ((U64&)x - (U64&)y) ) < COMPARE_MANTISSA_UP_TO_BIT);
+}
+
+inline S32 llabs(const S32 a)
+{
+	return S32(std::labs(a));
+}
+
+inline F32 llabs(const F32 a)
+{
+	return F32(std::fabs(a));
+}
+
+inline F64 llabs(const F64 a)
+{
+	return F64(std::fabs(a));
+}
+
+inline S32 lltrunc( F32 f )
+{
+#if LL_WINDOWS && !defined( __INTEL_COMPILER )
+		// Avoids changing the floating point control word.
+		// Add or subtract 0.5 - epsilon and then round
+		const static U32 zpfp[] = { 0xBEFFFFFF, 0x3EFFFFFF };
+		S32 result;
+		__asm {
+			fld		f
+			mov		eax,	f
+			shr		eax,	29
+			and		eax,	4
+			fadd	dword ptr [zpfp + eax]
+			fistp	result
+		}
+		return result;
+#else
+		return (S32)f;
+#endif
+}
+
+inline S32 lltrunc( F64 f )
+{
+	return (S32)f;
+}
+
+inline S32 llfloor( F32 f )
+{
+#if LL_WINDOWS && !defined( __INTEL_COMPILER )
+		// Avoids changing the floating point control word.
+		// Accurate (unlike Stereopsis version) for all values between S32_MIN and S32_MAX and slightly faster than Stereopsis version.
+		// Add -(0.5 - epsilon) and then round
+		const U32 zpfp = 0xBEFFFFFF;
+		S32 result;
+		__asm { 
+			fld		f
+			fadd	dword ptr [zpfp]
+			fistp	result
+		}
+		return result;
+#else
+		return (S32)floor(f);
+#endif
+}
+
+
+inline S32 llceil( F32 f )
+{
+	// This could probably be optimized, but this works.
+	return (S32)ceil(f);
+}
+
+
+#ifndef BOGUS_ROUND
+// Use this round.  Does an arithmetic round (0.5 always rounds up)
+inline S32 llround(const F32 val)
+{
+	return llfloor(val + 0.5f);
+}
+
+#else // BOGUS_ROUND
+// Old llround implementation - does banker's round (toward nearest even in the case of a 0.5.
+// Not using this because we don't have a consistent implementation on both platforms, use
+// llfloor(val + 0.5f), which is consistent on all platforms.
+inline S32 llround(const F32 val)
+{
+	#if LL_WINDOWS
+		// Note: assumes that the floating point control word is set to rounding mode (the default)
+		S32 ret_val;
+		_asm fld	val
+		_asm fistp	ret_val;
+		return ret_val;
+	#elif LL_LINUX
+		// Note: assumes that the floating point control word is set
+		// to rounding mode (the default)
+		S32 ret_val;
+		__asm__ __volatile__( "flds %1    \n\t"
+							  "fistpl %0  \n\t"
+							  : "=m" (ret_val)
+							  : "m" (val) );
+		return ret_val;
+	#else
+		return llfloor(val + 0.5f);
+	#endif
+}
+
+// A fast arithmentic round on intel, from Laurent de Soras http://ldesoras.free.fr
+inline int round_int(double x)
+{
+	const float round_to_nearest = 0.5f;
+	int i;
+	__asm
+	{
+		fld x
+		fadd st, st (0)
+		fadd round_to_nearest
+		fistp i
+		sar i, 1
+	}
+	return (i);
+}
+#endif // BOGUS_ROUND
+
+inline F32 llround( F32 val, F32 nearest )
+{
+	return F32(floor(val * (1.0f / nearest) + 0.5f)) * nearest;
+}
+
+inline F64 llround( F64 val, F64 nearest )
+{
+	return F64(floor(val * (1.0 / nearest) + 0.5)) * nearest;
+}
+
+// these provide minimum peak error
+//
+// avg  error = -0.013049 
+// peak error = -31.4 dB
+// RMS  error = -28.1 dB
+
+const F32 FAST_MAG_ALPHA = 0.960433870103f;
+const F32 FAST_MAG_BETA = 0.397824734759f;
+
+// these provide minimum RMS error
+//
+// avg  error = 0.000003 
+// peak error = -32.6 dB
+// RMS  error = -25.7 dB
+//
+//const F32 FAST_MAG_ALPHA = 0.948059448969f;
+//const F32 FAST_MAG_BETA = 0.392699081699f;
+
+inline F32 fastMagnitude(F32 a, F32 b)
+{ 
+	a = (a > 0) ? a : -a;
+	b = (b > 0) ? b : -b;
+	return(FAST_MAG_ALPHA * llmax(a,b) + FAST_MAG_BETA * llmin(a,b));
+}
+
+
+
+////////////////////
+//
+// Fast F32/S32 conversions
+//
+// Culled from www.stereopsis.com/FPU.html
+
+const F64 LL_DOUBLE_TO_FIX_MAGIC	= 68719476736.0*1.5;     //2^36 * 1.5,  (52-_shiftamt=36) uses limited precisicion to floor
+const S32 LL_SHIFT_AMOUNT			= 16;                    //16.16 fixed point representation,
+
+// Endian dependent code
+#ifdef LL_LITTLE_ENDIAN
+	#define LL_EXP_INDEX				1
+	#define LL_MAN_INDEX				0
+#else
+	#define LL_EXP_INDEX				0
+	#define LL_MAN_INDEX				1
+#endif
+
+/* Deprecated: use llround(), lltrunc(), or llfloor() instead
+// ================================================================================================
+// Real2Int
+// ================================================================================================
+inline S32 F64toS32(F64 val)
+{
+	val		= val + LL_DOUBLE_TO_FIX_MAGIC;
+	return ((S32*)&val)[LL_MAN_INDEX] >> LL_SHIFT_AMOUNT; 
+}
+
+// ================================================================================================
+// Real2Int
+// ================================================================================================
+inline S32 F32toS32(F32 val)
+{
+	return F64toS32 ((F64)val);
+}
+*/
+
+////////////////////////////////////////////////
+//
+// Fast exp and log
+//
+
+// Implementation of fast exp() approximation (from a paper by Nicol N. Schraudolph
+// http://www.inf.ethz.ch/~schraudo/pubs/exp.pdf
+static union
+{
+	double d;
+	struct
+	{
+#ifdef LL_LITTLE_ENDIAN
+		S32 j, i;
+#else
+		S32 i, j;
+#endif
+	} n;
+} LLECO; // not sure what the name means
+
+#define LL_EXP_A (1048576 * OO_LN2) // use 1512775 for integer
+#define LL_EXP_C (60801)			// this value of C good for -4 < y < 4
+
+#define LL_FAST_EXP(y) (LLECO.n.i = llround(F32(LL_EXP_A*(y))) + (1072693248 - LL_EXP_C), LLECO.d)
+
+
+
+inline F32 llfastpow(const F32 x, const F32 y)
+{
+	return (F32)(LL_FAST_EXP(y * log(x)));
+}
+
+
+inline F32 snap_to_sig_figs(F32 foo, S32 sig_figs)
+{
+	// compute the power of ten
+	F32 bar = 1.f;
+	for (S32 i = 0; i < sig_figs; i++)
+	{
+		bar *= 10.f;
+	}
+
+	//F32 new_foo = (F32)llround(foo * bar);
+	// the llround() implementation sucks.  Don't us it.
+
+	F32 sign = (foo > 0.f) ? 1.f : -1.f;
+	F32 new_foo = F32( S64(foo * bar + sign * 0.5f));
+	new_foo /= bar;
+
+	return new_foo;
+}
+
+inline F32 lerp(F32 a, F32 b, F32 u) 
+{
+	return a + ((b - a) * u);
+}
+
+inline F32 lerp2d(F32 x00, F32 x01, F32 x10, F32 x11, F32 u, F32 v)
+{
+	F32 a = x00 + (x01-x00)*u;
+	F32 b = x10 + (x11-x10)*u;
+	F32 r = a + (b-a)*v;
+	return r;
+}
+
+inline F32 ramp(F32 x, F32 a, F32 b)
+{
+	return (a == b) ? 0.0f : ((a - x) / (a - b));
+}
+
+inline F32 rescale(F32 x, F32 x1, F32 x2, F32 y1, F32 y2)
+{
+	return lerp(y1, y2, ramp(x, x1, x2));
+}
+
+inline F32 clamp_rescale(F32 x, F32 x1, F32 x2, F32 y1, F32 y2)
+{
+	if (y1 < y2)
+	{
+		return llclamp(rescale(x,x1,x2,y1,y2),y1,y2);
+	}
+	else
+	{
+		return llclamp(rescale(x,x1,x2,y1,y2),y2,y1);
+	}
+}
+
+
+inline F32 cubic_step( F32 x, F32 x0, F32 x1, F32 s0, F32 s1 )
+{
+	if (x <= x0)
+		return s0;
+
+	if (x >= x1)
+		return s1;
+
+	F32 f = (x - x0) / (x1 - x0);
+
+	return	s0 + (s1 - s0) * (f * f) * (3.0f - 2.0f * f);
+}
+
+inline F32 cubic_step( F32 x )
+{
+	x = llclampf(x);
+
+	return	(x * x) * (3.0f - 2.0f * x);
+}
+
+inline F32 quadratic_step( F32 x, F32 x0, F32 x1, F32 s0, F32 s1 )
+{
+	if (x <= x0)
+		return s0;
+
+	if (x >= x1)
+		return s1;
+
+	F32 f = (x - x0) / (x1 - x0);
+	F32 f_squared = f * f;
+
+	return	(s0 * (1.f - f_squared)) + ((s1 - s0) * f_squared);
+}
+
+inline F32 llsimple_angle(F32 angle)
+{
+	while(angle <= -F_PI)
+		angle += F_TWO_PI;
+	while(angle >  F_PI)
+		angle -= F_TWO_PI;
+	return angle;
+}
+
+//SDK - Renamed this to get_lower_power_two, since this is what this actually does.
+inline U32 get_lower_power_two(U32 val, U32 max_power_two)
+{
+	if(!max_power_two)
+	{
+		max_power_two = 1 << 31 ;
+	}
+	if(max_power_two & (max_power_two - 1))
+	{
+		return 0 ;
+	}
+
+	for(; val < max_power_two ; max_power_two >>= 1) ;
+	
+	return max_power_two ;
+}
+
+// calculate next highest power of two, limited by max_power_two
+// This is taken from a brilliant little code snipped on http://acius2.blogspot.com/2007/11/calculating-next-power-of-2.html
+// Basically we convert the binary to a solid string of 1's with the same
+// number of digits, then add one.  We subtract 1 initially to handle
+// the case where the number passed in is actually a power of two.
+// WARNING: this only works with 32 bit ints.
+inline U32 get_next_power_two(U32 val, U32 max_power_two)
+{
+	if(!max_power_two)
+	{
+		max_power_two = 1 << 31 ;
+	}
+
+	if(val >= max_power_two)
+	{
+		return max_power_two;
+	}
+
+	val--;
+	val = (val >> 1) | val;
+	val = (val >> 2) | val;
+	val = (val >> 4) | val;
+	val = (val >> 8) | val;
+	val = (val >> 16) | val;
+	val++;
+
+	return val;
+}
+
+//get the gaussian value given the linear distance from axis x and guassian value o
+inline F32 llgaussian(F32 x, F32 o)
+{
+	return 1.f/(F_SQRT_TWO_PI*o)*powf(F_E, -(x*x)/(2*o*o));
+}
+
+// Include simd math header
+#include "llsimdmath.h"
+
+#endif
diff --git a/indra/llmath/lloctree.h b/indra/llmath/lloctree.h
index 59828ae565..432e9fbcd8 100644
--- a/indra/llmath/lloctree.h
+++ b/indra/llmath/lloctree.h
@@ -142,7 +142,7 @@ public:
 	
 	S32 getOctant(const LLVector4a& pos) const			//get the octant pos is in
 	{
-		return pos.greaterThan4(mD[CENTER]).getComparisonMask() & 0x7;
+		return pos.greaterThan(mD[CENTER]).getGatheredBits() & 0x7;
 	}
 	
 	inline bool isInside(const LLVector4a& pos, const F32& rad) const
@@ -157,13 +157,13 @@ public:
 
 	bool isInside(const LLVector4a& pos) const
 	{
-		S32 gt = pos.greaterThan4(mD[MAX]).getComparisonMask() & 0x7;
+		S32 gt = pos.greaterThan(mD[MAX]).getGatheredBits() & 0x7;
 		if (gt)
 		{
 			return false;
 		}
 
-		S32 lt = pos.lessEqual4(mD[MIN]).getComparisonMask() & 0x7;
+		S32 lt = pos.lessEqual(mD[MIN]).getGatheredBits() & 0x7;
 		if (lt)
 		{
 			return false;
@@ -206,13 +206,13 @@ public:
 	{
 		const LLVector4a& pos = data->getPositionGroup();
 
-		LLVector4a gt = pos.greaterThan4(center);
+		LLVector4a gt = pos.greaterThan(center);
 
 		LLVector4a up;
-		up.mQ = _mm_and_ps(size.mQ, gt.mQ);
+		up = _mm_and_ps(size, gt);
 
 		LLVector4a down;
-		down.mQ = _mm_andnot_ps(gt.mQ, size.mQ);
+		down = _mm_andnot_ps(gt, size);
 
 		center.add(up);
 		center.sub(down);
@@ -326,9 +326,8 @@ public:
 				LLVector4a val;
 				val.setSub(center, getCenter());
 				val.setAbs(val);
-				LLVector4a app_zero;
-				app_zero.mQ = F_APPROXIMATELY_ZERO_4A;
-				S32 lt = val.lessThan4(app_zero).getComparisonMask() & 0x7;
+								
+				S32 lt = val.lessThan(LLVector4a::getEpsilon()).getGatheredBits() & 0x7;
 
 				if( lt == 0x7 )
 				{
@@ -642,7 +641,7 @@ public:
 		LLVector4a val;
 		val.setSub(v, BaseType::mD[BaseType::CENTER]);
 		val.setAbs(val);
-		S32 lt = val.lessThan4(MAX_MAG).getComparisonMask() & 0x7;
+		S32 lt = val.lessThan(MAX_MAG).getGatheredBits() & 0x7;
 
 		if (lt != 0x7)
 		{
diff --git a/indra/llmath/llquantize.h b/indra/llmath/llquantize.h
index 2192427f07..000d8a060f 100644
--- a/indra/llmath/llquantize.h
+++ b/indra/llmath/llquantize.h
@@ -1,152 +1,158 @@
-/** 
- * @file llquantize.h
- * @brief useful routines for quantizing floats to various length ints
- * and back out again
- *
- * $LicenseInfo:firstyear=2001&license=viewergpl$
- * 
- * Copyright (c) 2001-2009, Linden Research, Inc.
- * 
- * Second Life Viewer Source Code
- * The source code in this file ("Source Code") is provided by Linden Lab
- * to you under the terms of the GNU General Public License, version 2.0
- * ("GPL"), unless you have obtained a separate licensing agreement
- * ("Other License"), formally executed by you and Linden Lab.  Terms of
- * the GPL can be found in doc/GPL-license.txt in this distribution, or
- * online at http://secondlifegrid.net/programs/open_source/licensing/gplv2
- * 
- * There are special exceptions to the terms and conditions of the GPL as
- * it is applied to this Source Code. View the full text of the exception
- * in the file doc/FLOSS-exception.txt in this software distribution, or
- * online at
- * http://secondlifegrid.net/programs/open_source/licensing/flossexception
- * 
- * By copying, modifying or distributing this software, you acknowledge
- * that you have read and understood your obligations described above,
- * and agree to abide by those obligations.
- * 
- * ALL LINDEN LAB SOURCE CODE IS PROVIDED "AS IS." LINDEN LAB MAKES NO
- * WARRANTIES, EXPRESS, IMPLIED OR OTHERWISE, REGARDING ITS ACCURACY,
- * COMPLETENESS OR PERFORMANCE.
- * $/LicenseInfo$
- */
-
-#ifndef LL_LLQUANTIZE_H
-#define LL_LLQUANTIZE_H
-
-const U16 U16MAX = 65535;
-const F32 OOU16MAX = 1.f/(F32)(U16MAX);
-
-const U8 U8MAX = 255;
-const F32 OOU8MAX = 1.f/(F32)(U8MAX);
-
-const U8 FIRSTVALIDCHAR = 54;
-const U8 MAXSTRINGVAL = U8MAX - FIRSTVALIDCHAR; //we don't allow newline or null 
-
-
-inline U16 F32_to_U16_ROUND(F32 val, F32 lower, F32 upper)
-{
-	val = llclamp(val, lower, upper);
-	// make sure that the value is positive and normalized to <0, 1>
-	val -= lower;
-	val /= (upper - lower);
-
-	// round the value.   Sreturn the U16
-	return (U16)(llround(val*U16MAX));
-}
-
-
-inline U16 F32_to_U16(F32 val, F32 lower, F32 upper)
-{
-	val = llclamp(val, lower, upper);
-	// make sure that the value is positive and normalized to <0, 1>
-	val -= lower;
-	val /= (upper - lower);
-
-	// return the U16
-	return (U16)(llfloor(val*U16MAX));
-}
-
-inline F32 U16_to_F32(U16 ival, F32 lower, F32 upper)
-{
-	F32 val = ival*OOU16MAX;
-	F32 delta = (upper - lower);
-	val *= delta;
-	val += lower;
-
-	F32 max_error = delta*OOU16MAX;
-
-	// make sure that zero's come through as zero
-	if (fabsf(val) < max_error)
-		val = 0.f;
-
-	return val;
-}
-
-
-inline U8 F32_to_U8_ROUND(F32 val, F32 lower, F32 upper)
-{
-	val = llclamp(val, lower, upper);
-	// make sure that the value is positive and normalized to <0, 1>
-	val -= lower;
-	val /= (upper - lower);
-
-	// return the rounded U8
-	return (U8)(llround(val*U8MAX));
-}
-
-
-inline U8 F32_to_U8(F32 val, F32 lower, F32 upper)
-{
-	val = llclamp(val, lower, upper);
-	// make sure that the value is positive and normalized to <0, 1>
-	val -= lower;
-	val /= (upper - lower);
-
-	// return the U8
-	return (U8)(llfloor(val*U8MAX));
-}
-
-inline F32 U8_to_F32(U8 ival, F32 lower, F32 upper)
-{
-	F32 val = ival*OOU8MAX;
-	F32 delta = (upper - lower);
-	val *= delta;
-	val += lower;
-
-	F32 max_error = delta*OOU8MAX;
-
-	// make sure that zero's come through as zero
-	if (fabsf(val) < max_error)
-		val = 0.f;
-
-	return val;
-}
-
-inline U8 F32_TO_STRING(F32 val, F32 lower, F32 upper)
-{
-	val = llclamp(val, lower, upper); //[lower, upper]
-	// make sure that the value is positive and normalized to <0, 1>
-	val -= lower;					//[0, upper-lower]
-	val /= (upper - lower);			//[0,1]
-	val = val * MAXSTRINGVAL;		//[0, MAXSTRINGVAL]
-	val = floor(val + 0.5f);		//[0, MAXSTRINGVAL]
-
-	U8 stringVal = (U8)(val) + FIRSTVALIDCHAR;			//[FIRSTVALIDCHAR, MAXSTRINGVAL + FIRSTVALIDCHAR]
-	return stringVal;
-}
-
-inline F32 STRING_TO_F32(U8 ival, F32 lower, F32 upper)
-{
-	// remove empty space left for NULL, newline, etc.
-	ival -= FIRSTVALIDCHAR;								//[0, MAXSTRINGVAL]
-
-	F32 val = (F32)ival * (1.f / (F32)MAXSTRINGVAL);	//[0, 1]
-	F32 delta = (upper - lower);
-	val *= delta;										//[0, upper - lower]
-	val += lower;										//[lower, upper]
-
-	return val;
-}
-
-#endif
+/** 
+ * @file llquantize.h
+ * @brief useful routines for quantizing floats to various length ints
+ * and back out again
+ *
+ * $LicenseInfo:firstyear=2001&license=viewergpl$
+ * 
+ * Copyright (c) 2001-2009, Linden Research, Inc.
+ * 
+ * Second Life Viewer Source Code
+ * The source code in this file ("Source Code") is provided by Linden Lab
+ * to you under the terms of the GNU General Public License, version 2.0
+ * ("GPL"), unless you have obtained a separate licensing agreement
+ * ("Other License"), formally executed by you and Linden Lab.  Terms of
+ * the GPL can be found in doc/GPL-license.txt in this distribution, or
+ * online at http://secondlifegrid.net/programs/open_source/licensing/gplv2
+ * 
+ * There are special exceptions to the terms and conditions of the GPL as
+ * it is applied to this Source Code. View the full text of the exception
+ * in the file doc/FLOSS-exception.txt in this software distribution, or
+ * online at
+ * http://secondlifegrid.net/programs/open_source/licensing/flossexception
+ * 
+ * By copying, modifying or distributing this software, you acknowledge
+ * that you have read and understood your obligations described above,
+ * and agree to abide by those obligations.
+ * 
+ * ALL LINDEN LAB SOURCE CODE IS PROVIDED "AS IS." LINDEN LAB MAKES NO
+ * WARRANTIES, EXPRESS, IMPLIED OR OTHERWISE, REGARDING ITS ACCURACY,
+ * COMPLETENESS OR PERFORMANCE.
+ * $/LicenseInfo$
+ */
+
+#ifndef LL_LLQUANTIZE_H
+#define LL_LLQUANTIZE_H
+
+const U16 U16MAX = 65535;
+LL_ALIGN_16( const F32 F_U16MAX_4A[4] ) = { 65535.f, 65535.f, 65535.f, 65535.f };
+
+const F32 OOU16MAX = 1.f/(F32)(U16MAX);
+LL_ALIGN_16( const F32 F_OOU16MAX_4A[4] ) = { OOU16MAX, OOU16MAX, OOU16MAX, OOU16MAX };
+
+const U8 U8MAX = 255;
+LL_ALIGN_16( const F32 F_U8MAX_4A[4] ) = { 255.f, 255.f, 255.f, 255.f };
+
+const F32 OOU8MAX = 1.f/(F32)(U8MAX);
+LL_ALIGN_16( const F32 F_OOU8MAX_4A[4] ) = { OOU8MAX, OOU8MAX, OOU8MAX, OOU8MAX };
+
+const U8 FIRSTVALIDCHAR = 54;
+const U8 MAXSTRINGVAL = U8MAX - FIRSTVALIDCHAR; //we don't allow newline or null 
+
+
+inline U16 F32_to_U16_ROUND(F32 val, F32 lower, F32 upper)
+{
+	val = llclamp(val, lower, upper);
+	// make sure that the value is positive and normalized to <0, 1>
+	val -= lower;
+	val /= (upper - lower);
+
+	// round the value.   Sreturn the U16
+	return (U16)(llround(val*U16MAX));
+}
+
+
+inline U16 F32_to_U16(F32 val, F32 lower, F32 upper)
+{
+	val = llclamp(val, lower, upper);
+	// make sure that the value is positive and normalized to <0, 1>
+	val -= lower;
+	val /= (upper - lower);
+
+	// return the U16
+	return (U16)(llfloor(val*U16MAX));
+}
+
+inline F32 U16_to_F32(U16 ival, F32 lower, F32 upper)
+{
+	F32 val = ival*OOU16MAX;
+	F32 delta = (upper - lower);
+	val *= delta;
+	val += lower;
+
+	F32 max_error = delta*OOU16MAX;
+
+	// make sure that zero's come through as zero
+	if (fabsf(val) < max_error)
+		val = 0.f;
+
+	return val;
+}
+
+
+inline U8 F32_to_U8_ROUND(F32 val, F32 lower, F32 upper)
+{
+	val = llclamp(val, lower, upper);
+	// make sure that the value is positive and normalized to <0, 1>
+	val -= lower;
+	val /= (upper - lower);
+
+	// return the rounded U8
+	return (U8)(llround(val*U8MAX));
+}
+
+
+inline U8 F32_to_U8(F32 val, F32 lower, F32 upper)
+{
+	val = llclamp(val, lower, upper);
+	// make sure that the value is positive and normalized to <0, 1>
+	val -= lower;
+	val /= (upper - lower);
+
+	// return the U8
+	return (U8)(llfloor(val*U8MAX));
+}
+
+inline F32 U8_to_F32(U8 ival, F32 lower, F32 upper)
+{
+	F32 val = ival*OOU8MAX;
+	F32 delta = (upper - lower);
+	val *= delta;
+	val += lower;
+
+	F32 max_error = delta*OOU8MAX;
+
+	// make sure that zero's come through as zero
+	if (fabsf(val) < max_error)
+		val = 0.f;
+
+	return val;
+}
+
+inline U8 F32_TO_STRING(F32 val, F32 lower, F32 upper)
+{
+	val = llclamp(val, lower, upper); //[lower, upper]
+	// make sure that the value is positive and normalized to <0, 1>
+	val -= lower;					//[0, upper-lower]
+	val /= (upper - lower);			//[0,1]
+	val = val * MAXSTRINGVAL;		//[0, MAXSTRINGVAL]
+	val = floor(val + 0.5f);		//[0, MAXSTRINGVAL]
+
+	U8 stringVal = (U8)(val) + FIRSTVALIDCHAR;			//[FIRSTVALIDCHAR, MAXSTRINGVAL + FIRSTVALIDCHAR]
+	return stringVal;
+}
+
+inline F32 STRING_TO_F32(U8 ival, F32 lower, F32 upper)
+{
+	// remove empty space left for NULL, newline, etc.
+	ival -= FIRSTVALIDCHAR;								//[0, MAXSTRINGVAL]
+
+	F32 val = (F32)ival * (1.f / (F32)MAXSTRINGVAL);	//[0, 1]
+	F32 delta = (upper - lower);
+	val *= delta;										//[0, upper - lower]
+	val += lower;										//[lower, upper]
+
+	return val;
+}
+
+#endif
diff --git a/indra/llmath/llquaternion.cpp b/indra/llmath/llquaternion.cpp
index fdcc19d657..efdc10e2c6 100644
--- a/indra/llmath/llquaternion.cpp
+++ b/indra/llmath/llquaternion.cpp
@@ -1,960 +1,961 @@
-/** 
- * @file llquaternion.cpp
- * @brief LLQuaternion class implementation.
- *
- * $LicenseInfo:firstyear=2000&license=viewergpl$
- * 
- * Copyright (c) 2000-2009, Linden Research, Inc.
- * 
- * Second Life Viewer Source Code
- * The source code in this file ("Source Code") is provided by Linden Lab
- * to you under the terms of the GNU General Public License, version 2.0
- * ("GPL"), unless you have obtained a separate licensing agreement
- * ("Other License"), formally executed by you and Linden Lab.  Terms of
- * the GPL can be found in doc/GPL-license.txt in this distribution, or
- * online at http://secondlifegrid.net/programs/open_source/licensing/gplv2
- * 
- * There are special exceptions to the terms and conditions of the GPL as
- * it is applied to this Source Code. View the full text of the exception
- * in the file doc/FLOSS-exception.txt in this software distribution, or
- * online at
- * http://secondlifegrid.net/programs/open_source/licensing/flossexception
- * 
- * By copying, modifying or distributing this software, you acknowledge
- * that you have read and understood your obligations described above,
- * and agree to abide by those obligations.
- * 
- * ALL LINDEN LAB SOURCE CODE IS PROVIDED "AS IS." LINDEN LAB MAKES NO
- * WARRANTIES, EXPRESS, IMPLIED OR OTHERWISE, REGARDING ITS ACCURACY,
- * COMPLETENESS OR PERFORMANCE.
- * $/LicenseInfo$
- */
-
-#include "linden_common.h"
-
-#include "llquaternion.h"
-
-#include "llmath.h"	// for F_PI
-//#include "vmath.h"
-#include "v3math.h"
-#include "v3dmath.h"
-#include "v4math.h"
-#include "m4math.h"
-#include "m3math.h"
-#include "llquantize.h"
-
-// WARNING: Don't use this for global const definitions!  using this
-// at the top of a *.cpp file might not give you what you think.
-const LLQuaternion LLQuaternion::DEFAULT;
- 
-// Constructors
-
-LLQuaternion::LLQuaternion(const LLMatrix4 &mat)
-{
-	*this = mat.quaternion();
-	normalize();
-}
-
-LLQuaternion::LLQuaternion(const LLMatrix3 &mat)
-{
-	*this = mat.quaternion();
-	normalize();
-}
-
-LLQuaternion::LLQuaternion(F32 angle, const LLVector4 &vec)
-{
-	LLVector3 v(vec.mV[VX], vec.mV[VY], vec.mV[VZ]);
-	v.normalize();
-
-	F32 c, s;
-	c = cosf(angle*0.5f);
-	s = sinf(angle*0.5f);
-
-	mQ[VX] = v.mV[VX] * s;
-	mQ[VY] = v.mV[VY] * s;
-	mQ[VZ] = v.mV[VZ] * s;
-	mQ[VW] = c;
-	normalize();
-}
-
-LLQuaternion::LLQuaternion(F32 angle, const LLVector3 &vec)
-{
-	LLVector3 v(vec);
-	v.normalize();
-
-	F32 c, s;
-	c = cosf(angle*0.5f);
-	s = sinf(angle*0.5f);
-
-	mQ[VX] = v.mV[VX] * s;
-	mQ[VY] = v.mV[VY] * s;
-	mQ[VZ] = v.mV[VZ] * s;
-	mQ[VW] = c;
-	normalize();
-}
-
-LLQuaternion::LLQuaternion(const LLVector3 &x_axis,
-						   const LLVector3 &y_axis,
-						   const LLVector3 &z_axis)
-{
-	LLMatrix3 mat;
-	mat.setRows(x_axis, y_axis, z_axis);
-	*this = mat.quaternion();
-	normalize();
-}
-
-// Quatizations
-void	LLQuaternion::quantize16(F32 lower, F32 upper)
-{
-	F32 x = mQ[VX];
-	F32 y = mQ[VY];
-	F32 z = mQ[VZ];
-	F32 s = mQ[VS];
-
-	x = U16_to_F32(F32_to_U16_ROUND(x, lower, upper), lower, upper);
-	y = U16_to_F32(F32_to_U16_ROUND(y, lower, upper), lower, upper);
-	z = U16_to_F32(F32_to_U16_ROUND(z, lower, upper), lower, upper);
-	s = U16_to_F32(F32_to_U16_ROUND(s, lower, upper), lower, upper);
-
-	mQ[VX] = x;
-	mQ[VY] = y;
-	mQ[VZ] = z;
-	mQ[VS] = s;
-
-	normalize();
-}
-
-void	LLQuaternion::quantize8(F32 lower, F32 upper)
-{
-	mQ[VX] = U8_to_F32(F32_to_U8_ROUND(mQ[VX], lower, upper), lower, upper);
-	mQ[VY] = U8_to_F32(F32_to_U8_ROUND(mQ[VY], lower, upper), lower, upper);
-	mQ[VZ] = U8_to_F32(F32_to_U8_ROUND(mQ[VZ], lower, upper), lower, upper);
-	mQ[VS] = U8_to_F32(F32_to_U8_ROUND(mQ[VS], lower, upper), lower, upper);
-
-	normalize();
-}
-
-// LLVector3 Magnitude and Normalization Functions
-
-
-// Set LLQuaternion routines
-
-const LLQuaternion&	LLQuaternion::setAngleAxis(F32 angle, F32 x, F32 y, F32 z)
-{
-	LLVector3 vec(x, y, z);
-	vec.normalize();
-
-	angle *= 0.5f;
-	F32 c, s;
-	c = cosf(angle);
-	s = sinf(angle);
-
-	mQ[VX] = vec.mV[VX]*s;
-	mQ[VY] = vec.mV[VY]*s;
-	mQ[VZ] = vec.mV[VZ]*s;
-	mQ[VW] = c;
-
-	normalize();
-	return (*this);
-}
-
-const LLQuaternion&	LLQuaternion::setAngleAxis(F32 angle, const LLVector3 &vec)
-{
-	LLVector3 v(vec);
-	v.normalize();
-
-	angle *= 0.5f;
-	F32 c, s;
-	c = cosf(angle);
-	s = sinf(angle);
-
-	mQ[VX] = v.mV[VX]*s;
-	mQ[VY] = v.mV[VY]*s;
-	mQ[VZ] = v.mV[VZ]*s;
-	mQ[VW] = c;
-
-	normalize();
-	return (*this);
-}
-
-const LLQuaternion&	LLQuaternion::setAngleAxis(F32 angle, const LLVector4 &vec)
-{
-	LLVector3 v(vec.mV[VX], vec.mV[VY], vec.mV[VZ]);
-	v.normalize();
-
-	F32 c, s;
-	c = cosf(angle*0.5f);
-	s = sinf(angle*0.5f);
-
-	mQ[VX] = v.mV[VX]*s;
-	mQ[VY] = v.mV[VY]*s;
-	mQ[VZ] = v.mV[VZ]*s;
-	mQ[VW] = c;
-
-	normalize();
-	return (*this);
-}
-
-const LLQuaternion&	LLQuaternion::setEulerAngles(F32 roll, F32 pitch, F32 yaw)
-{
-	LLMatrix3 rot_mat(roll, pitch, yaw);
-	rot_mat.orthogonalize();
-	*this = rot_mat.quaternion();
-		
-	normalize();
-	return (*this);
-}
-
-// deprecated
-const LLQuaternion&	LLQuaternion::set(const LLMatrix3 &mat)
-{
-	*this = mat.quaternion();
-	normalize();
-	return (*this);
-}
-
-// deprecated
-const LLQuaternion&	LLQuaternion::set(const LLMatrix4 &mat)
-{
-	*this = mat.quaternion();
-	normalize();
-	return (*this);
-}
-
-// deprecated
-const LLQuaternion&	LLQuaternion::setQuat(F32 angle, F32 x, F32 y, F32 z)
-{
-	LLVector3 vec(x, y, z);
-	vec.normalize();
-
-	angle *= 0.5f;
-	F32 c, s;
-	c = cosf(angle);
-	s = sinf(angle);
-
-	mQ[VX] = vec.mV[VX]*s;
-	mQ[VY] = vec.mV[VY]*s;
-	mQ[VZ] = vec.mV[VZ]*s;
-	mQ[VW] = c;
-
-	normalize();
-	return (*this);
-}
-
-// deprecated
-const LLQuaternion&	LLQuaternion::setQuat(F32 angle, const LLVector3 &vec)
-{
-	LLVector3 v(vec);
-	v.normalize();
-
-	angle *= 0.5f;
-	F32 c, s;
-	c = cosf(angle);
-	s = sinf(angle);
-
-	mQ[VX] = v.mV[VX]*s;
-	mQ[VY] = v.mV[VY]*s;
-	mQ[VZ] = v.mV[VZ]*s;
-	mQ[VW] = c;
-
-	normalize();
-	return (*this);
-}
-
-const LLQuaternion&	LLQuaternion::setQuat(F32 angle, const LLVector4 &vec)
-{
-	LLVector3 v(vec.mV[VX], vec.mV[VY], vec.mV[VZ]);
-	v.normalize();
-
-	F32 c, s;
-	c = cosf(angle*0.5f);
-	s = sinf(angle*0.5f);
-
-	mQ[VX] = v.mV[VX]*s;
-	mQ[VY] = v.mV[VY]*s;
-	mQ[VZ] = v.mV[VZ]*s;
-	mQ[VW] = c;
-
-	normalize();
-	return (*this);
-}
-
-const LLQuaternion&	LLQuaternion::setQuat(F32 roll, F32 pitch, F32 yaw)
-{
-	LLMatrix3 rot_mat(roll, pitch, yaw);
-	rot_mat.orthogonalize();
-	*this = rot_mat.quaternion();
-		
-	normalize();
-	return (*this);
-}
-
-const LLQuaternion&	LLQuaternion::setQuat(const LLMatrix3 &mat)
-{
-	*this = mat.quaternion();
-	normalize();
-	return (*this);
-}
-
-const LLQuaternion&	LLQuaternion::setQuat(const LLMatrix4 &mat)
-{
-	*this = mat.quaternion();
-	normalize();
-	return (*this);
-//#if 1
-//	// NOTE: LLQuaternion's are actually inverted with respect to
-//	// the matrices, so this code also assumes inverted quaternions
-//	// (-x, -y, -z, w). The result is that roll,pitch,yaw are applied
-//	// in reverse order (yaw,pitch,roll).
-//	F64 cosX = cos(roll);
-//    F64 cosY = cos(pitch);
-//    F64 cosZ = cos(yaw);
-//
-//    F64 sinX = sin(roll);
-//    F64 sinY = sin(pitch);
-//    F64 sinZ = sin(yaw);
-//
-//    mQ[VW] = (F32)sqrt(cosY*cosZ - sinX*sinY*sinZ + cosX*cosZ + cosX*cosY + 1.0)*.5;
-//	if (fabs(mQ[VW]) < F_APPROXIMATELY_ZERO)
-//	{
-//		// null rotation, any axis will do
-//		mQ[VX] = 0.0f;
-//		mQ[VY] = 1.0f;
-//		mQ[VZ] = 0.0f;
-//	}
-//	else
-//	{
-//		F32 inv_s = 1.0f / (4.0f * mQ[VW]);
-//		mQ[VX] = (F32)-(-sinX*cosY - cosX*sinY*sinZ - sinX*cosZ) * inv_s;
-//		mQ[VY] = (F32)-(-cosX*sinY*cosZ + sinX*sinZ - sinY) * inv_s;
-//		mQ[VZ] = (F32)-(-cosY*sinZ - sinX*sinY*cosZ - cosX*sinZ) * inv_s;		
-//	}
-//
-//#else // This only works on a certain subset of roll/pitch/yaw
-//	
-//	F64 cosX = cosf(roll/2.0);
-//    F64 cosY = cosf(pitch/2.0);
-//    F64 cosZ = cosf(yaw/2.0);
-//
-//    F64 sinX = sinf(roll/2.0);
-//    F64 sinY = sinf(pitch/2.0);
-//    F64 sinZ = sinf(yaw/2.0);
-//
-//    mQ[VW] = (F32)(cosX*cosY*cosZ + sinX*sinY*sinZ);
-//    mQ[VX] = (F32)(sinX*cosY*cosZ - cosX*sinY*sinZ);
-//    mQ[VY] = (F32)(cosX*sinY*cosZ + sinX*cosY*sinZ);
-//    mQ[VZ] = (F32)(cosX*cosY*sinZ - sinX*sinY*cosZ);
-//#endif
-//
-//	normalize();
-//	return (*this);
-}
-
-// SJB: This code is correct for a logicly stored (non-transposed) matrix;
-//		Our matrices are stored transposed, OpenGL style, so this generates the
-//		INVERSE matrix, or the CORRECT matrix form an INVERSE quaternion.
-//		Because we use similar logic in LLMatrix3::quaternion(),
-//		we are internally consistant so everything works OK :)
-LLMatrix3	LLQuaternion::getMatrix3(void) const
-{
-	LLMatrix3	mat;
-	F32		xx, xy, xz, xw, yy, yz, yw, zz, zw;
-
-    xx      = mQ[VX] * mQ[VX];
-    xy      = mQ[VX] * mQ[VY];
-    xz      = mQ[VX] * mQ[VZ];
-    xw      = mQ[VX] * mQ[VW];
-
-    yy      = mQ[VY] * mQ[VY];
-    yz      = mQ[VY] * mQ[VZ];
-    yw      = mQ[VY] * mQ[VW];
-
-    zz      = mQ[VZ] * mQ[VZ];
-    zw      = mQ[VZ] * mQ[VW];
-
-    mat.mMatrix[0][0]  = 1.f - 2.f * ( yy + zz );
-    mat.mMatrix[0][1]  =	   2.f * ( xy + zw );
-    mat.mMatrix[0][2]  =	   2.f * ( xz - yw );
-
-    mat.mMatrix[1][0]  =	   2.f * ( xy - zw );
-    mat.mMatrix[1][1]  = 1.f - 2.f * ( xx + zz );
-    mat.mMatrix[1][2]  =	   2.f * ( yz + xw );
-
-    mat.mMatrix[2][0]  =	   2.f * ( xz + yw );
-    mat.mMatrix[2][1]  =	   2.f * ( yz - xw );
-    mat.mMatrix[2][2]  = 1.f - 2.f * ( xx + yy );
-
-	return mat;
-}
-
-LLMatrix4	LLQuaternion::getMatrix4(void) const
-{
-	LLMatrix4	mat;
-	F32		xx, xy, xz, xw, yy, yz, yw, zz, zw;
-
-    xx      = mQ[VX] * mQ[VX];
-    xy      = mQ[VX] * mQ[VY];
-    xz      = mQ[VX] * mQ[VZ];
-    xw      = mQ[VX] * mQ[VW];
-
-    yy      = mQ[VY] * mQ[VY];
-    yz      = mQ[VY] * mQ[VZ];
-    yw      = mQ[VY] * mQ[VW];
-
-    zz      = mQ[VZ] * mQ[VZ];
-    zw      = mQ[VZ] * mQ[VW];
-
-    mat.mMatrix[0][0]  = 1.f - 2.f * ( yy + zz );
-    mat.mMatrix[0][1]  =	   2.f * ( xy + zw );
-    mat.mMatrix[0][2]  =	   2.f * ( xz - yw );
-
-    mat.mMatrix[1][0]  =	   2.f * ( xy - zw );
-    mat.mMatrix[1][1]  = 1.f - 2.f * ( xx + zz );
-    mat.mMatrix[1][2]  =	   2.f * ( yz + xw );
-
-    mat.mMatrix[2][0]  =	   2.f * ( xz + yw );
-    mat.mMatrix[2][1]  =	   2.f * ( yz - xw );
-    mat.mMatrix[2][2]  = 1.f - 2.f * ( xx + yy );
-
-	// TODO -- should we set the translation portion to zero?
-
-	return mat;
-}
-
-
-
-
-// Other useful methods
-
-
-// calculate the shortest rotation from a to b
-void LLQuaternion::shortestArc(const LLVector3 &a, const LLVector3 &b)
-{
-	// Make a local copy of both vectors.
-	LLVector3 vec_a = a;
-	LLVector3 vec_b = b;
-
-	// Make sure neither vector is zero length.  Also normalize
-	// the vectors while we are at it.
-	F32 vec_a_mag = vec_a.normalize();
-	F32 vec_b_mag = vec_b.normalize();
-	if (vec_a_mag < F_APPROXIMATELY_ZERO ||
-		vec_b_mag < F_APPROXIMATELY_ZERO)
-	{
-		// Can't calculate a rotation from this.
-		// Just return ZERO_ROTATION instead.
-		loadIdentity();
-		return;
-	}
-
-	// Create an axis to rotate around, and the cos of the angle to rotate.
-	LLVector3 axis = vec_a % vec_b;
-	F32 cos_theta  = vec_a * vec_b;
-
-	// Check the angle between the vectors to see if they are parallel or anti-parallel.
-	if (cos_theta > 1.0 - F_APPROXIMATELY_ZERO)
-	{
-		// a and b are parallel.  No rotation is necessary.
-		loadIdentity();
-	}
-	else if (cos_theta < -1.0 + F_APPROXIMATELY_ZERO)
-	{
-		// a and b are anti-parallel.
-		// Rotate 180 degrees around some orthogonal axis.
-		// Find the projection of the x-axis onto a, and try
-		// using the vector between the projection and the x-axis
-		// as the orthogonal axis.
-		LLVector3 proj = vec_a.mV[VX] / (vec_a * vec_a) * vec_a;
-		LLVector3 ortho_axis(1.f, 0.f, 0.f);
-		ortho_axis -= proj;
-		
-		// Turn this into an orthonormal axis.
-		F32 ortho_length = ortho_axis.normalize();
-		// If the axis' length is 0, then our guess at an orthogonal axis
-		// was wrong (a is parallel to the x-axis).
-		if (ortho_length < F_APPROXIMATELY_ZERO)
-		{
-			// Use the z-axis instead.
-			ortho_axis.setVec(0.f, 0.f, 1.f);
-		}
-
-		// Construct a quaternion from this orthonormal axis.
-		mQ[VX] = ortho_axis.mV[VX];
-		mQ[VY] = ortho_axis.mV[VY];
-		mQ[VZ] = ortho_axis.mV[VZ];
-		mQ[VW] = 0.f;
-	}
-	else
-	{
-		// a and b are NOT parallel or anti-parallel.
-		// Return the rotation between these vectors.
-		F32 theta = (F32)acos(cos_theta);
-
-		setAngleAxis(theta, axis);
-	}
-}
-
-// constrains rotation to a cone angle specified in radians
-const LLQuaternion &LLQuaternion::constrain(F32 radians)
-{
-	const F32 cos_angle_lim = cosf( radians/2 );	// mQ[VW] limit
-	const F32 sin_angle_lim = sinf( radians/2 );	// rotation axis length	limit
-
-	if (mQ[VW] < 0.f)
-	{
-		mQ[VX] *= -1.f;
-		mQ[VY] *= -1.f;
-		mQ[VZ] *= -1.f;
-		mQ[VW] *= -1.f;
-	}
-
-	// if rotation angle is greater than limit (cos is less than limit)
-	if( mQ[VW] < cos_angle_lim )
-	{
-		mQ[VW] = cos_angle_lim;
-		F32 axis_len = sqrtf( mQ[VX]*mQ[VX] + mQ[VY]*mQ[VY] + mQ[VZ]*mQ[VZ] ); // sin(theta/2)
-		F32 axis_mult_fact = sin_angle_lim / axis_len;
-		mQ[VX] *= axis_mult_fact;
-		mQ[VY] *= axis_mult_fact;
-		mQ[VZ] *= axis_mult_fact;
-	}
-
-	return *this;
-}
-
-// Operators
-
-std::ostream& operator<<(std::ostream &s, const LLQuaternion &a)
-{
-	s << "{ " 
-		<< a.mQ[VX] << ", " << a.mQ[VY] << ", " << a.mQ[VZ] << ", " << a.mQ[VW] 
-	<< " }";
-	return s;
-}
-
-
-// Does NOT renormalize the result
-LLQuaternion	operator*(const LLQuaternion &a, const LLQuaternion &b)
-{
-//	LLQuaternion::mMultCount++;
-
-	LLQuaternion q(
-		b.mQ[3] * a.mQ[0] + b.mQ[0] * a.mQ[3] + b.mQ[1] * a.mQ[2] - b.mQ[2] * a.mQ[1],
-		b.mQ[3] * a.mQ[1] + b.mQ[1] * a.mQ[3] + b.mQ[2] * a.mQ[0] - b.mQ[0] * a.mQ[2],
-		b.mQ[3] * a.mQ[2] + b.mQ[2] * a.mQ[3] + b.mQ[0] * a.mQ[1] - b.mQ[1] * a.mQ[0],
-		b.mQ[3] * a.mQ[3] - b.mQ[0] * a.mQ[0] - b.mQ[1] * a.mQ[1] - b.mQ[2] * a.mQ[2]
-	);
-	return q;
-}
-
-/*
-LLMatrix4	operator*(const LLMatrix4 &m, const LLQuaternion &q)
-{
-	LLMatrix4 qmat(q);
-	return (m*qmat);
-}
-*/
-
-
-
-LLVector4		operator*(const LLVector4 &a, const LLQuaternion &rot)
-{
-    F32 rw = - rot.mQ[VX] * a.mV[VX] - rot.mQ[VY] * a.mV[VY] - rot.mQ[VZ] * a.mV[VZ];
-    F32 rx =   rot.mQ[VW] * a.mV[VX] + rot.mQ[VY] * a.mV[VZ] - rot.mQ[VZ] * a.mV[VY];
-    F32 ry =   rot.mQ[VW] * a.mV[VY] + rot.mQ[VZ] * a.mV[VX] - rot.mQ[VX] * a.mV[VZ];
-    F32 rz =   rot.mQ[VW] * a.mV[VZ] + rot.mQ[VX] * a.mV[VY] - rot.mQ[VY] * a.mV[VX];
-
-    F32 nx = - rw * rot.mQ[VX] +  rx * rot.mQ[VW] - ry * rot.mQ[VZ] + rz * rot.mQ[VY];
-    F32 ny = - rw * rot.mQ[VY] +  ry * rot.mQ[VW] - rz * rot.mQ[VX] + rx * rot.mQ[VZ];
-    F32 nz = - rw * rot.mQ[VZ] +  rz * rot.mQ[VW] - rx * rot.mQ[VY] + ry * rot.mQ[VX];
-
-    return LLVector4(nx, ny, nz, a.mV[VW]);
-}
-
-LLVector3		operator*(const LLVector3 &a, const LLQuaternion &rot)
-{
-    F32 rw = - rot.mQ[VX] * a.mV[VX] - rot.mQ[VY] * a.mV[VY] - rot.mQ[VZ] * a.mV[VZ];
-    F32 rx =   rot.mQ[VW] * a.mV[VX] + rot.mQ[VY] * a.mV[VZ] - rot.mQ[VZ] * a.mV[VY];
-    F32 ry =   rot.mQ[VW] * a.mV[VY] + rot.mQ[VZ] * a.mV[VX] - rot.mQ[VX] * a.mV[VZ];
-    F32 rz =   rot.mQ[VW] * a.mV[VZ] + rot.mQ[VX] * a.mV[VY] - rot.mQ[VY] * a.mV[VX];
-
-    F32 nx = - rw * rot.mQ[VX] +  rx * rot.mQ[VW] - ry * rot.mQ[VZ] + rz * rot.mQ[VY];
-    F32 ny = - rw * rot.mQ[VY] +  ry * rot.mQ[VW] - rz * rot.mQ[VX] + rx * rot.mQ[VZ];
-    F32 nz = - rw * rot.mQ[VZ] +  rz * rot.mQ[VW] - rx * rot.mQ[VY] + ry * rot.mQ[VX];
-
-    return LLVector3(nx, ny, nz);
-}
-
-LLVector3d		operator*(const LLVector3d &a, const LLQuaternion &rot)
-{
-    F64 rw = - rot.mQ[VX] * a.mdV[VX] - rot.mQ[VY] * a.mdV[VY] - rot.mQ[VZ] * a.mdV[VZ];
-    F64 rx =   rot.mQ[VW] * a.mdV[VX] + rot.mQ[VY] * a.mdV[VZ] - rot.mQ[VZ] * a.mdV[VY];
-    F64 ry =   rot.mQ[VW] * a.mdV[VY] + rot.mQ[VZ] * a.mdV[VX] - rot.mQ[VX] * a.mdV[VZ];
-    F64 rz =   rot.mQ[VW] * a.mdV[VZ] + rot.mQ[VX] * a.mdV[VY] - rot.mQ[VY] * a.mdV[VX];
-
-    F64 nx = - rw * rot.mQ[VX] +  rx * rot.mQ[VW] - ry * rot.mQ[VZ] + rz * rot.mQ[VY];
-    F64 ny = - rw * rot.mQ[VY] +  ry * rot.mQ[VW] - rz * rot.mQ[VX] + rx * rot.mQ[VZ];
-    F64 nz = - rw * rot.mQ[VZ] +  rz * rot.mQ[VW] - rx * rot.mQ[VY] + ry * rot.mQ[VX];
-
-    return LLVector3d(nx, ny, nz);
-}
-
-F32 dot(const LLQuaternion &a, const LLQuaternion &b)
-{
-	return a.mQ[VX] * b.mQ[VX] + 
-		   a.mQ[VY] * b.mQ[VY] + 
-		   a.mQ[VZ] * b.mQ[VZ] + 
-		   a.mQ[VW] * b.mQ[VW]; 
-}
-
-// DEMO HACK: This lerp is probably inocrrect now due intermediate normalization
-// it should look more like the lerp below
-#if 0
-// linear interpolation
-LLQuaternion lerp(F32 t, const LLQuaternion &p, const LLQuaternion &q)
-{
-	LLQuaternion r;
-	r = t * (q - p) + p;
-	r.normalize();
-	return r;
-}
-#endif
-
-// lerp from identity to q
-LLQuaternion lerp(F32 t, const LLQuaternion &q)
-{
-	LLQuaternion r;
-	r.mQ[VX] = t * q.mQ[VX];
-	r.mQ[VY] = t * q.mQ[VY];
-	r.mQ[VZ] = t * q.mQ[VZ];
-	r.mQ[VW] = t * (q.mQ[VZ] - 1.f) + 1.f;
-	r.normalize();
-	return r;
-}
-
-LLQuaternion lerp(F32 t, const LLQuaternion &p, const LLQuaternion &q)
-{
-	LLQuaternion r;
-	F32 inv_t;
-
-	inv_t = 1.f - t;
-
-	r.mQ[VX] = t * q.mQ[VX] + (inv_t * p.mQ[VX]);
-	r.mQ[VY] = t * q.mQ[VY] + (inv_t * p.mQ[VY]);
-	r.mQ[VZ] = t * q.mQ[VZ] + (inv_t * p.mQ[VZ]);
-	r.mQ[VW] = t * q.mQ[VW] + (inv_t * p.mQ[VW]);
-	r.normalize();
-	return r;
-}
-
-
-// spherical linear interpolation
-LLQuaternion slerp( F32 u, const LLQuaternion &a, const LLQuaternion &b )
-{
-	// cosine theta = dot product of a and b
-	F32 cos_t = a.mQ[0]*b.mQ[0] + a.mQ[1]*b.mQ[1] + a.mQ[2]*b.mQ[2] + a.mQ[3]*b.mQ[3];
-	
-	// if b is on opposite hemisphere from a, use -a instead
-	int bflip;
- 	if (cos_t < 0.0f)
-	{
-		cos_t = -cos_t;
-		bflip = TRUE;
-	}
-	else
-		bflip = FALSE;
-
-	// if B is (within precision limits) the same as A,
-	// just linear interpolate between A and B.
-	F32 alpha;	// interpolant
-	F32 beta;		// 1 - interpolant
-	if (1.0f - cos_t < 0.00001f)
-	{
-		beta = 1.0f - u;
-		alpha = u;
- 	}
-	else
-	{
- 		F32 theta = acosf(cos_t);
- 		F32 sin_t = sinf(theta);
- 		beta = sinf(theta - u*theta) / sin_t;
- 		alpha = sinf(u*theta) / sin_t;
- 	}
-
-	if (bflip)
-		beta = -beta;
-
-	// interpolate
-	LLQuaternion ret;
-	ret.mQ[0] = beta*a.mQ[0] + alpha*b.mQ[0];
- 	ret.mQ[1] = beta*a.mQ[1] + alpha*b.mQ[1];
- 	ret.mQ[2] = beta*a.mQ[2] + alpha*b.mQ[2];
- 	ret.mQ[3] = beta*a.mQ[3] + alpha*b.mQ[3];
-
-	return ret;
-}
-
-// lerp whenever possible
-LLQuaternion nlerp(F32 t, const LLQuaternion &a, const LLQuaternion &b)
-{
-	if (dot(a, b) < 0.f)
-	{
-		return slerp(t, a, b);
-	}
-	else
-	{
-		return lerp(t, a, b);
-	}
-}
-
-LLQuaternion nlerp(F32 t, const LLQuaternion &q)
-{
-	if (q.mQ[VW] < 0.f)
-	{
-		return slerp(t, q);
-	}
-	else
-	{
-		return lerp(t, q);
-	}
-}
-
-// slerp from identity quaternion to another quaternion
-LLQuaternion slerp(F32 t, const LLQuaternion &q)
-{
-	F32 c = q.mQ[VW];
-	if (1.0f == t  ||  1.0f == c)
-	{
-		// the trivial cases
-		return q;
-	}
-
-	LLQuaternion r;
-	F32 s, angle, stq, stp;
-
-	s = (F32) sqrt(1.f - c*c);
-
-    if (c < 0.0f)
-    {
-        // when c < 0.0 then theta > PI/2 
-        // since quat and -quat are the same rotation we invert one of  
-        // p or q to reduce unecessary spins
-        // A equivalent way to do it is to convert acos(c) as if it had 
-		// been negative, and to negate stp 
-        angle   = (F32) acos(-c); 
-        stp     = -(F32) sin(angle * (1.f - t));
-        stq     = (F32) sin(angle * t);
-    }   
-    else
-    {
-		angle 	= (F32) acos(c);
-        stp     = (F32) sin(angle * (1.f - t));
-        stq     = (F32) sin(angle * t);
-    }
-
-	r.mQ[VX] = (q.mQ[VX] * stq) / s;
-	r.mQ[VY] = (q.mQ[VY] * stq) / s;
-	r.mQ[VZ] = (q.mQ[VZ] * stq) / s;
-	r.mQ[VW] = (stp + q.mQ[VW] * stq) / s;
-
-	return r;
-}
-
-LLQuaternion mayaQ(F32 xRot, F32 yRot, F32 zRot, LLQuaternion::Order order)
-{
-	LLQuaternion xQ( xRot*DEG_TO_RAD, LLVector3(1.0f, 0.0f, 0.0f) );
-	LLQuaternion yQ( yRot*DEG_TO_RAD, LLVector3(0.0f, 1.0f, 0.0f) );
-	LLQuaternion zQ( zRot*DEG_TO_RAD, LLVector3(0.0f, 0.0f, 1.0f) );
-	LLQuaternion ret;
-	switch( order )
-	{
-	case LLQuaternion::XYZ:
-		ret = xQ * yQ * zQ;
-		break;
-	case LLQuaternion::YZX:
-		ret = yQ * zQ * xQ;
-		break;
-	case LLQuaternion::ZXY:
-		ret = zQ * xQ * yQ;
-		break;
-	case LLQuaternion::XZY:
-		ret = xQ * zQ * yQ;
-		break;
-	case LLQuaternion::YXZ:
-		ret = yQ * xQ * zQ;
-		break;
-	case LLQuaternion::ZYX:
-		ret = zQ * yQ * xQ;
-		break;
-	}
-	return ret;
-}
-
-const char *OrderToString( const LLQuaternion::Order order )
-{
-	const char *p = NULL;
-	switch( order )
-	{
-	default:
-	case LLQuaternion::XYZ:
-		p = "XYZ";
-		break;
-	case LLQuaternion::YZX:
-		p = "YZX";
-		break;
-	case LLQuaternion::ZXY:
-		p = "ZXY";
-		break;
-	case LLQuaternion::XZY:
-		p = "XZY";
-		break;
-	case LLQuaternion::YXZ:
-		p = "YXZ";
-		break;
-	case LLQuaternion::ZYX:
-		p = "ZYX";
-		break;
-	}
-	return p;
-}
-
-LLQuaternion::Order StringToOrder( const char *str )
-{
-	if (strncmp(str, "XYZ", 3)==0 || strncmp(str, "xyz", 3)==0)
-		return LLQuaternion::XYZ;
-
-	if (strncmp(str, "YZX", 3)==0 || strncmp(str, "yzx", 3)==0)
-		return LLQuaternion::YZX;
-
-	if (strncmp(str, "ZXY", 3)==0 || strncmp(str, "zxy", 3)==0)
-		return LLQuaternion::ZXY;
-
-	if (strncmp(str, "XZY", 3)==0 || strncmp(str, "xzy", 3)==0)
-		return LLQuaternion::XZY;
-
-	if (strncmp(str, "YXZ", 3)==0 || strncmp(str, "yxz", 3)==0)
-		return LLQuaternion::YXZ;
-
-	if (strncmp(str, "ZYX", 3)==0 || strncmp(str, "zyx", 3)==0)
-		return LLQuaternion::ZYX;
-
-	return LLQuaternion::XYZ;
-}
-
-void LLQuaternion::getAngleAxis(F32* angle, LLVector3 &vec) const
-{
-	F32 cos_a = mQ[VW];
-	if (cos_a > 1.0f) cos_a = 1.0f;
-	if (cos_a < -1.0f) cos_a = -1.0f;
-
-    F32 sin_a = (F32) sqrt( 1.0f - cos_a * cos_a );
-
-    if ( fabs( sin_a ) < 0.0005f )
-		sin_a = 1.0f;
-	else
-		sin_a = 1.f/sin_a;
-
-    F32 temp_angle = 2.0f * (F32) acos( cos_a );
-	if (temp_angle > F_PI)
-	{
-		// The (angle,axis) pair should never have angles outside [PI, -PI]
-		// since we want the _shortest_ (angle,axis) solution.
-		// Since acos is defined for [0, PI], and we multiply by 2.0, we
-		// can push the angle outside the acceptible range.
-		// When this happens we set the angle to the other portion of a 
-		// full 2PI rotation, and negate the axis, which reverses the 
-		// direction of the rotation (by the right-hand rule).
-		*angle = 2.f * F_PI - temp_angle;
-    	vec.mV[VX] = - mQ[VX] * sin_a;
-    	vec.mV[VY] = - mQ[VY] * sin_a;
-    	vec.mV[VZ] = - mQ[VZ] * sin_a;
-	}
-	else
-	{
-		*angle = temp_angle;
-    	vec.mV[VX] = mQ[VX] * sin_a;
-    	vec.mV[VY] = mQ[VY] * sin_a;
-    	vec.mV[VZ] = mQ[VZ] * sin_a;
-	}
-}
-
-
-// quaternion does not need to be normalized
-void LLQuaternion::getEulerAngles(F32 *roll, F32 *pitch, F32 *yaw) const
-{
-	LLMatrix3 rot_mat(*this);
-	rot_mat.orthogonalize();
-	rot_mat.getEulerAngles(roll, pitch, yaw);
-
-//	// NOTE: LLQuaternion's are actually inverted with respect to
-//	// the matrices, so this code also assumes inverted quaternions
-//	// (-x, -y, -z, w). The result is that roll,pitch,yaw are applied
-//	// in reverse order (yaw,pitch,roll).
-//	F32 x = -mQ[VX], y = -mQ[VY], z = -mQ[VZ], w = mQ[VW];
-//	F64 m20 = 2.0*(x*z-y*w);
-//	if (1.0f - fabsf(m20) < F_APPROXIMATELY_ZERO)
-//	{
-//		*roll = 0.0f;
-//		*pitch = (F32)asin(m20);
-//		*yaw = (F32)atan2(2.0*(x*y-z*w), 1.0 - 2.0*(x*x+z*z));
-//	}
-//	else
-//	{
-//		*roll  = (F32)atan2(-2.0*(y*z+x*w), 1.0-2.0*(x*x+y*y));
-//		*pitch = (F32)asin(m20);
-//		*yaw   = (F32)atan2(-2.0*(x*y+z*w), 1.0-2.0*(y*y+z*z));
-//	}
-}
-
-// Saves space by using the fact that our quaternions are normalized
-LLVector3 LLQuaternion::packToVector3() const
-{
-	if( mQ[VW] >= 0 )
-	{
-		return LLVector3( mQ[VX], mQ[VY], mQ[VZ] );
-	}
-	else
-	{
-		return LLVector3( -mQ[VX], -mQ[VY], -mQ[VZ] );
-	}
-}
-
-// Saves space by using the fact that our quaternions are normalized
-void LLQuaternion::unpackFromVector3( const LLVector3& vec )
-{
-	mQ[VX] = vec.mV[VX];
-	mQ[VY] = vec.mV[VY];
-	mQ[VZ] = vec.mV[VZ];
-	F32 t = 1.f - vec.magVecSquared();
-	if( t > 0 )
-	{
-		mQ[VW] = sqrt( t );
-	}
-	else
-	{
-		// Need this to avoid trying to find the square root of a negative number due
-		// to floating point error.
-		mQ[VW] = 0;
-	}
-}
-
-BOOL LLQuaternion::parseQuat(const std::string& buf, LLQuaternion* value)
-{
-	if( buf.empty() || value == NULL)
-	{
-		return FALSE;
-	}
-
-	LLQuaternion quat;
-	S32 count = sscanf( buf.c_str(), "%f %f %f %f", quat.mQ + 0, quat.mQ + 1, quat.mQ + 2, quat.mQ + 3 );
-	if( 4 == count )
-	{
-		value->set( quat );
-		return TRUE;
-	}
-
-	return FALSE;
-}
-
-
-// End
+/** 
+ * @file llquaternion.cpp
+ * @brief LLQuaternion class implementation.
+ *
+ * $LicenseInfo:firstyear=2000&license=viewergpl$
+ * 
+ * Copyright (c) 2000-2009, Linden Research, Inc.
+ * 
+ * Second Life Viewer Source Code
+ * The source code in this file ("Source Code") is provided by Linden Lab
+ * to you under the terms of the GNU General Public License, version 2.0
+ * ("GPL"), unless you have obtained a separate licensing agreement
+ * ("Other License"), formally executed by you and Linden Lab.  Terms of
+ * the GPL can be found in doc/GPL-license.txt in this distribution, or
+ * online at http://secondlifegrid.net/programs/open_source/licensing/gplv2
+ * 
+ * There are special exceptions to the terms and conditions of the GPL as
+ * it is applied to this Source Code. View the full text of the exception
+ * in the file doc/FLOSS-exception.txt in this software distribution, or
+ * online at
+ * http://secondlifegrid.net/programs/open_source/licensing/flossexception
+ * 
+ * By copying, modifying or distributing this software, you acknowledge
+ * that you have read and understood your obligations described above,
+ * and agree to abide by those obligations.
+ * 
+ * ALL LINDEN LAB SOURCE CODE IS PROVIDED "AS IS." LINDEN LAB MAKES NO
+ * WARRANTIES, EXPRESS, IMPLIED OR OTHERWISE, REGARDING ITS ACCURACY,
+ * COMPLETENESS OR PERFORMANCE.
+ * $/LicenseInfo$
+ */
+
+#include "linden_common.h"
+
+#include "llmath.h"	// for F_PI
+
+#include "llquaternion.h"
+
+//#include "vmath.h"
+#include "v3math.h"
+#include "v3dmath.h"
+#include "v4math.h"
+#include "m4math.h"
+#include "m3math.h"
+#include "llquantize.h"
+
+// WARNING: Don't use this for global const definitions!  using this
+// at the top of a *.cpp file might not give you what you think.
+const LLQuaternion LLQuaternion::DEFAULT;
+ 
+// Constructors
+
+LLQuaternion::LLQuaternion(const LLMatrix4 &mat)
+{
+	*this = mat.quaternion();
+	normalize();
+}
+
+LLQuaternion::LLQuaternion(const LLMatrix3 &mat)
+{
+	*this = mat.quaternion();
+	normalize();
+}
+
+LLQuaternion::LLQuaternion(F32 angle, const LLVector4 &vec)
+{
+	LLVector3 v(vec.mV[VX], vec.mV[VY], vec.mV[VZ]);
+	v.normalize();
+
+	F32 c, s;
+	c = cosf(angle*0.5f);
+	s = sinf(angle*0.5f);
+
+	mQ[VX] = v.mV[VX] * s;
+	mQ[VY] = v.mV[VY] * s;
+	mQ[VZ] = v.mV[VZ] * s;
+	mQ[VW] = c;
+	normalize();
+}
+
+LLQuaternion::LLQuaternion(F32 angle, const LLVector3 &vec)
+{
+	LLVector3 v(vec);
+	v.normalize();
+
+	F32 c, s;
+	c = cosf(angle*0.5f);
+	s = sinf(angle*0.5f);
+
+	mQ[VX] = v.mV[VX] * s;
+	mQ[VY] = v.mV[VY] * s;
+	mQ[VZ] = v.mV[VZ] * s;
+	mQ[VW] = c;
+	normalize();
+}
+
+LLQuaternion::LLQuaternion(const LLVector3 &x_axis,
+						   const LLVector3 &y_axis,
+						   const LLVector3 &z_axis)
+{
+	LLMatrix3 mat;
+	mat.setRows(x_axis, y_axis, z_axis);
+	*this = mat.quaternion();
+	normalize();
+}
+
+// Quatizations
+void	LLQuaternion::quantize16(F32 lower, F32 upper)
+{
+	F32 x = mQ[VX];
+	F32 y = mQ[VY];
+	F32 z = mQ[VZ];
+	F32 s = mQ[VS];
+
+	x = U16_to_F32(F32_to_U16_ROUND(x, lower, upper), lower, upper);
+	y = U16_to_F32(F32_to_U16_ROUND(y, lower, upper), lower, upper);
+	z = U16_to_F32(F32_to_U16_ROUND(z, lower, upper), lower, upper);
+	s = U16_to_F32(F32_to_U16_ROUND(s, lower, upper), lower, upper);
+
+	mQ[VX] = x;
+	mQ[VY] = y;
+	mQ[VZ] = z;
+	mQ[VS] = s;
+
+	normalize();
+}
+
+void	LLQuaternion::quantize8(F32 lower, F32 upper)
+{
+	mQ[VX] = U8_to_F32(F32_to_U8_ROUND(mQ[VX], lower, upper), lower, upper);
+	mQ[VY] = U8_to_F32(F32_to_U8_ROUND(mQ[VY], lower, upper), lower, upper);
+	mQ[VZ] = U8_to_F32(F32_to_U8_ROUND(mQ[VZ], lower, upper), lower, upper);
+	mQ[VS] = U8_to_F32(F32_to_U8_ROUND(mQ[VS], lower, upper), lower, upper);
+
+	normalize();
+}
+
+// LLVector3 Magnitude and Normalization Functions
+
+
+// Set LLQuaternion routines
+
+const LLQuaternion&	LLQuaternion::setAngleAxis(F32 angle, F32 x, F32 y, F32 z)
+{
+	LLVector3 vec(x, y, z);
+	vec.normalize();
+
+	angle *= 0.5f;
+	F32 c, s;
+	c = cosf(angle);
+	s = sinf(angle);
+
+	mQ[VX] = vec.mV[VX]*s;
+	mQ[VY] = vec.mV[VY]*s;
+	mQ[VZ] = vec.mV[VZ]*s;
+	mQ[VW] = c;
+
+	normalize();
+	return (*this);
+}
+
+const LLQuaternion&	LLQuaternion::setAngleAxis(F32 angle, const LLVector3 &vec)
+{
+	LLVector3 v(vec);
+	v.normalize();
+
+	angle *= 0.5f;
+	F32 c, s;
+	c = cosf(angle);
+	s = sinf(angle);
+
+	mQ[VX] = v.mV[VX]*s;
+	mQ[VY] = v.mV[VY]*s;
+	mQ[VZ] = v.mV[VZ]*s;
+	mQ[VW] = c;
+
+	normalize();
+	return (*this);
+}
+
+const LLQuaternion&	LLQuaternion::setAngleAxis(F32 angle, const LLVector4 &vec)
+{
+	LLVector3 v(vec.mV[VX], vec.mV[VY], vec.mV[VZ]);
+	v.normalize();
+
+	F32 c, s;
+	c = cosf(angle*0.5f);
+	s = sinf(angle*0.5f);
+
+	mQ[VX] = v.mV[VX]*s;
+	mQ[VY] = v.mV[VY]*s;
+	mQ[VZ] = v.mV[VZ]*s;
+	mQ[VW] = c;
+
+	normalize();
+	return (*this);
+}
+
+const LLQuaternion&	LLQuaternion::setEulerAngles(F32 roll, F32 pitch, F32 yaw)
+{
+	LLMatrix3 rot_mat(roll, pitch, yaw);
+	rot_mat.orthogonalize();
+	*this = rot_mat.quaternion();
+		
+	normalize();
+	return (*this);
+}
+
+// deprecated
+const LLQuaternion&	LLQuaternion::set(const LLMatrix3 &mat)
+{
+	*this = mat.quaternion();
+	normalize();
+	return (*this);
+}
+
+// deprecated
+const LLQuaternion&	LLQuaternion::set(const LLMatrix4 &mat)
+{
+	*this = mat.quaternion();
+	normalize();
+	return (*this);
+}
+
+// deprecated
+const LLQuaternion&	LLQuaternion::setQuat(F32 angle, F32 x, F32 y, F32 z)
+{
+	LLVector3 vec(x, y, z);
+	vec.normalize();
+
+	angle *= 0.5f;
+	F32 c, s;
+	c = cosf(angle);
+	s = sinf(angle);
+
+	mQ[VX] = vec.mV[VX]*s;
+	mQ[VY] = vec.mV[VY]*s;
+	mQ[VZ] = vec.mV[VZ]*s;
+	mQ[VW] = c;
+
+	normalize();
+	return (*this);
+}
+
+// deprecated
+const LLQuaternion&	LLQuaternion::setQuat(F32 angle, const LLVector3 &vec)
+{
+	LLVector3 v(vec);
+	v.normalize();
+
+	angle *= 0.5f;
+	F32 c, s;
+	c = cosf(angle);
+	s = sinf(angle);
+
+	mQ[VX] = v.mV[VX]*s;
+	mQ[VY] = v.mV[VY]*s;
+	mQ[VZ] = v.mV[VZ]*s;
+	mQ[VW] = c;
+
+	normalize();
+	return (*this);
+}
+
+const LLQuaternion&	LLQuaternion::setQuat(F32 angle, const LLVector4 &vec)
+{
+	LLVector3 v(vec.mV[VX], vec.mV[VY], vec.mV[VZ]);
+	v.normalize();
+
+	F32 c, s;
+	c = cosf(angle*0.5f);
+	s = sinf(angle*0.5f);
+
+	mQ[VX] = v.mV[VX]*s;
+	mQ[VY] = v.mV[VY]*s;
+	mQ[VZ] = v.mV[VZ]*s;
+	mQ[VW] = c;
+
+	normalize();
+	return (*this);
+}
+
+const LLQuaternion&	LLQuaternion::setQuat(F32 roll, F32 pitch, F32 yaw)
+{
+	LLMatrix3 rot_mat(roll, pitch, yaw);
+	rot_mat.orthogonalize();
+	*this = rot_mat.quaternion();
+		
+	normalize();
+	return (*this);
+}
+
+const LLQuaternion&	LLQuaternion::setQuat(const LLMatrix3 &mat)
+{
+	*this = mat.quaternion();
+	normalize();
+	return (*this);
+}
+
+const LLQuaternion&	LLQuaternion::setQuat(const LLMatrix4 &mat)
+{
+	*this = mat.quaternion();
+	normalize();
+	return (*this);
+//#if 1
+//	// NOTE: LLQuaternion's are actually inverted with respect to
+//	// the matrices, so this code also assumes inverted quaternions
+//	// (-x, -y, -z, w). The result is that roll,pitch,yaw are applied
+//	// in reverse order (yaw,pitch,roll).
+//	F64 cosX = cos(roll);
+//    F64 cosY = cos(pitch);
+//    F64 cosZ = cos(yaw);
+//
+//    F64 sinX = sin(roll);
+//    F64 sinY = sin(pitch);
+//    F64 sinZ = sin(yaw);
+//
+//    mQ[VW] = (F32)sqrt(cosY*cosZ - sinX*sinY*sinZ + cosX*cosZ + cosX*cosY + 1.0)*.5;
+//	if (fabs(mQ[VW]) < F_APPROXIMATELY_ZERO)
+//	{
+//		// null rotation, any axis will do
+//		mQ[VX] = 0.0f;
+//		mQ[VY] = 1.0f;
+//		mQ[VZ] = 0.0f;
+//	}
+//	else
+//	{
+//		F32 inv_s = 1.0f / (4.0f * mQ[VW]);
+//		mQ[VX] = (F32)-(-sinX*cosY - cosX*sinY*sinZ - sinX*cosZ) * inv_s;
+//		mQ[VY] = (F32)-(-cosX*sinY*cosZ + sinX*sinZ - sinY) * inv_s;
+//		mQ[VZ] = (F32)-(-cosY*sinZ - sinX*sinY*cosZ - cosX*sinZ) * inv_s;		
+//	}
+//
+//#else // This only works on a certain subset of roll/pitch/yaw
+//	
+//	F64 cosX = cosf(roll/2.0);
+//    F64 cosY = cosf(pitch/2.0);
+//    F64 cosZ = cosf(yaw/2.0);
+//
+//    F64 sinX = sinf(roll/2.0);
+//    F64 sinY = sinf(pitch/2.0);
+//    F64 sinZ = sinf(yaw/2.0);
+//
+//    mQ[VW] = (F32)(cosX*cosY*cosZ + sinX*sinY*sinZ);
+//    mQ[VX] = (F32)(sinX*cosY*cosZ - cosX*sinY*sinZ);
+//    mQ[VY] = (F32)(cosX*sinY*cosZ + sinX*cosY*sinZ);
+//    mQ[VZ] = (F32)(cosX*cosY*sinZ - sinX*sinY*cosZ);
+//#endif
+//
+//	normalize();
+//	return (*this);
+}
+
+// SJB: This code is correct for a logicly stored (non-transposed) matrix;
+//		Our matrices are stored transposed, OpenGL style, so this generates the
+//		INVERSE matrix, or the CORRECT matrix form an INVERSE quaternion.
+//		Because we use similar logic in LLMatrix3::quaternion(),
+//		we are internally consistant so everything works OK :)
+LLMatrix3	LLQuaternion::getMatrix3(void) const
+{
+	LLMatrix3	mat;
+	F32		xx, xy, xz, xw, yy, yz, yw, zz, zw;
+
+    xx      = mQ[VX] * mQ[VX];
+    xy      = mQ[VX] * mQ[VY];
+    xz      = mQ[VX] * mQ[VZ];
+    xw      = mQ[VX] * mQ[VW];
+
+    yy      = mQ[VY] * mQ[VY];
+    yz      = mQ[VY] * mQ[VZ];
+    yw      = mQ[VY] * mQ[VW];
+
+    zz      = mQ[VZ] * mQ[VZ];
+    zw      = mQ[VZ] * mQ[VW];
+
+    mat.mMatrix[0][0]  = 1.f - 2.f * ( yy + zz );
+    mat.mMatrix[0][1]  =	   2.f * ( xy + zw );
+    mat.mMatrix[0][2]  =	   2.f * ( xz - yw );
+
+    mat.mMatrix[1][0]  =	   2.f * ( xy - zw );
+    mat.mMatrix[1][1]  = 1.f - 2.f * ( xx + zz );
+    mat.mMatrix[1][2]  =	   2.f * ( yz + xw );
+
+    mat.mMatrix[2][0]  =	   2.f * ( xz + yw );
+    mat.mMatrix[2][1]  =	   2.f * ( yz - xw );
+    mat.mMatrix[2][2]  = 1.f - 2.f * ( xx + yy );
+
+	return mat;
+}
+
+LLMatrix4	LLQuaternion::getMatrix4(void) const
+{
+	LLMatrix4	mat;
+	F32		xx, xy, xz, xw, yy, yz, yw, zz, zw;
+
+    xx      = mQ[VX] * mQ[VX];
+    xy      = mQ[VX] * mQ[VY];
+    xz      = mQ[VX] * mQ[VZ];
+    xw      = mQ[VX] * mQ[VW];
+
+    yy      = mQ[VY] * mQ[VY];
+    yz      = mQ[VY] * mQ[VZ];
+    yw      = mQ[VY] * mQ[VW];
+
+    zz      = mQ[VZ] * mQ[VZ];
+    zw      = mQ[VZ] * mQ[VW];
+
+    mat.mMatrix[0][0]  = 1.f - 2.f * ( yy + zz );
+    mat.mMatrix[0][1]  =	   2.f * ( xy + zw );
+    mat.mMatrix[0][2]  =	   2.f * ( xz - yw );
+
+    mat.mMatrix[1][0]  =	   2.f * ( xy - zw );
+    mat.mMatrix[1][1]  = 1.f - 2.f * ( xx + zz );
+    mat.mMatrix[1][2]  =	   2.f * ( yz + xw );
+
+    mat.mMatrix[2][0]  =	   2.f * ( xz + yw );
+    mat.mMatrix[2][1]  =	   2.f * ( yz - xw );
+    mat.mMatrix[2][2]  = 1.f - 2.f * ( xx + yy );
+
+	// TODO -- should we set the translation portion to zero?
+
+	return mat;
+}
+
+
+
+
+// Other useful methods
+
+
+// calculate the shortest rotation from a to b
+void LLQuaternion::shortestArc(const LLVector3 &a, const LLVector3 &b)
+{
+	// Make a local copy of both vectors.
+	LLVector3 vec_a = a;
+	LLVector3 vec_b = b;
+
+	// Make sure neither vector is zero length.  Also normalize
+	// the vectors while we are at it.
+	F32 vec_a_mag = vec_a.normalize();
+	F32 vec_b_mag = vec_b.normalize();
+	if (vec_a_mag < F_APPROXIMATELY_ZERO ||
+		vec_b_mag < F_APPROXIMATELY_ZERO)
+	{
+		// Can't calculate a rotation from this.
+		// Just return ZERO_ROTATION instead.
+		loadIdentity();
+		return;
+	}
+
+	// Create an axis to rotate around, and the cos of the angle to rotate.
+	LLVector3 axis = vec_a % vec_b;
+	F32 cos_theta  = vec_a * vec_b;
+
+	// Check the angle between the vectors to see if they are parallel or anti-parallel.
+	if (cos_theta > 1.0 - F_APPROXIMATELY_ZERO)
+	{
+		// a and b are parallel.  No rotation is necessary.
+		loadIdentity();
+	}
+	else if (cos_theta < -1.0 + F_APPROXIMATELY_ZERO)
+	{
+		// a and b are anti-parallel.
+		// Rotate 180 degrees around some orthogonal axis.
+		// Find the projection of the x-axis onto a, and try
+		// using the vector between the projection and the x-axis
+		// as the orthogonal axis.
+		LLVector3 proj = vec_a.mV[VX] / (vec_a * vec_a) * vec_a;
+		LLVector3 ortho_axis(1.f, 0.f, 0.f);
+		ortho_axis -= proj;
+		
+		// Turn this into an orthonormal axis.
+		F32 ortho_length = ortho_axis.normalize();
+		// If the axis' length is 0, then our guess at an orthogonal axis
+		// was wrong (a is parallel to the x-axis).
+		if (ortho_length < F_APPROXIMATELY_ZERO)
+		{
+			// Use the z-axis instead.
+			ortho_axis.setVec(0.f, 0.f, 1.f);
+		}
+
+		// Construct a quaternion from this orthonormal axis.
+		mQ[VX] = ortho_axis.mV[VX];
+		mQ[VY] = ortho_axis.mV[VY];
+		mQ[VZ] = ortho_axis.mV[VZ];
+		mQ[VW] = 0.f;
+	}
+	else
+	{
+		// a and b are NOT parallel or anti-parallel.
+		// Return the rotation between these vectors.
+		F32 theta = (F32)acos(cos_theta);
+
+		setAngleAxis(theta, axis);
+	}
+}
+
+// constrains rotation to a cone angle specified in radians
+const LLQuaternion &LLQuaternion::constrain(F32 radians)
+{
+	const F32 cos_angle_lim = cosf( radians/2 );	// mQ[VW] limit
+	const F32 sin_angle_lim = sinf( radians/2 );	// rotation axis length	limit
+
+	if (mQ[VW] < 0.f)
+	{
+		mQ[VX] *= -1.f;
+		mQ[VY] *= -1.f;
+		mQ[VZ] *= -1.f;
+		mQ[VW] *= -1.f;
+	}
+
+	// if rotation angle is greater than limit (cos is less than limit)
+	if( mQ[VW] < cos_angle_lim )
+	{
+		mQ[VW] = cos_angle_lim;
+		F32 axis_len = sqrtf( mQ[VX]*mQ[VX] + mQ[VY]*mQ[VY] + mQ[VZ]*mQ[VZ] ); // sin(theta/2)
+		F32 axis_mult_fact = sin_angle_lim / axis_len;
+		mQ[VX] *= axis_mult_fact;
+		mQ[VY] *= axis_mult_fact;
+		mQ[VZ] *= axis_mult_fact;
+	}
+
+	return *this;
+}
+
+// Operators
+
+std::ostream& operator<<(std::ostream &s, const LLQuaternion &a)
+{
+	s << "{ " 
+		<< a.mQ[VX] << ", " << a.mQ[VY] << ", " << a.mQ[VZ] << ", " << a.mQ[VW] 
+	<< " }";
+	return s;
+}
+
+
+// Does NOT renormalize the result
+LLQuaternion	operator*(const LLQuaternion &a, const LLQuaternion &b)
+{
+//	LLQuaternion::mMultCount++;
+
+	LLQuaternion q(
+		b.mQ[3] * a.mQ[0] + b.mQ[0] * a.mQ[3] + b.mQ[1] * a.mQ[2] - b.mQ[2] * a.mQ[1],
+		b.mQ[3] * a.mQ[1] + b.mQ[1] * a.mQ[3] + b.mQ[2] * a.mQ[0] - b.mQ[0] * a.mQ[2],
+		b.mQ[3] * a.mQ[2] + b.mQ[2] * a.mQ[3] + b.mQ[0] * a.mQ[1] - b.mQ[1] * a.mQ[0],
+		b.mQ[3] * a.mQ[3] - b.mQ[0] * a.mQ[0] - b.mQ[1] * a.mQ[1] - b.mQ[2] * a.mQ[2]
+	);
+	return q;
+}
+
+/*
+LLMatrix4	operator*(const LLMatrix4 &m, const LLQuaternion &q)
+{
+	LLMatrix4 qmat(q);
+	return (m*qmat);
+}
+*/
+
+
+
+LLVector4		operator*(const LLVector4 &a, const LLQuaternion &rot)
+{
+    F32 rw = - rot.mQ[VX] * a.mV[VX] - rot.mQ[VY] * a.mV[VY] - rot.mQ[VZ] * a.mV[VZ];
+    F32 rx =   rot.mQ[VW] * a.mV[VX] + rot.mQ[VY] * a.mV[VZ] - rot.mQ[VZ] * a.mV[VY];
+    F32 ry =   rot.mQ[VW] * a.mV[VY] + rot.mQ[VZ] * a.mV[VX] - rot.mQ[VX] * a.mV[VZ];
+    F32 rz =   rot.mQ[VW] * a.mV[VZ] + rot.mQ[VX] * a.mV[VY] - rot.mQ[VY] * a.mV[VX];
+
+    F32 nx = - rw * rot.mQ[VX] +  rx * rot.mQ[VW] - ry * rot.mQ[VZ] + rz * rot.mQ[VY];
+    F32 ny = - rw * rot.mQ[VY] +  ry * rot.mQ[VW] - rz * rot.mQ[VX] + rx * rot.mQ[VZ];
+    F32 nz = - rw * rot.mQ[VZ] +  rz * rot.mQ[VW] - rx * rot.mQ[VY] + ry * rot.mQ[VX];
+
+    return LLVector4(nx, ny, nz, a.mV[VW]);
+}
+
+LLVector3		operator*(const LLVector3 &a, const LLQuaternion &rot)
+{
+    F32 rw = - rot.mQ[VX] * a.mV[VX] - rot.mQ[VY] * a.mV[VY] - rot.mQ[VZ] * a.mV[VZ];
+    F32 rx =   rot.mQ[VW] * a.mV[VX] + rot.mQ[VY] * a.mV[VZ] - rot.mQ[VZ] * a.mV[VY];
+    F32 ry =   rot.mQ[VW] * a.mV[VY] + rot.mQ[VZ] * a.mV[VX] - rot.mQ[VX] * a.mV[VZ];
+    F32 rz =   rot.mQ[VW] * a.mV[VZ] + rot.mQ[VX] * a.mV[VY] - rot.mQ[VY] * a.mV[VX];
+
+    F32 nx = - rw * rot.mQ[VX] +  rx * rot.mQ[VW] - ry * rot.mQ[VZ] + rz * rot.mQ[VY];
+    F32 ny = - rw * rot.mQ[VY] +  ry * rot.mQ[VW] - rz * rot.mQ[VX] + rx * rot.mQ[VZ];
+    F32 nz = - rw * rot.mQ[VZ] +  rz * rot.mQ[VW] - rx * rot.mQ[VY] + ry * rot.mQ[VX];
+
+    return LLVector3(nx, ny, nz);
+}
+
+LLVector3d		operator*(const LLVector3d &a, const LLQuaternion &rot)
+{
+    F64 rw = - rot.mQ[VX] * a.mdV[VX] - rot.mQ[VY] * a.mdV[VY] - rot.mQ[VZ] * a.mdV[VZ];
+    F64 rx =   rot.mQ[VW] * a.mdV[VX] + rot.mQ[VY] * a.mdV[VZ] - rot.mQ[VZ] * a.mdV[VY];
+    F64 ry =   rot.mQ[VW] * a.mdV[VY] + rot.mQ[VZ] * a.mdV[VX] - rot.mQ[VX] * a.mdV[VZ];
+    F64 rz =   rot.mQ[VW] * a.mdV[VZ] + rot.mQ[VX] * a.mdV[VY] - rot.mQ[VY] * a.mdV[VX];
+
+    F64 nx = - rw * rot.mQ[VX] +  rx * rot.mQ[VW] - ry * rot.mQ[VZ] + rz * rot.mQ[VY];
+    F64 ny = - rw * rot.mQ[VY] +  ry * rot.mQ[VW] - rz * rot.mQ[VX] + rx * rot.mQ[VZ];
+    F64 nz = - rw * rot.mQ[VZ] +  rz * rot.mQ[VW] - rx * rot.mQ[VY] + ry * rot.mQ[VX];
+
+    return LLVector3d(nx, ny, nz);
+}
+
+F32 dot(const LLQuaternion &a, const LLQuaternion &b)
+{
+	return a.mQ[VX] * b.mQ[VX] + 
+		   a.mQ[VY] * b.mQ[VY] + 
+		   a.mQ[VZ] * b.mQ[VZ] + 
+		   a.mQ[VW] * b.mQ[VW]; 
+}
+
+// DEMO HACK: This lerp is probably inocrrect now due intermediate normalization
+// it should look more like the lerp below
+#if 0
+// linear interpolation
+LLQuaternion lerp(F32 t, const LLQuaternion &p, const LLQuaternion &q)
+{
+	LLQuaternion r;
+	r = t * (q - p) + p;
+	r.normalize();
+	return r;
+}
+#endif
+
+// lerp from identity to q
+LLQuaternion lerp(F32 t, const LLQuaternion &q)
+{
+	LLQuaternion r;
+	r.mQ[VX] = t * q.mQ[VX];
+	r.mQ[VY] = t * q.mQ[VY];
+	r.mQ[VZ] = t * q.mQ[VZ];
+	r.mQ[VW] = t * (q.mQ[VZ] - 1.f) + 1.f;
+	r.normalize();
+	return r;
+}
+
+LLQuaternion lerp(F32 t, const LLQuaternion &p, const LLQuaternion &q)
+{
+	LLQuaternion r;
+	F32 inv_t;
+
+	inv_t = 1.f - t;
+
+	r.mQ[VX] = t * q.mQ[VX] + (inv_t * p.mQ[VX]);
+	r.mQ[VY] = t * q.mQ[VY] + (inv_t * p.mQ[VY]);
+	r.mQ[VZ] = t * q.mQ[VZ] + (inv_t * p.mQ[VZ]);
+	r.mQ[VW] = t * q.mQ[VW] + (inv_t * p.mQ[VW]);
+	r.normalize();
+	return r;
+}
+
+
+// spherical linear interpolation
+LLQuaternion slerp( F32 u, const LLQuaternion &a, const LLQuaternion &b )
+{
+	// cosine theta = dot product of a and b
+	F32 cos_t = a.mQ[0]*b.mQ[0] + a.mQ[1]*b.mQ[1] + a.mQ[2]*b.mQ[2] + a.mQ[3]*b.mQ[3];
+	
+	// if b is on opposite hemisphere from a, use -a instead
+	int bflip;
+ 	if (cos_t < 0.0f)
+	{
+		cos_t = -cos_t;
+		bflip = TRUE;
+	}
+	else
+		bflip = FALSE;
+
+	// if B is (within precision limits) the same as A,
+	// just linear interpolate between A and B.
+	F32 alpha;	// interpolant
+	F32 beta;		// 1 - interpolant
+	if (1.0f - cos_t < 0.00001f)
+	{
+		beta = 1.0f - u;
+		alpha = u;
+ 	}
+	else
+	{
+ 		F32 theta = acosf(cos_t);
+ 		F32 sin_t = sinf(theta);
+ 		beta = sinf(theta - u*theta) / sin_t;
+ 		alpha = sinf(u*theta) / sin_t;
+ 	}
+
+	if (bflip)
+		beta = -beta;
+
+	// interpolate
+	LLQuaternion ret;
+	ret.mQ[0] = beta*a.mQ[0] + alpha*b.mQ[0];
+ 	ret.mQ[1] = beta*a.mQ[1] + alpha*b.mQ[1];
+ 	ret.mQ[2] = beta*a.mQ[2] + alpha*b.mQ[2];
+ 	ret.mQ[3] = beta*a.mQ[3] + alpha*b.mQ[3];
+
+	return ret;
+}
+
+// lerp whenever possible
+LLQuaternion nlerp(F32 t, const LLQuaternion &a, const LLQuaternion &b)
+{
+	if (dot(a, b) < 0.f)
+	{
+		return slerp(t, a, b);
+	}
+	else
+	{
+		return lerp(t, a, b);
+	}
+}
+
+LLQuaternion nlerp(F32 t, const LLQuaternion &q)
+{
+	if (q.mQ[VW] < 0.f)
+	{
+		return slerp(t, q);
+	}
+	else
+	{
+		return lerp(t, q);
+	}
+}
+
+// slerp from identity quaternion to another quaternion
+LLQuaternion slerp(F32 t, const LLQuaternion &q)
+{
+	F32 c = q.mQ[VW];
+	if (1.0f == t  ||  1.0f == c)
+	{
+		// the trivial cases
+		return q;
+	}
+
+	LLQuaternion r;
+	F32 s, angle, stq, stp;
+
+	s = (F32) sqrt(1.f - c*c);
+
+    if (c < 0.0f)
+    {
+        // when c < 0.0 then theta > PI/2 
+        // since quat and -quat are the same rotation we invert one of  
+        // p or q to reduce unecessary spins
+        // A equivalent way to do it is to convert acos(c) as if it had 
+		// been negative, and to negate stp 
+        angle   = (F32) acos(-c); 
+        stp     = -(F32) sin(angle * (1.f - t));
+        stq     = (F32) sin(angle * t);
+    }   
+    else
+    {
+		angle 	= (F32) acos(c);
+        stp     = (F32) sin(angle * (1.f - t));
+        stq     = (F32) sin(angle * t);
+    }
+
+	r.mQ[VX] = (q.mQ[VX] * stq) / s;
+	r.mQ[VY] = (q.mQ[VY] * stq) / s;
+	r.mQ[VZ] = (q.mQ[VZ] * stq) / s;
+	r.mQ[VW] = (stp + q.mQ[VW] * stq) / s;
+
+	return r;
+}
+
+LLQuaternion mayaQ(F32 xRot, F32 yRot, F32 zRot, LLQuaternion::Order order)
+{
+	LLQuaternion xQ( xRot*DEG_TO_RAD, LLVector3(1.0f, 0.0f, 0.0f) );
+	LLQuaternion yQ( yRot*DEG_TO_RAD, LLVector3(0.0f, 1.0f, 0.0f) );
+	LLQuaternion zQ( zRot*DEG_TO_RAD, LLVector3(0.0f, 0.0f, 1.0f) );
+	LLQuaternion ret;
+	switch( order )
+	{
+	case LLQuaternion::XYZ:
+		ret = xQ * yQ * zQ;
+		break;
+	case LLQuaternion::YZX:
+		ret = yQ * zQ * xQ;
+		break;
+	case LLQuaternion::ZXY:
+		ret = zQ * xQ * yQ;
+		break;
+	case LLQuaternion::XZY:
+		ret = xQ * zQ * yQ;
+		break;
+	case LLQuaternion::YXZ:
+		ret = yQ * xQ * zQ;
+		break;
+	case LLQuaternion::ZYX:
+		ret = zQ * yQ * xQ;
+		break;
+	}
+	return ret;
+}
+
+const char *OrderToString( const LLQuaternion::Order order )
+{
+	const char *p = NULL;
+	switch( order )
+	{
+	default:
+	case LLQuaternion::XYZ:
+		p = "XYZ";
+		break;
+	case LLQuaternion::YZX:
+		p = "YZX";
+		break;
+	case LLQuaternion::ZXY:
+		p = "ZXY";
+		break;
+	case LLQuaternion::XZY:
+		p = "XZY";
+		break;
+	case LLQuaternion::YXZ:
+		p = "YXZ";
+		break;
+	case LLQuaternion::ZYX:
+		p = "ZYX";
+		break;
+	}
+	return p;
+}
+
+LLQuaternion::Order StringToOrder( const char *str )
+{
+	if (strncmp(str, "XYZ", 3)==0 || strncmp(str, "xyz", 3)==0)
+		return LLQuaternion::XYZ;
+
+	if (strncmp(str, "YZX", 3)==0 || strncmp(str, "yzx", 3)==0)
+		return LLQuaternion::YZX;
+
+	if (strncmp(str, "ZXY", 3)==0 || strncmp(str, "zxy", 3)==0)
+		return LLQuaternion::ZXY;
+
+	if (strncmp(str, "XZY", 3)==0 || strncmp(str, "xzy", 3)==0)
+		return LLQuaternion::XZY;
+
+	if (strncmp(str, "YXZ", 3)==0 || strncmp(str, "yxz", 3)==0)
+		return LLQuaternion::YXZ;
+
+	if (strncmp(str, "ZYX", 3)==0 || strncmp(str, "zyx", 3)==0)
+		return LLQuaternion::ZYX;
+
+	return LLQuaternion::XYZ;
+}
+
+void LLQuaternion::getAngleAxis(F32* angle, LLVector3 &vec) const
+{
+	F32 cos_a = mQ[VW];
+	if (cos_a > 1.0f) cos_a = 1.0f;
+	if (cos_a < -1.0f) cos_a = -1.0f;
+
+    F32 sin_a = (F32) sqrt( 1.0f - cos_a * cos_a );
+
+    if ( fabs( sin_a ) < 0.0005f )
+		sin_a = 1.0f;
+	else
+		sin_a = 1.f/sin_a;
+
+    F32 temp_angle = 2.0f * (F32) acos( cos_a );
+	if (temp_angle > F_PI)
+	{
+		// The (angle,axis) pair should never have angles outside [PI, -PI]
+		// since we want the _shortest_ (angle,axis) solution.
+		// Since acos is defined for [0, PI], and we multiply by 2.0, we
+		// can push the angle outside the acceptible range.
+		// When this happens we set the angle to the other portion of a 
+		// full 2PI rotation, and negate the axis, which reverses the 
+		// direction of the rotation (by the right-hand rule).
+		*angle = 2.f * F_PI - temp_angle;
+    	vec.mV[VX] = - mQ[VX] * sin_a;
+    	vec.mV[VY] = - mQ[VY] * sin_a;
+    	vec.mV[VZ] = - mQ[VZ] * sin_a;
+	}
+	else
+	{
+		*angle = temp_angle;
+    	vec.mV[VX] = mQ[VX] * sin_a;
+    	vec.mV[VY] = mQ[VY] * sin_a;
+    	vec.mV[VZ] = mQ[VZ] * sin_a;
+	}
+}
+
+
+// quaternion does not need to be normalized
+void LLQuaternion::getEulerAngles(F32 *roll, F32 *pitch, F32 *yaw) const
+{
+	LLMatrix3 rot_mat(*this);
+	rot_mat.orthogonalize();
+	rot_mat.getEulerAngles(roll, pitch, yaw);
+
+//	// NOTE: LLQuaternion's are actually inverted with respect to
+//	// the matrices, so this code also assumes inverted quaternions
+//	// (-x, -y, -z, w). The result is that roll,pitch,yaw are applied
+//	// in reverse order (yaw,pitch,roll).
+//	F32 x = -mQ[VX], y = -mQ[VY], z = -mQ[VZ], w = mQ[VW];
+//	F64 m20 = 2.0*(x*z-y*w);
+//	if (1.0f - fabsf(m20) < F_APPROXIMATELY_ZERO)
+//	{
+//		*roll = 0.0f;
+//		*pitch = (F32)asin(m20);
+//		*yaw = (F32)atan2(2.0*(x*y-z*w), 1.0 - 2.0*(x*x+z*z));
+//	}
+//	else
+//	{
+//		*roll  = (F32)atan2(-2.0*(y*z+x*w), 1.0-2.0*(x*x+y*y));
+//		*pitch = (F32)asin(m20);
+//		*yaw   = (F32)atan2(-2.0*(x*y+z*w), 1.0-2.0*(y*y+z*z));
+//	}
+}
+
+// Saves space by using the fact that our quaternions are normalized
+LLVector3 LLQuaternion::packToVector3() const
+{
+	if( mQ[VW] >= 0 )
+	{
+		return LLVector3( mQ[VX], mQ[VY], mQ[VZ] );
+	}
+	else
+	{
+		return LLVector3( -mQ[VX], -mQ[VY], -mQ[VZ] );
+	}
+}
+
+// Saves space by using the fact that our quaternions are normalized
+void LLQuaternion::unpackFromVector3( const LLVector3& vec )
+{
+	mQ[VX] = vec.mV[VX];
+	mQ[VY] = vec.mV[VY];
+	mQ[VZ] = vec.mV[VZ];
+	F32 t = 1.f - vec.magVecSquared();
+	if( t > 0 )
+	{
+		mQ[VW] = sqrt( t );
+	}
+	else
+	{
+		// Need this to avoid trying to find the square root of a negative number due
+		// to floating point error.
+		mQ[VW] = 0;
+	}
+}
+
+BOOL LLQuaternion::parseQuat(const std::string& buf, LLQuaternion* value)
+{
+	if( buf.empty() || value == NULL)
+	{
+		return FALSE;
+	}
+
+	LLQuaternion quat;
+	S32 count = sscanf( buf.c_str(), "%f %f %f %f", quat.mQ + 0, quat.mQ + 1, quat.mQ + 2, quat.mQ + 3 );
+	if( 4 == count )
+	{
+		value->set( quat );
+		return TRUE;
+	}
+
+	return FALSE;
+}
+
+
+// End
diff --git a/indra/llmath/llquaternion.h b/indra/llmath/llquaternion.h
index 0769f29f23..bbd4326483 100644
--- a/indra/llmath/llquaternion.h
+++ b/indra/llmath/llquaternion.h
@@ -1,590 +1,594 @@
-/** 
- * @file llquaternion.h
- * @brief LLQuaternion class header file.
- *
- * $LicenseInfo:firstyear=2000&license=viewergpl$
- * 
- * Copyright (c) 2000-2009, Linden Research, Inc.
- * 
- * Second Life Viewer Source Code
- * The source code in this file ("Source Code") is provided by Linden Lab
- * to you under the terms of the GNU General Public License, version 2.0
- * ("GPL"), unless you have obtained a separate licensing agreement
- * ("Other License"), formally executed by you and Linden Lab.  Terms of
- * the GPL can be found in doc/GPL-license.txt in this distribution, or
- * online at http://secondlifegrid.net/programs/open_source/licensing/gplv2
- * 
- * There are special exceptions to the terms and conditions of the GPL as
- * it is applied to this Source Code. View the full text of the exception
- * in the file doc/FLOSS-exception.txt in this software distribution, or
- * online at
- * http://secondlifegrid.net/programs/open_source/licensing/flossexception
- * 
- * By copying, modifying or distributing this software, you acknowledge
- * that you have read and understood your obligations described above,
- * and agree to abide by those obligations.
- * 
- * ALL LINDEN LAB SOURCE CODE IS PROVIDED "AS IS." LINDEN LAB MAKES NO
- * WARRANTIES, EXPRESS, IMPLIED OR OTHERWISE, REGARDING ITS ACCURACY,
- * COMPLETENESS OR PERFORMANCE.
- * $/LicenseInfo$
- */
-
-#ifndef LLQUATERNION_H
-#define LLQUATERNION_H
-
-#include "llmath.h"
-
-class LLVector4;
-class LLVector3;
-class LLVector3d;
-class LLMatrix4;
-class LLMatrix3;
-
-//	NOTA BENE: Quaternion code is written assuming Unit Quaternions!!!!
-//			   Moreover, it is written assuming that all vectors and matricies
-//			   passed as arguments are normalized and unitary respectively.
-//			   VERY VERY VERY VERY BAD THINGS will happen if these assumptions fail.
-
-static const U32 LENGTHOFQUAT = 4;
-
-class LLQuaternion
-{
-public:
-	F32 mQ[LENGTHOFQUAT];
-
-	static const LLQuaternion DEFAULT;
-
-	LLQuaternion();									// Initializes Quaternion to (0,0,0,1)
-	explicit LLQuaternion(const LLMatrix4 &mat);				// Initializes Quaternion from Matrix4
-	explicit LLQuaternion(const LLMatrix3 &mat);				// Initializes Quaternion from Matrix3
-	LLQuaternion(F32 x, F32 y, F32 z, F32 w);		// Initializes Quaternion to normalize(x, y, z, w)
-	LLQuaternion(F32 angle, const LLVector4 &vec);	// Initializes Quaternion to axis_angle2quat(angle, vec)
-	LLQuaternion(F32 angle, const LLVector3 &vec);	// Initializes Quaternion to axis_angle2quat(angle, vec)
-	LLQuaternion(const F32 *q);						// Initializes Quaternion to normalize(x, y, z, w)
-	LLQuaternion(const LLVector3 &x_axis,
-				 const LLVector3 &y_axis,
-				 const LLVector3 &z_axis);			// Initializes Quaternion from Matrix3 = [x_axis ; y_axis ; z_axis]
-
-	BOOL isIdentity() const;
-	BOOL isNotIdentity() const;
-	BOOL isFinite() const;									// checks to see if all values of LLQuaternion are finite
-	void quantize16(F32 lower, F32 upper);					// changes the vector to reflect quatization
-	void quantize8(F32 lower, F32 upper);							// changes the vector to reflect quatization
-	void loadIdentity();											// Loads the quaternion that represents the identity rotation
-
-	const LLQuaternion&	set(F32 x, F32 y, F32 z, F32 w);		// Sets Quaternion to normalize(x, y, z, w)
-	const LLQuaternion&	set(const LLQuaternion &quat);			// Copies Quaternion
-	const LLQuaternion&	set(const F32 *q);						// Sets Quaternion to normalize(quat[VX], quat[VY], quat[VZ], quat[VW])
-	const LLQuaternion&	set(const LLMatrix3 &mat);				// Sets Quaternion to mat2quat(mat)
-	const LLQuaternion&	set(const LLMatrix4 &mat);				// Sets Quaternion to mat2quat(mat)
-
-	const LLQuaternion&	setAngleAxis(F32 angle, F32 x, F32 y, F32 z);	// Sets Quaternion to axis_angle2quat(angle, x, y, z)
-	const LLQuaternion&	setAngleAxis(F32 angle, const LLVector3 &vec);	// Sets Quaternion to axis_angle2quat(angle, vec)
-	const LLQuaternion&	setAngleAxis(F32 angle, const LLVector4 &vec);	// Sets Quaternion to axis_angle2quat(angle, vec)
-	const LLQuaternion&	setEulerAngles(F32 roll, F32 pitch, F32 yaw);	// Sets Quaternion to euler2quat(pitch, yaw, roll)
-
-	const LLQuaternion&	setQuatInit(F32 x, F32 y, F32 z, F32 w);	// deprecated
-	const LLQuaternion&	setQuat(const LLQuaternion &quat);			// deprecated
-	const LLQuaternion&	setQuat(const F32 *q);						// deprecated
-	const LLQuaternion&	setQuat(const LLMatrix3 &mat);				// deprecated
-	const LLQuaternion&	setQuat(const LLMatrix4 &mat);				// deprecated
-	const LLQuaternion&	setQuat(F32 angle, F32 x, F32 y, F32 z);	// deprecated
-	const LLQuaternion&	setQuat(F32 angle, const LLVector3 &vec);	// deprecated
-	const LLQuaternion&	setQuat(F32 angle, const LLVector4 &vec);	// deprecated
-	const LLQuaternion&	setQuat(F32 roll, F32 pitch, F32 yaw);		// deprecated
-
-	LLMatrix4	getMatrix4(void) const;							// Returns the Matrix4 equivalent of Quaternion
-	LLMatrix3	getMatrix3(void) const;							// Returns the Matrix3 equivalent of Quaternion
-	void		getAngleAxis(F32* angle, F32* x, F32* y, F32* z) const;	// returns rotation in radians about axis x,y,z
-	void		getAngleAxis(F32* angle, LLVector3 &vec) const;
-	void		getEulerAngles(F32 *roll, F32* pitch, F32 *yaw) const;
-
-	F32	normalize();	// Normalizes Quaternion and returns magnitude
-	F32	normQuat();		// deprecated
-
-	const LLQuaternion&	conjugate(void);	// Conjugates Quaternion and returns result
-	const LLQuaternion&	conjQuat(void);		// deprecated
-
-	// Other useful methods
-	const LLQuaternion&	transpose();		// transpose (same as conjugate)
-	const LLQuaternion&	transQuat();		// deprecated
-
-	void			shortestArc(const LLVector3 &a, const LLVector3 &b);	// shortest rotation from a to b
-	const LLQuaternion& constrain(F32 radians);						// constrains rotation to a cone angle specified in radians
-
-	// Standard operators
-	friend std::ostream& operator<<(std::ostream &s, const LLQuaternion &a);					// Prints a
-	friend LLQuaternion operator+(const LLQuaternion &a, const LLQuaternion &b);	// Addition
-	friend LLQuaternion operator-(const LLQuaternion &a, const LLQuaternion &b);	// Subtraction
-	friend LLQuaternion operator-(const LLQuaternion &a);							// Negation
-	friend LLQuaternion operator*(F32 a, const LLQuaternion &q);					// Scale
-	friend LLQuaternion operator*(const LLQuaternion &q, F32 b);					// Scale
-	friend LLQuaternion operator*(const LLQuaternion &a, const LLQuaternion &b);	// Returns a * b
-	friend LLQuaternion operator~(const LLQuaternion &a);							// Returns a* (Conjugate of a)
-	bool operator==(const LLQuaternion &b) const;			// Returns a == b
-	bool operator!=(const LLQuaternion &b) const;			// Returns a != b
-
-	friend const LLQuaternion& operator*=(LLQuaternion &a, const LLQuaternion &b);	// Returns a * b
-
-	friend LLVector4 operator*(const LLVector4 &a, const LLQuaternion &rot);		// Rotates a by rot
-	friend LLVector3 operator*(const LLVector3 &a, const LLQuaternion &rot);		// Rotates a by rot
-	friend LLVector3d operator*(const LLVector3d &a, const LLQuaternion &rot);		// Rotates a by rot
-
-	// Non-standard operators
-	friend F32 dot(const LLQuaternion &a, const LLQuaternion &b);
-	friend LLQuaternion lerp(F32 t, const LLQuaternion &p, const LLQuaternion &q);		// linear interpolation (t = 0 to 1) from p to q
-	friend LLQuaternion lerp(F32 t, const LLQuaternion &q);								// linear interpolation (t = 0 to 1) from identity to q
-	friend LLQuaternion slerp(F32 t, const LLQuaternion &p, const LLQuaternion &q); 	// spherical linear interpolation from p to q
-	friend LLQuaternion slerp(F32 t, const LLQuaternion &q);							// spherical linear interpolation from identity to q
-	friend LLQuaternion nlerp(F32 t, const LLQuaternion &p, const LLQuaternion &q); 	// normalized linear interpolation from p to q
-	friend LLQuaternion nlerp(F32 t, const LLQuaternion &q); 							// normalized linear interpolation from p to q
-
-	LLVector3	packToVector3() const;						// Saves space by using the fact that our quaternions are normalized
-	void		unpackFromVector3(const LLVector3& vec);	// Saves space by using the fact that our quaternions are normalized
-
-	enum Order {
-		XYZ = 0,
-		YZX = 1,
-		ZXY = 2,
-		XZY = 3,
-		YXZ = 4,
-		ZYX = 5
-	};
-	// Creates a quaternions from maya's rotation representation,
-	// which is 3 rotations (in DEGREES) in the specified order
-	friend LLQuaternion mayaQ(F32 x, F32 y, F32 z, Order order);
-
-	// Conversions between Order and strings like "xyz" or "ZYX"
-	friend const char *OrderToString( const Order order );
-	friend Order StringToOrder( const char *str );
-
-	static BOOL parseQuat(const std::string& buf, LLQuaternion* value);
-
-	// For debugging, only
-	//static U32 mMultCount;
-};
-
-// checker
-inline BOOL	LLQuaternion::isFinite() const
-{
-	return (llfinite(mQ[VX]) && llfinite(mQ[VY]) && llfinite(mQ[VZ]) && llfinite(mQ[VS]));
-}
-
-inline BOOL LLQuaternion::isIdentity() const
-{
-	return 
-		( mQ[VX] == 0.f ) &&
-		( mQ[VY] == 0.f ) &&
-		( mQ[VZ] == 0.f ) &&
-		( mQ[VS] == 1.f );
-}
-
-inline BOOL LLQuaternion::isNotIdentity() const
-{
-	return 
-		( mQ[VX] != 0.f ) ||
-		( mQ[VY] != 0.f ) ||
-		( mQ[VZ] != 0.f ) ||
-		( mQ[VS] != 1.f );
-}
-
-
-
-inline LLQuaternion::LLQuaternion(void)
-{
-	mQ[VX] = 0.f;
-	mQ[VY] = 0.f;
-	mQ[VZ] = 0.f;
-	mQ[VS] = 1.f;
-}
-
-inline LLQuaternion::LLQuaternion(F32 x, F32 y, F32 z, F32 w)
-{
-	mQ[VX] = x;
-	mQ[VY] = y;
-	mQ[VZ] = z;
-	mQ[VS] = w;
-
-	//RN: don't normalize this case as its used mainly for temporaries during calculations
-	//normalize();
-	/*
-	F32 mag = sqrtf(mQ[VX]*mQ[VX] + mQ[VY]*mQ[VY] + mQ[VZ]*mQ[VZ] + mQ[VS]*mQ[VS]);
-	mag -= 1.f;
-	mag = fabs(mag);
-	llassert(mag < 10.f*FP_MAG_THRESHOLD);
-	*/
-}
-
-inline LLQuaternion::LLQuaternion(const F32 *q)
-{
-	mQ[VX] = q[VX];
-	mQ[VY] = q[VY];
-	mQ[VZ] = q[VZ];
-	mQ[VS] = q[VW];
-
-	normalize();
-	/*
-	F32 mag = sqrtf(mQ[VX]*mQ[VX] + mQ[VY]*mQ[VY] + mQ[VZ]*mQ[VZ] + mQ[VS]*mQ[VS]);
-	mag -= 1.f;
-	mag = fabs(mag);
-	llassert(mag < FP_MAG_THRESHOLD);
-	*/
-}
-
-
-inline void LLQuaternion::loadIdentity()
-{
-	mQ[VX] = 0.0f;
-	mQ[VY] = 0.0f;
-	mQ[VZ] = 0.0f;
-	mQ[VW] = 1.0f;
-}
-
-
-inline const LLQuaternion&	LLQuaternion::set(F32 x, F32 y, F32 z, F32 w)
-{
-	mQ[VX] = x;
-	mQ[VY] = y;
-	mQ[VZ] = z;
-	mQ[VS] = w;
-	normalize();
-	return (*this);
-}
-
-inline const LLQuaternion&	LLQuaternion::set(const LLQuaternion &quat)
-{
-	mQ[VX] = quat.mQ[VX];
-	mQ[VY] = quat.mQ[VY];
-	mQ[VZ] = quat.mQ[VZ];
-	mQ[VW] = quat.mQ[VW];
-	normalize();
-	return (*this);
-}
-
-inline const LLQuaternion&	LLQuaternion::set(const F32 *q)
-{
-	mQ[VX] = q[VX];
-	mQ[VY] = q[VY];
-	mQ[VZ] = q[VZ];
-	mQ[VS] = q[VW];
-	normalize();
-	return (*this);
-}
-
-
-// deprecated
-inline const LLQuaternion&	LLQuaternion::setQuatInit(F32 x, F32 y, F32 z, F32 w)
-{
-	mQ[VX] = x;
-	mQ[VY] = y;
-	mQ[VZ] = z;
-	mQ[VS] = w;
-	normalize();
-	return (*this);
-}
-
-// deprecated
-inline const LLQuaternion&	LLQuaternion::setQuat(const LLQuaternion &quat)
-{
-	mQ[VX] = quat.mQ[VX];
-	mQ[VY] = quat.mQ[VY];
-	mQ[VZ] = quat.mQ[VZ];
-	mQ[VW] = quat.mQ[VW];
-	normalize();
-	return (*this);
-}
-
-// deprecated
-inline const LLQuaternion&	LLQuaternion::setQuat(const F32 *q)
-{
-	mQ[VX] = q[VX];
-	mQ[VY] = q[VY];
-	mQ[VZ] = q[VZ];
-	mQ[VS] = q[VW];
-	normalize();
-	return (*this);
-}
-
-// There may be a cheaper way that avoids the sqrt.
-// Does sin_a = VX*VX + VY*VY + VZ*VZ?
-// Copied from Matrix and Quaternion FAQ 1.12
-inline void LLQuaternion::getAngleAxis(F32* angle, F32* x, F32* y, F32* z) const
-{
-	F32 cos_a = mQ[VW];
-	if (cos_a > 1.0f) cos_a = 1.0f;
-	if (cos_a < -1.0f) cos_a = -1.0f;
-
-    F32 sin_a = (F32) sqrt( 1.0f - cos_a * cos_a );
-
-    if ( fabs( sin_a ) < 0.0005f )
-		sin_a = 1.0f;
-	else
-		sin_a = 1.f/sin_a;
-
-    F32 temp_angle = 2.0f * (F32) acos( cos_a );
-	if (temp_angle > F_PI)
-	{
-		// The (angle,axis) pair should never have angles outside [PI, -PI]
-		// since we want the _shortest_ (angle,axis) solution.
-		// Since acos is defined for [0, PI], and we multiply by 2.0, we
-		// can push the angle outside the acceptible range.
-		// When this happens we set the angle to the other portion of a 
-		// full 2PI rotation, and negate the axis, which reverses the 
-		// direction of the rotation (by the right-hand rule).
-		*angle = 2.f * F_PI - temp_angle;
-    	*x = - mQ[VX] * sin_a;
-    	*y = - mQ[VY] * sin_a;
-    	*z = - mQ[VZ] * sin_a;
-	}
-	else
-	{
-		*angle = temp_angle;
-    	*x = mQ[VX] * sin_a;
-    	*y = mQ[VY] * sin_a;
-    	*z = mQ[VZ] * sin_a;
-	}
-}
-
-inline const LLQuaternion& LLQuaternion::conjugate()
-{
-	mQ[VX] *= -1.f;
-	mQ[VY] *= -1.f;
-	mQ[VZ] *= -1.f;
-	return (*this);
-}
-
-inline const LLQuaternion& LLQuaternion::conjQuat()
-{
-	mQ[VX] *= -1.f;
-	mQ[VY] *= -1.f;
-	mQ[VZ] *= -1.f;
-	return (*this);
-}
-
-// Transpose
-inline const LLQuaternion& LLQuaternion::transpose()
-{
-	mQ[VX] *= -1.f;
-	mQ[VY] *= -1.f;
-	mQ[VZ] *= -1.f;
-	return (*this);
-}
-
-// deprecated
-inline const LLQuaternion& LLQuaternion::transQuat()
-{
-	mQ[VX] *= -1.f;
-	mQ[VY] *= -1.f;
-	mQ[VZ] *= -1.f;
-	return (*this);
-}
-
-
-inline LLQuaternion 	operator+(const LLQuaternion &a, const LLQuaternion &b)
-{
-	return LLQuaternion( 
-		a.mQ[VX] + b.mQ[VX],
-		a.mQ[VY] + b.mQ[VY],
-		a.mQ[VZ] + b.mQ[VZ],
-		a.mQ[VW] + b.mQ[VW] );
-}
-
-
-inline LLQuaternion 	operator-(const LLQuaternion &a, const LLQuaternion &b)
-{
-	return LLQuaternion( 
-		a.mQ[VX] - b.mQ[VX],
-		a.mQ[VY] - b.mQ[VY],
-		a.mQ[VZ] - b.mQ[VZ],
-		a.mQ[VW] - b.mQ[VW] );
-}
-
-
-inline LLQuaternion 	operator-(const LLQuaternion &a)
-{
-	return LLQuaternion(
-		-a.mQ[VX],
-		-a.mQ[VY],
-		-a.mQ[VZ],
-		-a.mQ[VW] );
-}
-
-
-inline LLQuaternion 	operator*(F32 a, const LLQuaternion &q)
-{
-	return LLQuaternion(
-		a * q.mQ[VX],
-		a * q.mQ[VY],
-		a * q.mQ[VZ],
-		a * q.mQ[VW] );
-}
-
-
-inline LLQuaternion 	operator*(const LLQuaternion &q, F32 a)
-{
-	return LLQuaternion(
-		a * q.mQ[VX],
-		a * q.mQ[VY],
-		a * q.mQ[VZ],
-		a * q.mQ[VW] );
-}
-
-inline LLQuaternion	operator~(const LLQuaternion &a)
-{
-	LLQuaternion q(a);
-	q.conjQuat();
-	return q;
-}
-
-inline bool	LLQuaternion::operator==(const LLQuaternion &b) const
-{
-	return (  (mQ[VX] == b.mQ[VX])
-			&&(mQ[VY] == b.mQ[VY])
-			&&(mQ[VZ] == b.mQ[VZ])
-			&&(mQ[VS] == b.mQ[VS]));
-}
-
-inline bool	LLQuaternion::operator!=(const LLQuaternion &b) const
-{
-	return (  (mQ[VX] != b.mQ[VX])
-			||(mQ[VY] != b.mQ[VY])
-			||(mQ[VZ] != b.mQ[VZ])
-			||(mQ[VS] != b.mQ[VS]));
-}
-
-inline const LLQuaternion&	operator*=(LLQuaternion &a, const LLQuaternion &b)
-{
-#if 1
-	LLQuaternion q(
-		b.mQ[3] * a.mQ[0] + b.mQ[0] * a.mQ[3] + b.mQ[1] * a.mQ[2] - b.mQ[2] * a.mQ[1],
-		b.mQ[3] * a.mQ[1] + b.mQ[1] * a.mQ[3] + b.mQ[2] * a.mQ[0] - b.mQ[0] * a.mQ[2],
-		b.mQ[3] * a.mQ[2] + b.mQ[2] * a.mQ[3] + b.mQ[0] * a.mQ[1] - b.mQ[1] * a.mQ[0],
-		b.mQ[3] * a.mQ[3] - b.mQ[0] * a.mQ[0] - b.mQ[1] * a.mQ[1] - b.mQ[2] * a.mQ[2]
-	);
-	a = q;
-#else
-	a = a * b;
-#endif
-	return a;
-}
-
-const F32 ONE_PART_IN_A_MILLION = 0.000001f;
-
-inline F32	LLQuaternion::normalize()
-{
-	F32 mag = sqrtf(mQ[VX]*mQ[VX] + mQ[VY]*mQ[VY] + mQ[VZ]*mQ[VZ] + mQ[VS]*mQ[VS]);
-
-	if (mag > FP_MAG_THRESHOLD)
-	{
-		// Floating point error can prevent some quaternions from achieving
-		// exact unity length.  When trying to renormalize such quaternions we
-		// can oscillate between multiple quantized states.  To prevent such
-		// drifts we only renomalize if the length is far enough from unity.
-		if (fabs(1.f - mag) > ONE_PART_IN_A_MILLION)
-		{
-			F32 oomag = 1.f/mag;
-			mQ[VX] *= oomag;
-			mQ[VY] *= oomag;
-			mQ[VZ] *= oomag;
-			mQ[VS] *= oomag;
-		}
-	}
-	else
-	{
-		// we were given a very bad quaternion so we set it to identity
-		mQ[VX] = 0.f;
-		mQ[VY] = 0.f;
-		mQ[VZ] = 0.f;
-		mQ[VS] = 1.f;
-	}
-
-	return mag;
-}
-
-// deprecated
-inline F32	LLQuaternion::normQuat()
-{
-	F32 mag = sqrtf(mQ[VX]*mQ[VX] + mQ[VY]*mQ[VY] + mQ[VZ]*mQ[VZ] + mQ[VS]*mQ[VS]);
-
-	if (mag > FP_MAG_THRESHOLD)
-	{
-		if (fabs(1.f - mag) > ONE_PART_IN_A_MILLION)
-		{
-			// only renormalize if length not close enough to 1.0 already
-			F32 oomag = 1.f/mag;
-			mQ[VX] *= oomag;
-			mQ[VY] *= oomag;
-			mQ[VZ] *= oomag;
-			mQ[VS] *= oomag;
-		}
-	}
-	else
-	{
-		mQ[VX] = 0.f;
-		mQ[VY] = 0.f;
-		mQ[VZ] = 0.f;
-		mQ[VS] = 1.f;
-	}
-
-	return mag;
-}
-
-LLQuaternion::Order StringToOrder( const char *str );
-
-// Some notes about Quaternions
-
-// What is a Quaternion?
-// ---------------------
-// A quaternion is a point in 4-dimensional complex space.
-// Q = { Qx, Qy, Qz, Qw }
-// 
-//
-// Why Quaternions?
-// ----------------
-// The set of quaternions that make up the the 4-D unit sphere 
-// can be mapped to the set of all rotations in 3-D space.  Sometimes
-// it is easier to describe/manipulate rotations in quaternion space
-// than rotation-matrix space.
-//
-//
-// How Quaternions?
-// ----------------
-// In order to take advantage of quaternions we need to know how to
-// go from rotation-matricies to quaternions and back.  We also have
-// to agree what variety of rotations we're generating.
-// 
-// Consider the equation...   v' = v * R 
-//
-// There are two ways to think about rotations of vectors.
-// 1) v' is the same vector in a different reference frame
-// 2) v' is a new vector in the same reference frame
-//
-// bookmark -- which way are we using?
-// 
-// 
-// Quaternion from Angle-Axis:
-// ---------------------------
-// Suppose we wanted to represent a rotation of some angle (theta) 
-// about some axis ({Ax, Ay, Az})...
-//
-// axis of rotation = {Ax, Ay, Az} 
-// angle_of_rotation = theta
-//
-// s = sin(0.5 * theta)
-// c = cos(0.5 * theta)
-// Q = { s * Ax, s * Ay, s * Az, c }
-//
-//
-// 3x3 Matrix from Quaternion
-// --------------------------
-//
-//     |                                                                    |
-//     | 1 - 2 * (y^2 + z^2)   2 * (x * y + z * w)     2 * (y * w - x * z)  |
-//     |                                                                    |
-// M = | 2 * (x * y - z * w)   1 - 2 * (x^2 + z^2)     2 * (y * z + x * w)  |
-//     |                                                                    |
-//     | 2 * (x * z + y * w)   2 * (y * z - x * w)     1 - 2 * (x^2 + y^2)  |
-//     |                                                                    |
-
-#endif
+/** 
+ * @file llquaternion.h
+ * @brief LLQuaternion class header file.
+ *
+ * $LicenseInfo:firstyear=2000&license=viewergpl$
+ * 
+ * Copyright (c) 2000-2009, Linden Research, Inc.
+ * 
+ * Second Life Viewer Source Code
+ * The source code in this file ("Source Code") is provided by Linden Lab
+ * to you under the terms of the GNU General Public License, version 2.0
+ * ("GPL"), unless you have obtained a separate licensing agreement
+ * ("Other License"), formally executed by you and Linden Lab.  Terms of
+ * the GPL can be found in doc/GPL-license.txt in this distribution, or
+ * online at http://secondlifegrid.net/programs/open_source/licensing/gplv2
+ * 
+ * There are special exceptions to the terms and conditions of the GPL as
+ * it is applied to this Source Code. View the full text of the exception
+ * in the file doc/FLOSS-exception.txt in this software distribution, or
+ * online at
+ * http://secondlifegrid.net/programs/open_source/licensing/flossexception
+ * 
+ * By copying, modifying or distributing this software, you acknowledge
+ * that you have read and understood your obligations described above,
+ * and agree to abide by those obligations.
+ * 
+ * ALL LINDEN LAB SOURCE CODE IS PROVIDED "AS IS." LINDEN LAB MAKES NO
+ * WARRANTIES, EXPRESS, IMPLIED OR OTHERWISE, REGARDING ITS ACCURACY,
+ * COMPLETENESS OR PERFORMANCE.
+ * $/LicenseInfo$
+ */
+
+#ifndef LLQUATERNION_H
+#define LLQUATERNION_H
+
+#include <iostream>
+
+#ifndef LLMATH_H //enforce specific include order to avoid tangling inline dependencies
+#error "Please include llmath.h first."
+#endif
+
+class LLVector4;
+class LLVector3;
+class LLVector3d;
+class LLMatrix4;
+class LLMatrix3;
+
+//	NOTA BENE: Quaternion code is written assuming Unit Quaternions!!!!
+//			   Moreover, it is written assuming that all vectors and matricies
+//			   passed as arguments are normalized and unitary respectively.
+//			   VERY VERY VERY VERY BAD THINGS will happen if these assumptions fail.
+
+static const U32 LENGTHOFQUAT = 4;
+
+class LLQuaternion
+{
+public:
+	F32 mQ[LENGTHOFQUAT];
+
+	static const LLQuaternion DEFAULT;
+
+	LLQuaternion();									// Initializes Quaternion to (0,0,0,1)
+	explicit LLQuaternion(const LLMatrix4 &mat);				// Initializes Quaternion from Matrix4
+	explicit LLQuaternion(const LLMatrix3 &mat);				// Initializes Quaternion from Matrix3
+	LLQuaternion(F32 x, F32 y, F32 z, F32 w);		// Initializes Quaternion to normalize(x, y, z, w)
+	LLQuaternion(F32 angle, const LLVector4 &vec);	// Initializes Quaternion to axis_angle2quat(angle, vec)
+	LLQuaternion(F32 angle, const LLVector3 &vec);	// Initializes Quaternion to axis_angle2quat(angle, vec)
+	LLQuaternion(const F32 *q);						// Initializes Quaternion to normalize(x, y, z, w)
+	LLQuaternion(const LLVector3 &x_axis,
+				 const LLVector3 &y_axis,
+				 const LLVector3 &z_axis);			// Initializes Quaternion from Matrix3 = [x_axis ; y_axis ; z_axis]
+
+	BOOL isIdentity() const;
+	BOOL isNotIdentity() const;
+	BOOL isFinite() const;									// checks to see if all values of LLQuaternion are finite
+	void quantize16(F32 lower, F32 upper);					// changes the vector to reflect quatization
+	void quantize8(F32 lower, F32 upper);							// changes the vector to reflect quatization
+	void loadIdentity();											// Loads the quaternion that represents the identity rotation
+
+	const LLQuaternion&	set(F32 x, F32 y, F32 z, F32 w);		// Sets Quaternion to normalize(x, y, z, w)
+	const LLQuaternion&	set(const LLQuaternion &quat);			// Copies Quaternion
+	const LLQuaternion&	set(const F32 *q);						// Sets Quaternion to normalize(quat[VX], quat[VY], quat[VZ], quat[VW])
+	const LLQuaternion&	set(const LLMatrix3 &mat);				// Sets Quaternion to mat2quat(mat)
+	const LLQuaternion&	set(const LLMatrix4 &mat);				// Sets Quaternion to mat2quat(mat)
+
+	const LLQuaternion&	setAngleAxis(F32 angle, F32 x, F32 y, F32 z);	// Sets Quaternion to axis_angle2quat(angle, x, y, z)
+	const LLQuaternion&	setAngleAxis(F32 angle, const LLVector3 &vec);	// Sets Quaternion to axis_angle2quat(angle, vec)
+	const LLQuaternion&	setAngleAxis(F32 angle, const LLVector4 &vec);	// Sets Quaternion to axis_angle2quat(angle, vec)
+	const LLQuaternion&	setEulerAngles(F32 roll, F32 pitch, F32 yaw);	// Sets Quaternion to euler2quat(pitch, yaw, roll)
+
+	const LLQuaternion&	setQuatInit(F32 x, F32 y, F32 z, F32 w);	// deprecated
+	const LLQuaternion&	setQuat(const LLQuaternion &quat);			// deprecated
+	const LLQuaternion&	setQuat(const F32 *q);						// deprecated
+	const LLQuaternion&	setQuat(const LLMatrix3 &mat);				// deprecated
+	const LLQuaternion&	setQuat(const LLMatrix4 &mat);				// deprecated
+	const LLQuaternion&	setQuat(F32 angle, F32 x, F32 y, F32 z);	// deprecated
+	const LLQuaternion&	setQuat(F32 angle, const LLVector3 &vec);	// deprecated
+	const LLQuaternion&	setQuat(F32 angle, const LLVector4 &vec);	// deprecated
+	const LLQuaternion&	setQuat(F32 roll, F32 pitch, F32 yaw);		// deprecated
+
+	LLMatrix4	getMatrix4(void) const;							// Returns the Matrix4 equivalent of Quaternion
+	LLMatrix3	getMatrix3(void) const;							// Returns the Matrix3 equivalent of Quaternion
+	void		getAngleAxis(F32* angle, F32* x, F32* y, F32* z) const;	// returns rotation in radians about axis x,y,z
+	void		getAngleAxis(F32* angle, LLVector3 &vec) const;
+	void		getEulerAngles(F32 *roll, F32* pitch, F32 *yaw) const;
+
+	F32	normalize();	// Normalizes Quaternion and returns magnitude
+	F32	normQuat();		// deprecated
+
+	const LLQuaternion&	conjugate(void);	// Conjugates Quaternion and returns result
+	const LLQuaternion&	conjQuat(void);		// deprecated
+
+	// Other useful methods
+	const LLQuaternion&	transpose();		// transpose (same as conjugate)
+	const LLQuaternion&	transQuat();		// deprecated
+
+	void			shortestArc(const LLVector3 &a, const LLVector3 &b);	// shortest rotation from a to b
+	const LLQuaternion& constrain(F32 radians);						// constrains rotation to a cone angle specified in radians
+
+	// Standard operators
+	friend std::ostream& operator<<(std::ostream &s, const LLQuaternion &a);					// Prints a
+	friend LLQuaternion operator+(const LLQuaternion &a, const LLQuaternion &b);	// Addition
+	friend LLQuaternion operator-(const LLQuaternion &a, const LLQuaternion &b);	// Subtraction
+	friend LLQuaternion operator-(const LLQuaternion &a);							// Negation
+	friend LLQuaternion operator*(F32 a, const LLQuaternion &q);					// Scale
+	friend LLQuaternion operator*(const LLQuaternion &q, F32 b);					// Scale
+	friend LLQuaternion operator*(const LLQuaternion &a, const LLQuaternion &b);	// Returns a * b
+	friend LLQuaternion operator~(const LLQuaternion &a);							// Returns a* (Conjugate of a)
+	bool operator==(const LLQuaternion &b) const;			// Returns a == b
+	bool operator!=(const LLQuaternion &b) const;			// Returns a != b
+
+	friend const LLQuaternion& operator*=(LLQuaternion &a, const LLQuaternion &b);	// Returns a * b
+
+	friend LLVector4 operator*(const LLVector4 &a, const LLQuaternion &rot);		// Rotates a by rot
+	friend LLVector3 operator*(const LLVector3 &a, const LLQuaternion &rot);		// Rotates a by rot
+	friend LLVector3d operator*(const LLVector3d &a, const LLQuaternion &rot);		// Rotates a by rot
+
+	// Non-standard operators
+	friend F32 dot(const LLQuaternion &a, const LLQuaternion &b);
+	friend LLQuaternion lerp(F32 t, const LLQuaternion &p, const LLQuaternion &q);		// linear interpolation (t = 0 to 1) from p to q
+	friend LLQuaternion lerp(F32 t, const LLQuaternion &q);								// linear interpolation (t = 0 to 1) from identity to q
+	friend LLQuaternion slerp(F32 t, const LLQuaternion &p, const LLQuaternion &q); 	// spherical linear interpolation from p to q
+	friend LLQuaternion slerp(F32 t, const LLQuaternion &q);							// spherical linear interpolation from identity to q
+	friend LLQuaternion nlerp(F32 t, const LLQuaternion &p, const LLQuaternion &q); 	// normalized linear interpolation from p to q
+	friend LLQuaternion nlerp(F32 t, const LLQuaternion &q); 							// normalized linear interpolation from p to q
+
+	LLVector3	packToVector3() const;						// Saves space by using the fact that our quaternions are normalized
+	void		unpackFromVector3(const LLVector3& vec);	// Saves space by using the fact that our quaternions are normalized
+
+	enum Order {
+		XYZ = 0,
+		YZX = 1,
+		ZXY = 2,
+		XZY = 3,
+		YXZ = 4,
+		ZYX = 5
+	};
+	// Creates a quaternions from maya's rotation representation,
+	// which is 3 rotations (in DEGREES) in the specified order
+	friend LLQuaternion mayaQ(F32 x, F32 y, F32 z, Order order);
+
+	// Conversions between Order and strings like "xyz" or "ZYX"
+	friend const char *OrderToString( const Order order );
+	friend Order StringToOrder( const char *str );
+
+	static BOOL parseQuat(const std::string& buf, LLQuaternion* value);
+
+	// For debugging, only
+	//static U32 mMultCount;
+};
+
+// checker
+inline BOOL	LLQuaternion::isFinite() const
+{
+	return (llfinite(mQ[VX]) && llfinite(mQ[VY]) && llfinite(mQ[VZ]) && llfinite(mQ[VS]));
+}
+
+inline BOOL LLQuaternion::isIdentity() const
+{
+	return 
+		( mQ[VX] == 0.f ) &&
+		( mQ[VY] == 0.f ) &&
+		( mQ[VZ] == 0.f ) &&
+		( mQ[VS] == 1.f );
+}
+
+inline BOOL LLQuaternion::isNotIdentity() const
+{
+	return 
+		( mQ[VX] != 0.f ) ||
+		( mQ[VY] != 0.f ) ||
+		( mQ[VZ] != 0.f ) ||
+		( mQ[VS] != 1.f );
+}
+
+
+
+inline LLQuaternion::LLQuaternion(void)
+{
+	mQ[VX] = 0.f;
+	mQ[VY] = 0.f;
+	mQ[VZ] = 0.f;
+	mQ[VS] = 1.f;
+}
+
+inline LLQuaternion::LLQuaternion(F32 x, F32 y, F32 z, F32 w)
+{
+	mQ[VX] = x;
+	mQ[VY] = y;
+	mQ[VZ] = z;
+	mQ[VS] = w;
+
+	//RN: don't normalize this case as its used mainly for temporaries during calculations
+	//normalize();
+	/*
+	F32 mag = sqrtf(mQ[VX]*mQ[VX] + mQ[VY]*mQ[VY] + mQ[VZ]*mQ[VZ] + mQ[VS]*mQ[VS]);
+	mag -= 1.f;
+	mag = fabs(mag);
+	llassert(mag < 10.f*FP_MAG_THRESHOLD);
+	*/
+}
+
+inline LLQuaternion::LLQuaternion(const F32 *q)
+{
+	mQ[VX] = q[VX];
+	mQ[VY] = q[VY];
+	mQ[VZ] = q[VZ];
+	mQ[VS] = q[VW];
+
+	normalize();
+	/*
+	F32 mag = sqrtf(mQ[VX]*mQ[VX] + mQ[VY]*mQ[VY] + mQ[VZ]*mQ[VZ] + mQ[VS]*mQ[VS]);
+	mag -= 1.f;
+	mag = fabs(mag);
+	llassert(mag < FP_MAG_THRESHOLD);
+	*/
+}
+
+
+inline void LLQuaternion::loadIdentity()
+{
+	mQ[VX] = 0.0f;
+	mQ[VY] = 0.0f;
+	mQ[VZ] = 0.0f;
+	mQ[VW] = 1.0f;
+}
+
+
+inline const LLQuaternion&	LLQuaternion::set(F32 x, F32 y, F32 z, F32 w)
+{
+	mQ[VX] = x;
+	mQ[VY] = y;
+	mQ[VZ] = z;
+	mQ[VS] = w;
+	normalize();
+	return (*this);
+}
+
+inline const LLQuaternion&	LLQuaternion::set(const LLQuaternion &quat)
+{
+	mQ[VX] = quat.mQ[VX];
+	mQ[VY] = quat.mQ[VY];
+	mQ[VZ] = quat.mQ[VZ];
+	mQ[VW] = quat.mQ[VW];
+	normalize();
+	return (*this);
+}
+
+inline const LLQuaternion&	LLQuaternion::set(const F32 *q)
+{
+	mQ[VX] = q[VX];
+	mQ[VY] = q[VY];
+	mQ[VZ] = q[VZ];
+	mQ[VS] = q[VW];
+	normalize();
+	return (*this);
+}
+
+
+// deprecated
+inline const LLQuaternion&	LLQuaternion::setQuatInit(F32 x, F32 y, F32 z, F32 w)
+{
+	mQ[VX] = x;
+	mQ[VY] = y;
+	mQ[VZ] = z;
+	mQ[VS] = w;
+	normalize();
+	return (*this);
+}
+
+// deprecated
+inline const LLQuaternion&	LLQuaternion::setQuat(const LLQuaternion &quat)
+{
+	mQ[VX] = quat.mQ[VX];
+	mQ[VY] = quat.mQ[VY];
+	mQ[VZ] = quat.mQ[VZ];
+	mQ[VW] = quat.mQ[VW];
+	normalize();
+	return (*this);
+}
+
+// deprecated
+inline const LLQuaternion&	LLQuaternion::setQuat(const F32 *q)
+{
+	mQ[VX] = q[VX];
+	mQ[VY] = q[VY];
+	mQ[VZ] = q[VZ];
+	mQ[VS] = q[VW];
+	normalize();
+	return (*this);
+}
+
+// There may be a cheaper way that avoids the sqrt.
+// Does sin_a = VX*VX + VY*VY + VZ*VZ?
+// Copied from Matrix and Quaternion FAQ 1.12
+inline void LLQuaternion::getAngleAxis(F32* angle, F32* x, F32* y, F32* z) const
+{
+	F32 cos_a = mQ[VW];
+	if (cos_a > 1.0f) cos_a = 1.0f;
+	if (cos_a < -1.0f) cos_a = -1.0f;
+
+    F32 sin_a = (F32) sqrt( 1.0f - cos_a * cos_a );
+
+    if ( fabs( sin_a ) < 0.0005f )
+		sin_a = 1.0f;
+	else
+		sin_a = 1.f/sin_a;
+
+    F32 temp_angle = 2.0f * (F32) acos( cos_a );
+	if (temp_angle > F_PI)
+	{
+		// The (angle,axis) pair should never have angles outside [PI, -PI]
+		// since we want the _shortest_ (angle,axis) solution.
+		// Since acos is defined for [0, PI], and we multiply by 2.0, we
+		// can push the angle outside the acceptible range.
+		// When this happens we set the angle to the other portion of a 
+		// full 2PI rotation, and negate the axis, which reverses the 
+		// direction of the rotation (by the right-hand rule).
+		*angle = 2.f * F_PI - temp_angle;
+    	*x = - mQ[VX] * sin_a;
+    	*y = - mQ[VY] * sin_a;
+    	*z = - mQ[VZ] * sin_a;
+	}
+	else
+	{
+		*angle = temp_angle;
+    	*x = mQ[VX] * sin_a;
+    	*y = mQ[VY] * sin_a;
+    	*z = mQ[VZ] * sin_a;
+	}
+}
+
+inline const LLQuaternion& LLQuaternion::conjugate()
+{
+	mQ[VX] *= -1.f;
+	mQ[VY] *= -1.f;
+	mQ[VZ] *= -1.f;
+	return (*this);
+}
+
+inline const LLQuaternion& LLQuaternion::conjQuat()
+{
+	mQ[VX] *= -1.f;
+	mQ[VY] *= -1.f;
+	mQ[VZ] *= -1.f;
+	return (*this);
+}
+
+// Transpose
+inline const LLQuaternion& LLQuaternion::transpose()
+{
+	mQ[VX] *= -1.f;
+	mQ[VY] *= -1.f;
+	mQ[VZ] *= -1.f;
+	return (*this);
+}
+
+// deprecated
+inline const LLQuaternion& LLQuaternion::transQuat()
+{
+	mQ[VX] *= -1.f;
+	mQ[VY] *= -1.f;
+	mQ[VZ] *= -1.f;
+	return (*this);
+}
+
+
+inline LLQuaternion 	operator+(const LLQuaternion &a, const LLQuaternion &b)
+{
+	return LLQuaternion( 
+		a.mQ[VX] + b.mQ[VX],
+		a.mQ[VY] + b.mQ[VY],
+		a.mQ[VZ] + b.mQ[VZ],
+		a.mQ[VW] + b.mQ[VW] );
+}
+
+
+inline LLQuaternion 	operator-(const LLQuaternion &a, const LLQuaternion &b)
+{
+	return LLQuaternion( 
+		a.mQ[VX] - b.mQ[VX],
+		a.mQ[VY] - b.mQ[VY],
+		a.mQ[VZ] - b.mQ[VZ],
+		a.mQ[VW] - b.mQ[VW] );
+}
+
+
+inline LLQuaternion 	operator-(const LLQuaternion &a)
+{
+	return LLQuaternion(
+		-a.mQ[VX],
+		-a.mQ[VY],
+		-a.mQ[VZ],
+		-a.mQ[VW] );
+}
+
+
+inline LLQuaternion 	operator*(F32 a, const LLQuaternion &q)
+{
+	return LLQuaternion(
+		a * q.mQ[VX],
+		a * q.mQ[VY],
+		a * q.mQ[VZ],
+		a * q.mQ[VW] );
+}
+
+
+inline LLQuaternion 	operator*(const LLQuaternion &q, F32 a)
+{
+	return LLQuaternion(
+		a * q.mQ[VX],
+		a * q.mQ[VY],
+		a * q.mQ[VZ],
+		a * q.mQ[VW] );
+}
+
+inline LLQuaternion	operator~(const LLQuaternion &a)
+{
+	LLQuaternion q(a);
+	q.conjQuat();
+	return q;
+}
+
+inline bool	LLQuaternion::operator==(const LLQuaternion &b) const
+{
+	return (  (mQ[VX] == b.mQ[VX])
+			&&(mQ[VY] == b.mQ[VY])
+			&&(mQ[VZ] == b.mQ[VZ])
+			&&(mQ[VS] == b.mQ[VS]));
+}
+
+inline bool	LLQuaternion::operator!=(const LLQuaternion &b) const
+{
+	return (  (mQ[VX] != b.mQ[VX])
+			||(mQ[VY] != b.mQ[VY])
+			||(mQ[VZ] != b.mQ[VZ])
+			||(mQ[VS] != b.mQ[VS]));
+}
+
+inline const LLQuaternion&	operator*=(LLQuaternion &a, const LLQuaternion &b)
+{
+#if 1
+	LLQuaternion q(
+		b.mQ[3] * a.mQ[0] + b.mQ[0] * a.mQ[3] + b.mQ[1] * a.mQ[2] - b.mQ[2] * a.mQ[1],
+		b.mQ[3] * a.mQ[1] + b.mQ[1] * a.mQ[3] + b.mQ[2] * a.mQ[0] - b.mQ[0] * a.mQ[2],
+		b.mQ[3] * a.mQ[2] + b.mQ[2] * a.mQ[3] + b.mQ[0] * a.mQ[1] - b.mQ[1] * a.mQ[0],
+		b.mQ[3] * a.mQ[3] - b.mQ[0] * a.mQ[0] - b.mQ[1] * a.mQ[1] - b.mQ[2] * a.mQ[2]
+	);
+	a = q;
+#else
+	a = a * b;
+#endif
+	return a;
+}
+
+const F32 ONE_PART_IN_A_MILLION = 0.000001f;
+
+inline F32	LLQuaternion::normalize()
+{
+	F32 mag = sqrtf(mQ[VX]*mQ[VX] + mQ[VY]*mQ[VY] + mQ[VZ]*mQ[VZ] + mQ[VS]*mQ[VS]);
+
+	if (mag > FP_MAG_THRESHOLD)
+	{
+		// Floating point error can prevent some quaternions from achieving
+		// exact unity length.  When trying to renormalize such quaternions we
+		// can oscillate between multiple quantized states.  To prevent such
+		// drifts we only renomalize if the length is far enough from unity.
+		if (fabs(1.f - mag) > ONE_PART_IN_A_MILLION)
+		{
+			F32 oomag = 1.f/mag;
+			mQ[VX] *= oomag;
+			mQ[VY] *= oomag;
+			mQ[VZ] *= oomag;
+			mQ[VS] *= oomag;
+		}
+	}
+	else
+	{
+		// we were given a very bad quaternion so we set it to identity
+		mQ[VX] = 0.f;
+		mQ[VY] = 0.f;
+		mQ[VZ] = 0.f;
+		mQ[VS] = 1.f;
+	}
+
+	return mag;
+}
+
+// deprecated
+inline F32	LLQuaternion::normQuat()
+{
+	F32 mag = sqrtf(mQ[VX]*mQ[VX] + mQ[VY]*mQ[VY] + mQ[VZ]*mQ[VZ] + mQ[VS]*mQ[VS]);
+
+	if (mag > FP_MAG_THRESHOLD)
+	{
+		if (fabs(1.f - mag) > ONE_PART_IN_A_MILLION)
+		{
+			// only renormalize if length not close enough to 1.0 already
+			F32 oomag = 1.f/mag;
+			mQ[VX] *= oomag;
+			mQ[VY] *= oomag;
+			mQ[VZ] *= oomag;
+			mQ[VS] *= oomag;
+		}
+	}
+	else
+	{
+		mQ[VX] = 0.f;
+		mQ[VY] = 0.f;
+		mQ[VZ] = 0.f;
+		mQ[VS] = 1.f;
+	}
+
+	return mag;
+}
+
+LLQuaternion::Order StringToOrder( const char *str );
+
+// Some notes about Quaternions
+
+// What is a Quaternion?
+// ---------------------
+// A quaternion is a point in 4-dimensional complex space.
+// Q = { Qx, Qy, Qz, Qw }
+// 
+//
+// Why Quaternions?
+// ----------------
+// The set of quaternions that make up the the 4-D unit sphere 
+// can be mapped to the set of all rotations in 3-D space.  Sometimes
+// it is easier to describe/manipulate rotations in quaternion space
+// than rotation-matrix space.
+//
+//
+// How Quaternions?
+// ----------------
+// In order to take advantage of quaternions we need to know how to
+// go from rotation-matricies to quaternions and back.  We also have
+// to agree what variety of rotations we're generating.
+// 
+// Consider the equation...   v' = v * R 
+//
+// There are two ways to think about rotations of vectors.
+// 1) v' is the same vector in a different reference frame
+// 2) v' is a new vector in the same reference frame
+//
+// bookmark -- which way are we using?
+// 
+// 
+// Quaternion from Angle-Axis:
+// ---------------------------
+// Suppose we wanted to represent a rotation of some angle (theta) 
+// about some axis ({Ax, Ay, Az})...
+//
+// axis of rotation = {Ax, Ay, Az} 
+// angle_of_rotation = theta
+//
+// s = sin(0.5 * theta)
+// c = cos(0.5 * theta)
+// Q = { s * Ax, s * Ay, s * Az, c }
+//
+//
+// 3x3 Matrix from Quaternion
+// --------------------------
+//
+//     |                                                                    |
+//     | 1 - 2 * (y^2 + z^2)   2 * (x * y + z * w)     2 * (y * w - x * z)  |
+//     |                                                                    |
+// M = | 2 * (x * y - z * w)   1 - 2 * (x^2 + z^2)     2 * (y * z + x * w)  |
+//     |                                                                    |
+//     | 2 * (x * z + y * w)   2 * (y * z - x * w)     1 - 2 * (x^2 + y^2)  |
+//     |                                                                    |
+
+#endif
diff --git a/indra/llmath/llvolume.cpp b/indra/llmath/llvolume.cpp
index bba0a6d089..ab9f8c4c24 100644
--- a/indra/llmath/llvolume.cpp
+++ b/indra/llmath/llvolume.cpp
@@ -45,7 +45,7 @@
 #include "v4math.h"
 #include "m4math.h"
 #include "m3math.h"
-#include "llmatrix4a.h"
+#include "llmatrix3a.h"
 #include "lloctree.h"
 #include "lldarray.h"
 #include "llvolume.h"
@@ -53,6 +53,7 @@
 #include "llstl.h"
 #include "llsdserialize.h"
 #include "llvector4a.h"
+#include "llmatrix4a.h"
 
 #define DEBUG_SILHOUETTE_BINORMALS 0
 #define DEBUG_SILHOUETTE_NORMALS 0 // TomY: Use this to display normals using the silhouette
@@ -161,7 +162,7 @@ BOOL LLTriangleRayIntersect(const LLVector4a& vert0, const LLVector4a& vert1, co
 	LLVector4a det;
 	det.setAllDot3(edge1, pvec);
 	
-	if (det.greaterEqual4(LLVector4a::getApproximatelyZero()).getComparisonMask() & 0x7)
+	if (det.greaterEqual(LLVector4a::getEpsilon()).getGatheredBits() & 0x7)
 	{
 		/* calculate distance from vert0 to ray origin */
 		LLVector4a tvec;
@@ -171,8 +172,8 @@ BOOL LLTriangleRayIntersect(const LLVector4a& vert0, const LLVector4a& vert1, co
 		LLVector4a u;
 		u.setAllDot3(tvec,pvec);
 
-		if ((u.greaterEqual4(LLVector4a::getZero()).getComparisonMask() & 0x7) &&
-			(u.lessEqual4(det).getComparisonMask() & 0x7))
+		if ((u.greaterEqual(LLVector4a::getZero()).getGatheredBits() & 0x7) &&
+			(u.lessEqual(det).getGatheredBits() & 0x7))
 		{
 			/* prepare to test V parameter */
 			LLVector4a qvec;
@@ -188,8 +189,8 @@ BOOL LLTriangleRayIntersect(const LLVector4a& vert0, const LLVector4a& vert1, co
 			LLVector4a sum_uv;
 			sum_uv.setAdd(u, v);
 
-			S32 v_gequal = v.greaterEqual4(LLVector4a::getZero()).getComparisonMask() & 0x7;
-			S32 sum_lequal = sum_uv.lessEqual4(det).getComparisonMask() & 0x7;
+			S32 v_gequal = v.greaterEqual(LLVector4a::getZero()).getGatheredBits() & 0x7;
+			S32 sum_lequal = sum_uv.lessEqual(det).getGatheredBits() & 0x7;
 
 			if (v_gequal  && sum_lequal)
 			{
@@ -230,7 +231,7 @@ BOOL LLTriangleRayIntersectTwoSided(const LLVector4a& vert0, const LLVector4a& v
 	pvec.setCross3(dir, edge2);
 
 	/* if determinant is near zero, ray lies in plane of triangle */
-	F32 det = edge1.dot3(pvec);
+	F32 det = edge1.dot3(pvec).getF32();
 
 	
 	if (det > -F_APPROXIMATELY_ZERO && det < F_APPROXIMATELY_ZERO)
@@ -245,7 +246,7 @@ BOOL LLTriangleRayIntersectTwoSided(const LLVector4a& vert0, const LLVector4a& v
 	tvec.setSub(orig, vert0);
 	
 	/* calculate U parameter and test bounds */
-	u = (tvec.dot3(pvec)) * inv_det;
+	u = (tvec.dot3(pvec).getF32()) * inv_det;
 	if (u < 0.f || u > 1.f)
 	{
 		return FALSE;
@@ -255,7 +256,7 @@ BOOL LLTriangleRayIntersectTwoSided(const LLVector4a& vert0, const LLVector4a& v
 	tvec.sub(edge1);
 		
 	/* calculate V parameter and test bounds */
-	v = (dir.dot3(tvec)) * inv_det;
+	v = (dir.dot3(tvec).getF32()) * inv_det;
 	
 	if (v < 0.f || u + v > 1.f)
 	{
@@ -263,7 +264,7 @@ BOOL LLTriangleRayIntersectTwoSided(const LLVector4a& vert0, const LLVector4a& v
 	}
 
 	/* calculate t, ray intersects triangle */
-	t = (edge2.dot3(tvec)) * inv_det;
+	t = (edge2.dot3(tvec).getF32()) * inv_det;
 	
 	intersection_a = u;
 	intersection_b = v;
@@ -326,20 +327,20 @@ public:
 				//stretch by triangles in node
 				tri = *iter;
 				
-				min.setMin(*tri->mV[0]);
-				min.setMin(*tri->mV[1]);
-				min.setMin(*tri->mV[2]);
+				min.setMin(min, *tri->mV[0]);
+				min.setMin(min, *tri->mV[1]);
+				min.setMin(min, *tri->mV[2]);
 
-				max.setMax(*tri->mV[0]);
-				max.setMax(*tri->mV[1]);
-				max.setMax(*tri->mV[2]);
+				max.setMax(max, *tri->mV[0]);
+				max.setMax(max, *tri->mV[1]);
+				max.setMax(max, *tri->mV[2]);
 			}
 
 			for (S32 i = 0; i < branch->getChildCount(); ++i)
 			{  //stretch by child extents
 				LLVolumeOctreeListener* child = (LLVolumeOctreeListener*) branch->getChild(i)->getListener(0);
-				min.setMin(child->mExtents[0]);
-				max.setMax(child->mExtents[1]);
+				min.setMin(min, child->mExtents[0]);
+				max.setMax(min, child->mExtents[1]);
 			}
 		}
 		else if (branch->getChildCount() != 0)
@@ -352,8 +353,8 @@ public:
 			for (S32 i = 1; i < branch->getChildCount(); ++i)
 			{  //stretch by child extents
 				child = (LLVolumeOctreeListener*) branch->getChild(i)->getListener(0);
-				min.setMin(child->mExtents[0]);
-				max.setMax(child->mExtents[1]);
+				min.setMin(min, child->mExtents[0]);
+				max.setMax(max, child->mExtents[1]);
 			}
 		}
 		else
@@ -2011,7 +2012,7 @@ const LLVolumeFace::VertexData& LLVolumeFace::VertexData::operator=(const LLVolu
 	if (this != &rhs)
 	{
 		init();
-		LLVector4a::memcpyNonAliased16((F32*) mData, (F32*) rhs.mData, 8);
+		LLVector4a::memcpyNonAliased16((F32*) mData, (F32*) rhs.mData, 8*sizeof(F32));
 		mTexCoord = rhs.mTexCoord;
 	}
 	return *this;
@@ -2055,8 +2056,8 @@ void LLVolumeFace::VertexData::setNormal(const LLVector4a& norm)
 
 bool LLVolumeFace::VertexData::operator<(const LLVolumeFace::VertexData& rhs)const
 {
-	const F32* lp = this->getPosition().getF32();
-	const F32* rp = rhs.getPosition().getF32();
+	const F32* lp = this->getPosition().getF32ptr();
+	const F32* rp = rhs.getPosition().getF32ptr();
 
 	if (lp[0] != rp[0])
 	{
@@ -2073,8 +2074,8 @@ bool LLVolumeFace::VertexData::operator<(const LLVolumeFace::VertexData& rhs)con
 		return lp[2] < rp[2];
 	}
 
-	lp = getNormal().getF32();
-	rp = rhs.getNormal().getF32();
+	lp = getNormal().getF32ptr();
+	rp = rhs.getNormal().getF32ptr();
 
 	if (lp[0] != rp[0])
 	{
@@ -2101,23 +2102,23 @@ bool LLVolumeFace::VertexData::operator<(const LLVolumeFace::VertexData& rhs)con
 
 bool LLVolumeFace::VertexData::operator==(const LLVolumeFace::VertexData& rhs)const
 {
-	return mData[POSITION].equal3(rhs.getPosition()) &&
-			mData[NORMAL].equal3(rhs.getNormal()) &&
+	return mData[POSITION].equals3(rhs.getPosition()) &&
+			mData[NORMAL].equals3(rhs.getNormal()) &&
 			mTexCoord == rhs.mTexCoord;
 }
 
 bool LLVolumeFace::VertexData::compareNormal(const LLVolumeFace::VertexData& rhs, F32 angle_cutoff) const
 {
 	bool retval = false;
-	if (rhs.mData[POSITION].equal3(mData[POSITION]) && rhs.mTexCoord == mTexCoord)
+	if (rhs.mData[POSITION].equals3(mData[POSITION]) && rhs.mTexCoord == mTexCoord)
 	{
 		if (angle_cutoff > 1.f)
 		{
-			retval = (mData[NORMAL].equal3(rhs.mData[NORMAL]));
+			retval = (mData[NORMAL].equals3(rhs.mData[NORMAL]));
 		}
 		else
 		{
-			F32 cur_angle = rhs.mData[NORMAL].dot3(mData[NORMAL]);
+			F32 cur_angle = rhs.mData[NORMAL].dot3(mData[NORMAL]).getF32();
 			retval = cur_angle > angle_cutoff;
 		}
 	}
@@ -2331,8 +2332,8 @@ bool LLVolume::unpackVolumeFaces(std::istream& is, S32 size)
 				}
 				else
 				{
-					min.setMin(*pos_out);
-					max.setMax(*pos_out);
+					min.setMin(min, *pos_out);
+					max.setMax(max, *pos_out);
 				}
 
 				pos_out++;
@@ -2944,7 +2945,7 @@ void sculpt_calc_mesh_resolution(U16 width, U16 height, U8 type, F32 detail, S32
 		ratio = (F32) width / (F32) height;
 
 	
-	s = (S32)fsqrtf(((F32)vertices / ratio));
+	s = (S32)(F32) sqrt(((F32)vertices / ratio));
 
 	s = llmax(s, 4);              // no degenerate sizes, please
 	t = vertices / s;
@@ -5280,16 +5281,15 @@ LLVolumeFace& LLVolumeFace::operator=(const LLVolumeFace& src)
 
 	freeData();
 	
-	LLVector4a::memcpyNonAliased16((F32*) mExtents, (F32*) src.mExtents, 12);
+	LLVector4a::memcpyNonAliased16((F32*) mExtents, (F32*) src.mExtents, 12*sizeof(F32));
 
 	resizeVertices(src.mNumVertices);
 	resizeIndices(src.mNumIndices);
 
 	if (mNumVertices)
 	{
-		S32 vert_size = mNumVertices*4;
+		S32 vert_size = mNumVertices*4*sizeof(F32);
 		S32 tc_size = (mNumVertices*8+0xF) & ~0xF;
-		tc_size /= 4;
 			
 		LLVector4a::memcpyNonAliased16((F32*) mPositions, (F32*) src.mPositions, vert_size);
 		LLVector4a::memcpyNonAliased16((F32*) mNormals, (F32*) src.mNormals, vert_size);
@@ -5322,8 +5322,7 @@ LLVolumeFace& LLVolumeFace::operator=(const LLVolumeFace& src)
 	if (mNumIndices)
 	{
 		S32 idx_size = (mNumIndices*2+0xF) & ~0xF;
-		idx_size /= 4;
-
+		
 		LLVector4a::memcpyNonAliased16((F32*) mIndices, (F32*) src.mIndices, idx_size);
 	}
 	
@@ -5388,9 +5387,9 @@ void LLVolumeFace::getVertexData(U16 index, LLVolumeFace::VertexData& cv)
 
 bool LLVolumeFace::VertexMapData::operator==(const LLVolumeFace::VertexData& rhs) const
 {
-	return getPosition().equal3(rhs.getPosition()) &&
+	return getPosition().equals3(rhs.getPosition()) &&
 		mTexCoord == rhs.mTexCoord &&
-		getNormal().equal3(rhs.getNormal());
+		getNormal().equals3(rhs.getNormal());
 }
 
 bool LLVolumeFace::VertexMapData::ComparePosition::operator()(const LLVector3& a, const LLVector3& b) const
@@ -5423,7 +5422,7 @@ void LLVolumeFace::optimize(F32 angle_cutoff)
 		getVertexData(index, cv);
 		
 		BOOL found = FALSE;
-		VertexMapData::PointMap::iterator point_iter = point_map.find(LLVector3(cv.getPosition().getF32()));
+		VertexMapData::PointMap::iterator point_iter = point_map.find(LLVector3(cv.getPosition().getF32ptr()));
 		if (point_iter != point_map.end())
 		{ //duplicate point might exist
 			for (U32 j = 0; j < point_iter->second.size(); ++j)
@@ -5455,7 +5454,7 @@ void LLVolumeFace::optimize(F32 angle_cutoff)
 			}
 			else
 			{
-				point_map[LLVector3(d.getPosition().getF32())].push_back(d);
+				point_map[LLVector3(d.getPosition().getF32ptr())].push_back(d);
 			}
 		}
 	}
@@ -5491,12 +5490,12 @@ void LLVolumeFace::createOctree()
 		tri->mIndex[2] = mIndices[i+2];
 
 		LLVector4a min = v0;
-		min.setMin(v1);
-		min.setMin(v2);
+		min.setMin(min, v1);
+		min.setMin(min, v2);
 
 		LLVector4a max = v0;
-		max.setMax(v1);
-		max.setMax(v2);
+		max.setMax(max, v1);
+		max.setMax(max, v2);
 
 		LLVector4a center;
 		center.setAdd(min, max);
@@ -5507,7 +5506,7 @@ void LLVolumeFace::createOctree()
 		LLVector4a size;
 		size.setSub(max,min);
 		
-		tri->mRadius = size.length3() * 0.5f;
+		tri->mRadius = size.getLength3().getF32() * 0.5f;
 		
 		mOctree->insert(tri);
 	}
@@ -5655,12 +5654,13 @@ BOOL LLVolumeFace::createUnCutCubeCap(LLVolume* volume, BOOL partial_build)
 
 				if (gx == 0 && gy == 0)
 				{
-					min = max = newVert.getPosition();
+					min = newVert.getPosition();
+					max = min;
 				}
 				else
 				{
-					min.setMin(newVert.getPosition());
-					max.setMax(newVert.getPosition());
+					min.setMin(min, newVert.getPosition());
+					max.setMax(max, newVert.getPosition());
 				}
 			}
 		}
@@ -5795,7 +5795,8 @@ BOOL LLVolumeFace::createCap(LLVolume* volume, BOOL partial_build)
 		
 		if (i == 0)
 		{
-			min = max = pos[i];
+			max = pos[i];
+			min = max;
 			min_uv = max_uv = tc[i];
 		}
 		else
@@ -5848,8 +5849,8 @@ BOOL LLVolumeFace::createCap(LLVolume* volume, BOOL partial_build)
 		
 	for (S32 i = 0; i < num_vertices; i++)
 	{
-		binorm[i].load4a((F32*) &binormal.mQ);
-		norm[i].load4a((F32*) &normal.mQ);
+		binorm[i].load4a(binormal.getF32ptr());
+		norm[i].load4a(normal.getF32ptr());
 	}
 
 	if (partial_build)
@@ -6186,7 +6187,7 @@ void LLVolumeFace::pushVertex(const LLVector4a& pos, const LLVector4a& norm, con
 	LLVector4a* dst = (LLVector4a*) ll_aligned_malloc_16(new_size);
 	if (mPositions)
 	{
-		LLVector4a::memcpyNonAliased16((F32*) dst, (F32*) mPositions, old_size/4);
+		LLVector4a::memcpyNonAliased16((F32*) dst, (F32*) mPositions, old_size);
 		ll_aligned_free_16(mPositions);
 	}
 	mPositions = dst;
@@ -6195,7 +6196,7 @@ void LLVolumeFace::pushVertex(const LLVector4a& pos, const LLVector4a& norm, con
 	dst = (LLVector4a*) ll_aligned_malloc_16(new_size);
 	if (mNormals)
 	{
-		LLVector4a::memcpyNonAliased16((F32*) dst, (F32*) mNormals, old_size/4);
+		LLVector4a::memcpyNonAliased16((F32*) dst, (F32*) mNormals, old_size);
 		ll_aligned_free_16(mNormals);
 	}
 	mNormals = dst;
@@ -6209,7 +6210,7 @@ void LLVolumeFace::pushVertex(const LLVector4a& pos, const LLVector4a& norm, con
 		LLVector2* dst = (LLVector2*) ll_aligned_malloc_16(new_size);
 		if (mTexCoords)
 		{
-			LLVector4a::memcpyNonAliased16((F32*) dst, (F32*) mTexCoords, old_size/4);
+			LLVector4a::memcpyNonAliased16((F32*) dst, (F32*) mTexCoords, old_size);
 			ll_aligned_free_16(mTexCoords);
 		}
 	}
@@ -6268,7 +6269,7 @@ void LLVolumeFace::pushIndex(const U16& idx)
 		U16* dst = (U16*) ll_aligned_malloc_16(new_size);
 		if (mIndices)
 		{
-			LLVector4a::memcpyNonAliased16((F32*) dst, (F32*) mIndices, old_size/4);
+			LLVector4a::memcpyNonAliased16((F32*) dst, (F32*) mIndices, old_size);
 			ll_aligned_free_16(mIndices);
 		}
 		mIndices = dst;
@@ -6319,9 +6320,9 @@ void LLVolumeFace::appendFace(const LLVolumeFace& face, LLMatrix4& mat_in, LLMat
 
 	if (mNumVertices > 0)
 	{ //copy old buffers
-		LLVector4a::memcpyNonAliased16((F32*) new_pos, (F32*) mPositions, mNumVertices*4);
-		LLVector4a::memcpyNonAliased16((F32*) new_norm, (F32*) mNormals, mNumVertices*4);
-		LLVector4a::memcpyNonAliased16((F32*) new_tc, (F32*) mTexCoords, mNumVertices*2);
+		LLVector4a::memcpyNonAliased16((F32*) new_pos, (F32*) mPositions, mNumVertices*4*sizeof(F32));
+		LLVector4a::memcpyNonAliased16((F32*) new_norm, (F32*) mNormals, mNumVertices*4*sizeof(F32));
+		LLVector4a::memcpyNonAliased16((F32*) new_tc, (F32*) mTexCoords, mNumVertices*2*sizeof(F32));
 	}
 
 	//free old buffer space
@@ -6382,7 +6383,7 @@ void LLVolumeFace::appendFace(const LLVolumeFace& face, LLMatrix4& mat_in, LLMat
 	if (mNumIndices > 0)
 	{ //copy old index buffer
 		S32 old_size = (mNumIndices*2+0xF) & ~0xF;
-		LLVector4a::memcpyNonAliased16((F32*) new_indices, (F32*) mIndices, old_size/4);
+		LLVector4a::memcpyNonAliased16((F32*) new_indices, (F32*) mIndices, old_size);
 	}
 
 	//free old index buffer
diff --git a/indra/llmath/tests/v2math_test.cpp b/indra/llmath/tests/v2math_test.cpp
index 4660fcb955..c745b9989e 100644
--- a/indra/llmath/tests/v2math_test.cpp
+++ b/indra/llmath/tests/v2math_test.cpp
@@ -91,7 +91,7 @@ namespace tut
 		F32 x = 2.2345f, y = 3.5678f ;
 		LLVector2 vec2(x,y);
 		ensure("magVecSquared:Fail ", is_approx_equal(vec2.magVecSquared(), (x*x + y*y)));
-		ensure("magVec:Fail ", is_approx_equal(vec2.magVec(), fsqrtf(x*x + y*y)));
+		ensure("magVec:Fail ", is_approx_equal(vec2.magVec(), (F32) sqrt(x*x + y*y)));
 	}
 
 	template<> template<>
@@ -413,7 +413,7 @@ namespace tut
 		ensure_equals("dist_vec_squared values are not equal",val2, val1);
 
 		val1 = 	dist_vec(vec2, vec3);
-		val2 = fsqrtf((x1 - x2)*(x1 - x2) + (y1 - y2)* (y1 - y2));
+		val2 = (F32) sqrt((x1 - x2)*(x1 - x2) + (y1 - y2)* (y1 - y2));
 		ensure_equals("dist_vec values are not equal",val2, val1);
 	}
 
@@ -437,7 +437,7 @@ namespace tut
 		LLVector2 vec2(x1, y1);
 
 		F32 vecMag = vec2.normVec();
-		F32 mag = fsqrtf(x1*x1 + y1*y1);
+		F32 mag = (F32) sqrt(x1*x1 + y1*y1);
 
 		F32 oomag = 1.f / mag;
 		val1 = x1 * oomag;
diff --git a/indra/llmath/tests/v3color_test.cpp b/indra/llmath/tests/v3color_test.cpp
index 316b6e392f..0efba8e9f3 100644
--- a/indra/llmath/tests/v3color_test.cpp
+++ b/indra/llmath/tests/v3color_test.cpp
@@ -99,7 +99,7 @@ namespace tut
 		F32 r = 2.3436212f, g = 1231.f, b = 4.7849321232f;
 		LLColor3 llcolor3(r,g,b);
 		ensure("magVecSquared:Fail ", is_approx_equal(llcolor3.magVecSquared(), (r*r + g*g + b*b)));
-		ensure("magVec:Fail ", is_approx_equal(llcolor3.magVec(), fsqrtf(r*r + g*g + b*b)));
+		ensure("magVec:Fail ", is_approx_equal(llcolor3.magVec(), (F32) sqrt(r*r + g*g + b*b)));
 	}
 
 	template<> template<>
@@ -109,7 +109,7 @@ namespace tut
 		F32 val1, val2,val3;
 		LLColor3 llcolor3(r,g,b);
 		F32 vecMag = llcolor3.normVec();
-		F32 mag = fsqrtf(r*r + g*g + b*b);
+		F32 mag = (F32) sqrt(r*r + g*g + b*b);
 		F32 oomag = 1.f / mag;
 		val1 = r * oomag;
 		val2 = g * oomag;
@@ -292,7 +292,7 @@ namespace tut
 		F32 r1 =1.f, g1 = 2.f,b1 = 1.2f, r2 = -2.3f, g2 = 1.11f, b2 = 1234.234f;
 		LLColor3 llcolor3(r1,g1,b1),llcolor3a(r2,g2,b2);
 		F32 val = distVec(llcolor3,llcolor3a);
-		ensure("distVec failed ", is_approx_equal(fsqrtf((r1-r2)*(r1-r2) + (g1-g2)*(g1-g2) + (b1-b2)*(b1-b2)) ,val));
+		ensure("distVec failed ", is_approx_equal((F32) sqrt((r1-r2)*(r1-r2) + (g1-g2)*(g1-g2) + (b1-b2)*(b1-b2)) ,val));
 		
 		F32 val1 = distVec_squared(llcolor3,llcolor3a);
 		ensure("distVec_squared failed ", is_approx_equal(((r1-r2)*(r1-r2) + (g1-g2)*(g1-g2) + (b1-b2)*(b1-b2)) ,val1));
diff --git a/indra/llmath/tests/v3dmath_test.cpp b/indra/llmath/tests/v3dmath_test.cpp
index e7c949186c..894b6200f5 100644
--- a/indra/llmath/tests/v3dmath_test.cpp
+++ b/indra/llmath/tests/v3dmath_test.cpp
@@ -409,7 +409,7 @@ namespace tut
 		LLVector3d vec3D(x,y,z);
 		F64 res = (x*x + y*y + z*z) - vec3D.magVecSquared();
 		ensure("1:magVecSquared:Fail ", ((-F_APPROXIMATELY_ZERO <= res)&& (res <=F_APPROXIMATELY_ZERO)));
-		res = fsqrtf(x*x + y*y + z*z) - vec3D.magVec();
+		res = (F32) sqrt(x*x + y*y + z*z) - vec3D.magVec();
 		ensure("2:magVec: Fail ", ((-F_APPROXIMATELY_ZERO <= res)&& (res <=F_APPROXIMATELY_ZERO)));	
 	}
 
diff --git a/indra/llmath/tests/v3math_test.cpp b/indra/llmath/tests/v3math_test.cpp
index 7faf076243..d5c8dd2f9c 100644
--- a/indra/llmath/tests/v3math_test.cpp
+++ b/indra/llmath/tests/v3math_test.cpp
@@ -155,7 +155,7 @@ namespace tut
 		F32 x = 2.32f, y = 1.212f, z = -.12f;
 		LLVector3 vec3(x,y,z);		
 		ensure("1:magVecSquared:Fail ", is_approx_equal(vec3.magVecSquared(), (x*x + y*y + z*z)));
-		ensure("2:magVec:Fail ", is_approx_equal(vec3.magVec(), fsqrtf(x*x + y*y + z*z)));
+		ensure("2:magVec:Fail ", is_approx_equal(vec3.magVec(), (F32) sqrt(x*x + y*y + z*z)));
 	}
 
 	template<> template<>
@@ -515,7 +515,7 @@ namespace tut
 		F32 val1,val2;
 		LLVector3 vec3(x1,y1,z1),vec3a(x2,y2,z2);
 		val1 = dist_vec(vec3,vec3a);
-		val2 = fsqrtf((x1 - x2)*(x1 - x2) + (y1 - y2)* (y1 - y2) + (z1 - z2)* (z1 -z2));
+		val2 = (F32) sqrt((x1 - x2)*(x1 - x2) + (y1 - y2)* (y1 - y2) + (z1 - z2)* (z1 -z2));
 		ensure_equals("1:dist_vec: Fail ",val2, val1);
 		val1 = dist_vec_squared(vec3,vec3a);
 		val2 =((x1 - x2)*(x1 - x2) + (y1 - y2)* (y1 - y2) + (z1 - z2)* (z1 -z2));
diff --git a/indra/llmath/tests/v4color_test.cpp b/indra/llmath/tests/v4color_test.cpp
index 33921e0f0f..636446027a 100644
--- a/indra/llmath/tests/v4color_test.cpp
+++ b/indra/llmath/tests/v4color_test.cpp
@@ -161,7 +161,7 @@ namespace tut
 		F32 r = 0x20, g = 0xFFFF, b = 0xFF;
 		LLColor4 llcolor4(r,g,b);
 		ensure("magVecSquared:Fail ", is_approx_equal(llcolor4.magVecSquared(), (r*r + g*g + b*b)));
-		ensure("magVec:Fail ", is_approx_equal(llcolor4.magVec(), fsqrtf(r*r + g*g + b*b)));
+		ensure("magVec:Fail ", is_approx_equal(llcolor4.magVec(), (F32) sqrt(r*r + g*g + b*b)));
 	}
 
 	template<> template<>
@@ -170,7 +170,7 @@ namespace tut
 		F32 r = 0x20, g = 0xFFFF, b = 0xFF;
 		LLColor4 llcolor4(r,g,b);
 		F32 vecMag = llcolor4.normVec();
-		F32 mag = fsqrtf(r*r + g*g + b*b);
+		F32 mag = (F32) sqrt(r*r + g*g + b*b);
 		F32 oomag = 1.f / mag;
 		F32 val1 = r * oomag, val2 = g * oomag,	val3 = b * oomag;
 		ensure("1:normVec failed ", (is_approx_equal(val1, llcolor4.mV[0]) && is_approx_equal(val2, llcolor4.mV[1]) && is_approx_equal(val3, llcolor4.mV[2]) && is_approx_equal(vecMag, mag)));
diff --git a/indra/llmath/tests/v4coloru_test.cpp b/indra/llmath/tests/v4coloru_test.cpp
index 9f71cfc8cc..b3dbfece34 100644
--- a/indra/llmath/tests/v4coloru_test.cpp
+++ b/indra/llmath/tests/v4coloru_test.cpp
@@ -141,7 +141,7 @@ namespace tut
 		U8 r = 0x12, g = 0xFF, b = 0xAF;
 		LLColor4U llcolor4u(r,g,b);
 		ensure("magVecSquared:Fail ", is_approx_equal(llcolor4u.magVecSquared(), (F32)(r*r + g*g + b*b)));
-		ensure("magVec:Fail ", is_approx_equal(llcolor4u.magVec(), fsqrtf(r*r + g*g + b*b)));
+		ensure("magVec:Fail ", is_approx_equal(llcolor4u.magVec(), (F32) sqrt((F32) (r*r + g*g + b*b))));
 	}
 
 	template<> template<>
diff --git a/indra/llmath/tests/v4math_test.cpp b/indra/llmath/tests/v4math_test.cpp
index fe051c27e9..e919c90efa 100644
--- a/indra/llmath/tests/v4math_test.cpp
+++ b/indra/llmath/tests/v4math_test.cpp
@@ -102,7 +102,7 @@ namespace tut
 	{
 		F32 x = 10.f, y = -2.3f, z = -.023f;
 		LLVector4 vec4(x,y,z);
-		ensure("magVec:Fail ", is_approx_equal(vec4.magVec(), fsqrtf(x*x + y*y + z*z)));
+		ensure("magVec:Fail ", is_approx_equal(vec4.magVec(), (F32) sqrt(x*x + y*y + z*z)));
 		ensure("magVecSquared:Fail ", is_approx_equal(vec4.magVecSquared(), (x*x + y*y + z*z)));
 	}
 
@@ -343,7 +343,7 @@ namespace tut
 		F32 val1,val2;
 		LLVector4 vec4(x1,y1,z1),vec4a(x2,y2,z2);
 		val1 = dist_vec(vec4,vec4a);
-		val2 = fsqrtf((x1 - x2)*(x1 - x2) + (y1 - y2)* (y1 - y2) + (z1 - z2)* (z1 -z2));
+		val2 = (F32) sqrt((x1 - x2)*(x1 - x2) + (y1 - y2)* (y1 - y2) + (z1 - z2)* (z1 -z2));
 		ensure_equals("dist_vec: Fail ",val2, val1);
 		val1 = dist_vec_squared(vec4,vec4a);
 		val2 =((x1 - x2)*(x1 - x2) + (y1 - y2)* (y1 - y2) + (z1 - z2)* (z1 -z2));
diff --git a/indra/llmath/v2math.cpp b/indra/llmath/v2math.cpp
index 220336e0c2..2603127f75 100644
--- a/indra/llmath/v2math.cpp
+++ b/indra/llmath/v2math.cpp
@@ -92,7 +92,7 @@ F32	dist_vec(const LLVector2 &a, const LLVector2 &b)
 {
 	F32 x = a.mV[0] - b.mV[0];
 	F32 y = a.mV[1] - b.mV[1];
-	return fsqrtf( x*x + y*y );
+	return (F32) sqrt( x*x + y*y );
 }
 
 F32	dist_vec_squared(const LLVector2 &a, const LLVector2 &b)
diff --git a/indra/llmath/v2math.h b/indra/llmath/v2math.h
index ae26c85ce4..35fd1b6048 100644
--- a/indra/llmath/v2math.h
+++ b/indra/llmath/v2math.h
@@ -225,7 +225,7 @@ inline void	LLVector2::setVec(const F32 *vec)
 
 inline F32 LLVector2::length(void) const
 {
-	return fsqrtf(mV[0]*mV[0] + mV[1]*mV[1]);
+	return (F32) sqrt(mV[0]*mV[0] + mV[1]*mV[1]);
 }
 
 inline F32 LLVector2::lengthSquared(void) const
@@ -235,7 +235,7 @@ inline F32 LLVector2::lengthSquared(void) const
 
 inline F32		LLVector2::normalize(void)
 {
-	F32 mag = fsqrtf(mV[0]*mV[0] + mV[1]*mV[1]);
+	F32 mag = (F32) sqrt(mV[0]*mV[0] + mV[1]*mV[1]);
 	F32 oomag;
 
 	if (mag > FP_MAG_THRESHOLD)
@@ -262,7 +262,7 @@ inline bool LLVector2::isFinite() const
 // deprecated
 inline F32		LLVector2::magVec(void) const
 {
-	return fsqrtf(mV[0]*mV[0] + mV[1]*mV[1]);
+	return (F32) sqrt(mV[0]*mV[0] + mV[1]*mV[1]);
 }
 
 // deprecated
@@ -274,7 +274,7 @@ inline F32		LLVector2::magVecSquared(void) const
 // deprecated
 inline F32		LLVector2::normVec(void)
 {
-	F32 mag = fsqrtf(mV[0]*mV[0] + mV[1]*mV[1]);
+	F32 mag = (F32) sqrt(mV[0]*mV[0] + mV[1]*mV[1]);
 	F32 oomag;
 
 	if (mag > FP_MAG_THRESHOLD)
diff --git a/indra/llmath/v3color.h b/indra/llmath/v3color.h
index 1915d80502..95a3de8b62 100644
--- a/indra/llmath/v3color.h
+++ b/indra/llmath/v3color.h
@@ -284,7 +284,7 @@ inline F32		LLColor3::brightness(void) const
 
 inline F32		LLColor3::length(void) const
 {
-	return fsqrtf(mV[0]*mV[0] + mV[1]*mV[1] + mV[2]*mV[2]);
+	return (F32) sqrt(mV[0]*mV[0] + mV[1]*mV[1] + mV[2]*mV[2]);
 }
 
 inline F32		LLColor3::lengthSquared(void) const
@@ -294,7 +294,7 @@ inline F32		LLColor3::lengthSquared(void) const
 
 inline F32		LLColor3::normalize(void)
 {
-	F32 mag = fsqrtf(mV[0]*mV[0] + mV[1]*mV[1] + mV[2]*mV[2]);
+	F32 mag = (F32) sqrt(mV[0]*mV[0] + mV[1]*mV[1] + mV[2]*mV[2]);
 	F32 oomag;
 
 	if (mag)
@@ -310,7 +310,7 @@ inline F32		LLColor3::normalize(void)
 // deprecated
 inline F32		LLColor3::magVec(void) const
 {
-	return fsqrtf(mV[0]*mV[0] + mV[1]*mV[1] + mV[2]*mV[2]);
+	return (F32) sqrt(mV[0]*mV[0] + mV[1]*mV[1] + mV[2]*mV[2]);
 }
 
 // deprecated
@@ -322,7 +322,7 @@ inline F32		LLColor3::magVecSquared(void) const
 // deprecated
 inline F32		LLColor3::normVec(void)
 {
-	F32 mag = fsqrtf(mV[0]*mV[0] + mV[1]*mV[1] + mV[2]*mV[2]);
+	F32 mag = (F32) sqrt(mV[0]*mV[0] + mV[1]*mV[1] + mV[2]*mV[2]);
 	F32 oomag;
 
 	if (mag)
@@ -444,7 +444,7 @@ inline F32		distVec(const LLColor3 &a, const LLColor3 &b)
 	F32 x = a.mV[0] - b.mV[0];
 	F32 y = a.mV[1] - b.mV[1];
 	F32 z = a.mV[2] - b.mV[2];
-	return fsqrtf( x*x + y*y + z*z );
+	return (F32) sqrt( x*x + y*y + z*z );
 }
 
 inline F32		distVec_squared(const LLColor3 &a, const LLColor3 &b)
diff --git a/indra/llmath/v3dmath.h b/indra/llmath/v3dmath.h
index 6ab31e8a41..ab253de064 100644
--- a/indra/llmath/v3dmath.h
+++ b/indra/llmath/v3dmath.h
@@ -240,7 +240,7 @@ inline const LLVector3d&	LLVector3d::setVec(const F64 *vec)
 
 inline F64 LLVector3d::normVec(void)
 {
-	F64 mag = fsqrtf(mdV[0]*mdV[0] + mdV[1]*mdV[1] + mdV[2]*mdV[2]);
+	F64 mag = (F32) sqrt(mdV[0]*mdV[0] + mdV[1]*mdV[1] + mdV[2]*mdV[2]);
 	F64 oomag;
 
 	if (mag > FP_MAG_THRESHOLD)
@@ -262,7 +262,7 @@ inline F64 LLVector3d::normVec(void)
 
 inline F64 LLVector3d::normalize(void)
 {
-	F64 mag = fsqrtf(mdV[0]*mdV[0] + mdV[1]*mdV[1] + mdV[2]*mdV[2]);
+	F64 mag = (F32) sqrt(mdV[0]*mdV[0] + mdV[1]*mdV[1] + mdV[2]*mdV[2]);
 	F64 oomag;
 
 	if (mag > FP_MAG_THRESHOLD)
@@ -286,7 +286,7 @@ inline F64 LLVector3d::normalize(void)
 
 inline F64	LLVector3d::magVec(void) const
 {
-	return fsqrtf(mdV[0]*mdV[0] + mdV[1]*mdV[1] + mdV[2]*mdV[2]);
+	return (F32) sqrt(mdV[0]*mdV[0] + mdV[1]*mdV[1] + mdV[2]*mdV[2]);
 }
 
 inline F64	LLVector3d::magVecSquared(void) const
@@ -296,7 +296,7 @@ inline F64	LLVector3d::magVecSquared(void) const
 
 inline F64	LLVector3d::length(void) const
 {
-	return fsqrtf(mdV[0]*mdV[0] + mdV[1]*mdV[1] + mdV[2]*mdV[2]);
+	return (F32) sqrt(mdV[0]*mdV[0] + mdV[1]*mdV[1] + mdV[2]*mdV[2]);
 }
 
 inline F64	LLVector3d::lengthSquared(void) const
@@ -406,7 +406,7 @@ inline F64	dist_vec(const LLVector3d &a, const LLVector3d &b)
 	F64 x = a.mdV[0] - b.mdV[0];
 	F64 y = a.mdV[1] - b.mdV[1];
 	F64 z = a.mdV[2] - b.mdV[2];
-	return fsqrtf( x*x + y*y + z*z );
+	return (F32) sqrt( x*x + y*y + z*z );
 }
 
 inline F64	dist_vec_squared(const LLVector3d &a, const LLVector3d &b)
diff --git a/indra/llmath/v3math.h b/indra/llmath/v3math.h
index 75c860a91e..5d483a8753 100644
--- a/indra/llmath/v3math.h
+++ b/indra/llmath/v3math.h
@@ -282,7 +282,7 @@ inline void	LLVector3::setVec(const F32 *vec)
 
 inline F32 LLVector3::normalize(void)
 {
-	F32 mag = fsqrtf(mV[0]*mV[0] + mV[1]*mV[1] + mV[2]*mV[2]);
+	F32 mag = (F32) sqrt(mV[0]*mV[0] + mV[1]*mV[1] + mV[2]*mV[2]);
 	F32 oomag;
 
 	if (mag > FP_MAG_THRESHOLD)
@@ -305,7 +305,7 @@ inline F32 LLVector3::normalize(void)
 // deprecated
 inline F32 LLVector3::normVec(void)
 {
-	F32 mag = fsqrtf(mV[0]*mV[0] + mV[1]*mV[1] + mV[2]*mV[2]);
+	F32 mag = (F32) sqrt(mV[0]*mV[0] + mV[1]*mV[1] + mV[2]*mV[2]);
 	F32 oomag;
 
 	if (mag > FP_MAG_THRESHOLD)
@@ -329,7 +329,7 @@ inline F32 LLVector3::normVec(void)
 
 inline F32	LLVector3::length(void) const
 {
-	return fsqrtf(mV[0]*mV[0] + mV[1]*mV[1] + mV[2]*mV[2]);
+	return (F32) sqrt(mV[0]*mV[0] + mV[1]*mV[1] + mV[2]*mV[2]);
 }
 
 inline F32	LLVector3::lengthSquared(void) const
@@ -339,7 +339,7 @@ inline F32	LLVector3::lengthSquared(void) const
 
 inline F32	LLVector3::magVec(void) const
 {
-	return fsqrtf(mV[0]*mV[0] + mV[1]*mV[1] + mV[2]*mV[2]);
+	return (F32) sqrt(mV[0]*mV[0] + mV[1]*mV[1] + mV[2]*mV[2]);
 }
 
 inline F32	LLVector3::magVecSquared(void) const
@@ -473,7 +473,7 @@ inline F32	dist_vec(const LLVector3 &a, const LLVector3 &b)
 	F32 x = a.mV[0] - b.mV[0];
 	F32 y = a.mV[1] - b.mV[1];
 	F32 z = a.mV[2] - b.mV[2];
-	return fsqrtf( x*x + y*y + z*z );
+	return (F32) sqrt( x*x + y*y + z*z );
 }
 
 inline F32	dist_vec_squared(const LLVector3 &a, const LLVector3 &b)
diff --git a/indra/llmath/v4color.h b/indra/llmath/v4color.h
index 6b63b976b0..dd92e1cc63 100644
--- a/indra/llmath/v4color.h
+++ b/indra/llmath/v4color.h
@@ -392,7 +392,7 @@ inline const LLColor4&	LLColor4::setAlpha(F32 a)
 
 inline F32		LLColor4::length(void) const
 {
-	return fsqrtf(mV[VX]*mV[VX] + mV[VY]*mV[VY] + mV[VZ]*mV[VZ]);
+	return (F32) sqrt(mV[VX]*mV[VX] + mV[VY]*mV[VY] + mV[VZ]*mV[VZ]);
 }
 
 inline F32		LLColor4::lengthSquared(void) const
@@ -402,7 +402,7 @@ inline F32		LLColor4::lengthSquared(void) const
 
 inline F32		LLColor4::normalize(void)
 {
-	F32 mag = fsqrtf(mV[VX]*mV[VX] + mV[VY]*mV[VY] + mV[VZ]*mV[VZ]);
+	F32 mag = (F32) sqrt(mV[VX]*mV[VX] + mV[VY]*mV[VY] + mV[VZ]*mV[VZ]);
 	F32 oomag;
 
 	if (mag)
@@ -418,7 +418,7 @@ inline F32		LLColor4::normalize(void)
 // deprecated
 inline F32		LLColor4::magVec(void) const
 {
-	return fsqrtf(mV[VX]*mV[VX] + mV[VY]*mV[VY] + mV[VZ]*mV[VZ]);
+	return (F32) sqrt(mV[VX]*mV[VX] + mV[VY]*mV[VY] + mV[VZ]*mV[VZ]);
 }
 
 // deprecated
@@ -430,7 +430,7 @@ inline F32		LLColor4::magVecSquared(void) const
 // deprecated
 inline F32		LLColor4::normVec(void)
 {
-	F32 mag = fsqrtf(mV[VX]*mV[VX] + mV[VY]*mV[VY] + mV[VZ]*mV[VZ]);
+	F32 mag = (F32) sqrt(mV[VX]*mV[VX] + mV[VY]*mV[VY] + mV[VZ]*mV[VZ]);
 	F32 oomag;
 
 	if (mag)
diff --git a/indra/llmath/v4coloru.h b/indra/llmath/v4coloru.h
index 4ec5a345eb..08245403a1 100644
--- a/indra/llmath/v4coloru.h
+++ b/indra/llmath/v4coloru.h
@@ -300,7 +300,7 @@ inline const LLColor4U&	LLColor4U::setAlpha(U8 a)
 
 inline F32		LLColor4U::length(void) const
 {
-	return fsqrtf( ((F32)mV[VX]) * mV[VX] + ((F32)mV[VY]) * mV[VY] + ((F32)mV[VZ]) * mV[VZ] );
+	return (F32) sqrt( ((F32)mV[VX]) * mV[VX] + ((F32)mV[VY]) * mV[VY] + ((F32)mV[VZ]) * mV[VZ] );
 }
 
 inline F32		LLColor4U::lengthSquared(void) const
@@ -311,7 +311,7 @@ inline F32		LLColor4U::lengthSquared(void) const
 // deprecated
 inline F32		LLColor4U::magVec(void) const
 {
-	return fsqrtf( ((F32)mV[VX]) * mV[VX] + ((F32)mV[VY]) * mV[VY] + ((F32)mV[VZ]) * mV[VZ] );
+	return (F32) sqrt( ((F32)mV[VX]) * mV[VX] + ((F32)mV[VY]) * mV[VY] + ((F32)mV[VZ]) * mV[VZ] );
 }
 
 // deprecated
diff --git a/indra/llmath/v4math.h b/indra/llmath/v4math.h
index 4c82e6b629..72a477ed20 100644
--- a/indra/llmath/v4math.h
+++ b/indra/llmath/v4math.h
@@ -321,7 +321,7 @@ inline void	LLVector4::setVec(const F32 *vec)
 
 inline F32		LLVector4::length(void) const
 {
-	return fsqrtf(mV[VX]*mV[VX] + mV[VY]*mV[VY] + mV[VZ]*mV[VZ]);
+	return (F32) sqrt(mV[VX]*mV[VX] + mV[VY]*mV[VY] + mV[VZ]*mV[VZ]);
 }
 
 inline F32		LLVector4::lengthSquared(void) const
@@ -331,7 +331,7 @@ inline F32		LLVector4::lengthSquared(void) const
 
 inline F32		LLVector4::magVec(void) const
 {
-	return fsqrtf(mV[VX]*mV[VX] + mV[VY]*mV[VY] + mV[VZ]*mV[VZ]);
+	return (F32) sqrt(mV[VX]*mV[VX] + mV[VY]*mV[VY] + mV[VZ]*mV[VZ]);
 }
 
 inline F32		LLVector4::magVecSquared(void) const
@@ -463,7 +463,7 @@ inline LLVector4 lerp(const LLVector4 &a, const LLVector4 &b, F32 u)
 
 inline F32		LLVector4::normalize(void)
 {
-	F32 mag = fsqrtf(mV[VX]*mV[VX] + mV[VY]*mV[VY] + mV[VZ]*mV[VZ]);
+	F32 mag = (F32) sqrt(mV[VX]*mV[VX] + mV[VY]*mV[VY] + mV[VZ]*mV[VZ]);
 	F32 oomag;
 
 	if (mag > FP_MAG_THRESHOLD)
@@ -486,7 +486,7 @@ inline F32		LLVector4::normalize(void)
 // deprecated
 inline F32		LLVector4::normVec(void)
 {
-	F32 mag = fsqrtf(mV[VX]*mV[VX] + mV[VY]*mV[VY] + mV[VZ]*mV[VZ]);
+	F32 mag = (F32) sqrt(mV[VX]*mV[VX] + mV[VY]*mV[VY] + mV[VZ]*mV[VZ]);
 	F32 oomag;
 
 	if (mag > FP_MAG_THRESHOLD)
diff --git a/indra/llmessage/llsdmessagebuilder.cpp b/indra/llmessage/llsdmessagebuilder.cpp
index 6e41b03895..2680dd0b77 100755
--- a/indra/llmessage/llsdmessagebuilder.cpp
+++ b/indra/llmessage/llsdmessagebuilder.cpp
@@ -35,6 +35,7 @@
 #include "llsdmessagebuilder.h"
 
 #include "llmessagetemplate.h"
+#include "llmath.h"
 #include "llquaternion.h"
 #include "llsdutil.h"
 #include "llsdutil_math.h"
diff --git a/indra/llmessage/lltemplatemessagebuilder.cpp b/indra/llmessage/lltemplatemessagebuilder.cpp
index fa02456d90..6f9707ed57 100644
--- a/indra/llmessage/lltemplatemessagebuilder.cpp
+++ b/indra/llmessage/lltemplatemessagebuilder.cpp
@@ -35,6 +35,7 @@
 #include "lltemplatemessagebuilder.h"
 
 #include "llmessagetemplate.h"
+#include "llmath.h"
 #include "llquaternion.h"
 #include "u64.h"
 #include "v3dmath.h"
diff --git a/indra/llmessage/lltemplatemessagereader.cpp b/indra/llmessage/lltemplatemessagereader.cpp
index 8f56cf2521..2b67e9f6fa 100644
--- a/indra/llmessage/lltemplatemessagereader.cpp
+++ b/indra/llmessage/lltemplatemessagereader.cpp
@@ -36,6 +36,7 @@
 #include "llfasttimer.h"
 #include "llmessagebuilder.h"
 #include "llmessagetemplate.h"
+#include "llmath.h"
 #include "llquaternion.h"
 #include "message.h"
 #include "u64.h"
diff --git a/indra/llrender/llrender.cpp b/indra/llrender/llrender.cpp
index 70601663e6..2b1a9bbf94 100644
--- a/indra/llrender/llrender.cpp
+++ b/indra/llrender/llrender.cpp
@@ -924,7 +924,7 @@ LLVector3 LLRender::getUITranslation()
 	{
 		llerrs << "UI offset stack empty." << llendl;
 	}
-	return LLVector3(mUIOffset[mUIStackDepth-1].getF32());
+	return LLVector3(mUIOffset[mUIStackDepth-1].getF32ptr());
 }
 
 LLVector3 LLRender::getUIScale()
@@ -933,7 +933,7 @@ LLVector3 LLRender::getUIScale()
 	{
 		llerrs << "UI scale stack empty." << llendl;
 	}
-	return LLVector3(mUIScale[mUIStackDepth-1].getF32());
+	return LLVector3(mUIScale[mUIStackDepth-1].getF32ptr());
 }
 
 
diff --git a/indra/newview/llagent.cpp b/indra/newview/llagent.cpp
index 72d51540ef..5fae5b893f 100644
--- a/indra/newview/llagent.cpp
+++ b/indra/newview/llagent.cpp
@@ -1210,7 +1210,7 @@ void LLAgent::startAutoPilotGlobal(const LLVector3d &target_global, const std::s
 	else
 	{
 		// Guess at a reasonable stop distance.
-		mAutoPilotStopDistance = fsqrtf( distance );
+		mAutoPilotStopDistance = (F32) sqrt( distance );
 		if (mAutoPilotStopDistance < 0.5f) 
 		{
 			mAutoPilotStopDistance = 0.5f;
diff --git a/indra/newview/lldrawable.cpp b/indra/newview/lldrawable.cpp
index 8d0a06378f..68f52e04bc 100644
--- a/indra/newview/lldrawable.cpp
+++ b/indra/newview/lldrawable.cpp
@@ -743,7 +743,7 @@ void LLDrawable::updateDistance(LLCamera& camera, bool force_update)
 		}
 		else
 		{
-			pos = LLVector3(getPositionGroup().getF32());
+			pos = LLVector3(getPositionGroup().getF32ptr());
 		}
 
 		pos -= camera.getOrigin();	
@@ -824,7 +824,7 @@ void LLDrawable::shiftPos(const LLVector4a &shift_vector)
 		for (S32 i = 0; i < getNumFaces(); i++)
 		{
 			LLFace *facep = getFace(i);
-			facep->mCenterAgent += LLVector3(shift_vector.getF32());
+			facep->mCenterAgent += LLVector3(shift_vector.getF32ptr());
 			facep->mExtents[0].add(shift_vector);
 			facep->mExtents[1].add(shift_vector);
 			
@@ -1153,13 +1153,13 @@ void LLSpatialBridge::updateSpatialExtents()
 		LLVector4a max;
 		max.setAdd(center, delta);
 
-		newMin.setMin(min);
-		newMax.setMax(max);
+		newMin.setMin(newMin, min);
+		newMax.setMax(newMax, max);
 	}
 	
 	LLVector4a diagonal;
 	diagonal.setSub(newMax, newMin);
-	mRadius = diagonal.length3() * 0.5f;
+	mRadius = diagonal.getLength3().getF32() * 0.5f;
 	
 	mPositionGroup->setAdd(newMin,newMax);
 	mPositionGroup->mul(0.5f);
diff --git a/indra/newview/lldrawpoolavatar.cpp b/indra/newview/lldrawpoolavatar.cpp
index af082eea3e..d698624c15 100644
--- a/indra/newview/lldrawpoolavatar.cpp
+++ b/indra/newview/lldrawpoolavatar.cpp
@@ -37,6 +37,7 @@
 
 #include "llvoavatar.h"
 #include "m3math.h"
+#include "llmatrix4a.h"
 
 #include "llagent.h" //for gAgent.needsRenderAvatar()
 #include "lldrawable.h"
@@ -495,7 +496,7 @@ S32 LLDrawPoolAvatar::getNumPasses()
 	}
 	else
 	{
-		return 3;
+		return 4;
 	}
 #else
 	if (LLPipeline::sImpostorRender)
@@ -814,26 +815,46 @@ void LLDrawPoolAvatar::endSkinned()
 #if LL_MESH_ENABLED
 void LLDrawPoolAvatar::beginRiggedSimple()
 {
-	if (LLPipeline::sUnderWaterRender)
+	if (sShaderLevel > 0)
 	{
-		sVertexProgram = &gSkinnedObjectSimpleWaterProgram;
+		if (LLPipeline::sUnderWaterRender)
+		{
+			sVertexProgram = &gSkinnedObjectSimpleWaterProgram;
+		}
+		else
+		{
+			sVertexProgram = &gSkinnedObjectSimpleProgram;
+		}
 	}
 	else
 	{
-		sVertexProgram = &gSkinnedObjectSimpleProgram;
+		if (LLPipeline::sUnderWaterRender)
+		{
+			sVertexProgram = &gObjectSimpleWaterProgram;
+		}
+		else
+		{
+			sVertexProgram = &gObjectSimpleProgram;
+		}
 	}
 
-	sDiffuseChannel = 0;
-	sVertexProgram->bind();
-	LLVertexBuffer::sWeight4Loc = sVertexProgram->getAttribLocation(LLViewerShaderMgr::OBJECT_WEIGHT);
+	if (sShaderLevel > 0 || gPipeline.canUseVertexShaders())
+	{
+		sDiffuseChannel = 0;
+		sVertexProgram->bind();
+		LLVertexBuffer::sWeight4Loc = sVertexProgram->getAttribLocation(LLViewerShaderMgr::OBJECT_WEIGHT);
+	}
 }
 
 void LLDrawPoolAvatar::endRiggedSimple()
 {
 	LLVertexBuffer::unbind();
-	sVertexProgram->unbind();
-	sVertexProgram = NULL;
-	LLVertexBuffer::sWeight4Loc = -1;
+	if (sShaderLevel > 0 || gPipeline.canUseVertexShaders())
+	{
+		sVertexProgram->unbind();
+		sVertexProgram = NULL;
+		LLVertexBuffer::sWeight4Loc = -1;
+	}
 }
 
 void LLDrawPoolAvatar::beginRiggedAlpha()
@@ -1281,8 +1302,10 @@ void LLDrawPoolAvatar::renderAvatars(LLVOAvatar* single_avatar, S32 pass)
 }
 
 #if LL_MESH_ENABLED
-void LLDrawPoolAvatar::updateRiggedFaceVertexBuffer(LLFace* face, const LLMeshSkinInfo* skin, LLVolume* volume, const LLVolumeFace& vol_face)
+void LLDrawPoolAvatar::updateRiggedFaceVertexBuffer(LLVOAvatar* avatar, LLFace* face, const LLMeshSkinInfo* skin, LLVolume* volume, const LLVolumeFace& vol_face)
 {
+	LLVertexBuffer* buffer = face->mVertexBuffer;
+
 	U32 data_mask = 0;
 	for (U32 i = 0; i < face->mRiggedIndex.size(); ++i)
 	{
@@ -1292,17 +1315,23 @@ void LLDrawPoolAvatar::updateRiggedFaceVertexBuffer(LLFace* face, const LLMeshSk
 		}
 	}
 
-	LLVertexBuffer* buff = face->mVertexBuffer;
-
-	if (!buff || 
-		buff->getTypeMask() != data_mask ||
-		buff->getRequestedVerts() != vol_face.mNumVertices)
+	if (!buffer || 
+		buffer->getTypeMask() != data_mask ||
+		buffer->getRequestedVerts() != vol_face.mNumVertices)
 	{
 		face->setGeomIndex(0);
 		face->setIndicesIndex(0);
 		face->setSize(vol_face.mNumVertices, vol_face.mNumIndices, true);
 
-		face->mVertexBuffer = new LLVertexBuffer(data_mask, 0);
+		if (sShaderLevel > 0)
+		{
+			face->mVertexBuffer = new LLVertexBuffer(data_mask, GL_DYNAMIC_DRAW_ARB);
+		}
+		else
+		{
+			face->mVertexBuffer = new LLVertexBuffer(data_mask, GL_STREAM_DRAW_ARB);
+		}
+
 		face->mVertexBuffer->allocateBuffer(face->getGeomCount(), face->getIndicesCount(), true);
 
 		U16 offset = 0;
@@ -1319,7 +1348,83 @@ void LLDrawPoolAvatar::updateRiggedFaceVertexBuffer(LLFace* face, const LLMeshSk
 		LLMatrix3 mat_normal(mat3);				
 
 		face->getGeometryVolume(*volume, face->getTEOffset(), mat_vert, mat_normal, offset, true);
-		buff = face->mVertexBuffer;
+		buffer = face->mVertexBuffer;
+	}
+
+	if (sShaderLevel <= 0 && face->mLastSkinTime < avatar->getLastSkinTime())
+	{ //perform software vertex skinning for this face
+		LLStrider<LLVector3> position;
+		LLStrider<LLVector3> normal;
+
+		buffer->getVertexStrider(position);
+		buffer->getNormalStrider(normal);
+
+		LLVector4a* pos = (LLVector4a*) position.get();
+		LLVector4a* norm = (LLVector4a*) normal.get();
+		
+		//build matrix palette
+		LLMatrix4a mp[64];
+		LLMatrix4* mat = (LLMatrix4*) mp;
+
+		for (U32 j = 0; j < skin->mJointNames.size(); ++j)
+		{
+			LLJoint* joint = avatar->getJoint(skin->mJointNames[j]);
+			if (joint)
+			{
+				mat[j] = skin->mInvBindMatrix[j];
+				mat[j] *= joint->getWorldMatrix();
+			}
+		}
+
+		LLVector4a* weight = vol_face.mWeights;
+
+		LLMatrix4a bind_shape_matrix;
+		bind_shape_matrix.loadu(skin->mBindShapeMatrix);
+
+		for (U32 j = 0; j < buffer->getRequestedVerts(); ++j)
+		{
+			LLMatrix4a final_mat;
+			final_mat.clear();
+
+			S32 idx[4];
+
+			LLVector4 wght;
+
+			F32 scale = 0.f;
+			for (U32 k = 0; k < 4; k++)
+			{
+				F32 w = weight[j][k];
+
+				idx[k] = (S32) floorf(w);
+				wght[k] = w - floorf(w);
+				scale += wght[k];
+			}
+
+			wght *= 1.f/scale;
+
+			for (U32 k = 0; k < 4; k++)
+			{
+				F32 w = wght[k];
+
+				LLMatrix4a src;
+				src.setMul(mp[idx[k]], w);
+
+				final_mat.add(src);
+			}
+
+			
+			LLVector4a& v = vol_face.mPositions[j];
+			LLVector4a t;
+			LLVector4a dst;
+			bind_shape_matrix.affineTransform(v, t);
+			final_mat.affineTransform(t, dst);
+			pos[j] = dst;
+
+			LLVector4a& n = vol_face.mNormals[j];
+			bind_shape_matrix.rotate(n, t);
+			final_mat.rotate(t, dst);
+			norm[j] = dst;
+		}
 	}
 }
 
@@ -1371,7 +1476,7 @@ void LLDrawPoolAvatar::renderRigged(LLVOAvatar* avatar, U32 type, bool glow)
 		stop_glerror();
 
 		const LLVolumeFace& vol_face = volume->getVolumeFace(te);
-		updateRiggedFaceVertexBuffer(face, skin, volume, vol_face);
+		updateRiggedFaceVertexBuffer(avatar, face, skin, volume, vol_face);
 		
 		stop_glerror();
 
@@ -1381,30 +1486,37 @@ void LLDrawPoolAvatar::renderRigged(LLVOAvatar* avatar, U32 type, bool glow)
 
 		if (buff)
 		{
-			LLMatrix4 mat[64];
+			if (sShaderLevel > 0)
+			{ //upload matrix palette to shader
+				LLMatrix4 mat[64];
 
-			for (U32 i = 0; i < skin->mJointNames.size(); ++i)
-			{
-				LLJoint* joint = avatar->getJoint(skin->mJointNames[i]);
-				if (joint)
+				for (U32 i = 0; i < skin->mJointNames.size(); ++i)
 				{
-					mat[i] = skin->mInvBindMatrix[i];
-					mat[i] *= joint->getWorldMatrix();
+					LLJoint* joint = avatar->getJoint(skin->mJointNames[i]);
+					if (joint)
+					{
+						mat[i] = skin->mInvBindMatrix[i];
+						mat[i] *= joint->getWorldMatrix();
+					}
 				}
-			}
-			
-			stop_glerror();
+				
+				stop_glerror();
 
-			LLDrawPoolAvatar::sVertexProgram->uniformMatrix4fv("matrixPalette", 
-				skin->mJointNames.size(),
-				FALSE,
-				(GLfloat*) mat[0].mMatrix);
-			LLDrawPoolAvatar::sVertexProgram->uniformMatrix4fv("matrixPalette[0]", 
-				skin->mJointNames.size(),
-				FALSE,
-				(GLfloat*) mat[0].mMatrix);
+				LLDrawPoolAvatar::sVertexProgram->uniformMatrix4fv("matrixPalette", 
+					skin->mJointNames.size(),
+					FALSE,
+					(GLfloat*) mat[0].mMatrix);
+				LLDrawPoolAvatar::sVertexProgram->uniformMatrix4fv("matrixPalette[0]", 
+					skin->mJointNames.size(),
+					FALSE,
+					(GLfloat*) mat[0].mMatrix);
 
-			stop_glerror();
+				stop_glerror();
+			}
+			else
+			{
+				data_mask &= ~LLVertexBuffer::MAP_WEIGHT4;
+			}
 
 			buff->setBuffer(data_mask);
 
diff --git a/indra/newview/lldrawpoolavatar.h b/indra/newview/lldrawpoolavatar.h
index b01394534b..d08ae04516 100644
--- a/indra/newview/lldrawpoolavatar.h
+++ b/indra/newview/lldrawpoolavatar.h
@@ -138,7 +138,8 @@ public:
 	void endDeferredRiggedSimple();
 	void endDeferredRiggedBump();
 		
-	void updateRiggedFaceVertexBuffer(LLFace* facep, 
+	void updateRiggedFaceVertexBuffer(LLVOAvatar* avatar,
+									  LLFace* facep, 
 									  const LLMeshSkinInfo* skin, 
 									  LLVolume* volume,
 									  const LLVolumeFace& vol_face);
diff --git a/indra/newview/llface.cpp b/indra/newview/llface.cpp
index 8533f9710c..5d7d3387a4 100644
--- a/indra/newview/llface.cpp
+++ b/indra/newview/llface.cpp
@@ -106,8 +106,8 @@ void planarProjection(LLVector2 &tc, const LLVector4a& normal,
 	LLVector4a tangent;
 	tangent.setCross3(binormal,normal);
 
-	tc.mV[1] = -((tangent.dot3(vec))*2 - 0.5f);
-	tc.mV[0] = 1.0f+((binormal.dot3(vec))*2 - 0.5f);
+	tc.mV[1] = -((tangent.dot3(vec).getF32())*2 - 0.5f);
+	tc.mV[0] = 1.0f+((binormal.dot3(vec).getF32())*2 - 0.5f);
 }
 
 void sphericalProjection(LLVector2 &tc, const LLVector4a& normal,
@@ -156,6 +156,7 @@ void LLFace::init(LLDrawable* drawablep, LLViewerObject* objp)
 
 	mLastUpdateTime = gFrameTimeSeconds;
 	mLastMoveTime = 0.f;
+	mLastSkinTime = gFrameTimeSeconds;
 	mVSize = 0.f;
 	mPixelArea = 16.f;
 	mState      = GLOBAL;
@@ -828,8 +829,8 @@ BOOL LLFace::genVolumeBBoxes(const LLVolume &volume, S32 f,
 			LLVector4a max;
 			max.setAdd(center, delta);
 
-			newMin.setMin(min);
-			newMax.setMax(max);
+			newMin.setMin(newMin,min);
+			newMax.setMax(newMax,max);
 		}
 
 		if (!mDrawablep->isActive())
@@ -844,11 +845,11 @@ BOOL LLFace::genVolumeBBoxes(const LLVolume &volume, S32 f,
 		t.mul(0.5f);
 
 		//VECTORIZE THIS
-		mCenterLocal.set(t.getF32());
+		mCenterLocal.set(t.getF32ptr());
 		
 		t.setSub(newMax,newMin);
 		t.mul(0.5f);
-		mBoundingSphereRadius = t.length3();
+		mBoundingSphereRadius = t.getLength3().getF32();
 
 		updateCenterAgent();
 	}
@@ -1313,7 +1314,7 @@ BOOL LLFace::getGeometryVolume(const LLVolume& volume,
 				{
 					if (!do_xform)
 					{
-						LLVector4a::memcpyNonAliased16((F32*) tex_coords.get(), (F32*) vf.mTexCoords, num_vertices*2);
+						LLVector4a::memcpyNonAliased16((F32*) tex_coords.get(), (F32*) vf.mTexCoords, num_vertices*2*sizeof(F32));
 					}
 					else
 					{
@@ -1529,13 +1530,13 @@ BOOL LLFace::getGeometryVolume(const LLVolume& volume,
 					if (mDrawablep->isActive())
 					{
 						LLVector3 t;
-						t.set(binormal.getF32());
+						t.set(binormal.getF32ptr());
 						t *= bump_quat;
 						binormal.load3(t.mV);
 					}
 
 					binormal.normalize3fast();
-					tc += LLVector2( bump_s_primary_light_ray.dot3(tangent), bump_t_primary_light_ray.dot3(binormal) );
+					tc += LLVector2( bump_s_primary_light_ray.dot3(tangent).getF32(), bump_t_primary_light_ray.dot3(binormal).getF32() );
 					
 					*tex_coords2++ = tc;
 				}	
@@ -1583,7 +1584,7 @@ BOOL LLFace::getGeometryVolume(const LLVolume& volume,
 	
 	if (rebuild_weights && vf.mWeights)
 	{
-		LLVector4a::memcpyNonAliased16((F32*) weights, (F32*) vf.mWeights, num_vertices*4);
+		LLVector4a::memcpyNonAliased16((F32*) weights, (F32*) vf.mWeights, num_vertices*4*sizeof(F32));
 	}
 
 	if (rebuild_color)
@@ -1705,21 +1706,21 @@ BOOL LLFace::calcPixelArea(F32& cos_angle_to_view_dir, F32& radius)
 
 	LLViewerCamera* camera = LLViewerCamera::getInstance();
 
-	F32 size_squared = size.dot3(size);
+	F32 size_squared = size.dot3(size).getF32();
 	LLVector4a lookAt;
 	LLVector4a t;
 	t.load3(camera->getOrigin().mV);
 	lookAt.setSub(center, t);
-	F32 dist = lookAt.length3();
+	F32 dist = lookAt.getLength3().getF32();
 	lookAt.normalize3fast() ;	
 
 	//get area of circle around node
-	F32 app_angle = atanf(fsqrtf(size_squared) / dist);
+	F32 app_angle = atanf((F32) sqrt(size_squared) / dist);
 	radius = app_angle*LLDrawable::sCurPixelAngle;
 	mPixelArea = radius*radius * 3.14159f;
 	LLVector4a x_axis;
 	x_axis.load3(camera->getXAxis().mV);
-	cos_angle_to_view_dir = lookAt.dot3(x_axis);
+	cos_angle_to_view_dir = lookAt.dot3(x_axis).getF32();
 
 	//if has media, check if the face is out of the view frustum.	
 	if(hasMedia())
diff --git a/indra/newview/llface.h b/indra/newview/llface.h
index b6a67c7fc4..a7d7889350 100644
--- a/indra/newview/llface.h
+++ b/indra/newview/llface.h
@@ -240,6 +240,7 @@ public:
 	LLPointer<LLVertexBuffer> mVertexBuffer;
 	LLPointer<LLVertexBuffer> mLastVertexBuffer;
 	F32			mLastUpdateTime;
+	F32			mLastSkinTime;
 	F32			mLastMoveTime;
 	LLMatrix4*	mTextureMatrix;
 	LLDrawInfo* mDrawInfo;
@@ -274,7 +275,7 @@ private:
 
 	S32			mReferenceIndex;
 	std::vector<S32> mRiggedIndex;
-	 
+	
 	F32			mVSize;
 	F32			mPixelArea;
 
diff --git a/indra/newview/llflexibleobject.cpp b/indra/newview/llflexibleobject.cpp
index 8be4e34748..62336b03bf 100644
--- a/indra/newview/llflexibleobject.cpp
+++ b/indra/newview/llflexibleobject.cpp
@@ -100,7 +100,7 @@ void LLVolumeImplFlexible::onParameterChanged(U16 param_type, LLNetworkData *dat
 void LLVolumeImplFlexible::onShift(const LLVector4a &shift_vector)
 {	
 	//VECTORIZE THIS
-	LLVector3 shift(shift_vector.getF32());
+	LLVector3 shift(shift_vector.getF32ptr());
 	for (int section = 0; section < (1<<FLEXIBLE_OBJECT_MAX_SECTIONS)+1; ++section)
 	{
 		mSection[section].mPosition += shift;	
diff --git a/indra/newview/llhudicon.cpp b/indra/newview/llhudicon.cpp
index 63040904df..e5da871d03 100644
--- a/indra/newview/llhudicon.cpp
+++ b/indra/newview/llhudicon.cpp
@@ -302,7 +302,7 @@ BOOL LLHUDIcon::lineSegmentIntersect(const LLVector3& start, const LLVector3& en
 		{
 			dir.mul(t);
 			starta.add(dir);
-			*intersection = LLVector3((F32*) &starta.mQ);
+			*intersection = LLVector3(starta.getF32ptr());
 		}
 		return TRUE;
 	}
diff --git a/indra/newview/llpanelnearbymedia.cpp b/indra/newview/llpanelnearbymedia.cpp
index 7f4609b83e..adeb401b77 100644
--- a/indra/newview/llpanelnearbymedia.cpp
+++ b/indra/newview/llpanelnearbymedia.cpp
@@ -362,7 +362,7 @@ void LLPanelNearByMedia::updateListItem(LLScrollListItem* item, LLViewerMediaImp
 		debug_str += llformat("%g/", (float)impl->getInterest());
 		
 		// proximity distance is actually distance squared -- display it as straight distance.
-		debug_str += llformat("%g/", fsqrtf(impl->getProximityDistance()));
+		debug_str += llformat("%g/", (F32) sqrt(impl->getProximityDistance()));
 		
 		//			s += llformat("%g/", (float)impl->getCPUUsage());
 		//			s += llformat("%g/", (float)impl->getApproximateTextureInterest());
diff --git a/indra/newview/llpanelprimmediacontrols.cpp b/indra/newview/llpanelprimmediacontrols.cpp
index 98fbebbc5d..d84ac2e4c8 100644
--- a/indra/newview/llpanelprimmediacontrols.cpp
+++ b/indra/newview/llpanelprimmediacontrols.cpp
@@ -576,8 +576,8 @@ void LLPanelPrimMediaControls::updateShape()
 			const LLVolumeFace& vf = volume->getVolumeFace(mTargetObjectFace);
 			
 			LLVector3 ext[2];
-			ext[0].set(vf.mExtents[0].getF32());
-			ext[1].set(vf.mExtents[1].getF32());
+			ext[0].set(vf.mExtents[0].getF32ptr());
+			ext[1].set(vf.mExtents[1].getF32ptr());
 			
 			LLVector3 center = (ext[0]+ext[1])*0.5f;
 			LLVector3 size = (ext[1]-ext[0])*0.5f;
diff --git a/indra/newview/llselectmgr.cpp b/indra/newview/llselectmgr.cpp
index 42f09f7396..92903a6aa9 100644
--- a/indra/newview/llselectmgr.cpp
+++ b/indra/newview/llselectmgr.cpp
@@ -1124,13 +1124,13 @@ void LLSelectMgr::getGrid(LLVector3& origin, LLQuaternion &rotation, LLVector3 &
 			size.setSub(max_extents, min_extents);
 			size.mul(0.5f);
 
-			mGridOrigin.set(center.getF32());
+			mGridOrigin.set(center.getF32ptr());
 			LLDrawable* drawable = first_grid_object->mDrawable;
 			if (drawable && drawable->isActive())
 			{
 				mGridOrigin = mGridOrigin * first_grid_object->getRenderMatrix();
 			}
-			mGridScale.set(size.getF32());
+			mGridScale.set(size.getF32ptr());
 		}
 	}
 	else // GRID_MODE_WORLD or just plain default
@@ -3543,7 +3543,7 @@ void LLSelectMgr::deselectAllIfTooFar()
 		{
 			if (mDebugSelectMgr)
 			{
-				llinfos << "Selection manager: auto-deselecting, select_dist = " << fsqrtf(select_dist_sq) << llendl;
+				llinfos << "Selection manager: auto-deselecting, select_dist = " << (F32) sqrt(select_dist_sq) << llendl;
 				llinfos << "agent pos global = " << gAgent.getPositionGlobal() << llendl;
 				llinfos << "selection pos global = " << selectionCenter << llendl;
 			}
diff --git a/indra/newview/llspatialpartition.cpp b/indra/newview/llspatialpartition.cpp
index 2335c7bb8e..e55ec03356 100644
--- a/indra/newview/llspatialpartition.cpp
+++ b/indra/newview/llspatialpartition.cpp
@@ -231,7 +231,7 @@ U8* get_box_fan_indices(LLCamera* camera, const LLVector4a& center)
 	LLVector4a origin;
 	origin.load3(camera->getOrigin().mV);
 
-	S32 cypher = center.greaterThan4(origin).getComparisonMask() & 0x7;
+	S32 cypher = center.greaterThan(origin).getGatheredBits() & 0x7;
 	
 	return sOcclusionIndices+cypher*8;
 }
@@ -253,7 +253,7 @@ void LLSpatialGroup::buildOcclusion()
 	r2.splat(0.25f);
 	r2.add(mBounds[1]);
 
-	r.setMin(r2);
+	r.setMin(r, r2);
 
 	LLVector4a* v = mOcclusionVerts;
 	const LLVector4a& c = mBounds[0];
@@ -775,8 +775,8 @@ BOOL LLSpatialGroup::boundObjects(BOOL empty, LLVector4a& minOut, LLVector4a& ma
 	}
 	else
 	{
-		minOut.setMin(newMin);
-		maxOut.setMax(newMax);
+		minOut.setMin(minOut, newMin);
+		maxOut.setMax(maxOut, newMax);
 	}
 		
 	return TRUE;
@@ -1220,8 +1220,8 @@ void LLSpatialGroup::updateDistance(LLCamera &camera)
 #endif
 	if (!getData().empty())
 	{
-		mRadius = mSpatialPartition->mRenderByGroup ? mObjectBounds[1].length3() :
-						(F32) mOctreeNode->getSize().length3();
+		mRadius = mSpatialPartition->mRenderByGroup ? mObjectBounds[1].getLength3().getF32() :
+						(F32) mOctreeNode->getSize().getLength3().getF32();
 		mDistance = mSpatialPartition->calcDistance(this, camera);
 		mPixelArea = mSpatialPartition->calcPixelArea(this, camera);
 	}
@@ -1241,7 +1241,7 @@ F32 LLSpatialPartition::calcDistance(LLSpatialGroup* group, LLCamera& camera)
 	{
 		LLVector4a v = eye;
 
-		dist = eye.length3();
+		dist = eye.getLength3().getF32();
 		eye.normalize3fast();
 
 		if (!group->isState(LLSpatialGroup::ALPHA_DIRTY))
@@ -1253,7 +1253,7 @@ F32 LLSpatialPartition::calcDistance(LLSpatialGroup* group, LLCamera& camera)
 				LLVector4a diff;
 				diff.setSub(view_angle, *group->mLastUpdateViewAngle);
 
-				if (diff.length3() > 0.64f)
+				if (diff.getLength3().getF32() > 0.64f)
 				{
 					*group->mViewAngle = view_angle;
 					*group->mLastUpdateViewAngle = view_angle;
@@ -1279,11 +1279,11 @@ F32 LLSpatialPartition::calcDistance(LLSpatialGroup* group, LLCamera& camera)
 		t.mul(group->mObjectBounds[1]);
 		v.sub(t);
 		
-		group->mDepth = v.dot3(ata);
+		group->mDepth = v.dot3(ata).getF32();
 	}
 	else
 	{
-		dist = eye.length3();
+		dist = eye.getLength3().getF32();
 	}
 
 	if (dist < 16.f)
@@ -1497,8 +1497,8 @@ BOOL LLSpatialGroup::rebound()
 			const LLVector4a& max = group->mExtents[1];
 			const LLVector4a& min = group->mExtents[0];
 
-			newMax.setMax(max);
-			newMin.setMin(min);
+			newMax.setMax(newMax, max);
+			newMin.setMin(newMin, min);
 		}
 
 		boundObjects(FALSE, newMin, newMax);
@@ -2196,8 +2196,8 @@ BOOL LLSpatialPartition::getVisibleExtents(LLCamera& camera, LLVector3& visMin,
 	LLOctreeCullVisExtents vis(&camera, visMina, visMaxa);
 	vis.traverse(mOctree);
 
-	visMin.set(visMina.getF32());
-	visMax.set(visMaxa.getF32());
+	visMin.set(visMina.getF32ptr());
+	visMax.set(visMaxa.getF32ptr());
 	return vis.mEmpty;
 }
 
@@ -2280,13 +2280,13 @@ BOOL earlyFail(LLCamera* camera, LLSpatialGroup* group)
 	LLVector4a max;
 	max.setAdd(c,r);
 	
-	S32 lt = e.lessThan4(min).getComparisonMask() & 0x7;
+	S32 lt = e.lessThan(min).getGatheredBits() & 0x7;
 	if (lt)
 	{
 		return FALSE;
 	}
 
-	S32 gt = e.greaterThan4(max).getComparisonMask() & 0x7;
+	S32 gt = e.greaterThan(max).getGatheredBits() & 0x7;
 	if (gt)
 	{
 		return FALSE;
@@ -2745,8 +2745,8 @@ void renderNormals(LLDrawable* drawablep)
 				p.setAdd(face.mPositions[j], n);
 				
 				gGL.color4f(1,1,1,1);
-				gGL.vertex3fv(face.mPositions[j].getF32());
-				gGL.vertex3fv(p.getF32());
+				gGL.vertex3fv(face.mPositions[j].getF32ptr());
+				gGL.vertex3fv(p.getF32ptr());
 				
 				if (face.mBinormals)
 				{
@@ -2754,8 +2754,8 @@ void renderNormals(LLDrawable* drawablep)
 					p.setAdd(face.mPositions[j], n);
 				
 					gGL.color4f(0,1,1,1);
-					gGL.vertex3fv(face.mPositions[j].getF32());
-					gGL.vertex3fv(p.getF32());
+					gGL.vertex3fv(face.mPositions[j].getF32ptr());
+					gGL.vertex3fv(p.getF32ptr());
 				}	
 			}
 
@@ -3024,8 +3024,8 @@ public:
 		LLVolumeOctreeListener* vl = (LLVolumeOctreeListener*) branch->getListener(0);
 
 		LLVector3 center, size;
-		center.set(vl->mBounds[0].getF32());
-		size.set(vl->mBounds[1].getF32());
+		center.set(vl->mBounds[0].getF32ptr());
+		size.set(vl->mBounds[1].getF32ptr());
 
 		drawBoxOutline(center, size);
 	}
diff --git a/indra/newview/llviewercamera.cpp b/indra/newview/llviewercamera.cpp
index cef7c4abbb..917185ed04 100644
--- a/indra/newview/llviewercamera.cpp
+++ b/indra/newview/llviewercamera.cpp
@@ -810,7 +810,7 @@ BOOL LLViewerCamera::areVertsVisible(LLViewerObject* volumep, BOOL all_verts)
 				render_mata.affineTransform(t, vec);
 			}
 
-			BOOL in_frustum = pointInFrustum(LLVector3(vec.getF32())) > 0;
+			BOOL in_frustum = pointInFrustum(LLVector3(vec.getF32ptr())) > 0;
 
 			if (( !in_frustum && all_verts) ||
 				 (in_frustum && !all_verts))
diff --git a/indra/newview/llviewerjointmesh.cpp b/indra/newview/llviewerjointmesh.cpp
index 9e35b5cc51..0a181e09bb 100644
--- a/indra/newview/llviewerjointmesh.cpp
+++ b/indra/newview/llviewerjointmesh.cpp
@@ -726,8 +726,8 @@ void LLViewerJointMesh::updateFaceData(LLFace *face, F32 pixel_area, BOOL damp_w
 			
 			U32 words = num_verts*4;
 
-			LLVector4a::memcpyNonAliased16(v, (F32*) mMesh->getCoords(), words);
-			LLVector4a::memcpyNonAliased16(n, (F32*) mMesh->getNormals(), words);
+			LLVector4a::memcpyNonAliased16(v, (F32*) mMesh->getCoords(), words*sizeof(F32));
+			LLVector4a::memcpyNonAliased16(n, (F32*) mMesh->getNormals(), words*sizeof(F32));
 						
 			
 			if (!terse_update)
@@ -740,9 +740,9 @@ void LLViewerJointMesh::updateFaceData(LLFace *face, F32 pixel_area, BOOL damp_w
 				F32* vw = (F32*) vertex_weightsp.get();
 				F32* cw = (F32*) clothing_weightsp.get();	
 
-				LLVector4a::memcpyNonAliased16(tc, (F32*) mMesh->getTexCoords(), num_verts*2);
-				LLVector4a::memcpyNonAliased16(vw, (F32*) mMesh->getWeights(), num_verts);	
-				LLVector4a::memcpyNonAliased16(cw, (F32*) mMesh->getClothingWeights(), num_verts*4);	
+				LLVector4a::memcpyNonAliased16(tc, (F32*) mMesh->getTexCoords(), num_verts*2*sizeof(F32));
+				LLVector4a::memcpyNonAliased16(vw, (F32*) mMesh->getWeights(), num_verts*sizeof(F32));	
+				LLVector4a::memcpyNonAliased16(cw, (F32*) mMesh->getClothingWeights(), num_verts*4*sizeof(F32));	
 			}
 
 			const U32 idx_count = mMesh->getNumFaces()*3;
diff --git a/indra/newview/llviewerjoystick.cpp b/indra/newview/llviewerjoystick.cpp
index 79d8fc7df9..16a6022b86 100644
--- a/indra/newview/llviewerjoystick.cpp
+++ b/indra/newview/llviewerjoystick.cpp
@@ -764,7 +764,7 @@ void LLViewerJoystick::moveAvatar(bool reset)
 	sDelta[RX_I] += (cur_delta[RX_I] - sDelta[RX_I]) * time * feather;
 	sDelta[RY_I] += (cur_delta[RY_I] - sDelta[RY_I]) * time * feather;
 	
-	handleRun(fsqrtf(sDelta[Z_I]*sDelta[Z_I] + sDelta[X_I]*sDelta[X_I]));
+	handleRun((F32) sqrt(sDelta[Z_I]*sDelta[Z_I] + sDelta[X_I]*sDelta[X_I]));
 	
 	// Allow forward/backward movement some priority
 	if (dom_axis == Z_I)
diff --git a/indra/newview/llviewermedia.cpp b/indra/newview/llviewermedia.cpp
index 178d928f57..303f339f7d 100644
--- a/indra/newview/llviewermedia.cpp
+++ b/indra/newview/llviewermedia.cpp
@@ -865,7 +865,7 @@ void LLViewerMedia::updateMedia(void *dummy_arg)
 				
 				// Set the low priority size for downsampling to approximately the size the texture is displayed at.
 				{
-					F32 approximate_interest_dimension = fsqrtf(pimpl->getInterest());
+					F32 approximate_interest_dimension = (F32) sqrt(pimpl->getInterest());
 					
 					pimpl->setLowPrioritySizeLimit(llround(approximate_interest_dimension));
 				}
diff --git a/indra/newview/llviewerobject.cpp b/indra/newview/llviewerobject.cpp
index 2b89deaa53..9587fbafb1 100644
--- a/indra/newview/llviewerobject.cpp
+++ b/indra/newview/llviewerobject.cpp
@@ -2973,7 +2973,7 @@ F32 LLViewerObject::getBinRadius()
 		const LLVector4a* ext = mDrawable->getSpatialExtents();
 		LLVector4a diff;
 		diff.setSub(ext[1], ext[0]);
-		return diff.length3();
+		return diff.getLength3().getF32();
 	}
 	
 	return getScale().magVec();
diff --git a/indra/newview/llviewerpartsim.cpp b/indra/newview/llviewerpartsim.cpp
index 41848e8b7a..4759454ee7 100644
--- a/indra/newview/llviewerpartsim.cpp
+++ b/indra/newview/llviewerpartsim.cpp
@@ -161,8 +161,8 @@ LLViewerPartGroup::LLViewerPartGroup(const LLVector3 &center_agent, const F32 bo
 
 	if (group != NULL)
 	{
-		LLVector3 center(group->mOctreeNode->getCenter().getF32());
-		LLVector3 size(group->mOctreeNode->getSize().getF32());
+		LLVector3 center(group->mOctreeNode->getCenter().getF32ptr());
+		LLVector3 size(group->mOctreeNode->getSize().getF32ptr());
 		size += LLVector3(0.01f, 0.01f, 0.01f);
 		mMinObjPos = center - size;
 		mMaxObjPos = center + size;
diff --git a/indra/newview/llviewertexture.cpp b/indra/newview/llviewertexture.cpp
index 719c5b0da5..d50efe89dd 100644
--- a/indra/newview/llviewertexture.cpp
+++ b/indra/newview/llviewertexture.cpp
@@ -1580,7 +1580,7 @@ F32 LLViewerFetchedTexture::calcDecodePriority()
 
 	S32 cur_discard = getCurrentDiscardLevelForFetching();
 	bool have_all_data = (cur_discard >= 0 && (cur_discard <= mDesiredDiscardLevel));
-	F32 pixel_priority = fsqrtf(mMaxVirtualSize);
+	F32 pixel_priority = (F32) sqrt(mMaxVirtualSize);
 
 	F32 priority = 0.f;
 
diff --git a/indra/newview/llvoavatar.cpp b/indra/newview/llvoavatar.cpp
index abf22b5e5a..e50d0fce49 100644
--- a/indra/newview/llvoavatar.cpp
+++ b/indra/newview/llvoavatar.cpp
@@ -675,6 +675,7 @@ LLVOAvatar::LLVOAvatar(const LLUUID& id,
 	mTexHairColor( NULL ),
 	mTexEyeColor( NULL ),
 	mNeedsSkin(FALSE),
+	mLastSkinTime(0.f),
 	mUpdatePeriod(1),
 	mFullyLoaded(FALSE),
 	mPreviousFullyLoaded(FALSE),
@@ -1356,7 +1357,7 @@ void LLVOAvatar::updateSpatialExtents(LLVector4a& newMin, LLVector4a &newMax)
 	if (isImpostor() && !needsImpostorUpdate())
 	{
 		LLVector3 delta = getRenderPosition() -
-			((LLVector3(mDrawable->getPositionGroup().getF32())-mImpostorOffset));
+			((LLVector3(mDrawable->getPositionGroup().getF32ptr())-mImpostorOffset));
 		
 		newMin.load3( (mLastAnimExtents[0] + delta).mV);
 		newMax.load3( (mLastAnimExtents[1] + delta).mV);
@@ -1364,12 +1365,12 @@ void LLVOAvatar::updateSpatialExtents(LLVector4a& newMin, LLVector4a &newMax)
 	else
 	{
 		getSpatialExtents(newMin,newMax);
-		mLastAnimExtents[0].set(newMin.getF32());
-		mLastAnimExtents[1].set(newMax.getF32());
+		mLastAnimExtents[0].set(newMin.getF32ptr());
+		mLastAnimExtents[1].set(newMax.getF32ptr());
 		LLVector4a pos_group;
 		pos_group.setAdd(newMin,newMax);
 		pos_group.mul(0.5f);
-		mImpostorOffset = LLVector3(pos_group.getF32())-getRenderPosition();
+		mImpostorOffset = LLVector3(pos_group.getF32ptr())-getRenderPosition();
 		mDrawable->setPositionGroup(pos_group);
 	}
 }
@@ -1435,7 +1436,7 @@ void LLVOAvatar::getSpatialExtents(LLVector4a& newMin, LLVector4a& newMax)
 						distance.setSub(ext[1], ext[0]);
 						LLVector4a max_span(max_attachment_span);
 
-						S32 lt = distance.lessThan4(max_span).getComparisonMask() & 0x7;
+						S32 lt = distance.lessThan(max_span).getGatheredBits() & 0x7;
 						
 						// Only add the prim to spatial extents calculations if it isn't a megaprim.
 						// max_attachment_span calculated at the start of the function 
@@ -2533,14 +2534,14 @@ void LLVOAvatar::idleUpdateMisc(bool detailed_update)
 				getSpatialExtents(ext[0], ext[1]);
 				LLVector4a diff;
 				diff.setSub(ext[1], mImpostorExtents[1]);
-				if (diff.length3() > 0.05f)
+				if (diff.getLength3().getF32() > 0.05f)
 				{
 					mNeedsImpostorUpdate = TRUE;
 				}
 				else
 				{
 					diff.setSub(ext[0], mImpostorExtents[0]);
-					if (diff.length3() > 0.05f)
+					if (diff.getLength3().getF32() > 0.05f)
 					{
 						mNeedsImpostorUpdate = TRUE;
 					}
@@ -3887,7 +3888,8 @@ U32 LLVOAvatar::renderSkinned(EAvatarRenderPass pass)
 				mMeshLOD[MESH_ID_HAIR]->updateJointGeometry();
 			}
 			mNeedsSkin = FALSE;
-			
+			mLastSkinTime = gFrameTimeSeconds;
+
 			LLVertexBuffer* vb = mDrawable->getFace(0)->mVertexBuffer;
 			if (vb)
 			{
@@ -4231,7 +4233,7 @@ void LLVOAvatar::updateTextures()
 
 	if (gPipeline.hasRenderDebugMask(LLPipeline::RENDER_DEBUG_TEXTURE_AREA))
 	{
-		setDebugText(llformat("%4.0f:%4.0f", fsqrtf(mMinPixelArea),fsqrtf(mMaxPixelArea)));
+		setDebugText(llformat("%4.0f:%4.0f", (F32) sqrt(mMinPixelArea),(F32) sqrt(mMaxPixelArea)));
 	}	
 }
 
@@ -5443,7 +5445,7 @@ void LLVOAvatar::setPixelAreaAndAngle(LLAgent &agent)
 	}
 	else
 	{
-		F32 radius = size.length3();
+		F32 radius = size.getLength3().getF32();
 		mAppAngle = (F32) atan2( radius, range) * RAD_TO_DEG;
 	}
 
diff --git a/indra/newview/llvoavatar.h b/indra/newview/llvoavatar.h
index 95b0665f7d..94b564fc8f 100644
--- a/indra/newview/llvoavatar.h
+++ b/indra/newview/llvoavatar.h
@@ -345,6 +345,7 @@ public:
 	U32 		renderImpostor(LLColor4U color = LLColor4U(255,255,255,255), S32 diffuse_channel = 0);
 	U32 		renderRigid();
 	U32 		renderSkinned(EAvatarRenderPass pass);
+	F32			getLastSkinTime() { return mLastSkinTime; }
 	U32			renderSkinnedAttachments();
 	U32 		renderTransparent(BOOL first_pass);
 	void 		renderCollisionVolumes();
@@ -357,6 +358,8 @@ private:
 	bool		shouldAlphaMask();
 
 	BOOL 		mNeedsSkin; // avatar has been animated and verts have not been updated
+	F32			mLastSkinTime; //value of gFrameTimeSeconds at last skin update
+
 	S32	 		mUpdatePeriod;
 	S32  		mNumInitFaces; //number of faces generated when creating the avatar drawable, does not inculde splitted faces due to long vertex buffer.
 
diff --git a/indra/newview/llvograss.cpp b/indra/newview/llvograss.cpp
index fe1e36cbe8..65829b213e 100644
--- a/indra/newview/llvograss.cpp
+++ b/indra/newview/llvograss.cpp
@@ -328,7 +328,7 @@ void LLVOGrass::updateTextures()
 	{
 		if (gPipeline.hasRenderDebugMask(LLPipeline::RENDER_DEBUG_TEXTURE_AREA))
 		{
-			setDebugText(llformat("%4.0f", fsqrtf(mPixelArea)));
+			setDebugText(llformat("%4.0f", (F32) sqrt(mPixelArea)));
 		}
 		getTEImage(0)->addTextureStats(mPixelArea);
 	}
diff --git a/indra/newview/llvosurfacepatch.cpp b/indra/newview/llvosurfacepatch.cpp
index 63f662c5a1..c047758a59 100644
--- a/indra/newview/llvosurfacepatch.cpp
+++ b/indra/newview/llvosurfacepatch.cpp
@@ -999,8 +999,8 @@ BOOL LLVOSurfacePatch::lineSegmentIntersect(const LLVector3& start, const LLVect
 	const LLVector4a* exta = mDrawable->getSpatialExtents();
 
 	LLVector3 ext[2];
-	ext[0].set(exta[0].getF32());
-	ext[1].set(exta[1].getF32());
+	ext[0].set(exta[0].getF32ptr());
+	ext[1].set(exta[1].getF32ptr());
 
 	F32 rad = (delta*tdelta).magVecSquared();
 
diff --git a/indra/newview/llvotextbubble.cpp b/indra/newview/llvotextbubble.cpp
index 339da3c0bf..e790373d02 100644
--- a/indra/newview/llvotextbubble.cpp
+++ b/indra/newview/llvotextbubble.cpp
@@ -254,8 +254,8 @@ void LLVOTextBubble::getGeometry(S32 idx,
 	LLVector2* dst_tc = (LLVector2*) texcoordsp.get();
 	LLVector2* src_tc = (LLVector2*) face.mTexCoords;
 
-	LLVector4a::memcpyNonAliased16((F32*) dst_norm, (F32*) src_norm, face.mNumVertices*4);
-	LLVector4a::memcpyNonAliased16((F32*) dst_tc, (F32*) src_tc, face.mNumVertices*2);
+	LLVector4a::memcpyNonAliased16((F32*) dst_norm, (F32*) src_norm, face.mNumVertices*4*sizeof(F32));
+	LLVector4a::memcpyNonAliased16((F32*) dst_tc, (F32*) src_tc, face.mNumVertices*2*sizeof(F32));
 	
 	
 	for (U32 i = 0; i < face.mNumVertices; i++)
diff --git a/indra/newview/llvotree.cpp b/indra/newview/llvotree.cpp
index eb790b04cc..e3b4efb9dd 100644
--- a/indra/newview/llvotree.cpp
+++ b/indra/newview/llvotree.cpp
@@ -472,7 +472,7 @@ void LLVOTree::updateTextures()
 	{
 		if (gPipeline.hasRenderDebugMask(LLPipeline::RENDER_DEBUG_TEXTURE_AREA))
 		{
-			setDebugText(llformat("%4.0f", fsqrtf(mPixelArea)));
+			setDebugText(llformat("%4.0f", (F32) sqrt(mPixelArea)));
 		}
 		mTreeImagep->addTextureStats(mPixelArea);
 	}
@@ -1278,8 +1278,8 @@ BOOL LLVOTree::lineSegmentIntersect(const LLVector3& start, const LLVector3& end
 
 	//VECTORIZE THIS
 	LLVector3 ext[2];
-	ext[0].set(exta[0].getF32());
-	ext[1].set(exta[1].getF32());
+	ext[0].set(exta[0].getF32ptr());
+	ext[1].set(exta[1].getF32ptr());
 	
 	LLVector3 center = (ext[1]+ext[0])*0.5f;
 	LLVector3 size = (ext[1]-ext[0]);
diff --git a/indra/newview/llvovolume.cpp b/indra/newview/llvovolume.cpp
index 1397b64623..128fd15142 100644
--- a/indra/newview/llvovolume.cpp
+++ b/indra/newview/llvovolume.cpp
@@ -696,7 +696,7 @@ void LLVOVolume::updateTextureVirtualSize()
 		const LLTextureEntry *te = face->getTextureEntry();
 		LLViewerTexture *imagep = face->getTexture();
 		if (!imagep || !te ||			
-			face->mExtents[0].equal3(face->mExtents[1]))
+			face->mExtents[0].equals3(face->mExtents[1]))
 		{
 			continue;
 		}
@@ -820,15 +820,15 @@ void LLVOVolume::updateTextureVirtualSize()
 	
 	if (gPipeline.hasRenderDebugMask(LLPipeline::RENDER_DEBUG_TEXTURE_AREA))
 	{
-		setDebugText(llformat("%.0f:%.0f", fsqrtf(min_vsize),fsqrtf(max_vsize)));
+		setDebugText(llformat("%.0f:%.0f", (F32) sqrt(min_vsize),(F32) sqrt(max_vsize)));
 	}
  	else if (gPipeline.hasRenderDebugMask(LLPipeline::RENDER_DEBUG_TEXTURE_PRIORITY))
  	{
- 		setDebugText(llformat("%.0f:%.0f", fsqrtf(min_vsize),fsqrtf(max_vsize)));
+ 		setDebugText(llformat("%.0f:%.0f", (F32) sqrt(min_vsize),(F32) sqrt(max_vsize)));
  	}
 	else if (gPipeline.hasRenderDebugMask(LLPipeline::RENDER_DEBUG_FACE_AREA))
 	{
-		setDebugText(llformat("%.0f:%.0f", fsqrtf(min_vsize),fsqrtf(max_vsize)));
+		setDebugText(llformat("%.0f:%.0f", (F32) sqrt(min_vsize),(F32) sqrt(max_vsize)));
 	}
 
 	if (mPixelArea == 0)
@@ -1355,8 +1355,8 @@ BOOL LLVOVolume::genBBoxes(BOOL force_global)
 			}
 			else
 			{
-				min.setMin(face->mExtents[0]);
-				max.setMax(face->mExtents[1]);
+				min.setMin(min, face->mExtents[0]);
+				max.setMax(max, face->mExtents[1]);
 			}
 		}
 	}
@@ -1864,7 +1864,7 @@ LLVector3 LLVOVolume::getApproximateFaceNormal(U8 face_id)
 			result.add(face.mNormals[i]);
 		}
 
-		LLVector3 ret((F32*) &result.mQ);
+		LLVector3 ret(result.getF32ptr());
 		ret = volumeDirectionToAgent(ret);
 		ret.normVec();
 	}
@@ -3075,7 +3075,7 @@ F32 LLVOVolume::getBinRadius()
 		LLVector4a rad;
 		rad.setSub(ext[1], ext[0]);
 		
-		radius = rad.length3()*0.5f;
+		radius = rad.getLength3().getF32()*0.5f;
 	}
 	else if (mDrawable->isStatic())
 	{
diff --git a/indra/newview/llworld.cpp b/indra/newview/llworld.cpp
index a1e4df8a66..53eca0d08e 100644
--- a/indra/newview/llworld.cpp
+++ b/indra/newview/llworld.cpp
@@ -591,7 +591,7 @@ void LLWorld::updateVisibilities()
 		region_list_t::iterator curiter = iter++;
 		LLViewerRegion* regionp = *curiter;
 		F32 height = regionp->getLand().getMaxZ() - regionp->getLand().getMinZ();
-		F32 radius = 0.5f*fsqrtf(height * height + diagonal_squared);
+		F32 radius = 0.5f*(F32) sqrt(height * height + diagonal_squared);
 		if (!regionp->getLand().hasZData()
 			|| LLViewerCamera::getInstance()->sphereInFrustum(regionp->getCenterAgent(), radius))
 		{
@@ -612,7 +612,7 @@ void LLWorld::updateVisibilities()
 		}
 
 		F32 height = regionp->getLand().getMaxZ() - regionp->getLand().getMinZ();
-		F32 radius = 0.5f*fsqrtf(height * height + diagonal_squared);
+		F32 radius = 0.5f*(F32) sqrt(height * height + diagonal_squared);
 		if (LLViewerCamera::getInstance()->sphereInFrustum(regionp->getCenterAgent(), radius))
 		{
 			regionp->calculateCameraDistance();
diff --git a/indra/newview/pipeline.cpp b/indra/newview/pipeline.cpp
index fec7da1dd0..b0a7b1ce83 100644
--- a/indra/newview/pipeline.cpp
+++ b/indra/newview/pipeline.cpp
@@ -1522,7 +1522,7 @@ F32 LLPipeline::calcPixelArea(const LLVector4a& center, const LLVector4a& size,
 
 	LLVector4a lookAt;
 	lookAt.setSub(center, origin);
-	F32 dist = lookAt.length3();
+	F32 dist = lookAt.getLength3().getF32();
 
 	//ramp down distance for nearby objects
 	//shrink dist by dist/16.
@@ -1534,7 +1534,7 @@ F32 LLPipeline::calcPixelArea(const LLVector4a& center, const LLVector4a& size,
 	}
 
 	//get area of circle around node
-	F32 app_angle = atanf(size.length3()/dist);
+	F32 app_angle = atanf(size.getLength3().getF32()/dist);
 	F32 radius = app_angle*LLDrawable::sCurPixelAngle;
 	return radius*radius * F_PI;
 }
@@ -4671,7 +4671,7 @@ static F32 calc_light_dist(LLVOVolume* light, const LLVector3& cam_pos, F32 max_
 	{
 		return max_dist;
 	}
-	F32 dist = fsqrtf(dist2);
+	F32 dist = (F32) sqrt(dist2);
 	dist *= 1.f / inten;
 	dist -= radius;
 	if (selected)
@@ -6980,7 +6980,7 @@ void LLPipeline::renderDeferredLighting()
 
 					LLVector4a center;
 					center.load3(drawablep->getPositionAgent().mV);
-					const F32* c = center.getF32();
+					const F32* c = center.getF32ptr();
 					F32 s = volume->getLightRadius()*1.5f;
 
 					LLColor3 col = volume->getLightColor();
@@ -7078,7 +7078,7 @@ void LLPipeline::renderDeferredLighting()
 
 					LLVector4a center;
 					center.load3(drawablep->getPositionAgent().mV);
-					const F32* c = center.getF32();
+					const F32* c = center.getF32ptr();
 					F32 s = volume->getLightRadius()*1.5f;
 
 					sVisibleLightCount++;
@@ -9184,8 +9184,8 @@ void LLPipeline::generateImpostor(LLVOAvatar* avatar)
 	up.mul(up);
 	up.normalize3fast();
 
-	tdim.mV[0] = fabsf(half_height.dot3(left));
-	tdim.mV[1] = fabsf(half_height.dot3(up));
+	tdim.mV[0] = fabsf(half_height.dot3(left).getF32());
+	tdim.mV[1] = fabsf(half_height.dot3(up).getF32());
 
 	glMatrixMode(GL_PROJECTION);
 	glPushMatrix();
author	Dave Parks <davep@lindenlab.com>	2010-08-19 12:25:15 -0500
committer	Dave Parks <davep@lindenlab.com>	2010-08-19 12:25:15 -0500
commit	2fea1d5d33ec1b41a3cfa4307a1bfa58d8014f88 (patch)
tree	0438f2363b2a91a5ffe970a8130faa118f260e7e
parent	bd0b3a2ddeafaf0d1669ede7ab5aee22d8da9af7 (diff)