summaryrefslogtreecommitdiff
path: root/indra/llmath/llmatrix3a.cpp
diff options
context:
space:
mode:
authorAnsariel <ansariel.hiller@phoenixviewer.com>2024-05-22 19:04:52 +0200
committerAnsariel <ansariel.hiller@phoenixviewer.com>2024-05-22 19:04:52 +0200
commit1b67dd855c41f5a0cda7ec2a68d98071986ca703 (patch)
treeab243607f74f78200787bba5b9b88f07ef1b966f /indra/llmath/llmatrix3a.cpp
parent6d6eabca44d08d5b97bfe3e941d2b9687c2246ea (diff)
parente1623bb276f83a43ce7a197e388720c05bdefe61 (diff)
Merge remote-tracking branch 'origin/main' into DRTVWR-600-maint-A
# Conflicts: # autobuild.xml # indra/cmake/CMakeLists.txt # indra/cmake/GoogleMock.cmake # indra/llaudio/llaudioengine_fmodstudio.cpp # indra/llaudio/llaudioengine_fmodstudio.h # indra/llaudio/lllistener_fmodstudio.cpp # indra/llaudio/lllistener_fmodstudio.h # indra/llaudio/llstreamingaudio_fmodstudio.cpp # indra/llaudio/llstreamingaudio_fmodstudio.h # indra/llcharacter/llmultigesture.cpp # indra/llcharacter/llmultigesture.h # indra/llimage/llimage.cpp # indra/llimage/llimagepng.cpp # indra/llimage/llimageworker.cpp # indra/llimage/tests/llimageworker_test.cpp # indra/llmessage/tests/llmockhttpclient.h # indra/llprimitive/llgltfmaterial.h # indra/llrender/llfontfreetype.cpp # indra/llui/llcombobox.cpp # indra/llui/llfolderview.cpp # indra/llui/llfolderviewmodel.h # indra/llui/lllineeditor.cpp # indra/llui/lllineeditor.h # indra/llui/lltextbase.cpp # indra/llui/lltextbase.h # indra/llui/lltexteditor.cpp # indra/llui/lltextvalidate.cpp # indra/llui/lltextvalidate.h # indra/llui/lluictrl.h # indra/llui/llview.cpp # indra/llwindow/llwindowmacosx.cpp # indra/newview/app_settings/settings.xml # indra/newview/llappearancemgr.cpp # indra/newview/llappearancemgr.h # indra/newview/llavatarpropertiesprocessor.cpp # indra/newview/llavatarpropertiesprocessor.h # indra/newview/llbreadcrumbview.cpp # indra/newview/llbreadcrumbview.h # indra/newview/llbreastmotion.cpp # indra/newview/llbreastmotion.h # indra/newview/llconversationmodel.h # indra/newview/lldensityctrl.cpp # indra/newview/lldensityctrl.h # indra/newview/llface.inl # indra/newview/llfloatereditsky.cpp # indra/newview/llfloatereditwater.cpp # indra/newview/llfloateremojipicker.h # indra/newview/llfloaterimsessiontab.cpp # indra/newview/llfloaterprofiletexture.cpp # indra/newview/llfloaterprofiletexture.h # indra/newview/llgesturemgr.cpp # indra/newview/llgesturemgr.h # indra/newview/llimpanel.cpp # indra/newview/llimpanel.h # indra/newview/llinventorybridge.cpp # indra/newview/llinventorybridge.h # indra/newview/llinventoryclipboard.cpp # indra/newview/llinventoryclipboard.h # indra/newview/llinventoryfunctions.cpp # indra/newview/llinventoryfunctions.h # indra/newview/llinventorygallery.cpp # indra/newview/lllistbrowser.cpp # indra/newview/lllistbrowser.h # indra/newview/llpanelobjectinventory.cpp # indra/newview/llpanelprofile.cpp # indra/newview/llpanelprofile.h # indra/newview/llpreviewgesture.cpp # indra/newview/llsavedsettingsglue.cpp # indra/newview/llsavedsettingsglue.h # indra/newview/lltooldraganddrop.cpp # indra/newview/llurllineeditorctrl.cpp # indra/newview/llvectorperfoptions.cpp # indra/newview/llvectorperfoptions.h # indra/newview/llviewerparceloverlay.cpp # indra/newview/llviewertexlayer.cpp # indra/newview/llviewertexturelist.cpp # indra/newview/macmain.h # indra/test/test.cpp
Diffstat (limited to 'indra/llmath/llmatrix3a.cpp')
-rw-r--r--indra/llmath/llmatrix3a.cpp202
1 files changed, 101 insertions, 101 deletions
diff --git a/indra/llmath/llmatrix3a.cpp b/indra/llmath/llmatrix3a.cpp
index ab077abcb0..48a72e71e1 100644
--- a/indra/llmath/llmatrix3a.cpp
+++ b/indra/llmath/llmatrix3a.cpp
@@ -1,134 +1,134 @@
-/**
+/**
* @file llvector4a.cpp
* @brief SIMD vector implementation
*
* $LicenseInfo:firstyear=2010&license=viewerlgpl$
* Second Life Viewer Source Code
* Copyright (C) 2010, Linden Research, Inc.
- *
+ *
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation;
* version 2.1 of the License only.
- *
+ *
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
- *
+ *
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- *
+ *
* Linden Research, Inc., 945 Battery Street, San Francisco, CA 94111 USA
* $/LicenseInfo$
*/
#include "llmath.h"
-static LL_ALIGN_16(const F32 M_IDENT_3A[12]) =
- { 1.f, 0.f, 0.f, 0.f, // Column 1
- 0.f, 1.f, 0.f, 0.f, // Column 2
- 0.f, 0.f, 1.f, 0.f }; // Column 3
+static LL_ALIGN_16(const F32 M_IDENT_3A[12]) =
+ { 1.f, 0.f, 0.f, 0.f, // Column 1
+ 0.f, 1.f, 0.f, 0.f, // Column 2
+ 0.f, 0.f, 1.f, 0.f }; // Column 3
extern const LLMatrix3a LL_M3A_IDENTITY = *reinterpret_cast<const LLMatrix3a*> (M_IDENT_3A);
void LLMatrix3a::setMul( const LLMatrix3a& lhs, const LLMatrix3a& rhs )
{
- const LLVector4a col0 = lhs.getColumn(0);
- const LLVector4a col1 = lhs.getColumn(1);
- const LLVector4a col2 = lhs.getColumn(2);
-
- for ( int i = 0; i < 3; i++ )
- {
- LLVector4a xxxx = _mm_load_ss( rhs.mColumns[i].getF32ptr() );
- xxxx.splat<0>( xxxx );
- xxxx.mul( col0 );
-
- {
- LLVector4a yyyy = _mm_load_ss( rhs.mColumns[i].getF32ptr() + 1 );
- yyyy.splat<0>( yyyy );
- yyyy.mul( col1 );
- xxxx.add( yyyy );
- }
-
- {
- LLVector4a zzzz = _mm_load_ss( rhs.mColumns[i].getF32ptr() + 2 );
- zzzz.splat<0>( zzzz );
- zzzz.mul( col2 );
- xxxx.add( zzzz );
- }
-
- xxxx.store4a( mColumns[i].getF32ptr() );
- }
-
+ const LLVector4a col0 = lhs.getColumn(0);
+ const LLVector4a col1 = lhs.getColumn(1);
+ const LLVector4a col2 = lhs.getColumn(2);
+
+ for ( int i = 0; i < 3; i++ )
+ {
+ LLVector4a xxxx = _mm_load_ss( rhs.mColumns[i].getF32ptr() );
+ xxxx.splat<0>( xxxx );
+ xxxx.mul( col0 );
+
+ {
+ LLVector4a yyyy = _mm_load_ss( rhs.mColumns[i].getF32ptr() + 1 );
+ yyyy.splat<0>( yyyy );
+ yyyy.mul( col1 );
+ xxxx.add( yyyy );
+ }
+
+ {
+ LLVector4a zzzz = _mm_load_ss( rhs.mColumns[i].getF32ptr() + 2 );
+ zzzz.splat<0>( zzzz );
+ zzzz.mul( col2 );
+ xxxx.add( zzzz );
+ }
+
+ xxxx.store4a( mColumns[i].getF32ptr() );
+ }
+
}
/*static */void LLMatrix3a::batchTransform( const LLMatrix3a& xform, const LLVector4a* src, int numVectors, LLVector4a* dst )
{
- const LLVector4a col0 = xform.getColumn(0);
- const LLVector4a col1 = xform.getColumn(1);
- const LLVector4a col2 = xform.getColumn(2);
- const LLVector4a* maxAddr = src + numVectors;
-
- if ( numVectors & 0x1 )
- {
- LLVector4a xxxx = _mm_load_ss( (const F32*)src );
- LLVector4a yyyy = _mm_load_ss( (const F32*)src + 1 );
- LLVector4a zzzz = _mm_load_ss( (const F32*)src + 2 );
- xxxx.splat<0>( xxxx );
- yyyy.splat<0>( yyyy );
- zzzz.splat<0>( zzzz );
- xxxx.mul( col0 );
- yyyy.mul( col1 );
- zzzz.mul( col2 );
- xxxx.add( yyyy );
- xxxx.add( zzzz );
- xxxx.store4a( (F32*)dst );
- src++;
- dst++;
- }
-
-
- numVectors >>= 1;
- while ( src < maxAddr )
- {
- _mm_prefetch( (const char*)(src + 32 ), _MM_HINT_NTA );
- _mm_prefetch( (const char*)(dst + 32), _MM_HINT_NTA );
- LLVector4a xxxx = _mm_load_ss( (const F32*)src );
- LLVector4a xxxx1= _mm_load_ss( (const F32*)(src + 1) );
-
- xxxx.splat<0>( xxxx );
- xxxx1.splat<0>( xxxx1 );
- xxxx.mul( col0 );
- xxxx1.mul( col0 );
-
- {
- LLVector4a yyyy = _mm_load_ss( (const F32*)src + 1 );
- LLVector4a yyyy1 = _mm_load_ss( (const F32*)(src + 1) + 1);
- yyyy.splat<0>( yyyy );
- yyyy1.splat<0>( yyyy1 );
- yyyy.mul( col1 );
- yyyy1.mul( col1 );
- xxxx.add( yyyy );
- xxxx1.add( yyyy1 );
- }
-
- {
- LLVector4a zzzz = _mm_load_ss( (const F32*)(src) + 2 );
- LLVector4a zzzz1 = _mm_load_ss( (const F32*)(++src) + 2 );
- zzzz.splat<0>( zzzz );
- zzzz1.splat<0>( zzzz1 );
- zzzz.mul( col2 );
- zzzz1.mul( col2 );
- xxxx.add( zzzz );
- xxxx1.add( zzzz1 );
- }
-
- xxxx.store4a(dst->getF32ptr());
- src++;
- dst++;
-
- xxxx1.store4a((F32*)dst++);
- }
+ const LLVector4a col0 = xform.getColumn(0);
+ const LLVector4a col1 = xform.getColumn(1);
+ const LLVector4a col2 = xform.getColumn(2);
+ const LLVector4a* maxAddr = src + numVectors;
+
+ if ( numVectors & 0x1 )
+ {
+ LLVector4a xxxx = _mm_load_ss( (const F32*)src );
+ LLVector4a yyyy = _mm_load_ss( (const F32*)src + 1 );
+ LLVector4a zzzz = _mm_load_ss( (const F32*)src + 2 );
+ xxxx.splat<0>( xxxx );
+ yyyy.splat<0>( yyyy );
+ zzzz.splat<0>( zzzz );
+ xxxx.mul( col0 );
+ yyyy.mul( col1 );
+ zzzz.mul( col2 );
+ xxxx.add( yyyy );
+ xxxx.add( zzzz );
+ xxxx.store4a( (F32*)dst );
+ src++;
+ dst++;
+ }
+
+
+ numVectors >>= 1;
+ while ( src < maxAddr )
+ {
+ _mm_prefetch( (const char*)(src + 32 ), _MM_HINT_NTA );
+ _mm_prefetch( (const char*)(dst + 32), _MM_HINT_NTA );
+ LLVector4a xxxx = _mm_load_ss( (const F32*)src );
+ LLVector4a xxxx1= _mm_load_ss( (const F32*)(src + 1) );
+
+ xxxx.splat<0>( xxxx );
+ xxxx1.splat<0>( xxxx1 );
+ xxxx.mul( col0 );
+ xxxx1.mul( col0 );
+
+ {
+ LLVector4a yyyy = _mm_load_ss( (const F32*)src + 1 );
+ LLVector4a yyyy1 = _mm_load_ss( (const F32*)(src + 1) + 1);
+ yyyy.splat<0>( yyyy );
+ yyyy1.splat<0>( yyyy1 );
+ yyyy.mul( col1 );
+ yyyy1.mul( col1 );
+ xxxx.add( yyyy );
+ xxxx1.add( yyyy1 );
+ }
+
+ {
+ LLVector4a zzzz = _mm_load_ss( (const F32*)(src) + 2 );
+ LLVector4a zzzz1 = _mm_load_ss( (const F32*)(++src) + 2 );
+ zzzz.splat<0>( zzzz );
+ zzzz1.splat<0>( zzzz1 );
+ zzzz.mul( col2 );
+ zzzz1.mul( col2 );
+ xxxx.add( zzzz );
+ xxxx1.add( zzzz1 );
+ }
+
+ xxxx.store4a(dst->getF32ptr());
+ src++;
+ dst++;
+
+ xxxx1.store4a((F32*)dst++);
+ }
}