diff options
521 files changed, 38697 insertions, 6285 deletions
@@ -9,6 +9,7 @@ syntax: glob .*.swp #OSX image cache file *.DS_Store +*.orig LICENSES indra/.distcc build-linux-* @@ -8,6 +8,7 @@ bb38ff1a763738609e1b3cada6d15fa61e5e84b9 2.1.1-release 1e2b517adc2ecb342cd3c865f2a6ccf82a3cf8d7 2-1-beta-3 3469d90a115b900f8f250e137bbd9b684130f5d2 beta-4 3e4b947f79d88c385e8218cbc0731cef0e42cfc4 2-1-beta-1 +434973a76ab2755f98ab55e1afc193e16692d5c5 2-1-1-beta-2 46002088d9a4489e323b8d56131c680eaa21258c viewer-2-1-0-start 4f777ffb99fefdc6497c61385c22688ff149c659 viewer-2-0-0 52d96ad3d39be29147c5b2181b3bb46af6164f0e alpha-3 @@ -16,6 +17,7 @@ bb38ff1a763738609e1b3cada6d15fa61e5e84b9 2.1.1-release 7f16e79826d377f5f9f5b33dc721ab56d0d7dc8f alpha-4 7f16e79826d377f5f9f5b33dc721ab56d0d7dc8f fork to viewer-20qa 80bc6cff515118a36108967af49d3f8105c95bc9 viewer-2-0-2-start +87bfaf8c76f9b22d9c65d4b315358861be87c863 2-1-1-release b03065d018b8a2e28b7de85b293a4c992cb4c12d 2-1-release b8419565906e4feb434426d8d9b17dd1458e24b2 alpha-6 bb38ff1a763738609e1b3cada6d15fa61e5e84b9 2-1-1-release diff --git a/BuildParams b/BuildParams index a602be2fe2..0a193803da 100644 --- a/BuildParams +++ b/BuildParams @@ -11,8 +11,11 @@ Linux.symbolfiles = "newview/secondlife-symbols-linux.tar.bz2" # Use Public Upload Locations public_build = true +# skip windows debug build until we can get a fix in. +build_CYGWIN_Debug = false + # Update Public Inworld Build Status Indicators -email_status_this_is_os = true +email_status_this_is_os = false # Limit extent of codeticket updates to revisions after... codeticket_since = 2.2.0-release @@ -90,6 +93,33 @@ brad-parabuild.build_server = false brad-parabuild.build_server_tests = false # ======================================== +# mesh-development +# ======================================== +mesh-development.viewer_channel = "Project Viewer - Mesh" +mesh-development.login_channel = "Project Viewer - Mesh" +mesh-development.viewer_grid = aditi +mesh-development.build_debug_release_separately = true +mesh-development.build_CYGWIN_Debug = false +mesh-development.build_viewer_update_version_manager = false + +# ======================================== +# viewer-mesh +# ======================================== + +viewer-mesh.build_viewer = true +viewer-mesh.build_server = false +viewer-mesh.build_Linux = true +viewer-mesh.build_hg_bundle = true +viewer-mesh.build_viewer_update_version_manager = false +viewer-mesh.build_Debug = false +viewer-mesh.build_RelWithDebInfo = false +viewer-mesh.viewer_channel = "Project Viewer - Mesh" +viewer-mesh.login_channel = "Project Viewer - Mesh" +viewer-mesh.viewer_grid = aditi +viewer-mesh.email = shining@lists.lindenlab.com + + +# ======================================== # CG # ======================================== diff --git a/autobuild.xml b/autobuild.xml index cbaaae5c16..0e4db1bdca 100644 --- a/autobuild.xml +++ b/autobuild.xml @@ -3,14 +3,14 @@ <map> <key>installables</key> <map> - <key>GL</key> + <key>GLOD</key> <map> <key>license</key> - <string>GL</string> + <string>GLOD</string> <key>license_file</key> - <string>LICENSES/GL.txt</string> + <string>LICENSES/glod.txt</string> <key>name</key> - <string>GL</string> + <string>GLOD</string> <key>platforms</key> <map> <key>darwin</key> @@ -18,9 +18,9 @@ <key>archive</key> <map> <key>hash</key> - <string>0b7c1d43dc2b39301fef6c05948fb826</string> + <string>7c546f54f6ed654f713c778af3925dd4</string> <key>url</key> - <string>http://s3.amazonaws.com/viewer-source-downloads/install_pkgs/GL-darwin-20101004.tar.bz2</string> + <string>http://automated-builds-secondlife-com.s3.amazonaws.com/hg/repo/3p-glod/rev/226566/arch/Darwin/installer/glod-1.0pre4-darwin-20110413.tar.bz2</string> </map> <key>name</key> <string>darwin</string> @@ -30,9 +30,9 @@ <key>archive</key> <map> <key>hash</key> - <string>d85d88088360aee5b67105ad93b5ad16</string> + <string>18c708163d2a669bc3c030b05b4ebe61</string> <key>url</key> - <string>http://s3.amazonaws.com/viewer-source-downloads/install_pkgs/GL-linux-20100929.tar.bz2</string> + <string>http://automated-builds-secondlife-com.s3.amazonaws.com/hg/repo/3p-glod/rev/226657/arch/Linux/installer/glod-1.0pre4-linux-20110414.tar.bz2</string> </map> <key>name</key> <string>linux</string> @@ -42,9 +42,9 @@ <key>archive</key> <map> <key>hash</key> - <string>f8d98cbe78d5aafbc7aaabf840325aaf</string> + <string>c4ae6cddc04e0b2908a301726a53922c</string> <key>url</key> - <string>http://s3.amazonaws.com/viewer-source-downloads/install_pkgs/GL-0.0.0-windows-20110303.tar.bz2</string> + <string>http://automated-builds-secondlife-com.s3.amazonaws.com/hg/repo/3p-glod/rev/226194/arch/CYGWIN/installer/glod-1.0pre4-windows-20110408.tar.bz2</string> </map> <key>name</key> <string>windows</string> @@ -219,6 +219,54 @@ </map> </map> </map> + <key>colladadom</key> + <map> + <key>license</key> + <string>scea</string> + <key>license_file</key> + <string>LICENSES/collada.txt</string> + <key>name</key> + <string>colladadom</string> + <key>platforms</key> + <map> + <key>darwin</key> + <map> + <key>archive</key> + <map> + <key>hash</key> + <string>480b27a0cb39a4adfcdeabef895de3e1</string> + <key>url</key> + <string>http://automated-builds-secondlife-com.s3.amazonaws.com/hg/repo/3p-colladadom/rev/227230/arch/Darwin/installer/colladadom-2.2-darwin-20110420.tar.bz2</string> + </map> + <key>name</key> + <string>darwin</string> + </map> + <key>linux</key> + <map> + <key>archive</key> + <map> + <key>hash</key> + <string>d05be8fc196e9ce7b6636b931cf13dff</string> + <key>url</key> + <string>http://automated-builds-secondlife-com.s3.amazonaws.com/hg/repo/3p-colladadom/rev/226716/arch/Linux/installer/colladadom-2.2-linux-20110415.tar.bz2</string> + </map> + <key>name</key> + <string>linux</string> + </map> + <key>windows</key> + <map> + <key>archive</key> + <map> + <key>hash</key> + <string>a9f548eb6f9aaf292508a8b09c7f2f73</string> + <key>url</key> + <string>http://automated-builds-secondlife-com.s3.amazonaws.com/hg/repo/3p-colladadom/rev/226584/arch/CYGWIN/installer/colladadom-2.2-windows-20110413.tar.bz2</string> + </map> + <key>name</key> + <string>windows</string> + </map> + </map> + </map> <key>curl</key> <map> <key>license</key> @@ -531,6 +579,42 @@ </map> </map> </map> + <key>glext</key> + <map> + <key>license</key> + <string>glext</string> + <key>license_file</key> + <string>LICENSES/glext.txt</string> + <key>name</key> + <string>glext</string> + <key>platforms</key> + <map> + <key>linux</key> + <map> + <key>archive</key> + <map> + <key>hash</key> + <string>5de58ca0fe19abf68b25956762ee0d29</string> + <key>url</key> + <string>http://s3.amazonaws.com/viewer-source-downloads/install_pkgs/glext-68-windows-20110406.tar.bz2</string> + </map> + <key>name</key> + <string>linux</string> + </map> + <key>windows</key> + <map> + <key>archive</key> + <map> + <key>hash</key> + <string>5de58ca0fe19abf68b25956762ee0d29</string> + <key>url</key> + <string>http://s3.amazonaws.com/viewer-source-downloads/install_pkgs/glext-68-windows-20110406.tar.bz2</string> + </map> + <key>name</key> + <string>windows</string> + </map> + </map> + </map> <key>glh_linear</key> <map> <key>license</key> @@ -615,42 +699,6 @@ </map> </map> </map> - <key>google-perftools</key> - <map> - <key>license</key> - <string>bsd</string> - <key>license_file</key> - <string>LICENSES/google-perftools.txt</string> - <key>name</key> - <string>google-perftools</string> - <key>platforms</key> - <map> - <key>linux</key> - <map> - <key>archive</key> - <map> - <key>hash</key> - <string>cf513fc2eec4a414cc804cf408932a45</string> - <key>url</key> - <string>http://s3.amazonaws.com/viewer-source-downloads/install_pkgs/google_perftools-1.7-linux-20110315.tar.bz2</string> - </map> - <key>name</key> - <string>linux</string> - </map> - <key>windows</key> - <map> - <key>archive</key> - <map> - <key>hash</key> - <string>8108bffe1c814be9d035b47dac3d4541</string> - <key>url</key> - <string>http://s3.amazonaws.com/viewer-source-downloads/install_pkgs/google-perftools-1.0-windows-20101001b.tar.bz2</string> - </map> - <key>name</key> - <string>windows</string> - </map> - </map> - </map> <key>google_breakpad</key> <map> <key>license</key> @@ -704,7 +752,7 @@ <key>license</key> <string>bsd</string> <key>license_file</key> - <string>LICENSES/gmock.txt</string> + <string>LICENSES/googlemock.txt</string> <key>name</key> <string>googlemock</string> <key>platforms</key> @@ -776,7 +824,7 @@ <key>license</key> <string>lgpl</string> <key>license_file</key> - <string>LICENSES/gtk.txt</string> + <string>LICENSES/gtk-atk-pango-glib.txt</string> <key>name</key> <string>gtk-atk-pango-glib</string> <key>platforms</key> @@ -990,9 +1038,9 @@ <key>archive</key> <map> <key>hash</key> - <string>735a955e6442733e2342ab12c1087488</string> + <string>f194ba857ca8dd86483a3ef24535d0db</string> <key>url</key> - <string>http://s3.amazonaws.com/viewer-source-downloads/install_pkgs/libpng-1.5.1-windows-20110221.tar.bz2</string> + <string>http://automated-builds-secondlife-com.s3.amazonaws.com/hg/repo/3p-libpng/rev/226532/arch/CYGWIN/installer/libpng-1.5.1-windows-20110413.tar.bz2</string> </map> <key>name</key> <string>windows</string> @@ -1047,6 +1095,102 @@ </map> </map> </map> + <key>llconvexdecomposition</key> + <map> + <key>license</key> + <string>havok</string> + <key>license_file</key> + <string>on_file</string> + <key>name</key> + <string>llconvexdecomposition</string> + <key>platforms</key> + <map> + <key>darwin</key> + <map> + <key>archive</key> + <map> + <key>hash</key> + <string>6e45ad68506cd1ba49fd35a3201f0478</string> + <key>url</key> + <string>http://s3-proxy.lindenlab.com/private-builds-secondlife-com/hg/repo/3p-llconvexdecomposition/rev/228821/arch/Darwin/installer/llconvexdecomposition-0.1-darwin-20110504.tar.bz2</string> + </map> + <key>name</key> + <string>darwin</string> + </map> + <key>linux</key> + <map> + <key>archive</key> + <map> + <key>hash</key> + <string>00ff5144612c2e261a0811a4503ce3ba</string> + <key>url</key> + <string>http://s3-proxy.lindenlab.com/private-builds-secondlife-com/hg/repo/3p-llconvexdecomposition/rev/228821/arch/Linux/installer/llconvexdecomposition-0.1-linux-20110504.tar.bz2</string> + </map> + <key>name</key> + <string>linux</string> + </map> + <key>windows</key> + <map> + <key>archive</key> + <map> + <key>hash</key> + <string>a4635dcbbe0915ce023dd41d3b848d4c</string> + <key>url</key> + <string>http://s3-proxy.lindenlab.com/private-builds-secondlife-com/hg/repo/3p-llconvexdecomposition/rev/228821/arch/CYGWIN/installer/llconvexdecomposition-0.1-windows-20110504.tar.bz2</string> + </map> + <key>name</key> + <string>windows</string> + </map> + </map> + </map> + <key>llconvexdecompositionstub</key> + <map> + <key>license</key> + <string>lgpl</string> + <key>license_file</key> + <string>LICENSES/lgpl.txt</string> + <key>name</key> + <string>llconvexdecompositionstub</string> + <key>platforms</key> + <map> + <key>darwin</key> + <map> + <key>archive</key> + <map> + <key>hash</key> + <string>bc1388fc28dbb3bba1fe7cb8d09f49b4</string> + <key>url</key> + <string>http://automated-builds-secondlife-com.s3.amazonaws.com/hg/repo/3p-llconvexdecompositionstub/rev/227399/arch/Darwin/installer/llconvexdecompositionstub-0.3-darwin-20110421.tar.bz2</string> + </map> + <key>name</key> + <string>darwin</string> + </map> + <key>linux</key> + <map> + <key>archive</key> + <map> + <key>hash</key> + <string>3295bd4a0514b7c15dda9044f40c175e</string> + <key>url</key> + <string>http://automated-builds-secondlife-com.s3.amazonaws.com/hg/repo/3p-llconvexdecompositionstub/rev/227399/arch/Linux/installer/llconvexdecompositionstub-0.3-linux-20110422.tar.bz2</string> + </map> + <key>name</key> + <string>linux</string> + </map> + <key>windows</key> + <map> + <key>archive</key> + <map> + <key>hash</key> + <string>92f1dff3249024c1534b55343ed79ea3</string> + <key>url</key> + <string>http://automated-builds-secondlife-com.s3.amazonaws.com/hg/repo/3p-llconvexdecompositionstub/rev/227399/arch/CYGWIN/installer/llconvexdecompositionstub-0.3-windows-20110421.tar.bz2</string> + </map> + <key>name</key> + <string>windows</string> + </map> + </map> + </map> <key>llqtwebkit</key> <map> <key>license</key> @@ -1351,6 +1495,42 @@ </map> </map> </map> + <key>pcre</key> + <map> + <key>license</key> + <string>bsd</string> + <key>license_file</key> + <string>LICENSES/pcre-license.txt</string> + <key>name</key> + <string>pcre</string> + <key>platforms</key> + <map> + <key>darwin</key> + <map> + <key>archive</key> + <map> + <key>hash</key> + <string>a8e74694a0f4248228c13c845ed0a6f8</string> + <key>url</key> + <string>http://automated-builds-secondlife-com.s3.amazonaws.com/hg/repo/3p-pcre/rev/228822/arch/Darwin/installer/pcre-7.6-darwin-20110504.tar.bz2</string> + </map> + <key>name</key> + <string>darwin</string> + </map> + <key>linux</key> + <map> + <key>archive</key> + <map> + <key>hash</key> + <string>bb0abe962b3b8208ed2dab0424aab33d</string> + <key>url</key> + <string>http://automated-builds-secondlife-com.s3.amazonaws.com/hg/repo/3p-pcre/rev/228822/arch/Linux/installer/pcre-7.6-linux-20110504.tar.bz2</string> + </map> + <key>name</key> + <string>linux</string> + </map> + </map> + </map> <key>quicktime</key> <map> <key>license</key> @@ -1423,6 +1603,42 @@ </map> </map> </map> + <key>tcmalloc</key> + <map> + <key>license</key> + <string>bsd</string> + <key>license_file</key> + <string>LICENSES/google-perftools.txt</string> + <key>name</key> + <string>tcmalloc</string> + <key>platforms</key> + <map> + <key>linux</key> + <map> + <key>archive</key> + <map> + <key>hash</key> + <string>dde928cb24d22a267004a8c17669ba65</string> + <key>url</key> + <string>http://automated-builds-secondlife-com.s3.amazonaws.com/hg/repo/3p-google-perftools/rev/226426/arch/Linux/installer/google_perftools-1.7-linux-20110412.tar.bz2</string> + </map> + <key>name</key> + <string>linux</string> + </map> + <key>windows</key> + <map> + <key>archive</key> + <map> + <key>hash</key> + <string>8308f7bd68bb7083655753b7abe7225f</string> + <key>url</key> + <string>http://automated-builds-secondlife-com.s3.amazonaws.com/hg/repo/3p-google-perftools/rev/226287/arch/CYGWIN/installer/google_perftools-1.7-windows-20110411.tar.bz2</string> + </map> + <key>name</key> + <string>windows</string> + </map> + </map> + </map> <key>tut</key> <map> <key>license</key> @@ -1578,6 +1794,10 @@ <map> <key>configure</key> <map> + <key>arguments</key> + <array> + <string>../indra</string> + </array> <key>command</key> <string>cmake</string> <key>options</key> @@ -1587,10 +1807,6 @@ <string>-DROOT_PROJECT_NAME:STRING=SecondLife</string> <string>-DINSTALL_PROPRIETARY=FALSE</string> </array> - <key>arguments</key> - <array> - <string>../indra</string> - </array> </map> <key>name</key> <string>DebugOS</string> @@ -1619,6 +1835,10 @@ <map> <key>configure</key> <map> + <key>arguments</key> + <array> + <string>../indra</string> + </array> <key>command</key> <string>cmake</string> <key>options</key> @@ -1628,10 +1848,6 @@ <string>-DROOT_PROJECT_NAME:STRING=SecondLife</string> <string>-DINSTALL_PROPRIETARY=FALSE</string> </array> - <key>arguments</key> - <array> - <string>../indra</string> - </array> </map> <key>name</key> <string>RelWithDebInfoOS</string> @@ -1660,6 +1876,10 @@ <map> <key>configure</key> <map> + <key>arguments</key> + <array> + <string>../indra</string> + </array> <key>command</key> <string>cmake</string> <key>options</key> @@ -1669,10 +1889,6 @@ <string>-DROOT_PROJECT_NAME:STRING=SecondLife</string> <string>-DINSTALL_PROPRIETARY=FALSE</string> </array> - <key>arguments</key> - <array> - <string>../indra</string> - </array> </map> <key>name</key> <string>ReleaseOS</string> @@ -32,19 +32,19 @@ build_dir_CYGWIN() installer_Darwin() { - ls -1td "$(build_dir_Darwin ${last_built_variant:-Release})/newview/"*.dmg 2>/dev/null | sed 1q + ls -1td "$(build_dir_Darwin Release)/newview/"*.dmg 2>/dev/null | sed 1q } installer_Linux() { - ls -1td "$(build_dir_Linux ${last_built_variant:-Release})/newview/"*.tar.bz2 2>/dev/null | sed 1q + ls -1td "$(build_dir_Linux Release)/newview/"*.tar.bz2 2>/dev/null | sed 1q } installer_CYGWIN() { - d=$(build_dir_CYGWIN ${last_built_variant:-Release}) - p=$(sed 's:.*=::' "$d/newview/${last_built_variant:-Release}/touched.bat") - echo "$d/newview/${last_built_variant:-Release}/$p" + d=$(build_dir_CYGWIN Release) + p=$(sed 's:.*=::' "$d/newview/Release/touched.bat") + echo "$d/newview/Release/$p" } pre_build() diff --git a/doc/contributions.txt b/doc/contributions.txt index e7537b608b..163667c8ad 100644 --- a/doc/contributions.txt +++ b/doc/contributions.txt @@ -196,6 +196,7 @@ blino Nakamura Boroondas Gupte OPEN-29 OPEN-39 + OPEN-39 SNOW-278 SNOW-503 SNOW-510 @@ -418,6 +419,7 @@ Jonathan Yap VWR-17801 VWR-24347 STORM-975 + STORM-990 STORM-1019 STORM-844 STORM-643 @@ -816,7 +818,9 @@ Thraxis Epsilon tiamat bingyi CT-246 Tofu Buzzard + CTS-411 STORM-546 + VWR-24509 TraductoresAnonimos Alter CT-324 Tue Torok diff --git a/etc/message.xml b/etc/message.xml index 764aea3879..3445975545 100644 --- a/etc/message.xml +++ b/etc/message.xml @@ -596,6 +596,13 @@ <boolean>false</boolean> </map> + <key>ObjectPhysicsProperties</key> + <map> + <key>flavor</key> + <string>llsd</string> + <key>trusted-sender</key> + <boolean>true</boolean> + </map> </map> <key>capBans</key> diff --git a/indra/cmake/00-Common.cmake b/indra/cmake/00-Common.cmake index faffdc8ccd..2c974fb4ff 100644 --- a/indra/cmake/00-Common.cmake +++ b/indra/cmake/00-Common.cmake @@ -57,16 +57,18 @@ if (WINDOWS) add_definitions( /DLL_WINDOWS=1 + /DDOM_DYNAMIC /DUNICODE /D_UNICODE /GS /TP - /W3 + /W2 /c /Zc:forScope /nologo /Oy- /Zc:wchar_t- + /arch:SSE2 ) # Are we using the crummy Visual Studio KDU build workaround? @@ -141,6 +143,8 @@ if (LINUX) -fno-strict-aliasing -fsigned-char -g + -msse2 + -mfpmath=sse -pthread ) @@ -160,10 +164,6 @@ if (LINUX) link_directories(/usr/lib/mysql4/mysql) endif (EXISTS /usr/lib/mysql4/mysql) - add_definitions( - -msse2 - -mfpmath=sse - ) endif (SERVER) if (VIEWER) @@ -171,6 +171,8 @@ if (LINUX) add_definitions(-fvisibility=hidden) # don't catch SIGCHLD in our base application class for the viewer - some of our 3rd party libs may need their *own* SIGCHLD handler to work. Sigh! The viewer doesn't need to catch SIGCHLD anyway. add_definitions(-DLL_IGNORE_SIGCHLD) + add_definitions(-march=pentium4 -mfpmath=sse) + #add_definitions(-ftree-vectorize) # THIS CRASHES GCC 3.1-3.2 if (NOT STANDALONE) # this stops us requiring a really recent glibc at runtime add_definitions(-fno-stack-protector) @@ -210,7 +212,7 @@ if (LINUX OR DARWIN) set(GCC_WARNINGS "${GCC_WARNINGS} -Werror") endif (NOT GCC_DISABLE_FATAL_WARNINGS) - set(GCC_CXX_WARNINGS "${GCC_WARNINGS} -Wno-reorder -Wno-non-virtual-dtor -Woverloaded-virtual") + set(GCC_CXX_WARNINGS "${GCC_WARNINGS} -Wno-reorder -Wno-non-virtual-dtor") set(CMAKE_C_FLAGS "${GCC_WARNINGS} ${CMAKE_C_FLAGS}") set(CMAKE_CXX_FLAGS "${GCC_CXX_WARNINGS} ${CMAKE_CXX_FLAGS}") diff --git a/indra/cmake/CMakeLists.txt b/indra/cmake/CMakeLists.txt index 89c1c3691a..279d577a27 100644 --- a/indra/cmake/CMakeLists.txt +++ b/indra/cmake/CMakeLists.txt @@ -34,6 +34,7 @@ set(cmake_SOURCE_FILES FindZLIB.cmake FMOD.cmake FreeType.cmake + GLOD.cmake GStreamer010Plugin.cmake GooglePerfTools.cmake JPEG.cmake @@ -41,6 +42,7 @@ set(cmake_SOURCE_FILES LLAudio.cmake LLCharacter.cmake LLCommon.cmake + LLConvexDecomposition.cmake LLCrashLogger.cmake LLDatabase.cmake LLImage.cmake diff --git a/indra/cmake/Copy3rdPartyLibs.cmake b/indra/cmake/Copy3rdPartyLibs.cmake index 4202b54f94..d9efc8f40d 100644 --- a/indra/cmake/Copy3rdPartyLibs.cmake +++ b/indra/cmake/Copy3rdPartyLibs.cmake @@ -39,6 +39,8 @@ if(WINDOWS) libapriconv-1.dll ssleay32.dll libeay32.dll + libcollada14dom22-d.dll + glod.dll ) set(release_src_dir "${ARCH_PREBUILT_DIRS_RELEASE}") @@ -49,6 +51,8 @@ if(WINDOWS) libapriconv-1.dll ssleay32.dll libeay32.dll + libcollada14dom22.dll + glod.dll ) if(USE_GOOGLE_PERFTOOLS) @@ -207,9 +211,12 @@ elseif(DARWIN) libaprutil-1.dylib libexpat.1.5.2.dylib libexpat.dylib - libllqtwebkit.dylib + libGLOD.dylib + libllqtwebkit.dylib + libminizip.a libndofdev.dylib libexception_handler.dylib + libcollada14dom.dylib ) # fmod is statically linked on darwin @@ -245,20 +252,23 @@ elseif(LINUX) libaprutil-1.so.0 libatk-1.0.so libbreakpad_client.so.0 + libcollada14dom.so libcrypto.so.1.0.0 libdb-5.1.so libexpat.so libexpat.so.1 + libglod.so libgmock_main.so libgmock.so.0 libgmodule-2.0.so libgobject-2.0.so libgtest_main.so libgtest.so.0 + libminizip.so libopenal.so libopenjpeg.so libssl.so - libtcmalloc.so + libtcmalloc_minimal.so libuuid.so.16 libuuid.so.16.0.22 libssl.so.1.0.0 diff --git a/indra/cmake/FindTut.cmake b/indra/cmake/FindTut.cmake deleted file mode 100644 index c2a9f43053..0000000000 --- a/indra/cmake/FindTut.cmake +++ /dev/null @@ -1,30 +0,0 @@ -# -*- cmake -*- - -# - Find Tut -# Find the Tut unit test framework includes and library -# This module defines -# TUT_INCLUDE_DIR, where to find tut/tut.hpp. -# TUT_FOUND, If false, do not try to use Tut. - -find_path(TUT_INCLUDE_DIR tut/tut.hpp - NO_SYSTEM_ENVIRONMENT_PATH - ) - -if (TUT_INCLUDE_DIR) - set(TUT_FOUND "YES") -else (TUT_INCLUDE_DIR) - set(TUT_FOUND "NO") -endif (TUT_INCLUDE_DIR) - -if (TUT_FOUND) - if (NOT TUT_FIND_QUIETLY) - message(STATUS "Found Tut: ${TUT_INCLUDE_DIR}") - set(TUT_FIND_QUIETLY TRUE) # Only alert us the first time - endif (NOT TUT_FIND_QUIETLY) -else (TUT_FOUND) - if (TUT_FIND_REQUIRED) - message(FATAL_ERROR "Could not find Tut") - endif (TUT_FIND_REQUIRED) -endif (TUT_FOUND) - -mark_as_advanced(TUT_INCLUDE_DIR) diff --git a/indra/cmake/GLOD.cmake b/indra/cmake/GLOD.cmake new file mode 100644 index 0000000000..77221d55ed --- /dev/null +++ b/indra/cmake/GLOD.cmake @@ -0,0 +1,9 @@ +# -*- cmake -*- +include(Prebuilt) + +if (NOT STANDALONE) + use_prebuilt_binary(GLOD) +endif (NOT STANDALONE) + +set(GLOD_INCLUDE_DIR ${LIBS_PREBUILT_DIR}/include) +set(GLOD_LIBRARIES glod) diff --git a/indra/cmake/GooglePerfTools.cmake b/indra/cmake/GooglePerfTools.cmake index 6c784a3a76..8740e36753 100644 --- a/indra/cmake/GooglePerfTools.cmake +++ b/indra/cmake/GooglePerfTools.cmake @@ -5,15 +5,16 @@ if (STANDALONE) include(FindGooglePerfTools) else (STANDALONE) if (WINDOWS) - use_prebuilt_binary(google-perftools) + use_prebuilt_binary(tcmalloc) set(TCMALLOC_LIBRARIES debug libtcmalloc_minimal-debug optimized libtcmalloc_minimal) set(GOOGLE_PERFTOOLS_FOUND "YES") endif (WINDOWS) if (LINUX) - use_prebuilt_binary(google-perftools) - set(TCMALLOC_LIBRARIES tcmalloc) + use_prebuilt_binary(tcmalloc) + set(TCMALLOC_LIBRARIES + tcmalloc) set(PROFILER_LIBRARIES profiler) set(GOOGLE_PERFTOOLS_INCLUDE_DIR ${LIBS_PREBUILT_DIR}/include) @@ -28,12 +29,11 @@ if (GOOGLE_PERFTOOLS_FOUND) endif (GOOGLE_PERFTOOLS_FOUND) if (WINDOWS) - # *TODO -reenable this once we get server usage sorted out - #set(USE_GOOGLE_PERFTOOLS ON) + set(USE_GOOGLE_PERFTOOLS ON) endif (WINDOWS) if (USE_GOOGLE_PERFTOOLS) - set(TCMALLOC_FLAG -DLL_USE_TCMALLOC=1) + set(TCMALLOC_FLAG -ULL_USE_TCMALLOC=1) include_directories(${GOOGLE_PERFTOOLS_INCLUDE_DIR}) set(GOOGLE_PERFTOOLS_LIBRARIES ${TCMALLOC_LIBRARIES} ${STACKTRACE_LIBRARIES} ${PROFILER_LIBRARIES}) else (USE_GOOGLE_PERFTOOLS) diff --git a/indra/cmake/LLConvexDecomposition.cmake b/indra/cmake/LLConvexDecomposition.cmake new file mode 100644 index 0000000000..8e44504782 --- /dev/null +++ b/indra/cmake/LLConvexDecomposition.cmake @@ -0,0 +1,12 @@ +# -*- cmake -*- +include(Prebuilt) + +set(LLCONVEXDECOMP_INCLUDE_DIRS ${LIBS_PREBUILT_DIR}/include) + +if (INSTALL_PROPRIETARY AND NOT STANDALONE) + use_prebuilt_binary(llconvexdecomposition) + set(LLCONVEXDECOMP_LIBRARY llconvexdecomposition) +else (INSTALL_PROPRIETARY AND NOT STANDALONE) + use_prebuilt_binary(llconvexdecompositionstub) + set(LLCONVEXDECOMP_LIBRARY llconvexdecompositionstub) +endif (INSTALL_PROPRIETARY AND NOT STANDALONE) diff --git a/indra/cmake/LLPrimitive.cmake b/indra/cmake/LLPrimitive.cmake index d397b78f1c..e68d16ed08 100644 --- a/indra/cmake/LLPrimitive.cmake +++ b/indra/cmake/LLPrimitive.cmake @@ -1,7 +1,33 @@ # -*- cmake -*- +# these should be moved to their own cmake file +include(Prebuilt) +use_prebuilt_binary(colladadom) +use_prebuilt_binary(pcre) +use_prebuilt_binary(libxml) + set(LLPRIMITIVE_INCLUDE_DIRS ${LIBS_OPEN_DIR}/llprimitive ) +if (WINDOWS) + set(LLPRIMITIVE_LIBRARIES + debug llprimitive + optimized llprimitive + debug libcollada14dom22-d + optimized libcollada14dom22 + debug libboost_filesystem-vc100-mt-gd-1_45 + optimized libboost_filesystem-vc100-mt-1_45 + debug libboost_system-vc100-mt-gd-1_45 + optimized libboost_system-vc100-mt-1_45 + ) +else (WINDOWS) + set(LLPRIMITIVE_LIBRARIES + llprimitive + collada14dom + minizip + xml2 + pcrecpp + pcre + ) +endif (WINDOWS) -set(LLPRIMITIVE_LIBRARIES llprimitive) diff --git a/indra/cmake/LLTestCommand.cmake b/indra/cmake/LLTestCommand.cmake index 554559edbd..b5a0580a90 100644 --- a/indra/cmake/LLTestCommand.cmake +++ b/indra/cmake/LLTestCommand.cmake @@ -1,3 +1,4 @@ +include(Python) MACRO(LL_TEST_COMMAND OUTVAR LD_LIBRARY_PATH) # nat wonders how Kitware can use the term 'function' for a construct that # cannot return a value. And yet, variables you set inside a FUNCTION are diff --git a/indra/cmake/OpenGL.cmake b/indra/cmake/OpenGL.cmake index 661666f00d..0a3dd976b4 100644 --- a/indra/cmake/OpenGL.cmake +++ b/indra/cmake/OpenGL.cmake @@ -2,8 +2,7 @@ include(Prebuilt) if (NOT STANDALONE) - use_prebuilt_binary(GL) - # possible glh_linear should have its own .cmake file instead + use_prebuilt_binary(glext) use_prebuilt_binary(glh_linear) set(GLEXT_INCLUDE_DIR ${LIBS_PREBUILT_DIR}/include) endif (NOT STANDALONE) diff --git a/indra/cmake/Tut.cmake b/indra/cmake/Tut.cmake index 738c08c42f..7488e9dcb0 100644 --- a/indra/cmake/Tut.cmake +++ b/indra/cmake/Tut.cmake @@ -1,11 +1,6 @@ # -*- cmake -*- include(Prebuilt) -set(TUT_FIND_REQUIRED TRUE) -set(TUT_FIND_QUIETLY TRUE) - -if (STANDALONE) - include(FindTut) -else (STANDALONE) +if (NOT STANDALONE) use_prebuilt_binary(tut) -endif (STANDALONE) +endif(NOT STANDALONE) diff --git a/indra/cmake/Variables.cmake b/indra/cmake/Variables.cmake index 2f23e7c307..cfccd29def 100644 --- a/indra/cmake/Variables.cmake +++ b/indra/cmake/Variables.cmake @@ -29,6 +29,7 @@ set(SERVER_PREFIX) set(VIEWER_PREFIX) set(INTEGRATION_TESTS_PREFIX) set(LL_TESTS ON CACHE BOOL "Build and run unit and integration tests (disable for build timing runs to reduce variation") +set(INCREMENTAL_LINK OFF CACHE BOOL "Use incremental linking on win32 builds (enable for faster links on some machines)") if(LIBS_CLOSED_DIR) file(TO_CMAKE_PATH "${LIBS_CLOSED_DIR}" LIBS_CLOSED_DIR) diff --git a/indra/cmake/WebKitLibPlugin.cmake b/indra/cmake/WebKitLibPlugin.cmake index 0f5a81c020..7131445464 100644 --- a/indra/cmake/WebKitLibPlugin.cmake +++ b/indra/cmake/WebKitLibPlugin.cmake @@ -26,42 +26,45 @@ if (STANDALONE) endforeach(qlibname) # qjpeg depends on libjpeg list(APPEND QT_PLUGIN_LIBRARIES jpeg) - set(WEBKITLIBPLUGIN OFF CACHE BOOL - "WEBKITLIBPLUGIN support for the llplugin/llmedia test apps.") + set(WEBKITLIBPLUGIN OFF CACHE BOOL + "WEBKITLIBPLUGIN support for the llplugin/llmedia test apps.") else (STANDALONE) - use_prebuilt_binary(llqtwebkit) - set(WEBKITLIBPLUGIN ON CACHE BOOL - "WEBKITLIBPLUGIN support for the llplugin/llmedia test apps.") + use_prebuilt_binary(llqtwebkit) + set(WEBKITLIBPLUGIN ON CACHE BOOL + "WEBKITLIBPLUGIN support for the llplugin/llmedia test apps.") endif (STANDALONE) if (WINDOWS) - set(WEBKIT_PLUGIN_LIBRARIES - debug llqtwebkitd - debug QtWebKitd4 - debug QtOpenGLd4 - debug QtNetworkd4 - debug QtGuid4 - debug QtCored4 - debug qtmaind - optimized llqtwebkit - optimized QtWebKit4 - optimized QtOpenGL4 - optimized QtNetwork4 - optimized QtGui4 - optimized QtCore4 - optimized qtmain - ) + set(WEBKIT_PLUGIN_LIBRARIES + debug llqtwebkitd + debug QtWebKitd4 + debug QtOpenGLd4 + debug QtNetworkd4 + debug QtGuid4 + debug QtCored4 + debug qtmaind + optimized llqtwebkit + optimized QtWebKit4 + optimized QtOpenGL4 + optimized QtNetwork4 + optimized QtGui4 + optimized QtCore4 + optimized qtmain + ) elseif (DARWIN) - set(WEBKIT_PLUGIN_LIBRARIES - optimized ${ARCH_PREBUILT_DIRS_RELEASE}/libllqtwebkit.dylib - debug ${ARCH_PREBUILT_DIRS_RELEASE}/libllqtwebkit.dylib - ) + set(WEBKIT_PLUGIN_LIBRARIES + optimized ${ARCH_PREBUILT_DIRS_RELEASE}/libllqtwebkit.dylib + debug ${ARCH_PREBUILT_DIRS_RELEASE}/libllqtwebkit.dylib + ) elseif (LINUX) - if (STANDALONE) set(WEBKIT_PLUGIN_LIBRARIES ${LLQTWEBKIT_LIBRARY} ${QT_LIBRARIES} ${QT_PLUGIN_LIBRARIES}) - else (STANDALONE) set(WEBKIT_PLUGIN_LIBRARIES llqtwebkit +# qico +# qpng +# qtiff +# qsvg +# QtSvg QtWebKit QtOpenGL QtNetwork @@ -74,6 +77,9 @@ elseif (LINUX) X11 Xrender GL + +# sqlite3 +# Xi +# SM ) - endif (STANDALONE) endif (WINDOWS) diff --git a/indra/llcharacter/CMakeLists.txt b/indra/llcharacter/CMakeLists.txt index 14841b5d3d..6eb154458d 100644 --- a/indra/llcharacter/CMakeLists.txt +++ b/indra/llcharacter/CMakeLists.txt @@ -77,12 +77,13 @@ list(APPEND llcharacter_SOURCE_FILES ${llcharacter_HEADER_FILES}) add_library (llcharacter ${llcharacter_SOURCE_FILES}) -if(LL_TESTS) - # Add tests - include(LLAddBuildTest) - # UNIT TESTS - SET(llcharacter_TEST_SOURCE_FILES - lljoint.cpp - ) - LL_ADD_PROJECT_UNIT_TESTS(llcharacter "${llcharacter_TEST_SOURCE_FILES}") -endif(LL_TESTS) +# Add tests +if (LL_TESTS) + include(LLAddBuildTest) + # UNIT TESTS + SET(llcharacter_TEST_SOURCE_FILES + lljoint.cpp + ) + LL_ADD_PROJECT_UNIT_TESTS(llcharacter "${llcharacter_TEST_SOURCE_FILES}") +endif (LL_TESTS) + diff --git a/indra/llcharacter/lljoint.cpp b/indra/llcharacter/lljoint.cpp index 5d750c6c96..19907933cb 100644 --- a/indra/llcharacter/lljoint.cpp +++ b/indra/llcharacter/lljoint.cpp @@ -50,6 +50,7 @@ LLJoint::LLJoint() mUpdateXform = TRUE; mJointNum = -1; touch(); + mResetAfterRestoreOldXform = false; } @@ -235,6 +236,42 @@ void LLJoint::setPosition( const LLVector3& pos ) //-------------------------------------------------------------------- +// setPosition() +//-------------------------------------------------------------------- +void LLJoint::setDefaultFromCurrentXform( void ) +{ + mDefaultXform = mXform; + touch(MATRIX_DIRTY | POSITION_DIRTY); + +} + +//-------------------------------------------------------------------- +// storeCurrentXform() +//-------------------------------------------------------------------- +void LLJoint::storeCurrentXform( const LLVector3& pos ) +{ + mOldXform = mXform; + mResetAfterRestoreOldXform = true; + setPosition( pos ); +} +//-------------------------------------------------------------------- +// restoreOldXform() +//-------------------------------------------------------------------- +void LLJoint::restoreOldXform( void ) +{ + mResetAfterRestoreOldXform = false; + mXform = mOldXform; +} +//-------------------------------------------------------------------- +// restoreOldXform() +//-------------------------------------------------------------------- +void LLJoint::restoreToDefaultXform( void ) +{ + mXform = mDefaultXform; + setPosition( mXform.getPosition() ); +} + +//-------------------------------------------------------------------- // getWorldPosition() //-------------------------------------------------------------------- LLVector3 LLJoint::getWorldPosition() @@ -522,3 +559,4 @@ void LLJoint::clampRotation(LLQuaternion old_rot, LLQuaternion new_rot) } // End + diff --git a/indra/llcharacter/lljoint.h b/indra/llcharacter/lljoint.h index 8c8e5930fb..dc3c58cf64 100644 --- a/indra/llcharacter/lljoint.h +++ b/indra/llcharacter/lljoint.h @@ -80,11 +80,16 @@ protected: // explicit transformation members LLXformMatrix mXform; + LLXformMatrix mOldXform; + LLXformMatrix mDefaultXform; + LLUUID mId; public: U32 mDirtyFlags; BOOL mUpdateXform; + BOOL mResetAfterRestoreOldXform; + // describes the skin binding pose LLVector3 mSkinOffset; @@ -130,7 +135,9 @@ public: // get/set local position const LLVector3& getPosition(); void setPosition( const LLVector3& pos ); - + + void setDefaultPosition( const LLVector3& pos ); + // get/set world position LLVector3 getWorldPosition(); LLVector3 getLastWorldPosition(); @@ -172,6 +179,21 @@ public: S32 getJointNum() const { return mJointNum; } void setJointNum(S32 joint_num) { mJointNum = joint_num; } + + void restoreOldXform( void ); + void restoreToDefaultXform( void ); + void setDefaultFromCurrentXform( void ); + void storeCurrentXform( const LLVector3& pos ); + + //Accessor for the joint id + LLUUID getId( void ) { return mId; } + //Setter for the joints id + void setId( const LLUUID& id ) { mId = id;} + + //If the old transform flag has been set, then the reset logic in avatar needs to be aware(test) of it + const BOOL doesJointNeedToBeReset( void ) const { return mResetAfterRestoreOldXform; } + //Setter for joint reset flag + void setJointToBeReset( BOOL val ) { mResetAfterRestoreOldXform = val; } }; #endif // LL_LLJOINT_H diff --git a/indra/llcharacter/llkeyframemotion.cpp b/indra/llcharacter/llkeyframemotion.cpp index 5b0867524e..9df033a4ca 100644 --- a/indra/llcharacter/llkeyframemotion.cpp +++ b/indra/llcharacter/llkeyframemotion.cpp @@ -1145,7 +1145,7 @@ void LLKeyframeMotion::applyConstraint(JointConstraint* constraint, F32 time, U8 constraint->mPositions[joint_num] = new_pos; } constraint->mFixupDistanceRMS *= 1.f / (constraint->mTotalLength * (F32)(shared_data->mChainLength - 1)); - constraint->mFixupDistanceRMS = fsqrtf(constraint->mFixupDistanceRMS); + constraint->mFixupDistanceRMS = (F32) sqrt(constraint->mFixupDistanceRMS); //reset old joint rots for (joint_num = 0; joint_num <= shared_data->mChainLength; joint_num++) diff --git a/indra/llcommon/llassettype.cpp b/indra/llcommon/llassettype.cpp index eb610f625a..145dddd543 100644 --- a/indra/llcommon/llassettype.cpp +++ b/indra/llcommon/llassettype.cpp @@ -93,8 +93,9 @@ LLAssetDictionary::LLAssetDictionary() addEntry(LLAssetType::AT_LINK, new AssetEntry("LINK", "link", "sym link", false, false, true)); addEntry(LLAssetType::AT_LINK_FOLDER, new AssetEntry("FOLDER_LINK", "link_f", "sym folder link", false, false, true)); + addEntry(LLAssetType::AT_MESH, new AssetEntry("MESH", "mesh", "mesh", false, false, false)); + addEntry(LLAssetType::AT_NONE, new AssetEntry("NONE", "-1", NULL, FALSE, FALSE, FALSE)); - addEntry(LLAssetType::AT_NONE, new AssetEntry("NONE", "-1", NULL, false, false, false)); }; // static diff --git a/indra/llcommon/llassettype.h b/indra/llcommon/llassettype.h index c5ff2364cc..74ccd00324 100644 --- a/indra/llcommon/llassettype.h +++ b/indra/llcommon/llassettype.h @@ -108,8 +108,10 @@ public: AT_LINK_FOLDER = 25, // Inventory folder link - - AT_COUNT = 26, + AT_MESH = 49, + // Mesh data in our proprietary SLM format + + AT_COUNT = 50, // +*********************************************************+ // | TO ADD AN ELEMENT TO THIS ENUM: | diff --git a/indra/llcommon/lldefs.h b/indra/llcommon/lldefs.h index 6b38de6500..5a4b8325f4 100644 --- a/indra/llcommon/lldefs.h +++ b/indra/llcommon/lldefs.h @@ -236,5 +236,13 @@ inline LLDATATYPE llclampb(const LLDATATYPE& a) return llmin(llmax(a, (LLDATATYPE)0), (LLDATATYPE)255); } +template <class LLDATATYPE> +inline void llswap(LLDATATYPE& lhs, LLDATATYPE& rhs) +{ + LLDATATYPE tmp = lhs; + lhs = rhs; + rhs = tmp; +} + #endif // LL_LLDEFS_H diff --git a/indra/llcommon/llfasttimer.h b/indra/llcommon/llfasttimer.h index 4ff93a553c..2b25f2fabb 100644 --- a/indra/llcommon/llfasttimer.h +++ b/indra/llcommon/llfasttimer.h @@ -27,140 +27,9 @@ #ifndef LL_FASTTIMER_H #define LL_FASTTIMER_H +// Implementation of getCPUClockCount32() and getCPUClockCount64 are now in llfastertimer_class.cpp. + // pull in the actual class definition #include "llfasttimer_class.h" -// -// Important note: These implementations must be FAST! -// - -#if LL_WINDOWS -// -// Windows implementation of CPU clock -// - -// -// NOTE: put back in when we aren't using platform sdk anymore -// -// because MS has different signatures for these functions in winnt.h -// need to rename them to avoid conflicts -//#define _interlockedbittestandset _renamed_interlockedbittestandset -//#define _interlockedbittestandreset _renamed_interlockedbittestandreset -//#include <intrin.h> -//#undef _interlockedbittestandset -//#undef _interlockedbittestandreset - -//inline U32 LLFastTimer::getCPUClockCount32() -//{ -// U64 time_stamp = __rdtsc(); -// return (U32)(time_stamp >> 8); -//} -// -//// return full timer value, *not* shifted by 8 bits -//inline U64 LLFastTimer::getCPUClockCount64() -//{ -// return __rdtsc(); -//} - -// shift off lower 8 bits for lower resolution but longer term timing -// on 1Ghz machine, a 32-bit word will hold ~1000 seconds of timing -inline U32 LLFastTimer::getCPUClockCount32() -{ - U32 ret_val; - __asm - { - _emit 0x0f - _emit 0x31 - shr eax,8 - shl edx,24 - or eax, edx - mov dword ptr [ret_val], eax - } - return ret_val; -} - -// return full timer value, *not* shifted by 8 bits -inline U64 LLFastTimer::getCPUClockCount64() -{ - U64 ret_val; - __asm - { - _emit 0x0f - _emit 0x31 - mov eax,eax - mov edx,edx - mov dword ptr [ret_val+4], edx - mov dword ptr [ret_val], eax - } - return ret_val; -} -#endif - - -#if (LL_LINUX || LL_SOLARIS) && !(defined(__i386__) || defined(__amd64__)) -// -// Linux and Solaris implementation of CPU clock - non-x86. -// This is accurate but SLOW! Only use out of desperation. -// -// Try to use the MONOTONIC clock if available, this is a constant time counter -// with nanosecond resolution (but not necessarily accuracy) and attempts are -// made to synchronize this value between cores at kernel start. It should not -// be affected by CPU frequency. If not available use the REALTIME clock, but -// this may be affected by NTP adjustments or other user activity affecting -// the system time. -inline U64 LLFastTimer::getCPUClockCount64() -{ - struct timespec tp; - -#ifdef CLOCK_MONOTONIC // MONOTONIC supported at build-time? - if (-1 == clock_gettime(CLOCK_MONOTONIC,&tp)) // if MONOTONIC isn't supported at runtime then ouch, try REALTIME -#endif - clock_gettime(CLOCK_REALTIME,&tp); - - return (tp.tv_sec*LLFastTimer::sClockResolution)+tp.tv_nsec; -} - -inline U32 LLFastTimer::getCPUClockCount32() -{ - return (U32)(LLFastTimer::getCPUClockCount64() >> 8); -} -#endif // (LL_LINUX || LL_SOLARIS) && !(defined(__i386__) || defined(__amd64__)) - - -#if (LL_LINUX || LL_SOLARIS || LL_DARWIN) && (defined(__i386__) || defined(__amd64__)) -// -// Mac+Linux+Solaris FAST x86 implementation of CPU clock -inline U32 LLFastTimer::getCPUClockCount32() -{ - U64 x; - __asm__ volatile (".byte 0x0f, 0x31": "=A"(x)); - return (U32)(x >> 8); -} - -inline U64 LLFastTimer::getCPUClockCount64() -{ - U64 x; - __asm__ volatile (".byte 0x0f, 0x31": "=A"(x)); - return x; -} -#endif - - -#if ( LL_DARWIN && !(defined(__i386__) || defined(__amd64__))) -// -// Mac PPC (deprecated) implementation of CPU clock -// -// Just use gettimeofday implementation for now - -inline U32 LLFastTimer::getCPUClockCount32() -{ - return (U32)(get_clock_count()>>8); -} - -inline U64 LLFastTimer::getCPUClockCount64() -{ - return get_clock_count(); -} -#endif - #endif // LL_LLFASTTIMER_H diff --git a/indra/llcommon/llfasttimer_class.cpp b/indra/llcommon/llfasttimer_class.cpp index bce87ada96..bd594b06cf 100644 --- a/indra/llcommon/llfasttimer_class.cpp +++ b/indra/llcommon/llfasttimer_class.cpp @@ -35,10 +35,13 @@ #include <boost/bind.hpp> + #if LL_WINDOWS +#include "lltimer.h" #elif LL_LINUX || LL_SOLARIS #include <sys/time.h> #include <sched.h> +#include "lltimer.h" #elif LL_DARWIN #include <sys/time.h> #include "lltimer.h" // get_clock_count() @@ -61,6 +64,8 @@ BOOL LLFastTimer::sMetricLog = FALSE; LLMutex* LLFastTimer::sLogLock = NULL; std::queue<LLSD> LLFastTimer::sLogQueue; +#define USE_RDTSC 0 + #if LL_LINUX || LL_SOLARIS U64 LLFastTimer::sClockResolution = 1000000000; // Nanosecond resolution #else @@ -234,10 +239,23 @@ U64 LLFastTimer::countsPerSecond() // counts per second for the *32-bit* timer #else // windows or x86-mac or x86-linux or x86-solaris U64 LLFastTimer::countsPerSecond() // counts per second for the *32-bit* timer { +#if USE_RDTSC || !LL_WINDOWS //getCPUFrequency returns MHz and sCPUClockFrequency wants to be in Hz static U64 sCPUClockFrequency = U64(LLProcessorInfo().getCPUFrequency()*1000000.0); // we drop the low-order byte in our timers, so report a lower frequency +#else + // If we're not using RDTSC, each fasttimer tick is just a performance counter tick. + // Not redefining the clock frequency itself (in llprocessor.cpp/calculate_cpu_frequency()) + // since that would change displayed MHz stats for CPUs + static bool firstcall = true; + static U64 sCPUClockFrequency; + if (firstcall) + { + QueryPerformanceFrequency((LARGE_INTEGER*)&sCPUClockFrequency); + firstcall = false; + } +#endif return sCPUClockFrequency >> 8; } #endif @@ -482,6 +500,19 @@ void LLFastTimer::NamedTimer::resetFrame() { if (sLog) { //output current frame counts to performance log + + static S32 call_count = 0; + if (call_count % 100 == 0) + { + llinfos << "countsPerSecond (32 bit): " << countsPerSecond() << llendl; + llinfos << "get_clock_count (64 bit): " << get_clock_count() << llendl; + llinfos << "LLProcessorInfo().getCPUFrequency() " << LLProcessorInfo().getCPUFrequency() << llendl; + llinfos << "getCPUClockCount32() " << getCPUClockCount32() << llendl; + llinfos << "getCPUClockCount64() " << getCPUClockCount64() << llendl; + llinfos << "elapsed sec " << ((F64)getCPUClockCount64())/((F64)LLProcessorInfo().getCPUFrequency()*1000000.0) << llendl; + } + call_count++; + F64 iclock_freq = 1000.0 / countsPerSecond(); // good place to calculate clock frequency F64 total_time = 0; @@ -763,3 +794,144 @@ LLFastTimer::LLFastTimer(LLFastTimer::FrameState* state) ////////////////////////////////////////////////////////////////////////////// +// +// Important note: These implementations must be FAST! +// + + +#if LL_WINDOWS +// +// Windows implementation of CPU clock +// + +// +// NOTE: put back in when we aren't using platform sdk anymore +// +// because MS has different signatures for these functions in winnt.h +// need to rename them to avoid conflicts +//#define _interlockedbittestandset _renamed_interlockedbittestandset +//#define _interlockedbittestandreset _renamed_interlockedbittestandreset +//#include <intrin.h> +//#undef _interlockedbittestandset +//#undef _interlockedbittestandreset + +//inline U32 LLFastTimer::getCPUClockCount32() +//{ +// U64 time_stamp = __rdtsc(); +// return (U32)(time_stamp >> 8); +//} +// +//// return full timer value, *not* shifted by 8 bits +//inline U64 LLFastTimer::getCPUClockCount64() +//{ +// return __rdtsc(); +//} + +// shift off lower 8 bits for lower resolution but longer term timing +// on 1Ghz machine, a 32-bit word will hold ~1000 seconds of timing +#if USE_RDTSC +U32 LLFastTimer::getCPUClockCount32() +{ + U32 ret_val; + __asm + { + _emit 0x0f + _emit 0x31 + shr eax,8 + shl edx,24 + or eax, edx + mov dword ptr [ret_val], eax + } + return ret_val; +} + +// return full timer value, *not* shifted by 8 bits +U64 LLFastTimer::getCPUClockCount64() +{ + U64 ret_val; + __asm + { + _emit 0x0f + _emit 0x31 + mov eax,eax + mov edx,edx + mov dword ptr [ret_val+4], edx + mov dword ptr [ret_val], eax + } + return ret_val; +} + +std::string LLFastTimer::sClockType = "rdtsc"; + +#else +//LL_COMMON_API U64 get_clock_count(); // in lltimer.cpp +// These use QueryPerformanceCounter, which is arguably fine and also works on amd architectures. +U32 LLFastTimer::getCPUClockCount32() +{ + return (U32)(get_clock_count()>>8); +} + +U64 LLFastTimer::getCPUClockCount64() +{ + return get_clock_count(); +} + +std::string LLFastTimer::sClockType = "QueryPerformanceCounter"; +#endif + +#endif + + +#if (LL_LINUX || LL_SOLARIS) && !(defined(__i386__) || defined(__amd64__)) +// +// Linux and Solaris implementation of CPU clock - non-x86. +// This is accurate but SLOW! Only use out of desperation. +// +// Try to use the MONOTONIC clock if available, this is a constant time counter +// with nanosecond resolution (but not necessarily accuracy) and attempts are +// made to synchronize this value between cores at kernel start. It should not +// be affected by CPU frequency. If not available use the REALTIME clock, but +// this may be affected by NTP adjustments or other user activity affecting +// the system time. +U64 LLFastTimer::getCPUClockCount64() +{ + struct timespec tp; + +#ifdef CLOCK_MONOTONIC // MONOTONIC supported at build-time? + if (-1 == clock_gettime(CLOCK_MONOTONIC,&tp)) // if MONOTONIC isn't supported at runtime then ouch, try REALTIME +#endif + clock_gettime(CLOCK_REALTIME,&tp); + + return (tp.tv_sec*LLFastTimer::sClockResolution)+tp.tv_nsec; +} + +U32 LLFastTimer::getCPUClockCount32() +{ + return (U32)(LLFastTimer::getCPUClockCount64() >> 8); +} + +std::string LLFastTimer::sClockType = "clock_gettime"; + +#endif // (LL_LINUX || LL_SOLARIS) && !(defined(__i386__) || defined(__amd64__)) + + +#if (LL_LINUX || LL_SOLARIS || LL_DARWIN) && (defined(__i386__) || defined(__amd64__)) +// +// Mac+Linux+Solaris FAST x86 implementation of CPU clock +U32 LLFastTimer::getCPUClockCount32() +{ + U64 x; + __asm__ volatile (".byte 0x0f, 0x31": "=A"(x)); + return (U32)(x >> 8); +} + +U64 LLFastTimer::getCPUClockCount64() +{ + U64 x; + __asm__ volatile (".byte 0x0f, 0x31": "=A"(x)); + return x; +} + +std::string LLFastTimer::sClockType = "rdtsc"; +#endif + diff --git a/indra/llcommon/llfasttimer_class.h b/indra/llcommon/llfasttimer_class.h index eb9789682b..827747f0c6 100644 --- a/indra/llcommon/llfasttimer_class.h +++ b/indra/llcommon/llfasttimer_class.h @@ -31,12 +31,15 @@ #define FAST_TIMER_ON 1 #define TIME_FAST_TIMERS 0 +#define DEBUG_FAST_TIMER_THREADS 1 class LLMutex; #include <queue> #include "llsd.h" +LL_COMMON_API void assert_main_thread(); + class LL_COMMON_API LLFastTimer { public: @@ -176,6 +179,11 @@ public: U64 timer_end = getCPUClockCount64(); sTimerCycles += timer_end - timer_start; #endif +#if DEBUG_FAST_TIMER_THREADS +#if !LL_RELEASE + assert_main_thread(); +#endif +#endif } LL_FORCE_INLINE ~LLFastTimer() @@ -245,6 +253,7 @@ public: U32 mChildTime; }; static CurTimerData sCurTimerData; + static std::string sClockType; private: static U32 getCPUClockCount32(); diff --git a/indra/llcommon/llfoldertype.cpp b/indra/llcommon/llfoldertype.cpp index ebc79af412..c2cfb7286e 100644 --- a/indra/llcommon/llfoldertype.cpp +++ b/indra/llcommon/llfoldertype.cpp @@ -89,6 +89,9 @@ LLFolderDictionary::LLFolderDictionary() addEntry(LLFolderType::FT_CURRENT_OUTFIT, new FolderEntry("current", TRUE)); addEntry(LLFolderType::FT_OUTFIT, new FolderEntry("outfit", FALSE)); addEntry(LLFolderType::FT_MY_OUTFITS, new FolderEntry("my_otfts", TRUE)); + + addEntry(LLFolderType::FT_MESH, new FolderEntry("mesh", TRUE)); + addEntry(LLFolderType::FT_INBOX, new FolderEntry("inbox", TRUE)); addEntry(LLFolderType::FT_NONE, new FolderEntry("-1", FALSE)); diff --git a/indra/llcommon/llfoldertype.h b/indra/llcommon/llfoldertype.h index 936fbed17d..cb32cb075b 100644 --- a/indra/llcommon/llfoldertype.h +++ b/indra/llcommon/llfoldertype.h @@ -80,9 +80,11 @@ public: FT_OUTFIT = 47, FT_MY_OUTFITS = 48, - FT_INBOX = 49, + FT_MESH = 49, - FT_COUNT = 50, + FT_INBOX = 50, + + FT_COUNT = 51, FT_NONE = -1 }; diff --git a/indra/llcommon/llmd5.h b/indra/llcommon/llmd5.h index c8acbbe591..1526e6ac3c 100644 --- a/indra/llcommon/llmd5.h +++ b/indra/llcommon/llmd5.h @@ -103,7 +103,7 @@ public: void raw_digest(unsigned char *array) const; // provide 16-byte array for binary data void hex_digest(char *string) const; // provide 33-byte array for ascii-hex string - friend std::ostream& operator<< (std::ostream&, LLMD5 context); + friend LL_COMMON_API std::ostream& operator<< (std::ostream&, LLMD5 context); private: diff --git a/indra/llcommon/llmemory.cpp b/indra/llcommon/llmemory.cpp index 51fcd5b717..21d1c84d69 100644 --- a/indra/llcommon/llmemory.cpp +++ b/indra/llcommon/llmemory.cpp @@ -71,25 +71,6 @@ void LLMemory::freeReserve() reserveMem = NULL; } -void* ll_allocate (size_t size) -{ - if (size == 0) - { - llwarns << "Null allocation" << llendl; - } - void *p = malloc(size); - if (p == NULL) - { - LLMemory::freeReserve(); - llerrs << "Out of memory Error" << llendl; - } - return p; -} - -void ll_release (void *p) -{ - free(p); -} //---------------------------------------------------------------------------- diff --git a/indra/llcommon/llmemory.h b/indra/llcommon/llmemory.h index 11406f59b0..3bd1403576 100644 --- a/indra/llcommon/llmemory.h +++ b/indra/llcommon/llmemory.h @@ -1,25 +1,25 @@ -/** +/** * @file llmemory.h * @brief Memory allocation/deallocation header-stuff goes here. * * $LicenseInfo:firstyear=2002&license=viewerlgpl$ * Second Life Viewer Source Code * Copyright (C) 2010, Linden Research, Inc. - * + * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; * version 2.1 of the License only. - * + * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. - * + * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - * + * * Linden Research, Inc., 945 Battery Street, San Francisco, CA 94111 USA * $/LicenseInfo$ */ @@ -28,12 +28,82 @@ #include "llmemtype.h" -extern S32 gTotalDAlloc; -extern S32 gTotalDAUse; -extern S32 gDACount; +#if LL_DEBUG +inline void* ll_aligned_malloc( size_t size, int align ) +{ + void* mem = malloc( size + (align - 1) + sizeof(void*) ); + char* aligned = ((char*)mem) + sizeof(void*); + aligned += align - ((uintptr_t)aligned & (align - 1)); + + ((void**)aligned)[-1] = mem; + return aligned; +} + +inline void ll_aligned_free( void* ptr ) +{ + free( ((void**)ptr)[-1] ); +} -extern void* ll_allocate (size_t size); -extern void ll_release (void *p); +inline void* ll_aligned_malloc_16(size_t size) // returned hunk MUST be freed with ll_aligned_free_16(). +{ +#if defined(LL_WINDOWS) + return _mm_malloc(size, 16); +#elif defined(LL_DARWIN) + return malloc(size); // default osx malloc is 16 byte aligned. +#else + void *rtn; + if (LL_LIKELY(0 == posix_memalign(&rtn, 16, size))) + return rtn; + else // bad alignment requested, or out of memory + return NULL; +#endif +} + +inline void ll_aligned_free_16(void *p) +{ +#if defined(LL_WINDOWS) + _mm_free(p); +#elif defined(LL_DARWIN) + return free(p); +#else + free(p); // posix_memalign() is compatible with heap deallocator +#endif +} + +inline void* ll_aligned_malloc_32(size_t size) // returned hunk MUST be freed with ll_aligned_free_32(). +{ +#if defined(LL_WINDOWS) + return _mm_malloc(size, 32); +#elif defined(LL_DARWIN) + return ll_aligned_malloc( size, 32 ); +#else + void *rtn; + if (LL_LIKELY(0 == posix_memalign(&rtn, 32, size))) + return rtn; + else // bad alignment requested, or out of memory + return NULL; +#endif +} + +inline void ll_aligned_free_32(void *p) +{ +#if defined(LL_WINDOWS) + _mm_free(p); +#elif defined(LL_DARWIN) + ll_aligned_free( p ); +#else + free(p); // posix_memalign() is compatible with heap deallocator +#endif +} +#else // LL_DEBUG +// ll_aligned_foo are noops now that we use tcmalloc everywhere (tcmalloc aligns automatically at appropriate intervals) +#define ll_aligned_malloc( size, align ) malloc(size) +#define ll_aligned_free( ptr ) free(ptr) +#define ll_aligned_malloc_16 malloc +#define ll_aligned_free_16 free +#define ll_aligned_malloc_32 malloc +#define ll_aligned_free_32 free +#endif // LL_DEBUG class LL_COMMON_API LLMemory { diff --git a/indra/llcommon/llrefcount.cpp b/indra/llcommon/llrefcount.cpp index 55d0c85cbd..e1876599fc 100644 --- a/indra/llcommon/llrefcount.cpp +++ b/indra/llcommon/llrefcount.cpp @@ -29,9 +29,25 @@ #include "llerror.h" +#if LL_REF_COUNT_DEBUG +#include "llthread.h" +#include "llapr.h" +#endif + LLRefCount::LLRefCount(const LLRefCount& other) : mRef(0) { +#if LL_REF_COUNT_DEBUG + if(gAPRPoolp) + { + mMutexp = new LLMutex(gAPRPoolp) ; + } + else + { + mMutexp = NULL ; + } + mCrashAtUnlock = FALSE ; +#endif } LLRefCount& LLRefCount::operator=(const LLRefCount&) @@ -43,6 +59,17 @@ LLRefCount& LLRefCount::operator=(const LLRefCount&) LLRefCount::LLRefCount() : mRef(0) { +#if LL_REF_COUNT_DEBUG + if(gAPRPoolp) + { + mMutexp = new LLMutex(gAPRPoolp) ; + } + else + { + mMutexp = NULL ; + } + mCrashAtUnlock = FALSE ; +#endif } LLRefCount::~LLRefCount() @@ -51,4 +78,87 @@ LLRefCount::~LLRefCount() { llerrs << "deleting non-zero reference" << llendl; } + +#if LL_REF_COUNT_DEBUG + if(gAPRPoolp) + { + delete mMutexp ; + } +#endif } + +#if LL_REF_COUNT_DEBUG +void LLRefCount::ref() const +{ + if(mMutexp) + { + if(mMutexp->isLocked()) + { + mCrashAtUnlock = TRUE ; + llerrs << "the mutex is locked by the thread: " << mLockedThreadID + << " Current thread: " << LLThread::currentID() << llendl ; + } + + mMutexp->lock() ; + mLockedThreadID = LLThread::currentID() ; + + mRef++; + + if(mCrashAtUnlock) + { + while(1); //crash here. + } + mMutexp->unlock() ; + } + else + { + mRef++; + } +} + +S32 LLRefCount::unref() const +{ + if(mMutexp) + { + if(mMutexp->isLocked()) + { + mCrashAtUnlock = TRUE ; + llerrs << "the mutex is locked by the thread: " << mLockedThreadID + << " Current thread: " << LLThread::currentID() << llendl ; + } + + mMutexp->lock() ; + mLockedThreadID = LLThread::currentID() ; + + llassert(mRef >= 1); + if (0 == --mRef) + { + if(mCrashAtUnlock) + { + while(1); //crash here. + } + mMutexp->unlock() ; + + delete this; + return 0; + } + + if(mCrashAtUnlock) + { + while(1); //crash here. + } + mMutexp->unlock() ; + return mRef; + } + else + { + llassert(mRef >= 1); + if (0 == --mRef) + { + delete this; + return 0; + } + return mRef; + } +} +#endif diff --git a/indra/llcommon/llrefcount.h b/indra/llcommon/llrefcount.h index 19f008b15c..8eb5d53f3f 100644 --- a/indra/llcommon/llrefcount.h +++ b/indra/llcommon/llrefcount.h @@ -28,6 +28,11 @@ #include <boost/noncopyable.hpp> +#define LL_REF_COUNT_DEBUG 0 +#if LL_REF_COUNT_DEBUG +class LLMutex ; +#endif + //---------------------------------------------------------------------------- // RefCount objects should generally only be accessed by way of LLPointer<>'s // see llthread.h for LLThreadSafeRefCount @@ -43,12 +48,16 @@ protected: public: LLRefCount(); - void ref() const +#if LL_REF_COUNT_DEBUG + void ref() const ; + S32 unref() const ; +#else + inline void ref() const { mRef++; } - S32 unref() const + inline S32 unref() const { llassert(mRef >= 1); if (0 == --mRef) @@ -58,6 +67,7 @@ public: } return mRef; } +#endif //NOTE: when passing around a const LLRefCount object, this can return different results // at different types, since mRef is mutable @@ -68,6 +78,12 @@ public: private: mutable S32 mRef; + +#if LL_REF_COUNT_DEBUG + LLMutex* mMutexp ; + mutable U32 mLockedThreadID ; + mutable BOOL mCrashAtUnlock ; +#endif }; #endif diff --git a/indra/llcommon/llsdserialize.cpp b/indra/llcommon/llsdserialize.cpp index 10f460e8a6..5be5ecc492 100644 --- a/indra/llcommon/llsdserialize.cpp +++ b/indra/llcommon/llsdserialize.cpp @@ -34,6 +34,12 @@ #include <iostream> #include "apr_base64.h" +#ifdef LL_STANDALONE +# include <zlib.h> +#else +# include "zlib/zlib.h" // for davep's dirty little zip functions +#endif + #if !LL_WINDOWS #include <netinet/in.h> // htonl & ntohl #endif @@ -1983,3 +1989,180 @@ std::ostream& operator<<(std::ostream& s, const LLSD& llsd) return s; } + +//dirty little zippers -- yell at davep if these are horrid + +//return a string containing gzipped bytes of binary serialized LLSD +// VERY inefficient -- creates several copies of LLSD block in memory +std::string zip_llsd(LLSD& data) +{ + std::stringstream llsd_strm; + + LLSDSerialize::toBinary(data, llsd_strm); + + const U32 CHUNK = 65536; + + z_stream strm; + strm.zalloc = Z_NULL; + strm.zfree = Z_NULL; + strm.opaque = Z_NULL; + + S32 ret = deflateInit(&strm, Z_BEST_COMPRESSION); + if (ret != Z_OK) + { + llwarns << "Failed to compress LLSD block." << llendl; + return std::string(); + } + + std::string source = llsd_strm.str(); + + U8 out[CHUNK]; + + strm.avail_in = source.size(); + strm.next_in = (U8*) source.data(); + U8* output = NULL; + + U32 cur_size = 0; + + U32 have = 0; + + do + { + strm.avail_out = CHUNK; + strm.next_out = out; + + ret = deflate(&strm, Z_FINISH); + if (ret == Z_OK || ret == Z_STREAM_END) + { //copy result into output + if (strm.avail_out >= CHUNK) + { + llerrs << "WTF?" << llendl; + } + + have = CHUNK-strm.avail_out; + output = (U8*) realloc(output, cur_size+have); + memcpy(output+cur_size, out, have); + cur_size += have; + } + else + { + free(output); + llwarns << "Failed to compress LLSD block." << llendl; + return std::string(); + } + } + while (ret == Z_OK); + + std::string::size_type size = cur_size; + + std::string result((char*) output, size); + deflateEnd(&strm); + free(output); + +#if 0 //verify results work with unzip_llsd + std::istringstream test(result); + LLSD test_sd; + if (!unzip_llsd(test_sd, test, result.size())) + { + llerrs << "Invalid compression result!" << llendl; + } +#endif + + return result; +} + +//decompress a block of LLSD from provided istream +// not very efficient -- creats a copy of decompressed LLSD block in memory +// and deserializes from that copy using LLSDSerialize +bool unzip_llsd(LLSD& data, std::istream& is, S32 size) +{ + U8* result = NULL; + U32 cur_size = 0; + z_stream strm; + + const U32 CHUNK = 65536; + + U8 *in = new U8[size]; + is.read((char*) in, size); + + U8 out[CHUNK]; + + strm.zalloc = Z_NULL; + strm.zfree = Z_NULL; + strm.opaque = Z_NULL; + strm.avail_in = size; + strm.next_in = in; + + S32 ret = inflateInit(&strm); + + do + { + strm.avail_out = CHUNK; + strm.next_out = out; + ret = inflate(&strm, Z_NO_FLUSH); + if (ret == Z_STREAM_ERROR) + { + inflateEnd(&strm); + free(result); + delete [] in; + return false; + } + + switch (ret) + { + case Z_NEED_DICT: + ret = Z_DATA_ERROR; + case Z_DATA_ERROR: + case Z_MEM_ERROR: + inflateEnd(&strm); + free(result); + delete [] in; + return false; + break; + } + + U32 have = CHUNK-strm.avail_out; + + result = (U8*) realloc(result, cur_size + have); + memcpy(result+cur_size, out, have); + cur_size += have; + + } while (ret == Z_OK); + + inflateEnd(&strm); + delete [] in; + + if (ret != Z_STREAM_END) + { + free(result); + return false; + } + + //result now points to the decompressed LLSD block + { + std::string res_str((char*) result, cur_size); + + std::string deprecated_header("<? LLSD/Binary ?>"); + + if (res_str.substr(0, deprecated_header.size()) == deprecated_header) + { + res_str = res_str.substr(deprecated_header.size()+1, cur_size); + } + cur_size = res_str.size(); + + std::istringstream istr(res_str); + + if (!LLSDSerialize::fromBinary(data, istr, cur_size)) + { + llwarns << "Failed to unzip LLSD block" << llendl; + free(result); + return false; + } + } + + free(result); + return true; +} + + + diff --git a/indra/llcommon/llsdserialize.h b/indra/llcommon/llsdserialize.h index 1f096b5254..99a3ea3cd4 100644 --- a/indra/llcommon/llsdserialize.h +++ b/indra/llcommon/llsdserialize.h @@ -790,4 +790,8 @@ public: } }; +//dirty little zip functions -- yell at davep +LL_COMMON_API std::string zip_llsd(LLSD& data); +LL_COMMON_API bool unzip_llsd(LLSD& data, std::istream& is, S32 size); + #endif // LL_LLSDSERIALIZE_H diff --git a/indra/llcommon/llthread.cpp b/indra/llcommon/llthread.cpp index 49d05ef411..d9400fb5b3 100644 --- a/indra/llcommon/llthread.cpp +++ b/indra/llcommon/llthread.cpp @@ -56,6 +56,21 @@ // //---------------------------------------------------------------------------- +#if !LL_DARWIN +U32 ll_thread_local sThreadID = 0; +#endif + +U32 LLThread::sIDIter = 0; + +LL_COMMON_API void assert_main_thread() +{ + static U32 s_thread_id = LLThread::currentID(); + if (LLThread::currentID() != s_thread_id) + { + llerrs << "Illegal execution outside main thread." << llendl; + } +} + // // Handed to the APR thread creation function // @@ -63,10 +78,14 @@ void *APR_THREAD_FUNC LLThread::staticRun(apr_thread_t *apr_threadp, void *datap { LLThread *threadp = (LLThread *)datap; +#if !LL_DARWIN + sThreadID = threadp->mID; +#endif + // Run the user supplied function threadp->run(); - llinfos << "LLThread::staticRun() Exiting: " << threadp->mName << llendl; + //llinfos << "LLThread::staticRun() Exiting: " << threadp->mName << llendl; // We're done with the run function, this thread is done executing now. threadp->mStatus = STOPPED; @@ -81,6 +100,8 @@ LLThread::LLThread(const std::string& name, apr_pool_t *poolp) : mAPRThreadp(NULL), mStatus(STOPPED) { + mID = ++sIDIter; + // Thread creation probably CAN be paranoid about APR being initialized, if necessary if (poolp) { @@ -121,7 +142,7 @@ void LLThread::shutdown() // First, set the flag that indicates that we're ready to die setQuitting(); - llinfos << "LLThread::~LLThread() Killing thread " << mName << " Status: " << mStatus << llendl; + //llinfos << "LLThread::~LLThread() Killing thread " << mName << " Status: " << mStatus << llendl; // Now wait a bit for the thread to exit // It's unclear whether I should even bother doing this - this destructor // should netver get called unless we're already stopped, really... @@ -143,7 +164,7 @@ void LLThread::shutdown() if (!isStopped()) { // This thread just wouldn't stop, even though we gave it time - llwarns << "LLThread::~LLThread() exiting thread before clean exit!" << llendl; + //llwarns << "LLThread::~LLThread() exiting thread before clean exit!" << llendl; // Put a stake in its heart. apr_thread_exit(mAPRThreadp, -1); return; @@ -283,7 +304,7 @@ void LLThread::wakeLocked() //============================================================================ LLMutex::LLMutex(apr_pool_t *poolp) : - mAPRMutexp(NULL) + mAPRMutexp(NULL), mCount(0), mLockingThread(NO_THREAD) { //if (poolp) //{ @@ -315,7 +336,18 @@ LLMutex::~LLMutex() void LLMutex::lock() { +#if LL_DARWIN + if (mLockingThread == LLThread::currentID()) +#else + if (mLockingThread == sThreadID) +#endif + { //redundant lock + mCount++; + return; + } + apr_thread_mutex_lock(mAPRMutexp); + #if MUTEX_DEBUG // Have to have the lock before we can access the debug info U32 id = LLThread::currentID(); @@ -323,10 +355,22 @@ void LLMutex::lock() llerrs << "Already locked in Thread: " << id << llendl; mIsLocked[id] = TRUE; #endif + +#if LL_DARWIN + mLockingThread = LLThread::currentID(); +#else + mLockingThread = sThreadID; +#endif } void LLMutex::unlock() { + if (mCount > 0) + { //not the root unlock + mCount--; + return; + } + #if MUTEX_DEBUG // Access the debug info while we have the lock U32 id = LLThread::currentID(); @@ -334,6 +378,8 @@ void LLMutex::unlock() llerrs << "Not locked in Thread: " << id << llendl; mIsLocked[id] = FALSE; #endif + + mLockingThread = NO_THREAD; apr_thread_mutex_unlock(mAPRMutexp); } @@ -351,6 +397,11 @@ bool LLMutex::isLocked() } } +U32 LLMutex::lockingThread() const +{ + return mLockingThread; +} + //============================================================================ LLCondition::LLCondition(apr_pool_t *poolp) : @@ -371,6 +422,15 @@ LLCondition::~LLCondition() void LLCondition::wait() { + if (!isLocked()) + { //mAPRMutexp MUST be locked before calling apr_thread_cond_wait + apr_thread_mutex_lock(mAPRMutexp); +#if MUTEX_DEBUG + // avoid asserts on destruction in non-release builds + U32 id = LLThread::currentID(); + mIsLocked[id] = TRUE; +#endif + } apr_thread_cond_wait(mAPRCondp, mAPRMutexp); } diff --git a/indra/llcommon/llthread.h b/indra/llcommon/llthread.h index f1c6cd75af..40291a2569 100644 --- a/indra/llcommon/llthread.h +++ b/indra/llcommon/llthread.h @@ -35,8 +35,17 @@ class LLThread; class LLMutex; class LLCondition; +#if LL_WINDOWS +#define ll_thread_local __declspec(thread) +#else +#define ll_thread_local __thread +#endif + class LL_COMMON_API LLThread { +private: + static U32 sIDIter; + public: typedef enum e_thread_status { @@ -77,6 +86,8 @@ public: apr_pool_t *getAPRPool() { return mAPRPoolp; } LLVolatileAPRPool* getLocalAPRFilePool() { return mLocalAPRFilePoolp ; } + U32 getID() const { return mID; } + private: BOOL mPaused; @@ -91,6 +102,7 @@ protected: apr_pool_t *mAPRPoolp; BOOL mIsLocalPool; EThreadStatus mStatus; + U32 mID; //a local apr_pool for APRFile operations in this thread. If it exists, LLAPRFile::sAPRFilePoolp should not be used. //Note: this pool is used by APRFile ONLY, do NOT use it for any other purposes. @@ -128,17 +140,27 @@ protected: class LL_COMMON_API LLMutex { public: + typedef enum + { + NO_THREAD = 0xFFFFFFFF + } e_locking_thread; + LLMutex(apr_pool_t *apr_poolp); // NULL pool constructs a new pool for the mutex virtual ~LLMutex(); void lock(); // blocks void unlock(); bool isLocked(); // non-blocking, but does do a lock/unlock so not free + U32 lockingThread() const; //get ID of locking thread protected: apr_thread_mutex_t *mAPRMutexp; + mutable U32 mCount; + mutable U32 mLockingThread; + apr_pool_t *mAPRPoolp; BOOL mIsLocalPool; + #if MUTEX_DEBUG std::map<U32, BOOL> mIsLocked; #endif diff --git a/indra/llcommon/stdenums.h b/indra/llcommon/stdenums.h index 9f86de124e..556eff8370 100644 --- a/indra/llcommon/stdenums.h +++ b/indra/llcommon/stdenums.h @@ -49,7 +49,8 @@ enum EDragAndDropType DAD_ANIMATION = 12, DAD_GESTURE = 13, DAD_LINK = 14, - DAD_COUNT = 15, // number of types in this enum + DAD_MESH = 15, + DAD_COUNT = 16, // number of types in this enum }; // Reasons for drags to be denied. diff --git a/indra/llinventory/CMakeLists.txt b/indra/llinventory/CMakeLists.txt index 6b2b61f883..35a764b111 100644 --- a/indra/llinventory/CMakeLists.txt +++ b/indra/llinventory/CMakeLists.txt @@ -59,16 +59,17 @@ list(APPEND llinventory_SOURCE_FILES ${llinventory_HEADER_FILES}) add_library (llinventory ${llinventory_SOURCE_FILES}) -if(LL_TESTS) - #add unit tests - INCLUDE(LLAddBuildTest) - SET(llinventory_TEST_SOURCE_FILES - # no real unit tests yet! - ) - LL_ADD_PROJECT_UNIT_TESTS(llinventory "${llinventory_TEST_SOURCE_FILES}") - #set(TEST_DEBUG on) - set(test_libs llinventory ${LLMESSAGE_LIBRARIES} ${LLVFS_LIBRARIES} ${LLMATH_LIBRARIES} ${LLCOMMON_LIBRARIES} ${WINDOWS_LIBRARIES}) - LL_ADD_INTEGRATION_TEST(inventorymisc "" "${test_libs}") - LL_ADD_INTEGRATION_TEST(llparcel "" "${test_libs}") -endif(LL_TESTS) +#add unit tests +if (LL_TESTS) + INCLUDE(LLAddBuildTest) + SET(llinventory_TEST_SOURCE_FILES + # no real unit tests yet! + ) + LL_ADD_PROJECT_UNIT_TESTS(llinventory "${llinventory_TEST_SOURCE_FILES}") + + #set(TEST_DEBUG on) + set(test_libs llinventory ${LLMESSAGE_LIBRARIES} ${LLVFS_LIBRARIES} ${LLMATH_LIBRARIES} ${LLCOMMON_LIBRARIES} ${WINDOWS_LIBRARIES}) + LL_ADD_INTEGRATION_TEST(inventorymisc "" "${test_libs}") + LL_ADD_INTEGRATION_TEST(llparcel "" "${test_libs}") +endif (LL_TESTS) diff --git a/indra/llinventory/llinventorytype.cpp b/indra/llinventory/llinventorytype.cpp index a99be1420b..d2bba21648 100644 --- a/indra/llinventory/llinventorytype.cpp +++ b/indra/llinventory/llinventorytype.cpp @@ -83,6 +83,7 @@ LLInventoryDictionary::LLInventoryDictionary() addEntry(LLInventoryType::IT_WEARABLE, new InventoryEntry("wearable", "wearable", 2, LLAssetType::AT_CLOTHING, LLAssetType::AT_BODYPART)); addEntry(LLInventoryType::IT_ANIMATION, new InventoryEntry("animation", "animation", 1, LLAssetType::AT_ANIMATION)); addEntry(LLInventoryType::IT_GESTURE, new InventoryEntry("gesture", "gesture", 1, LLAssetType::AT_GESTURE)); + addEntry(LLInventoryType::IT_MESH, new InventoryEntry("mesh", "mesh", 1, LLAssetType::AT_MESH)); } @@ -91,32 +92,58 @@ LLInventoryDictionary::LLInventoryDictionary() static const LLInventoryType::EType DEFAULT_ASSET_FOR_INV_TYPE[LLAssetType::AT_COUNT] = { - LLInventoryType::IT_TEXTURE, // AT_TEXTURE - LLInventoryType::IT_SOUND, // AT_SOUND - LLInventoryType::IT_CALLINGCARD, // AT_CALLINGCARD - LLInventoryType::IT_LANDMARK, // AT_LANDMARK - LLInventoryType::IT_LSL, // AT_SCRIPT - LLInventoryType::IT_WEARABLE, // AT_CLOTHING - LLInventoryType::IT_OBJECT, // AT_OBJECT - LLInventoryType::IT_NOTECARD, // AT_NOTECARD - LLInventoryType::IT_CATEGORY, // AT_CATEGORY - LLInventoryType::IT_NONE, // (null entry) - LLInventoryType::IT_LSL, // AT_LSL_TEXT - LLInventoryType::IT_LSL, // AT_LSL_BYTECODE - LLInventoryType::IT_TEXTURE, // AT_TEXTURE_TGA - LLInventoryType::IT_WEARABLE, // AT_BODYPART - LLInventoryType::IT_CATEGORY, // AT_TRASH - LLInventoryType::IT_CATEGORY, // AT_SNAPSHOT_CATEGORY - LLInventoryType::IT_CATEGORY, // AT_LOST_AND_FOUND - LLInventoryType::IT_SOUND, // AT_SOUND_WAV - LLInventoryType::IT_NONE, // AT_IMAGE_TGA - LLInventoryType::IT_NONE, // AT_IMAGE_JPEG - LLInventoryType::IT_ANIMATION, // AT_ANIMATION - LLInventoryType::IT_GESTURE, // AT_GESTURE - LLInventoryType::IT_NONE, // AT_SIMSTATE + LLInventoryType::IT_TEXTURE, // 0 AT_TEXTURE + LLInventoryType::IT_SOUND, // 1 AT_SOUND + LLInventoryType::IT_CALLINGCARD, // 2 AT_CALLINGCARD + LLInventoryType::IT_LANDMARK, // 3 AT_LANDMARK + LLInventoryType::IT_LSL, // 4 AT_SCRIPT + LLInventoryType::IT_WEARABLE, // 5 AT_CLOTHING + LLInventoryType::IT_OBJECT, // 6 AT_OBJECT + LLInventoryType::IT_NOTECARD, // 7 AT_NOTECARD + LLInventoryType::IT_CATEGORY, // 8 AT_CATEGORY + LLInventoryType::IT_NONE, // 9 (null entry) + LLInventoryType::IT_LSL, // 10 AT_LSL_TEXT + LLInventoryType::IT_LSL, // 11 AT_LSL_BYTECODE + LLInventoryType::IT_TEXTURE, // 12 AT_TEXTURE_TGA + LLInventoryType::IT_WEARABLE, // 13 AT_BODYPART + LLInventoryType::IT_CATEGORY, // 14 AT_TRASH + LLInventoryType::IT_CATEGORY, // 15 AT_SNAPSHOT_CATEGORY + LLInventoryType::IT_CATEGORY, // 16 AT_LOST_AND_FOUND + LLInventoryType::IT_SOUND, // 17 AT_SOUND_WAV + LLInventoryType::IT_NONE, // 18 AT_IMAGE_TGA + LLInventoryType::IT_NONE, // 19 AT_IMAGE_JPEG + LLInventoryType::IT_ANIMATION, // 20 AT_ANIMATION + LLInventoryType::IT_GESTURE, // 21 AT_GESTURE + LLInventoryType::IT_NONE, // 22 AT_SIMSTATE - LLInventoryType::IT_NONE, // AT_LINK - LLInventoryType::IT_NONE, // AT_LINK_FOLDER + LLInventoryType::IT_NONE, // 23 AT_LINK + LLInventoryType::IT_NONE, // 24 AT_LINK_FOLDER + + LLInventoryType::IT_NONE, // 25 AT_NONE + LLInventoryType::IT_NONE, // 26 AT_NONE + LLInventoryType::IT_NONE, // 27 AT_NONE + LLInventoryType::IT_NONE, // 28 AT_NONE + LLInventoryType::IT_NONE, // 29 AT_NONE + LLInventoryType::IT_NONE, // 30 AT_NONE + LLInventoryType::IT_NONE, // 31 AT_NONE + LLInventoryType::IT_NONE, // 32 AT_NONE + LLInventoryType::IT_NONE, // 33 AT_NONE + LLInventoryType::IT_NONE, // 34 AT_NONE + LLInventoryType::IT_NONE, // 35 AT_NONE + LLInventoryType::IT_NONE, // 36 AT_NONE + LLInventoryType::IT_NONE, // 37 AT_NONE + LLInventoryType::IT_NONE, // 38 AT_NONE + LLInventoryType::IT_NONE, // 39 AT_NONE + LLInventoryType::IT_NONE, // 40 AT_NONE + LLInventoryType::IT_NONE, // 41 AT_NONE + LLInventoryType::IT_NONE, // 42 AT_NONE + LLInventoryType::IT_NONE, // 43 AT_NONE + LLInventoryType::IT_NONE, // 44 AT_NONE + LLInventoryType::IT_NONE, // 45 AT_NONE + LLInventoryType::IT_NONE, // 46 AT_NONE + LLInventoryType::IT_NONE, // 47 AT_NONE + LLInventoryType::IT_NONE, // 48 AT_NONE + LLInventoryType::IT_MESH // 49 AT_MESH }; // static diff --git a/indra/llinventory/llinventorytype.h b/indra/llinventory/llinventorytype.h index d9777a73f2..1a24e351ad 100644 --- a/indra/llinventory/llinventorytype.h +++ b/indra/llinventory/llinventorytype.h @@ -61,7 +61,8 @@ public: IT_WEARABLE = 18, IT_ANIMATION = 19, IT_GESTURE = 20, - IT_COUNT = 21, + IT_MESH = 22, + IT_COUNT = 23, IT_NONE = -1 }; diff --git a/indra/llmath/CMakeLists.txt b/indra/llmath/CMakeLists.txt index e93fe90650..9dadad7dd3 100644 --- a/indra/llmath/CMakeLists.txt +++ b/indra/llmath/CMakeLists.txt @@ -15,13 +15,16 @@ set(llmath_SOURCE_FILES llcamera.cpp llcoordframe.cpp llline.cpp + llmatrix3a.cpp llmodularmath.cpp llperlin.cpp llquaternion.cpp llrect.cpp llsphere.cpp + llvector4a.cpp llvolume.cpp llvolumemgr.cpp + llvolumeoctree.cpp llsdutil_math.cpp m3math.cpp m4math.cpp @@ -49,21 +52,32 @@ set(llmath_HEADER_FILES llinterp.h llline.h llmath.h + llmatrix3a.h + llmatrix3a.inl llmodularmath.h lloctree.h llperlin.h llplane.h llquantize.h llquaternion.h + llquaternion2.h + llquaternion2.inl llrect.h + llsimdmath.h + llsimdtypes.h + llsimdtypes.inl llsphere.h lltreenode.h + llvector4a.h + llvector4a.inl + llvector4logical.h llv4math.h llv4matrix3.h llv4matrix4.h llv4vector3.h llvolume.h llvolumemgr.h + llvolumeoctree.h llsdutil_math.h m3math.h m4math.h diff --git a/indra/llmath/llcamera.cpp b/indra/llmath/llcamera.cpp index 687c1a7d45..22ba26f99b 100644 --- a/indra/llmath/llcamera.cpp +++ b/indra/llmath/llcamera.cpp @@ -45,7 +45,6 @@ LLCamera::LLCamera() : calculateFrustumPlanes(); } - LLCamera::LLCamera(F32 vertical_fov_rads, F32 aspect_ratio, S32 view_height_in_pixels, F32 near_plane, F32 far_plane) : LLCoordFrame(), mViewHeightInPixels(view_height_in_pixels), @@ -61,6 +60,10 @@ LLCamera::LLCamera(F32 vertical_fov_rads, F32 aspect_ratio, S32 view_height_in_p setView(vertical_fov_rads); } +LLCamera::~LLCamera() +{ + +} // ---------------- LLCamera::getFoo() member functions ---------------- @@ -82,11 +85,11 @@ F32 LLCamera::getMaxView() const // ---------------- LLCamera::setFoo() member functions ---------------- -void LLCamera::setUserClipPlane(LLPlane plane) +void LLCamera::setUserClipPlane(LLPlane& plane) { mPlaneCount = 7; - mAgentPlanes[6].p = plane; - mAgentPlanes[6].mask = calcPlaneMask(plane); + mAgentPlanes[6] = plane; + mPlaneMask[6] = plane.calcPlaneMask(); } void LLCamera::disableUserClipPlane() @@ -158,166 +161,91 @@ size_t LLCamera::readFrustumFromBuffer(const char *buffer) // ---------------- test methods ---------------- -S32 LLCamera::AABBInFrustum(const LLVector3 ¢er, const LLVector3& radius) +S32 LLCamera::AABBInFrustum(const LLVector4a ¢er, const LLVector4a& radius) { - static const LLVector3 scaler[] = { - LLVector3(-1,-1,-1), - LLVector3( 1,-1,-1), - LLVector3(-1, 1,-1), - LLVector3( 1, 1,-1), - LLVector3(-1,-1, 1), - LLVector3( 1,-1, 1), - LLVector3(-1, 1, 1), - LLVector3( 1, 1, 1) + static const LLVector4a scaler[] = { + LLVector4a(-1,-1,-1), + LLVector4a( 1,-1,-1), + LLVector4a(-1, 1,-1), + LLVector4a( 1, 1,-1), + LLVector4a(-1,-1, 1), + LLVector4a( 1,-1, 1), + LLVector4a(-1, 1, 1), + LLVector4a( 1, 1, 1) }; U8 mask = 0; - S32 result = 2; - - /*if (mFrustumCornerDist > 0.f && radius.magVecSquared() > mFrustumCornerDist * mFrustumCornerDist) - { //box is larger than frustum, check frustum quads against box planes - - static const LLVector3 dir[] = - { - LLVector3(1, 0, 0), - LLVector3(-1, 0, 0), - LLVector3(0, 1, 0), - LLVector3(0, -1, 0), - LLVector3(0, 0, 1), - LLVector3(0, 0, -1) - }; - - U32 quads[] = - { - 0, 1, 2, 3, - 0, 1, 5, 4, - 2, 3, 7, 6, - 3, 0, 7, 4, - 1, 2, 6, 4, - 4, 5, 6, 7 - }; - - result = 0; - - BOOL total_inside = TRUE; - for (U32 i = 0; i < 6; i++) - { - LLVector3 p = center + radius.scaledVec(dir[i]); - F32 d = -p*dir[i]; - - for (U32 j = 0; j < 6; j++) - { //for each quad - F32 dist = mAgentFrustum[quads[j*4+0]]*dir[i] + d; - if (dist > 0) - { //at least one frustum point is outside the AABB - total_inside = FALSE; - for (U32 k = 1; k < 4; k++) - { //for each other point on quad - if ( mAgentFrustum[quads[j*4+k]]*dir[i]+d <= 0.f) - { //quad is straddling some plane of AABB - return 1; - } - } - } - else - { - for (U32 k = 1; k < 4; k++) - { - if (mAgentFrustum[quads[j*4+k]]*dir[i]+d > 0.f) - { - return 1; - } - } - } - } - } - - if (total_inside) - { - result = 1; - } - } - else*/ + bool result = false; + LLVector4a rscale, maxp, minp; + LLSimdScalar d; + for (U32 i = 0; i < mPlaneCount; i++) { - for (U32 i = 0; i < mPlaneCount; i++) + mask = mPlaneMask[i]; + if (mask != 0xff) { - mask = mAgentPlanes[i].mask; - if (mask == 0xff) - { - continue; - } - LLPlane p = mAgentPlanes[i].p; - LLVector3 n = LLVector3(p); - float d = p.mV[3]; - LLVector3 rscale = radius.scaledVec(scaler[mask]); - - LLVector3 minp = center - rscale; - LLVector3 maxp = center + rscale; - - if (n * minp > -d) + const LLPlane& p(mAgentPlanes[i]); + p.getAt<3>(d); + rscale.setMul(radius, scaler[mask]); + minp.setSub(center, rscale); + d = -d; + if (p.dot3(minp).getF32() > d) { return 0; } - - if (n * maxp > -d) + + if(!result) { - result = 1; + maxp.setAdd(center, rscale); + result = (p.dot3(maxp).getF32() > d); } } } - - return result; + return result?1:2; } -S32 LLCamera::AABBInFrustumNoFarClip(const LLVector3 ¢er, const LLVector3& radius) + +S32 LLCamera::AABBInFrustumNoFarClip(const LLVector4a& center, const LLVector4a& radius) { - static const LLVector3 scaler[] = { - LLVector3(-1,-1,-1), - LLVector3( 1,-1,-1), - LLVector3(-1, 1,-1), - LLVector3( 1, 1,-1), - LLVector3(-1,-1, 1), - LLVector3( 1,-1, 1), - LLVector3(-1, 1, 1), - LLVector3( 1, 1, 1) + static const LLVector4a scaler[] = { + LLVector4a(-1,-1,-1), + LLVector4a( 1,-1,-1), + LLVector4a(-1, 1,-1), + LLVector4a( 1, 1,-1), + LLVector4a(-1,-1, 1), + LLVector4a( 1,-1, 1), + LLVector4a(-1, 1, 1), + LLVector4a( 1, 1, 1) }; U8 mask = 0; - S32 result = 2; - + bool result = false; + LLVector4a rscale, maxp, minp; + LLSimdScalar d; for (U32 i = 0; i < mPlaneCount; i++) { - if (i == 5) - { - continue; - } - - mask = mAgentPlanes[i].mask; - if (mask == 0xff) + mask = mPlaneMask[i]; + if ((i != 5) && (mask != 0xff)) { - continue; - } - LLPlane p = mAgentPlanes[i].p; - LLVector3 n = LLVector3(p); - float d = p.mV[3]; - LLVector3 rscale = radius.scaledVec(scaler[mask]); - - LLVector3 minp = center - rscale; - LLVector3 maxp = center + rscale; - - if (n * minp > -d) - { - return 0; - } - - if (n * maxp > -d) - { - result = 1; + const LLPlane& p(mAgentPlanes[i]); + p.getAt<3>(d); + rscale.setMul(radius, scaler[mask]); + minp.setSub(center, rscale); + d = -d; + if (p.dot3(minp).getF32() > d) + { + return 0; + } + + if(!result) + { + maxp.setAdd(center, rscale); + result = (p.dot3(maxp).getF32() > d); + } } } - return result; + return result?1:2; } int LLCamera::sphereInFrustumQuick(const LLVector3 &sphere_center, const F32 radius) @@ -438,28 +366,22 @@ int LLCamera::sphereInFrustumOld(const LLVector3 &sphere_center, const F32 radiu int LLCamera::sphereInFrustum(const LLVector3 &sphere_center, const F32 radius) const { // Returns 1 if sphere is in frustum, 0 if not. - int res = 2; + bool res = false; for (int i = 0; i < 6; i++) { - if (mAgentPlanes[i].mask == 0xff) - { - continue; - } - - float d = mAgentPlanes[i].p.dist(sphere_center); - - if (d > radius) + if (mPlaneMask[i] != 0xff) { - return 0; - } + float d = mAgentPlanes[i].dist(sphere_center); - if (d > -radius) - { - res = 1; + if (d > radius) + { + return 0; + } + res = res || (d > -radius); } } - return res; + return res?1:2; } @@ -611,25 +533,6 @@ LLPlane planeFromPoints(LLVector3 p1, LLVector3 p2, LLVector3 p3) return LLPlane(p1, n); } -U8 LLCamera::calcPlaneMask(const LLPlane& plane) -{ - U8 mask = 0; - - if (plane.mV[0] >= 0) - { - mask |= 1; - } - if (plane.mV[1] >= 0) - { - mask |= 2; - } - if (plane.mV[2] >= 0) - { - mask |= 4; - } - - return mask; -} void LLCamera::ignoreAgentFrustumPlane(S32 idx) { @@ -638,12 +541,13 @@ void LLCamera::ignoreAgentFrustumPlane(S32 idx) return; } - mAgentPlanes[idx].mask = 0xff; - mAgentPlanes[idx].p.clearVec(); + mPlaneMask[idx] = 0xff; + mAgentPlanes[idx].clear(); } void LLCamera::calcAgentFrustumPlanes(LLVector3* frust) { + for (int i = 0; i < 8; i++) { mAgentFrustum[i] = frust[i]; @@ -656,27 +560,27 @@ void LLCamera::calcAgentFrustumPlanes(LLVector3* frust) //order of planes is important, keep most likely to fail in the front of the list //near - frust[0], frust[1], frust[2] - mAgentPlanes[2].p = planeFromPoints(frust[0], frust[1], frust[2]); + mAgentPlanes[2] = planeFromPoints(frust[0], frust[1], frust[2]); //far - mAgentPlanes[5].p = planeFromPoints(frust[5], frust[4], frust[6]); + mAgentPlanes[5] = planeFromPoints(frust[5], frust[4], frust[6]); //left - mAgentPlanes[0].p = planeFromPoints(frust[4], frust[0], frust[7]); + mAgentPlanes[0] = planeFromPoints(frust[4], frust[0], frust[7]); //right - mAgentPlanes[1].p = planeFromPoints(frust[1], frust[5], frust[6]); + mAgentPlanes[1] = planeFromPoints(frust[1], frust[5], frust[6]); //top - mAgentPlanes[4].p = planeFromPoints(frust[3], frust[2], frust[6]); + mAgentPlanes[4] = planeFromPoints(frust[3], frust[2], frust[6]); //bottom - mAgentPlanes[3].p = planeFromPoints(frust[1], frust[0], frust[4]); + mAgentPlanes[3] = planeFromPoints(frust[1], frust[0], frust[4]); //cache plane octant facing mask for use in AABBInFrustum for (U32 i = 0; i < mPlaneCount; i++) { - mAgentPlanes[i].mask = calcPlaneMask(mAgentPlanes[i].p); + mPlaneMask[i] = mAgentPlanes[i].calcPlaneMask(); } } @@ -730,9 +634,10 @@ void LLCamera::calculateWorldFrustumPlanes() F32 d; LLVector3 center = mOrigin - mXAxis*mNearPlane; mWorldPlanePos = center; + LLVector3 pnorm; for (int p=0; p<4; p++) { - LLVector3 pnorm = LLVector3(mLocalPlanes[p]); + mLocalPlanes[p].getVector3(pnorm); LLVector3 norm = rotateToAbsolute(pnorm); norm.normVec(); d = -(center * norm); @@ -742,13 +647,15 @@ void LLCamera::calculateWorldFrustumPlanes() LLVector3 zaxis(0, 0, 1.0f); F32 yaw = getYaw(); { - LLVector3 tnorm = LLVector3(mLocalPlanes[PLANE_LEFT]); + LLVector3 tnorm; + mLocalPlanes[PLANE_LEFT].getVector3(tnorm); tnorm.rotVec(yaw, zaxis); d = -(mOrigin * tnorm); mHorizPlanes[HORIZ_PLANE_LEFT] = LLPlane(tnorm, d); } { - LLVector3 tnorm = LLVector3(mLocalPlanes[PLANE_RIGHT]); + LLVector3 tnorm; + mLocalPlanes[PLANE_RIGHT].getVector3(tnorm); tnorm.rotVec(yaw, zaxis); d = -(mOrigin * tnorm); mHorizPlanes[HORIZ_PLANE_RIGHT] = LLPlane(tnorm, d); diff --git a/indra/llmath/llcamera.h b/indra/llmath/llcamera.h index 531144db39..ec67b91d05 100644 --- a/indra/llmath/llcamera.h +++ b/indra/llmath/llcamera.h @@ -31,6 +31,7 @@ #include "llmath.h" #include "llcoordframe.h" #include "llplane.h" +#include "llvector4a.h" const F32 DEFAULT_FIELD_OF_VIEW = 60.f * DEG_TO_RAD; const F32 DEFAULT_ASPECT_RATIO = 640.f / 480.f; @@ -64,6 +65,12 @@ class LLCamera : public LLCoordFrame { public: + + LLCamera(const LLCamera& rhs) + { + *this = rhs; + } + enum { PLANE_LEFT = 0, PLANE_RIGHT = 1, @@ -101,6 +108,9 @@ public: }; private: + LLPlane mAgentPlanes[7]; //frustum planes in agent space a la gluUnproject (I'm a bastard, I know) - DaveP + U8 mPlaneMask[8]; // 8 for alignment + F32 mView; // angle between top and bottom frustum planes in radians. F32 mAspect; // width/height S32 mViewHeightInPixels; // for ViewHeightInPixels() only @@ -114,30 +124,22 @@ private: LLPlane mWorldPlanes[PLANE_NUM]; LLPlane mHorizPlanes[HORIZ_PLANE_NUM]; - struct frustum_plane - { - frustum_plane() : mask(0) {} - LLPlane p; - U8 mask; - }; - frustum_plane mAgentPlanes[7]; //frustum planes in agent space a la gluUnproject (I'm a bastard, I know) - DaveP - U32 mPlaneCount; //defaults to 6, if setUserClipPlane is called, uses user supplied clip plane in LLVector3 mWorldPlanePos; // Position of World Planes (may be offset from camera) public: LLVector3 mAgentFrustum[8]; //8 corners of 6-plane frustum F32 mFrustumCornerDist; //distance to corner of frustum against far clip plane - LLPlane getAgentPlane(U32 idx) { return mAgentPlanes[idx].p; } + LLPlane& getAgentPlane(U32 idx) { return mAgentPlanes[idx]; } public: LLCamera(); LLCamera(F32 vertical_fov_rads, F32 aspect_ratio, S32 view_height_in_pixels, F32 near_plane, F32 far_plane); - virtual ~LLCamera(){} // no-op virtual destructor + virtual ~LLCamera(); + - void setUserClipPlane(LLPlane plane); + void setUserClipPlane(LLPlane& plane); void disableUserClipPlane(); - U8 calcPlaneMask(const LLPlane& plane); virtual void setView(F32 vertical_fov_rads); void setViewHeightInPixels(S32 height); void setAspect(F32 new_aspect); @@ -184,8 +186,8 @@ public: S32 sphereInFrustum(const LLVector3 ¢er, const F32 radius) const; S32 pointInFrustum(const LLVector3 &point) const { return sphereInFrustum(point, 0.0f); } S32 sphereInFrustumFull(const LLVector3 ¢er, const F32 radius) const { return sphereInFrustum(center, radius); } - S32 AABBInFrustum(const LLVector3 ¢er, const LLVector3& radius); - S32 AABBInFrustumNoFarClip(const LLVector3 ¢er, const LLVector3& radius); + S32 AABBInFrustum(const LLVector4a& center, const LLVector4a& radius); + S32 AABBInFrustumNoFarClip(const LLVector4a& center, const LLVector4a& radius); //does a quick 'n dirty sphere-sphere check S32 sphereInFrustumQuick(const LLVector3 &sphere_center, const F32 radius); diff --git a/indra/llmath/llmath.h b/indra/llmath/llmath.h index 798f1154d0..eea7c977fb 100644 --- a/indra/llmath/llmath.h +++ b/indra/llmath/llmath.h @@ -29,7 +29,7 @@ #include <cmath> #include <cstdlib> -#include <complex> +#include <vector> #include "lldefs.h" //#include "llstl.h" // *TODO: Remove when LLString is gone //#include "llstring.h" // *TODO: Remove when LLString is gone @@ -55,32 +55,11 @@ #endif // Single Precision Floating Point Routines -#ifndef sqrtf -#define sqrtf(x) ((F32)sqrt((F64)(x))) -#endif -#ifndef fsqrtf -#define fsqrtf(x) sqrtf(x) -#endif - -#ifndef cosf -#define cosf(x) ((F32)cos((F64)(x))) -#endif -#ifndef sinf -#define sinf(x) ((F32)sin((F64)(x))) -#endif -#ifndef tanf +// (There used to be more defined here, but they appeared to be redundant and +// were breaking some other includes. Removed by Falcon, reviewed by Andrew, 11/25/09) +/*#ifndef tanf #define tanf(x) ((F32)tan((F64)(x))) -#endif -#ifndef acosf -#define acosf(x) ((F32)acos((F64)(x))) -#endif - -#ifndef powf -#define powf(x,y) ((F32)pow((F64)(x),(F64)(y))) -#endif -#ifndef expf -#define expf(x) ((F32)exp((F64)(x))) -#endif +#endif*/ const F32 GRAVITY = -9.8f; @@ -200,7 +179,7 @@ inline S32 llfloor( F32 f ) } return result; #else - return (S32)floorf(f); + return (S32)floor(f); #endif } @@ -378,11 +357,14 @@ inline F32 snap_to_sig_figs(F32 foo, S32 sig_figs) bar *= 10.f; } - foo = (F32)llround(foo * bar); + //F32 new_foo = (F32)llround(foo * bar); + // the llround() implementation sucks. Don't us it. - // shift back - foo /= bar; - return foo; + F32 sign = (foo > 0.f) ? 1.f : -1.f; + F32 new_foo = F32( S64(foo * bar + sign * 0.5f)); + new_foo /= bar; + + return new_foo; } inline F32 lerp(F32 a, F32 b, F32 u) @@ -516,4 +498,45 @@ inline F32 llgaussian(F32 x, F32 o) return 1.f/(F_SQRT_TWO_PI*o)*powf(F_E, -(x*x)/(2*o*o)); } +//helper function for removing outliers +template <class VEC_TYPE> +inline void ll_remove_outliers(std::vector<VEC_TYPE>& data, F32 k) +{ + if (data.size() < 100) + { //not enough samples + return; + } + + VEC_TYPE Q1 = data[data.size()/4]; + VEC_TYPE Q3 = data[data.size()-data.size()/4-1]; + + VEC_TYPE min = (VEC_TYPE) ((F32) Q1-k * (F32) (Q3-Q1)); + VEC_TYPE max = (VEC_TYPE) ((F32) Q3+k * (F32) (Q3-Q1)); + + U32 i = 0; + while (i < data.size() && data[i] < min) + { + i++; + } + + S32 j = data.size()-1; + while (j > 0 && data[j] > max) + { + j--; + } + + if (j < data.size()-1) + { + data.erase(data.begin()+j, data.end()); + } + + if (i > 0) + { + data.erase(data.begin(), data.begin()+i); + } +} + +// Include simd math header +#include "llsimdmath.h" + #endif diff --git a/indra/llmath/llmatrix3a.cpp b/indra/llmath/llmatrix3a.cpp new file mode 100644 index 0000000000..ab077abcb0 --- /dev/null +++ b/indra/llmath/llmatrix3a.cpp @@ -0,0 +1,134 @@ +/** + * @file llvector4a.cpp + * @brief SIMD vector implementation + * + * $LicenseInfo:firstyear=2010&license=viewerlgpl$ + * Second Life Viewer Source Code + * Copyright (C) 2010, Linden Research, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; + * version 2.1 of the License only. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + * Linden Research, Inc., 945 Battery Street, San Francisco, CA 94111 USA + * $/LicenseInfo$ + */ + +#include "llmath.h" + +static LL_ALIGN_16(const F32 M_IDENT_3A[12]) = + { 1.f, 0.f, 0.f, 0.f, // Column 1 + 0.f, 1.f, 0.f, 0.f, // Column 2 + 0.f, 0.f, 1.f, 0.f }; // Column 3 + +extern const LLMatrix3a LL_M3A_IDENTITY = *reinterpret_cast<const LLMatrix3a*> (M_IDENT_3A); + +void LLMatrix3a::setMul( const LLMatrix3a& lhs, const LLMatrix3a& rhs ) +{ + const LLVector4a col0 = lhs.getColumn(0); + const LLVector4a col1 = lhs.getColumn(1); + const LLVector4a col2 = lhs.getColumn(2); + + for ( int i = 0; i < 3; i++ ) + { + LLVector4a xxxx = _mm_load_ss( rhs.mColumns[i].getF32ptr() ); + xxxx.splat<0>( xxxx ); + xxxx.mul( col0 ); + + { + LLVector4a yyyy = _mm_load_ss( rhs.mColumns[i].getF32ptr() + 1 ); + yyyy.splat<0>( yyyy ); + yyyy.mul( col1 ); + xxxx.add( yyyy ); + } + + { + LLVector4a zzzz = _mm_load_ss( rhs.mColumns[i].getF32ptr() + 2 ); + zzzz.splat<0>( zzzz ); + zzzz.mul( col2 ); + xxxx.add( zzzz ); + } + + xxxx.store4a( mColumns[i].getF32ptr() ); + } + +} + +/*static */void LLMatrix3a::batchTransform( const LLMatrix3a& xform, const LLVector4a* src, int numVectors, LLVector4a* dst ) +{ + const LLVector4a col0 = xform.getColumn(0); + const LLVector4a col1 = xform.getColumn(1); + const LLVector4a col2 = xform.getColumn(2); + const LLVector4a* maxAddr = src + numVectors; + + if ( numVectors & 0x1 ) + { + LLVector4a xxxx = _mm_load_ss( (const F32*)src ); + LLVector4a yyyy = _mm_load_ss( (const F32*)src + 1 ); + LLVector4a zzzz = _mm_load_ss( (const F32*)src + 2 ); + xxxx.splat<0>( xxxx ); + yyyy.splat<0>( yyyy ); + zzzz.splat<0>( zzzz ); + xxxx.mul( col0 ); + yyyy.mul( col1 ); + zzzz.mul( col2 ); + xxxx.add( yyyy ); + xxxx.add( zzzz ); + xxxx.store4a( (F32*)dst ); + src++; + dst++; + } + + + numVectors >>= 1; + while ( src < maxAddr ) + { + _mm_prefetch( (const char*)(src + 32 ), _MM_HINT_NTA ); + _mm_prefetch( (const char*)(dst + 32), _MM_HINT_NTA ); + LLVector4a xxxx = _mm_load_ss( (const F32*)src ); + LLVector4a xxxx1= _mm_load_ss( (const F32*)(src + 1) ); + + xxxx.splat<0>( xxxx ); + xxxx1.splat<0>( xxxx1 ); + xxxx.mul( col0 ); + xxxx1.mul( col0 ); + + { + LLVector4a yyyy = _mm_load_ss( (const F32*)src + 1 ); + LLVector4a yyyy1 = _mm_load_ss( (const F32*)(src + 1) + 1); + yyyy.splat<0>( yyyy ); + yyyy1.splat<0>( yyyy1 ); + yyyy.mul( col1 ); + yyyy1.mul( col1 ); + xxxx.add( yyyy ); + xxxx1.add( yyyy1 ); + } + + { + LLVector4a zzzz = _mm_load_ss( (const F32*)(src) + 2 ); + LLVector4a zzzz1 = _mm_load_ss( (const F32*)(++src) + 2 ); + zzzz.splat<0>( zzzz ); + zzzz1.splat<0>( zzzz1 ); + zzzz.mul( col2 ); + zzzz1.mul( col2 ); + xxxx.add( zzzz ); + xxxx1.add( zzzz1 ); + } + + xxxx.store4a(dst->getF32ptr()); + src++; + dst++; + + xxxx1.store4a((F32*)dst++); + } +} diff --git a/indra/llmath/llmatrix3a.h b/indra/llmath/llmatrix3a.h new file mode 100644 index 0000000000..adb7e3389d --- /dev/null +++ b/indra/llmath/llmatrix3a.h @@ -0,0 +1,128 @@ +/** + * @file llmatrix3a.h + * @brief LLMatrix3a class header file - memory aligned and vectorized 3x3 matrix + * + * $LicenseInfo:firstyear=2010&license=viewerlgpl$ + * Second Life Viewer Source Code + * Copyright (C) 2010, Linden Research, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; + * version 2.1 of the License only. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + * Linden Research, Inc., 945 Battery Street, San Francisco, CA 94111 USA + * $/LicenseInfo$ + */ + +#ifndef LL_LLMATRIX3A_H +#define LL_LLMATRIX3A_H + +///////////////////////////// +// LLMatrix3a, LLRotation +///////////////////////////// +// This class stores a 3x3 (technically 4x3) matrix in column-major order +///////////////////////////// +///////////////////////////// +// These classes are intentionally minimal right now. If you need additional +// functionality, please contact someone with SSE experience (e.g., Falcon or +// Huseby). +///////////////////////////// + +// LLMatrix3a is the base class for LLRotation, which should be used instead any time you're dealing with a +// rotation matrix. +class LLMatrix3a +{ +public: + + // Utility function for quickly transforming an array of LLVector4a's + // For transforming a single LLVector4a, see LLVector4a::setRotated + static void batchTransform( const LLMatrix3a& xform, const LLVector4a* src, int numVectors, LLVector4a* dst ); + + // Utility function to obtain the identity matrix + static inline const LLMatrix3a& getIdentity(); + + ////////////////////////// + // Ctors + ////////////////////////// + + // Ctor + LLMatrix3a() {} + + // Ctor for setting by columns + inline LLMatrix3a( const LLVector4a& c0, const LLVector4a& c1, const LLVector4a& c2 ); + + ////////////////////////// + // Get/Set + ////////////////////////// + + // Loads from an LLMatrix3 + inline void loadu(const LLMatrix3& src); + + // Set rows + inline void setRows(const LLVector4a& r0, const LLVector4a& r1, const LLVector4a& r2); + + // Set columns + inline void setColumns(const LLVector4a& c0, const LLVector4a& c1, const LLVector4a& c2); + + // Get the read-only access to a specified column. Valid columns are 0-2, but the + // function is unchecked. You've been warned. + inline const LLVector4a& getColumn(const U32 column) const; + + ///////////////////////// + // Matrix modification + ///////////////////////// + + // Set this matrix to the product of lhs and rhs ( this = lhs * rhs ) + void setMul( const LLMatrix3a& lhs, const LLMatrix3a& rhs ); + + // Set this matrix to the transpose of src + inline void setTranspose(const LLMatrix3a& src); + + // Set this matrix to a*w + b*(1-w) + inline void setLerp(const LLMatrix3a& a, const LLMatrix3a& b, F32 w); + + ///////////////////////// + // Matrix inspection + ///////////////////////// + + // Sets all 4 elements in 'dest' to the determinant of this matrix. + // If you will be using the determinant in subsequent ops with LLVector4a, use this version + inline void getDeterminant( LLVector4a& dest ) const; + + // Returns the determinant as an LLSimdScalar. Use this if you will be using the determinant + // primary for scalar operations. + inline LLSimdScalar getDeterminant() const; + + // Returns nonzero if rows 0-2 and colums 0-2 contain no NaN or INF values. Row 3 is ignored + inline LLBool32 isFinite() const; + + // Returns true if this matrix is equal to 'rhs' up to 'tolerance' + inline bool isApproximatelyEqual( const LLMatrix3a& rhs, F32 tolerance = F_APPROXIMATELY_ZERO ) const; + +protected: + + LLVector4a mColumns[3]; + +}; + +class LLRotation : public LLMatrix3a +{ +public: + + LLRotation() {} + + // Returns true if this rotation is orthonormal with det ~= 1 + inline bool isOkRotation() const; +}; + +#endif diff --git a/indra/llmath/llmatrix3a.inl b/indra/llmath/llmatrix3a.inl new file mode 100644 index 0000000000..37819fea3c --- /dev/null +++ b/indra/llmath/llmatrix3a.inl @@ -0,0 +1,119 @@ +/** + * @file llmatrix3a.inl + * @brief LLMatrix3a inline definitions + * + * $LicenseInfo:firstyear=2010&license=viewerlgpl$ + * Second Life Viewer Source Code + * Copyright (C) 2010, Linden Research, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; + * version 2.1 of the License only. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + * Linden Research, Inc., 945 Battery Street, San Francisco, CA 94111 USA + * $/LicenseInfo$ + */ + +#include "llmatrix3a.h" +#include "m3math.h" + +inline LLMatrix3a::LLMatrix3a( const LLVector4a& c0, const LLVector4a& c1, const LLVector4a& c2 ) +{ + setColumns( c0, c1, c2 ); +} + +inline void LLMatrix3a::loadu(const LLMatrix3& src) +{ + mColumns[0].load3(src.mMatrix[0]); + mColumns[1].load3(src.mMatrix[1]); + mColumns[2].load3(src.mMatrix[2]); +} + +inline void LLMatrix3a::setRows(const LLVector4a& r0, const LLVector4a& r1, const LLVector4a& r2) +{ + mColumns[0] = r0; + mColumns[1] = r1; + mColumns[2] = r2; + setTranspose( *this ); +} + +inline void LLMatrix3a::setColumns(const LLVector4a& c0, const LLVector4a& c1, const LLVector4a& c2) +{ + mColumns[0] = c0; + mColumns[1] = c1; + mColumns[2] = c2; +} + +inline void LLMatrix3a::setTranspose(const LLMatrix3a& src) +{ + const LLQuad srcCol0 = src.mColumns[0]; + const LLQuad srcCol1 = src.mColumns[1]; + const LLQuad unpacklo = _mm_unpacklo_ps( srcCol0, srcCol1 ); + mColumns[0] = _mm_movelh_ps( unpacklo, src.mColumns[2] ); + mColumns[1] = _mm_shuffle_ps( _mm_movehl_ps( srcCol0, unpacklo ), src.mColumns[2], _MM_SHUFFLE(0, 1, 1, 0) ); + mColumns[2] = _mm_shuffle_ps( _mm_unpackhi_ps( srcCol0, srcCol1 ), src.mColumns[2], _MM_SHUFFLE(0, 2, 1, 0) ); +} + +inline const LLVector4a& LLMatrix3a::getColumn(const U32 column) const +{ + llassert( column < 3 ); + return mColumns[column]; +} + +inline void LLMatrix3a::setLerp(const LLMatrix3a& a, const LLMatrix3a& b, F32 w) +{ + mColumns[0].setLerp( a.mColumns[0], b.mColumns[0], w ); + mColumns[1].setLerp( a.mColumns[1], b.mColumns[1], w ); + mColumns[2].setLerp( a.mColumns[2], b.mColumns[2], w ); +} + +inline LLBool32 LLMatrix3a::isFinite() const +{ + return mColumns[0].isFinite3() && mColumns[1].isFinite3() && mColumns[2].isFinite3(); +} + +inline void LLMatrix3a::getDeterminant( LLVector4a& dest ) const +{ + LLVector4a col1xcol2; col1xcol2.setCross3( mColumns[1], mColumns[2] ); + dest.setAllDot3( col1xcol2, mColumns[0] ); +} + +inline LLSimdScalar LLMatrix3a::getDeterminant() const +{ + LLVector4a col1xcol2; col1xcol2.setCross3( mColumns[1], mColumns[2] ); + return col1xcol2.dot3( mColumns[0] ); +} + +inline bool LLMatrix3a::isApproximatelyEqual( const LLMatrix3a& rhs, F32 tolerance /*= F_APPROXIMATELY_ZERO*/ ) const +{ + return rhs.getColumn(0).equals3(mColumns[0], tolerance) + && rhs.getColumn(1).equals3(mColumns[1], tolerance) + && rhs.getColumn(2).equals3(mColumns[2], tolerance); +} + +inline const LLMatrix3a& LLMatrix3a::getIdentity() +{ + extern const LLMatrix3a LL_M3A_IDENTITY; + return LL_M3A_IDENTITY; +} + +inline bool LLRotation::isOkRotation() const +{ + LLMatrix3a transpose; transpose.setTranspose( *this ); + LLMatrix3a product; product.setMul( *this, transpose ); + + LLSimdScalar detMinusOne = getDeterminant() - 1.f; + + return product.isApproximatelyEqual( LLMatrix3a::getIdentity() ) && (detMinusOne.getAbs() < F_APPROXIMATELY_ZERO); +} + diff --git a/indra/llmath/llmatrix4a.h b/indra/llmath/llmatrix4a.h new file mode 100644 index 0000000000..27cf5b79f6 --- /dev/null +++ b/indra/llmath/llmatrix4a.h @@ -0,0 +1,143 @@ +/** + * @file llmatrix4a.h + * @brief LLMatrix4a class header file - memory aligned and vectorized 4x4 matrix + * + * $LicenseInfo:firstyear=2007&license=viewerlgpl$ + * Second Life Viewer Source Code + * Copyright (C) 2010, Linden Research, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; + * version 2.1 of the License only. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + * Linden Research, Inc., 945 Battery Street, San Francisco, CA 94111 USA + * $/LicenseInfo$ + */ + +#ifndef LL_LLMATRIX4A_H +#define LL_LLMATRIX4A_H + +#include "llvector4a.h" +#include "m4math.h" +#include "m3math.h" + +class LLMatrix4a +{ +public: + LLVector4a mMatrix[4]; + + inline void clear() + { + mMatrix[0].clear(); + mMatrix[1].clear(); + mMatrix[2].clear(); + mMatrix[3].clear(); + } + + inline void loadu(const LLMatrix4& src) + { + mMatrix[0] = _mm_loadu_ps(src.mMatrix[0]); + mMatrix[1] = _mm_loadu_ps(src.mMatrix[1]); + mMatrix[2] = _mm_loadu_ps(src.mMatrix[2]); + mMatrix[3] = _mm_loadu_ps(src.mMatrix[3]); + + } + + inline void loadu(const LLMatrix3& src) + { + mMatrix[0].load3(src.mMatrix[0]); + mMatrix[1].load3(src.mMatrix[1]); + mMatrix[2].load3(src.mMatrix[2]); + mMatrix[3].set(0,0,0,1.f); + } + + inline void add(const LLMatrix4a& rhs) + { + mMatrix[0].add(rhs.mMatrix[0]); + mMatrix[1].add(rhs.mMatrix[1]); + mMatrix[2].add(rhs.mMatrix[2]); + mMatrix[3].add(rhs.mMatrix[3]); + } + + inline void setRows(const LLVector4a& r0, const LLVector4a& r1, const LLVector4a& r2) + { + mMatrix[0] = r0; + mMatrix[1] = r1; + mMatrix[2] = r2; + } + + inline void setMul(const LLMatrix4a& m, const F32 s) + { + mMatrix[0].setMul(m.mMatrix[0], s); + mMatrix[1].setMul(m.mMatrix[1], s); + mMatrix[2].setMul(m.mMatrix[2], s); + mMatrix[3].setMul(m.mMatrix[3], s); + } + + inline void setLerp(const LLMatrix4a& a, const LLMatrix4a& b, F32 w) + { + LLVector4a d0,d1,d2,d3; + d0.setSub(b.mMatrix[0], a.mMatrix[0]); + d1.setSub(b.mMatrix[1], a.mMatrix[1]); + d2.setSub(b.mMatrix[2], a.mMatrix[2]); + d3.setSub(b.mMatrix[3], a.mMatrix[3]); + + // this = a + d*w + + d0.mul(w); + d1.mul(w); + d2.mul(w); + d3.mul(w); + + mMatrix[0].setAdd(a.mMatrix[0],d0); + mMatrix[1].setAdd(a.mMatrix[1],d1); + mMatrix[2].setAdd(a.mMatrix[2],d2); + mMatrix[3].setAdd(a.mMatrix[3],d3); + } + + inline void rotate(const LLVector4a& v, LLVector4a& res) + { + res = _mm_shuffle_ps(v, v, _MM_SHUFFLE(0, 0, 0, 0)); + res.mul(mMatrix[0]); + + LLVector4a y; + y = _mm_shuffle_ps(v, v, _MM_SHUFFLE(1, 1, 1, 1)); + y.mul(mMatrix[1]); + + LLVector4a z; + z = _mm_shuffle_ps(v, v, _MM_SHUFFLE(2, 2, 2, 2)); + z.mul(mMatrix[2]); + + res.add(y); + res.add(z); + } + + inline void affineTransform(const LLVector4a& v, LLVector4a& res) + { + LLVector4a x,y,z; + + x = _mm_shuffle_ps(v, v, _MM_SHUFFLE(0, 0, 0, 0)); + y = _mm_shuffle_ps(v, v, _MM_SHUFFLE(1, 1, 1, 1)); + z = _mm_shuffle_ps(v, v, _MM_SHUFFLE(2, 2, 2, 2)); + + x.mul(mMatrix[0]); + y.mul(mMatrix[1]); + z.mul(mMatrix[2]); + + x.add(y); + z.add(mMatrix[3]); + res.setAdd(x,z); + } +}; + +#endif diff --git a/indra/llmath/lloctree.h b/indra/llmath/lloctree.h index 90d4d742c9..fdfc24f8b7 100644 --- a/indra/llmath/lloctree.h +++ b/indra/llmath/lloctree.h @@ -29,14 +29,11 @@ #include "lltreenode.h" #include "v3math.h" +#include "llvector4a.h" #include <vector> #include <set> -#if LL_RELEASE_WITH_DEBUG_INFO || LL_DEBUG -#define OCT_ERRS LL_ERRS("OctreeErrors") -#else #define OCT_ERRS LL_WARNS("OctreeErrors") -#endif #define LL_OCTREE_PARANOIA_CHECK 0 #if LL_DARWIN @@ -67,6 +64,13 @@ public: }; template <class T> +class LLOctreeTravelerDepthFirst : public LLOctreeTraveler<T> +{ +public: + virtual void traverse(const LLOctreeNode<T>* node); +}; + +template <class T> class LLOctreeNode : public LLTreeNode<T> { public: @@ -81,23 +85,30 @@ public: typedef LLOctreeNode<T> oct_node; typedef LLOctreeListener<T> oct_listener; - static const U8 OCTANT_POSITIVE_X = 0x01; - static const U8 OCTANT_POSITIVE_Y = 0x02; - static const U8 OCTANT_POSITIVE_Z = 0x04; - - LLOctreeNode( LLVector3d center, - LLVector3d size, + /*void* operator new(size_t size) + { + return ll_aligned_malloc_16(size); + } + + void operator delete(void* ptr) + { + ll_aligned_free_16(ptr); + }*/ + + LLOctreeNode( const LLVector4a& center, + const LLVector4a& size, BaseType* parent, U8 octant = 255) : mParent((oct_node*)parent), - mCenter(center), - mSize(size), mOctant(octant) { + mCenter = center; + mSize = size; + updateMinMax(); if ((mOctant == 255) && mParent) { - mOctant = ((oct_node*) mParent)->getOctant(mCenter.mdV); + mOctant = ((oct_node*) mParent)->getOctant(mCenter); } clearChildren(); @@ -114,40 +125,24 @@ public: } inline const BaseType* getParent() const { return mParent; } - inline void setParent(BaseType* parent) { mParent = (oct_node*) parent; } - inline const LLVector3d& getCenter() const { return mCenter; } - inline const LLVector3d& getSize() const { return mSize; } - inline void setCenter(LLVector3d center) { mCenter = center; } - inline void setSize(LLVector3d size) { mSize = size; } - inline oct_node* getNodeAt(T* data) { return getNodeAt(data->getPositionGroup(), data->getBinRadius()); } - inline U8 getOctant() const { return mOctant; } - inline void setOctant(U8 octant) { mOctant = octant; } + inline void setParent(BaseType* parent) { mParent = (oct_node*) parent; } + inline const LLVector4a& getCenter() const { return mCenter; } + inline const LLVector4a& getSize() const { return mSize; } + inline void setCenter(const LLVector4a& center) { mCenter = center; } + inline void setSize(const LLVector4a& size) { mSize = size; } + inline oct_node* getNodeAt(T* data) { return getNodeAt(data->getPositionGroup(), data->getBinRadius()); } + inline U8 getOctant() const { return mOctant; } inline const oct_node* getOctParent() const { return (const oct_node*) getParent(); } inline oct_node* getOctParent() { return (oct_node*) getParent(); } - U8 getOctant(const F64 pos[]) const //get the octant pos is in + U8 getOctant(const LLVector4a& pos) const //get the octant pos is in { - U8 ret = 0; - - if (pos[0] > mCenter.mdV[0]) - { - ret |= OCTANT_POSITIVE_X; - } - if (pos[1] > mCenter.mdV[1]) - { - ret |= OCTANT_POSITIVE_Y; - } - if (pos[2] > mCenter.mdV[2]) - { - ret |= OCTANT_POSITIVE_Z; - } - - return ret; + return (U8) (pos.greaterThan(mCenter).getGatheredBits() & 0x7); } - inline bool isInside(const LLVector3d& pos, const F64& rad) const + inline bool isInside(const LLVector4a& pos, const F32& rad) const { - return rad <= mSize.mdV[0]*2.0 && isInside(pos); + return rad <= mSize[0]*2.f && isInside(pos); } inline bool isInside(T* data) const @@ -155,29 +150,27 @@ public: return isInside(data->getPositionGroup(), data->getBinRadius()); } - bool isInside(const LLVector3d& pos) const + bool isInside(const LLVector4a& pos) const { - const F64& x = pos.mdV[0]; - const F64& y = pos.mdV[1]; - const F64& z = pos.mdV[2]; - - if (x > mMax.mdV[0] || x <= mMin.mdV[0] || - y > mMax.mdV[1] || y <= mMin.mdV[1] || - z > mMax.mdV[2] || z <= mMin.mdV[2]) + S32 gt = pos.greaterThan(mMax).getGatheredBits() & 0x7; + if (gt) { return false; } - + + S32 lt = pos.lessEqual(mMin).getGatheredBits() & 0x7; + if (lt) + { + return false; + } + return true; } void updateMinMax() { - for (U32 i = 0; i < 3; i++) - { - mMax.mdV[i] = mCenter.mdV[i] + mSize.mdV[i]; - mMin.mdV[i] = mCenter.mdV[i] - mSize.mdV[i]; - } + mMax.setAdd(mCenter, mSize); + mMin.setSub(mCenter, mSize); } inline oct_listener* getOctListener(U32 index) @@ -190,34 +183,34 @@ public: return contains(xform->getBinRadius()); } - bool contains(F64 radius) + bool contains(F32 radius) { if (mParent == NULL) { //root node contains nothing return false; } - F64 size = mSize.mdV[0]; - F64 p_size = size * 2.0; + F32 size = mSize[0]; + F32 p_size = size * 2.f; - return (radius <= 0.001 && size <= 0.001) || + return (radius <= 0.001f && size <= 0.001f) || (radius <= p_size && radius > size); } - static void pushCenter(LLVector3d ¢er, const LLVector3d &size, const T* data) + static void pushCenter(LLVector4a ¢er, const LLVector4a &size, const T* data) { - const LLVector3d& pos = data->getPositionGroup(); - for (U32 i = 0; i < 3; i++) - { - if (pos.mdV[i] > center.mdV[i]) - { - center.mdV[i] += size.mdV[i]; - } - else - { - center.mdV[i] -= size.mdV[i]; - } - } + const LLVector4a& pos = data->getPositionGroup(); + + LLVector4Logical gt = pos.greaterThan(center); + + LLVector4a up; + up = _mm_and_ps(size, gt); + + LLVector4a down; + down = _mm_andnot_ps(gt, size); + + center.add(up); + center.sub(down); } void accept(oct_traveler* visitor) { visitor->visit(this); } @@ -236,32 +229,49 @@ public: void accept(tree_traveler* visitor) const { visitor->visit(this); } void accept(oct_traveler* visitor) const { visitor->visit(this); } - oct_node* getNodeAt(const LLVector3d& pos, const F64& rad) + void validateChildMap() + { + for (U32 i = 0; i < 8; i++) + { + U8 idx = mChildMap[i]; + if (idx != 255) + { + LLOctreeNode<T>* child = mChild[idx]; + + if (child->getOctant() != i) + { + llerrs << "Invalid child map, bad octant data." << llendl; + } + + if (getOctant(child->getCenter()) != child->getOctant()) + { + llerrs << "Invalid child octant compared to position data." << llendl; + } + } + } + } + + + oct_node* getNodeAt(const LLVector4a& pos, const F32& rad) { LLOctreeNode<T>* node = this; if (node->isInside(pos, rad)) { //do a quick search by octant - U8 octant = node->getOctant(pos.mdV); - BOOL keep_going = TRUE; - + U8 octant = node->getOctant(pos); + //traverse the tree until we find a node that has no node //at the appropriate octant or is smaller than the object. //by definition, that node is the smallest node that contains // the data - while (keep_going && node->getSize().mdV[0] >= rad) + U8 next_node = node->mChildMap[octant]; + + while (next_node != 255 && node->getSize()[0] >= rad) { - keep_going = FALSE; - for (U32 i = 0; i < node->getChildCount() && !keep_going; i++) - { - if (node->getChild(i)->getOctant() == octant) - { - node = node->getChild(i); - octant = node->getOctant(pos.mdV); - keep_going = TRUE; - } - } + node = node->getChild(next_node); + octant = node->getOctant(pos); + next_node = node->mChildMap[octant]; } } else if (!node->contains(rad) && node->getParent()) @@ -276,7 +286,7 @@ public: { if (data == NULL) { - //OCT_ERRS << "!!! INVALID ELEMENT ADDED TO OCTREE BRANCH !!!" << llendl; + OCT_ERRS << "!!! INVALID ELEMENT ADDED TO OCTREE BRANCH !!!" << llendl; return false; } LLOctreeNode<T>* parent = getOctParent(); @@ -284,10 +294,8 @@ public: //is it here? if (isInside(data->getPositionGroup())) { - if (getElementCount() < LL_OCTREE_MAX_CAPACITY && - (contains(data->getBinRadius()) || - (data->getBinRadius() > getSize().mdV[0] && - parent && parent->getElementCount() >= LL_OCTREE_MAX_CAPACITY))) + if ((getElementCount() < LL_OCTREE_MAX_CAPACITY && contains(data->getBinRadius()) || + (data->getBinRadius() > getSize()[0] && parent && parent->getElementCount() >= LL_OCTREE_MAX_CAPACITY))) { //it belongs here #if LL_OCTREE_PARANOIA_CHECK //if this is a redundant insertion, error out (should never happen) @@ -317,16 +325,21 @@ public: } //it's here, but no kids are in the right place, make a new kid - LLVector3d center(getCenter()); - LLVector3d size(getSize()*0.5); + LLVector4a center = getCenter(); + LLVector4a size = getSize(); + size.mul(0.5f); //push center in direction of data LLOctreeNode<T>::pushCenter(center, size, data); // handle case where floating point number gets too small - if( llabs(center.mdV[0] - getCenter().mdV[0]) < F_APPROXIMATELY_ZERO && - llabs(center.mdV[1] - getCenter().mdV[1]) < F_APPROXIMATELY_ZERO && - llabs(center.mdV[2] - getCenter().mdV[2]) < F_APPROXIMATELY_ZERO) + LLVector4a val; + val.setSub(center, getCenter()); + val.setAbs(val); + + S32 lt = val.lessThan(LLVector4a::getEpsilon()).getGatheredBits() & 0x7; + + if( lt == 0x7 ) { mData.insert(data); BaseType::insert(data); @@ -344,7 +357,7 @@ public: //make sure no existing node matches this position for (U32 i = 0; i < getChildCount(); i++) { - if (mChild[i]->getCenter() == center) + if (mChild[i]->getCenter().equals3(center)) { OCT_ERRS << "Octree detected duplicate child center and gave up." << llendl; return false; @@ -362,7 +375,7 @@ public: else { //it's not in here, give it to the root - //OCT_ERRS << "Octree insertion failed, starting over from root!" << llendl; + OCT_ERRS << "Octree insertion failed, starting over from root!" << llendl; oct_node* node = this; @@ -436,6 +449,9 @@ public: void clearChildren() { mChild.clear(); + + U32* foo = (U32*) mChildMap; + foo[0] = foo[1] = 0xFFFFFFFF; } void validate() @@ -469,13 +485,19 @@ public: void addChild(oct_node* child, BOOL silent = FALSE) { #if LL_OCTREE_PARANOIA_CHECK + + if (child->getSize().equals3(getSize())) + { + OCT_ERRS << "Child size is same as parent size!" << llendl; + } + for (U32 i = 0; i < getChildCount(); i++) { - if(mChild[i]->getSize() != child->getSize()) + if(!mChild[i]->getSize().equals3(child->getSize())) { OCT_ERRS <<"Invalid octree child size." << llendl; } - if (mChild[i]->getCenter() == child->getCenter()) + if (mChild[i]->getCenter().equals3(child->getCenter())) { OCT_ERRS <<"Duplicate octree child position." << llendl; } @@ -487,6 +509,8 @@ public: } #endif + mChildMap[child->getOctant()] = (U8) mChild.size(); + mChild.push_back(child); child->setParent(this); @@ -500,7 +524,7 @@ public: } } - void removeChild(U8 index, BOOL destroy = FALSE) + void removeChild(S32 index, BOOL destroy = FALSE) { for (U32 i = 0; i < this->getListenerCount(); i++) { @@ -508,6 +532,8 @@ public: listener->handleChildRemoval(this, getChild(index)); } + + if (destroy) { mChild[index]->destroy(); @@ -515,6 +541,15 @@ public: } mChild.erase(mChild.begin() + index); + //rebuild child map + U32* foo = (U32*) mChildMap; + foo[0] = foo[1] = 0xFFFFFFFF; + + for (U32 i = 0; i < mChild.size(); ++i) + { + mChildMap[mChild[i]->getOctant()] = i; + } + checkAlive(); } @@ -541,19 +576,32 @@ public: } } - //OCT_ERRS << "Octree failed to delete requested child." << llendl; + OCT_ERRS << "Octree failed to delete requested child." << llendl; } protected: - child_list mChild; - element_list mData; + typedef enum + { + CENTER = 0, + SIZE = 1, + MAX = 2, + MIN = 3 + } eDName; + + LLVector4a mCenter; + LLVector4a mSize; + LLVector4a mMax; + LLVector4a mMin; + oct_node* mParent; - LLVector3d mCenter; - LLVector3d mSize; - LLVector3d mMax; - LLVector3d mMin; U8 mOctant; -}; + + child_list mChild; + U8 mChildMap[8]; + + element_list mData; + +}; //just like a regular node, except it might expand on insert and compress on balance template <class T> @@ -563,9 +611,9 @@ public: typedef LLOctreeNode<T> BaseType; typedef LLOctreeNode<T> oct_node; - LLOctreeRoot( LLVector3d center, - LLVector3d size, - BaseType* parent) + LLOctreeRoot(const LLVector4a& center, + const LLVector4a& size, + BaseType* parent) : BaseType(center, size, parent) { } @@ -596,6 +644,8 @@ public: //destroy child child->clearChildren(); delete child; + + return false; } return true; @@ -606,28 +656,33 @@ public: { if (data == NULL) { - //OCT_ERRS << "!!! INVALID ELEMENT ADDED TO OCTREE ROOT !!!" << llendl; + OCT_ERRS << "!!! INVALID ELEMENT ADDED TO OCTREE ROOT !!!" << llendl; return false; } if (data->getBinRadius() > 4096.0) { - //OCT_ERRS << "!!! ELEMENT EXCEEDS MAXIMUM SIZE IN OCTREE ROOT !!!" << llendl; + OCT_ERRS << "!!! ELEMENT EXCEEDS MAXIMUM SIZE IN OCTREE ROOT !!!" << llendl; return false; } - const F64 MAX_MAG = 1024.0*1024.0; + LLVector4a MAX_MAG; + MAX_MAG.splat(1024.f*1024.f); - const LLVector3d& v = data->getPositionGroup(); - if (!(fabs(v.mdV[0]-this->mCenter.mdV[0]) < MAX_MAG && - fabs(v.mdV[1]-this->mCenter.mdV[1]) < MAX_MAG && - fabs(v.mdV[2]-this->mCenter.mdV[2]) < MAX_MAG)) + const LLVector4a& v = data->getPositionGroup(); + + LLVector4a val; + val.setSub(v, BaseType::mCenter); + val.setAbs(val); + S32 lt = val.lessThan(MAX_MAG).getGatheredBits() & 0x7; + + if (lt != 0x7) { - //OCT_ERRS << "!!! ELEMENT EXCEEDS RANGE OF SPATIAL PARTITION !!!" << llendl; + OCT_ERRS << "!!! ELEMENT EXCEEDS RANGE OF SPATIAL PARTITION !!!" << llendl; return false; } - if (this->getSize().mdV[0] > data->getBinRadius() && isInside(data->getPositionGroup())) + if (this->getSize()[0] > data->getBinRadius() && isInside(data->getPositionGroup())) { //we got it, just act like a branch oct_node* node = getNodeAt(data); @@ -643,31 +698,34 @@ public: else if (this->getChildCount() == 0) { //first object being added, just wrap it up - while (!(this->getSize().mdV[0] > data->getBinRadius() && isInside(data->getPositionGroup()))) + while (!(this->getSize()[0] > data->getBinRadius() && isInside(data->getPositionGroup()))) { - LLVector3d center, size; + LLVector4a center, size; center = this->getCenter(); size = this->getSize(); LLOctreeNode<T>::pushCenter(center, size, data); this->setCenter(center); - this->setSize(size*2); + size.mul(2.f); + this->setSize(size); this->updateMinMax(); } LLOctreeNode<T>::insert(data); } else { - while (!(this->getSize().mdV[0] > data->getBinRadius() && isInside(data->getPositionGroup()))) + while (!(this->getSize()[0] > data->getBinRadius() && isInside(data->getPositionGroup()))) { //the data is outside the root node, we need to grow - LLVector3d center(this->getCenter()); - LLVector3d size(this->getSize()); + LLVector4a center(this->getCenter()); + LLVector4a size(this->getSize()); //expand this node - LLVector3d newcenter(center); + LLVector4a newcenter(center); LLOctreeNode<T>::pushCenter(newcenter, size, data); this->setCenter(newcenter); - this->setSize(size*2); + LLVector4a size2 = size; + size2.mul(2.f); + this->setSize(size2); this->updateMinMax(); //copy our children to a new branch @@ -704,4 +762,15 @@ void LLOctreeTraveler<T>::traverse(const LLOctreeNode<T>* node) traverse(node->getChild(i)); } } + +template <class T> +void LLOctreeTravelerDepthFirst<T>::traverse(const LLOctreeNode<T>* node) +{ + for (U32 i = 0; i < node->getChildCount(); i++) + { + traverse(node->getChild(i)); + } + node->accept(this); +} + #endif diff --git a/indra/llmath/llplane.h b/indra/llmath/llplane.h index 443f3f46b9..a611894721 100644 --- a/indra/llmath/llplane.h +++ b/indra/llmath/llplane.h @@ -36,19 +36,23 @@ // The plane normal = [A, B, C] // The closest approach = D / sqrt(A*A + B*B + C*C) -class LLPlane : public LLVector4 +class LLPlane { public: + + // Constructors LLPlane() {}; // no default constructor LLPlane(const LLVector3 &p0, F32 d) { setVec(p0, d); } LLPlane(const LLVector3 &p0, const LLVector3 &n) { setVec(p0, n); } - void setVec(const LLVector3 &p0, F32 d) { LLVector4::setVec(p0[0], p0[1], p0[2], d); } - void setVec(const LLVector3 &p0, const LLVector3 &n) + inline void setVec(const LLVector3 &p0, F32 d) { mV.set(p0[0], p0[1], p0[2], d); } + + // Set + inline void setVec(const LLVector3 &p0, const LLVector3 &n) { F32 d = -(p0 * n); setVec(n, d); } - void setVec(const LLVector3 &p0, const LLVector3 &p1, const LLVector3 &p2) + inline void setVec(const LLVector3 &p0, const LLVector3 &p1, const LLVector3 &p2) { LLVector3 u, v, w; u = p1 - p0; @@ -58,8 +62,38 @@ public: F32 d = -(w * p0); setVec(w, d); } - LLPlane& operator=(const LLVector4& v2) { LLVector4::setVec(v2[0],v2[1],v2[2],v2[3]); return *this;} + + inline LLPlane& operator=(const LLVector4& v2) { mV.set(v2[0],v2[1],v2[2],v2[3]); return *this;} + + inline LLPlane& operator=(const LLVector4a& v2) { mV.set(v2[0],v2[1],v2[2],v2[3]); return *this;} + + inline void set(const LLPlane& p2) { mV = p2.mV; } + + // F32 dist(const LLVector3 &v2) const { return mV[0]*v2[0] + mV[1]*v2[1] + mV[2]*v2[2] + mV[3]; } + + inline LLSimdScalar dot3(const LLVector4a& b) const { return mV.dot3(b); } + + // Read-only access a single float in this vector. Do not use in proximity to any function call that manipulates + // the data at the whole vector level or you will incur a substantial penalty. Consider using the splat functions instead + inline F32 operator[](const S32 idx) const { return mV[idx]; } + + // preferable when index is known at compile time + template <int N> LL_FORCE_INLINE void getAt(LLSimdScalar& v) const { v = mV.getScalarAt<N>(); } + + // reset the vector to 0, 0, 0, 1 + inline void clear() { mV.set(0, 0, 0, 1); } + + inline void getVector3(LLVector3& vec) const { vec.set(mV[0], mV[1], mV[2]); } + + // Retrieve the mask indicating which of the x, y, or z axis are greater or equal to zero. + inline U8 calcPlaneMask() + { + return mV.greaterEqual(LLVector4a::getZero()).getGatheredBits() & LLVector4Logical::MASK_XYZ; + } + +private: + LLVector4a mV; }; diff --git a/indra/llmath/llquantize.h b/indra/llmath/llquantize.h index 7f56ff3448..1595dbecf8 100644 --- a/indra/llmath/llquantize.h +++ b/indra/llmath/llquantize.h @@ -29,10 +29,16 @@ #define LL_LLQUANTIZE_H const U16 U16MAX = 65535; +LL_ALIGN_16( const F32 F_U16MAX_4A[4] ) = { 65535.f, 65535.f, 65535.f, 65535.f }; + const F32 OOU16MAX = 1.f/(F32)(U16MAX); +LL_ALIGN_16( const F32 F_OOU16MAX_4A[4] ) = { OOU16MAX, OOU16MAX, OOU16MAX, OOU16MAX }; const U8 U8MAX = 255; +LL_ALIGN_16( const F32 F_U8MAX_4A[4] ) = { 255.f, 255.f, 255.f, 255.f }; + const F32 OOU8MAX = 1.f/(F32)(U8MAX); +LL_ALIGN_16( const F32 F_OOU8MAX_4A[4] ) = { OOU8MAX, OOU8MAX, OOU8MAX, OOU8MAX }; const U8 FIRSTVALIDCHAR = 54; const U8 MAXSTRINGVAL = U8MAX - FIRSTVALIDCHAR; //we don't allow newline or null diff --git a/indra/llmath/llquaternion.cpp b/indra/llmath/llquaternion.cpp index a51f11072c..7381d5eb99 100644 --- a/indra/llmath/llquaternion.cpp +++ b/indra/llmath/llquaternion.cpp @@ -26,9 +26,10 @@ #include "linden_common.h" +#include "llmath.h" // for F_PI + #include "llquaternion.h" -#include "llmath.h" // for F_PI //#include "vmath.h" #include "v3math.h" #include "v3dmath.h" diff --git a/indra/llmath/llquaternion.h b/indra/llmath/llquaternion.h index 26da14ae20..ca0dfe206b 100644 --- a/indra/llmath/llquaternion.h +++ b/indra/llmath/llquaternion.h @@ -27,7 +27,11 @@ #ifndef LLQUATERNION_H #define LLQUATERNION_H -#include "llmath.h" +#include <iostream> + +#ifndef LLMATH_H //enforce specific include order to avoid tangling inline dependencies +#error "Please include llmath.h first." +#endif class LLVector4; class LLVector3; diff --git a/indra/llmath/llquaternion2.h b/indra/llmath/llquaternion2.h new file mode 100644 index 0000000000..fd9c0cf3ab --- /dev/null +++ b/indra/llmath/llquaternion2.h @@ -0,0 +1,105 @@ +/** + * @file llquaternion2.h + * @brief LLQuaternion2 class header file - SIMD-enabled quaternion class + * + * $LicenseInfo:firstyear=2010&license=viewerlgpl$ + * Second Life Viewer Source Code + * Copyright (C) 2010, Linden Research, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; + * version 2.1 of the License only. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + * Linden Research, Inc., 945 Battery Street, San Francisco, CA 94111 USA + * $/LicenseInfo$ + */ + +#ifndef LL_QUATERNION2_H +#define LL_QUATERNION2_H + +///////////////////////////// +// LLQuaternion2 +///////////////////////////// +// This class stores a quaternion x*i + y*j + z*k + w in <x, y, z, w> order +// (i.e., w in high order element of vector) +///////////////////////////// +///////////////////////////// +// These classes are intentionally minimal right now. If you need additional +// functionality, please contact someone with SSE experience (e.g., Falcon or +// Huseby). +///////////////////////////// +#include "llquaternion.h" + +class LLQuaternion2 +{ +public: + + ////////////////////////// + // Ctors + ////////////////////////// + + // Ctor + LLQuaternion2() {} + + // Ctor from LLQuaternion + explicit LLQuaternion2( const class LLQuaternion& quat ); + + ////////////////////////// + // Get/Set + ////////////////////////// + + // Load from an LLQuaternion + inline void operator=( const LLQuaternion& quat ) + { + mQ.loadua( quat.mQ ); + } + + // Return the internal LLVector4a representation of the quaternion + inline const LLVector4a& getVector4a() const; + inline LLVector4a& getVector4aRw(); + + ///////////////////////// + // Quaternion modification + ///////////////////////// + + // Set this quaternion to the conjugate of src + inline void setConjugate(const LLQuaternion2& src); + + // Renormalizes the quaternion. Assumes it has nonzero length. + inline void normalize(); + + // Quantize this quaternion to 8 bit precision + inline void quantize8(); + + // Quantize this quaternion to 16 bit precision + inline void quantize16(); + + ///////////////////////// + // Quaternion inspection + ///////////////////////// + + // Return true if this quaternion is equal to 'rhs'. + // Note! Quaternions exhibit "double-cover", so any rotation has two equally valid + // quaternion representations and they will NOT compare equal. + inline bool equals(const LLQuaternion2& rhs, F32 tolerance = F_APPROXIMATELY_ZERO ) const; + + // Return true if all components are finite and the quaternion is normalized + inline bool isOkRotation() const; + +protected: + + LLVector4a mQ; + +}; + +#endif diff --git a/indra/llmath/llquaternion2.inl b/indra/llmath/llquaternion2.inl new file mode 100644 index 0000000000..2a6987552d --- /dev/null +++ b/indra/llmath/llquaternion2.inl @@ -0,0 +1,102 @@ +/** + * @file llquaternion2.inl + * @brief LLQuaternion2 inline definitions + * + * $LicenseInfo:firstyear=2010&license=viewerlgpl$ + * Second Life Viewer Source Code + * Copyright (C) 2010, Linden Research, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; + * version 2.1 of the License only. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + * Linden Research, Inc., 945 Battery Street, San Francisco, CA 94111 USA + * $/LicenseInfo$ + */ + +#include "llquaternion2.h" + +static const LLQuad LL_V4A_PLUS_ONE = {1.f, 1.f, 1.f, 1.f}; +static const LLQuad LL_V4A_MINUS_ONE = {-1.f, -1.f, -1.f, -1.f}; + +// Ctor from LLQuaternion +inline LLQuaternion2::LLQuaternion2( const LLQuaternion& quat ) +{ + mQ.set(quat.mQ[VX], quat.mQ[VY], quat.mQ[VZ], quat.mQ[VW]); +} + +////////////////////////// +// Get/Set +////////////////////////// + +// Return the internal LLVector4a representation of the quaternion +inline const LLVector4a& LLQuaternion2::getVector4a() const +{ + return mQ; +} + +inline LLVector4a& LLQuaternion2::getVector4aRw() +{ + return mQ; +} + +///////////////////////// +// Quaternion modification +///////////////////////// + +// Set this quaternion to the conjugate of src +inline void LLQuaternion2::setConjugate(const LLQuaternion2& src) +{ + static LL_ALIGN_16( const U32 F_QUAT_INV_MASK_4A[4] ) = { 0x80000000, 0x80000000, 0x80000000, 0x00000000 }; + mQ = _mm_xor_ps(src.mQ, *reinterpret_cast<const LLQuad*>(&F_QUAT_INV_MASK_4A)); +} + +// Renormalizes the quaternion. Assumes it has nonzero length. +inline void LLQuaternion2::normalize() +{ + mQ.normalize4(); +} + +// Quantize this quaternion to 8 bit precision +inline void LLQuaternion2::quantize8() +{ + mQ.quantize8( LL_V4A_MINUS_ONE, LL_V4A_PLUS_ONE ); + normalize(); +} + +// Quantize this quaternion to 16 bit precision +inline void LLQuaternion2::quantize16() +{ + mQ.quantize16( LL_V4A_MINUS_ONE, LL_V4A_PLUS_ONE ); + normalize(); +} + + +///////////////////////// +// Quaternion inspection +///////////////////////// + +// Return true if this quaternion is equal to 'rhs'. +// Note! Quaternions exhibit "double-cover", so any rotation has two equally valid +// quaternion representations and they will NOT compare equal. +inline bool LLQuaternion2::equals(const LLQuaternion2 &rhs, F32 tolerance/* = F_APPROXIMATELY_ZERO*/) const +{ + return mQ.equals4(rhs.mQ, tolerance); +} + +// Return true if all components are finite and the quaternion is normalized +inline bool LLQuaternion2::isOkRotation() const +{ + return mQ.isFinite4() && mQ.isNormalized4(); +} + diff --git a/indra/llmath/llsimdmath.h b/indra/llmath/llsimdmath.h new file mode 100644 index 0000000000..c7cdf7b32c --- /dev/null +++ b/indra/llmath/llsimdmath.h @@ -0,0 +1,93 @@ +/** + * @file llsimdmath.h + * @brief Common header for SIMD-based math library (llvector4a, llmatrix3a, etc.) + * + * $LicenseInfo:firstyear=2010&license=viewerlgpl$ + * Second Life Viewer Source Code + * Copyright (C) 2010, Linden Research, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; + * version 2.1 of the License only. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + * Linden Research, Inc., 945 Battery Street, San Francisco, CA 94111 USA + * $/LicenseInfo$ + */ + +#ifndef LL_SIMD_MATH_H +#define LL_SIMD_MATH_H + +#ifndef LLMATH_H +#error "Please include llmath.h before this file." +#endif + +#if ( ( LL_DARWIN || LL_LINUX ) && !(__SSE2__) ) || ( LL_WINDOWS && ( _M_IX86_FP < 2 ) ) +#error SSE2 not enabled. LLVector4a and related class will not compile. +#endif + +#if !LL_WINDOWS +#include <stdint.h> +#endif + +template <typename T> T* LL_NEXT_ALIGNED_ADDRESS(T* address) +{ + return reinterpret_cast<T*>( + (reinterpret_cast<uintptr_t>(address) + 0xF) & ~0xF); +} + +template <typename T> T* LL_NEXT_ALIGNED_ADDRESS_64(T* address) +{ + return reinterpret_cast<T*>( + (reinterpret_cast<uintptr_t>(address) + 0x3F) & ~0x3F); +} + +#if LL_LINUX || LL_DARWIN + +#define LL_ALIGN_PREFIX(x) +#define LL_ALIGN_POSTFIX(x) __attribute__((aligned(x))) + +#elif LL_WINDOWS + +#define LL_ALIGN_PREFIX(x) __declspec(align(x)) +#define LL_ALIGN_POSTFIX(x) + +#else +#error "LL_ALIGN_PREFIX and LL_ALIGN_POSTFIX undefined" +#endif + +#define LL_ALIGN_16(var) LL_ALIGN_PREFIX(16) var LL_ALIGN_POSTFIX(16) + + + +#include <xmmintrin.h> +#include <emmintrin.h> + +#include "llsimdtypes.h" +#include "llsimdtypes.inl" + +class LLMatrix3a; +class LLRotation; +class LLMatrix3; + +#include "llquaternion.h" + +#include "llvector4logical.h" +#include "llvector4a.h" +#include "llmatrix3a.h" +#include "llquaternion2.h" +#include "llvector4a.inl" +#include "llmatrix3a.inl" +#include "llquaternion2.inl" + + +#endif //LL_SIMD_MATH_H diff --git a/indra/llmath/llsimdtypes.h b/indra/llmath/llsimdtypes.h new file mode 100644 index 0000000000..bd991d0e71 --- /dev/null +++ b/indra/llmath/llsimdtypes.h @@ -0,0 +1,124 @@ +/** + * @file llsimdtypes.h + * @brief Declaration of basic SIMD math related types + * + * $LicenseInfo:firstyear=2010&license=viewerlgpl$ + * Second Life Viewer Source Code + * Copyright (C) 2010, Linden Research, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; + * version 2.1 of the License only. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + * Linden Research, Inc., 945 Battery Street, San Francisco, CA 94111 USA + * $/LicenseInfo$ + */ + +#ifndef LL_SIMD_TYPES_H +#define LL_SIMD_TYPES_H + +#ifndef LL_SIMD_MATH_H +#error "Please include llmath.h before this file." +#endif + +typedef __m128 LLQuad; + + +#if LL_WINDOWS +#pragma warning(push) +#pragma warning( disable : 4800 3 ) // Disable warning about casting int to bool for this class. +#if defined(_MSC_VER) && (_MSC_VER < 1500) +// VC++ 2005 is missing these intrinsics +// __forceinline is MSVC specific and attempts to override compiler inlining judgment. This is so +// even in debug builds this call is a NOP. +__forceinline const __m128 _mm_castsi128_ps( const __m128i a ) { return reinterpret_cast<const __m128&>(a); } +__forceinline const __m128i _mm_castps_si128( const __m128 a ) { return reinterpret_cast<const __m128i&>(a); } +#endif // _MSC_VER + +#endif // LL_WINDOWS + +class LLBool32 +{ +public: + inline LLBool32() {} + inline LLBool32(int rhs) : m_bool(rhs) {} + inline LLBool32(unsigned int rhs) : m_bool(rhs) {} + inline LLBool32(bool rhs) { m_bool = static_cast<const int>(rhs); } + inline LLBool32& operator= (bool rhs) { m_bool = (int)rhs; return *this; } + inline bool operator== (bool rhs) const { return static_cast<const bool&>(m_bool) == rhs; } + inline bool operator!= (bool rhs) const { return !operator==(rhs); } + inline operator bool() const { return static_cast<const bool&>(m_bool); } + +private: + int m_bool; +}; + +#if LL_WINDOWS +#pragma warning(pop) +#endif + +class LLSimdScalar +{ +public: + inline LLSimdScalar() {} + inline LLSimdScalar(LLQuad q) + { + mQ = q; + } + + inline LLSimdScalar(F32 f) + { + mQ = _mm_set_ss(f); + } + + static inline const LLSimdScalar& getZero() + { + extern const LLQuad F_ZERO_4A; + return reinterpret_cast<const LLSimdScalar&>(F_ZERO_4A); + } + + inline F32 getF32() const; + + inline LLBool32 isApproximatelyEqual(const LLSimdScalar& rhs, F32 tolerance = F_APPROXIMATELY_ZERO) const; + + inline LLSimdScalar getAbs() const; + + inline void setMax( const LLSimdScalar& a, const LLSimdScalar& b ); + + inline void setMin( const LLSimdScalar& a, const LLSimdScalar& b ); + + inline LLSimdScalar& operator=(F32 rhs); + + inline LLSimdScalar& operator+=(const LLSimdScalar& rhs); + + inline LLSimdScalar& operator-=(const LLSimdScalar& rhs); + + inline LLSimdScalar& operator*=(const LLSimdScalar& rhs); + + inline LLSimdScalar& operator/=(const LLSimdScalar& rhs); + + inline operator LLQuad() const + { + return mQ; + } + + inline const LLQuad& getQuad() const + { + return mQ; + } + +private: + LLQuad mQ; +}; + +#endif //LL_SIMD_TYPES_H diff --git a/indra/llmath/llsimdtypes.inl b/indra/llmath/llsimdtypes.inl new file mode 100644 index 0000000000..712239e425 --- /dev/null +++ b/indra/llmath/llsimdtypes.inl @@ -0,0 +1,157 @@ +/** + * @file llsimdtypes.inl + * @brief Inlined definitions of basic SIMD math related types + * + * $LicenseInfo:firstyear=2010&license=viewerlgpl$ + * Second Life Viewer Source Code + * Copyright (C) 2010, Linden Research, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; + * version 2.1 of the License only. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + * Linden Research, Inc., 945 Battery Street, San Francisco, CA 94111 USA + * $/LicenseInfo$ + */ + + + + +////////////////// +// LLSimdScalar +////////////////// + +inline LLSimdScalar operator+(const LLSimdScalar& a, const LLSimdScalar& b) +{ + LLSimdScalar t(a); + t += b; + return t; +} + +inline LLSimdScalar operator-(const LLSimdScalar& a, const LLSimdScalar& b) +{ + LLSimdScalar t(a); + t -= b; + return t; +} + +inline LLSimdScalar operator*(const LLSimdScalar& a, const LLSimdScalar& b) +{ + LLSimdScalar t(a); + t *= b; + return t; +} + +inline LLSimdScalar operator/(const LLSimdScalar& a, const LLSimdScalar& b) +{ + LLSimdScalar t(a); + t /= b; + return t; +} + +inline LLSimdScalar operator-(const LLSimdScalar& a) +{ + static LL_ALIGN_16(const U32 signMask[4]) = {0x80000000, 0x80000000, 0x80000000, 0x80000000 }; + return _mm_xor_ps(*reinterpret_cast<const LLQuad*>(signMask), a); +} + +inline LLBool32 operator==(const LLSimdScalar& a, const LLSimdScalar& b) +{ + return _mm_comieq_ss(a, b); +} + +inline LLBool32 operator!=(const LLSimdScalar& a, const LLSimdScalar& b) +{ + return _mm_comineq_ss(a, b); +} + +inline LLBool32 operator<(const LLSimdScalar& a, const LLSimdScalar& b) +{ + return _mm_comilt_ss(a, b); +} + +inline LLBool32 operator<=(const LLSimdScalar& a, const LLSimdScalar& b) +{ + return _mm_comile_ss(a, b); +} + +inline LLBool32 operator>(const LLSimdScalar& a, const LLSimdScalar& b) +{ + return _mm_comigt_ss(a, b); +} + +inline LLBool32 operator>=(const LLSimdScalar& a, const LLSimdScalar& b) +{ + return _mm_comige_ss(a, b); +} + +inline LLBool32 LLSimdScalar::isApproximatelyEqual(const LLSimdScalar& rhs, F32 tolerance /* = F_APPROXIMATELY_ZERO */) const +{ + const LLSimdScalar tol( tolerance ); + const LLSimdScalar diff = _mm_sub_ss( mQ, rhs.mQ ); + const LLSimdScalar absDiff = diff.getAbs(); + return absDiff <= tol; +} + +inline void LLSimdScalar::setMax( const LLSimdScalar& a, const LLSimdScalar& b ) +{ + mQ = _mm_max_ss( a, b ); +} + +inline void LLSimdScalar::setMin( const LLSimdScalar& a, const LLSimdScalar& b ) +{ + mQ = _mm_min_ss( a, b ); +} + +inline LLSimdScalar& LLSimdScalar::operator=(F32 rhs) +{ + mQ = _mm_set_ss(rhs); + return *this; +} + +inline LLSimdScalar& LLSimdScalar::operator+=(const LLSimdScalar& rhs) +{ + mQ = _mm_add_ss( mQ, rhs ); + return *this; +} + +inline LLSimdScalar& LLSimdScalar::operator-=(const LLSimdScalar& rhs) +{ + mQ = _mm_sub_ss( mQ, rhs ); + return *this; +} + +inline LLSimdScalar& LLSimdScalar::operator*=(const LLSimdScalar& rhs) +{ + mQ = _mm_mul_ss( mQ, rhs ); + return *this; +} + +inline LLSimdScalar& LLSimdScalar::operator/=(const LLSimdScalar& rhs) +{ + mQ = _mm_div_ss( mQ, rhs ); + return *this; +} + +inline LLSimdScalar LLSimdScalar::getAbs() const +{ + static const LL_ALIGN_16(U32 F_ABS_MASK_4A[4]) = { 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF }; + return _mm_and_ps( mQ, *reinterpret_cast<const LLQuad*>(F_ABS_MASK_4A)); +} + +inline F32 LLSimdScalar::getF32() const +{ + F32 ret; + _mm_store_ss(&ret, mQ); + return ret; +} diff --git a/indra/llmath/lltreenode.h b/indra/llmath/lltreenode.h index a462d1659e..c66bc26176 100644 --- a/indra/llmath/lltreenode.h +++ b/indra/llmath/lltreenode.h @@ -28,6 +28,9 @@ #include "stdtypes.h" #include "xform.h" +#include "llpointer.h" +#include "llrefcount.h" + #include <vector> template <class T> class LLTreeNode; diff --git a/indra/llmath/llvector4a.cpp b/indra/llmath/llvector4a.cpp new file mode 100644 index 0000000000..b66b7a7076 --- /dev/null +++ b/indra/llmath/llvector4a.cpp @@ -0,0 +1,222 @@ +/** + * @file llvector4a.cpp + * @brief SIMD vector implementation + * + * $LicenseInfo:firstyear=2010&license=viewerlgpl$ + * Second Life Viewer Source Code + * Copyright (C) 2010, Linden Research, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; + * version 2.1 of the License only. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + * Linden Research, Inc., 945 Battery Street, San Francisco, CA 94111 USA + * $/LicenseInfo$ + */ + +#include "llmath.h" +#include "llquantize.h" + +extern const LLQuad F_ZERO_4A = { 0, 0, 0, 0 }; +extern const LLQuad F_APPROXIMATELY_ZERO_4A = { + F_APPROXIMATELY_ZERO, + F_APPROXIMATELY_ZERO, + F_APPROXIMATELY_ZERO, + F_APPROXIMATELY_ZERO +}; + +extern const LLVector4a LL_V4A_ZERO = reinterpret_cast<const LLVector4a&> ( F_ZERO_4A ); +extern const LLVector4a LL_V4A_EPSILON = reinterpret_cast<const LLVector4a&> ( F_APPROXIMATELY_ZERO_4A ); + +/*static */void LLVector4a::memcpyNonAliased16(F32* __restrict dst, const F32* __restrict src, size_t bytes) +{ + assert(src != NULL); + assert(dst != NULL); + assert(bytes > 0); + assert((bytes % sizeof(F32))== 0); + + F32* end = dst + (bytes / sizeof(F32) ); + + if (bytes > 64) + { + F32* begin_64 = LL_NEXT_ALIGNED_ADDRESS_64(dst); + + //at least 64 (16*4) bytes before the end of the destination, switch to 16 byte copies + F32* end_64 = end-16; + + _mm_prefetch((char*)begin_64, _MM_HINT_NTA); + _mm_prefetch((char*)begin_64 + 64, _MM_HINT_NTA); + _mm_prefetch((char*)begin_64 + 128, _MM_HINT_NTA); + _mm_prefetch((char*)begin_64 + 192, _MM_HINT_NTA); + + while (dst < begin_64) + { + copy4a(dst, src); + dst += 4; + src += 4; + } + + while (dst < end_64) + { + _mm_prefetch((char*)src + 512, _MM_HINT_NTA); + _mm_prefetch((char*)dst + 512, _MM_HINT_NTA); + copy4a(dst, src); + copy4a(dst+4, src+4); + copy4a(dst+8, src+8); + copy4a(dst+12, src+12); + + dst += 16; + src += 16; + } + } + + while (dst < end) + { + copy4a(dst, src); + dst += 4; + src += 4; + } +} + +void LLVector4a::setRotated( const LLRotation& rot, const LLVector4a& vec ) +{ + const LLVector4a col0 = rot.getColumn(0); + const LLVector4a col1 = rot.getColumn(1); + const LLVector4a col2 = rot.getColumn(2); + + LLVector4a result = _mm_load_ss( vec.getF32ptr() ); + result.splat<0>( result ); + result.mul( col0 ); + + { + LLVector4a yyyy = _mm_load_ss( vec.getF32ptr() + 1 ); + yyyy.splat<0>( yyyy ); + yyyy.mul( col1 ); + result.add( yyyy ); + } + + { + LLVector4a zzzz = _mm_load_ss( vec.getF32ptr() + 2 ); + zzzz.splat<0>( zzzz ); + zzzz.mul( col2 ); + result.add( zzzz ); + } + + *this = result; +} + +void LLVector4a::setRotated( const LLQuaternion2& quat, const LLVector4a& vec ) +{ + const LLVector4a& quatVec = quat.getVector4a(); + LLVector4a temp; temp.setCross3(quatVec, vec); + temp.add( temp ); + + const LLVector4a realPart( quatVec.getScalarAt<3>() ); + LLVector4a tempTimesReal; tempTimesReal.setMul( temp, realPart ); + + mQ = vec; + add( tempTimesReal ); + + LLVector4a imagCrossTemp; imagCrossTemp.setCross3( quatVec, temp ); + add(imagCrossTemp); +} + +void LLVector4a::quantize8( const LLVector4a& low, const LLVector4a& high ) +{ + LLVector4a val(mQ); + LLVector4a delta; delta.setSub( high, low ); + + { + val.clamp(low, high); + val.sub(low); + + // 8-bit quantization means we can do with just 12 bits of reciprocal accuracy + const LLVector4a oneOverDelta = _mm_rcp_ps(delta.mQ); +// { +// static LL_ALIGN_16( const F32 F_TWO_4A[4] ) = { 2.f, 2.f, 2.f, 2.f }; +// LLVector4a two; two.load4a( F_TWO_4A ); +// +// // Here we use _mm_rcp_ps plus one round of newton-raphson +// // We wish to find 'x' such that x = 1/delta +// // As a first approximation, we take x0 = _mm_rcp_ps(delta) +// // Then x1 = 2 * x0 - a * x0^2 or x1 = x0 * ( 2 - a * x0 ) +// // See Intel AP-803 http://ompf.org/!/Intel_application_note_AP-803.pdf +// const LLVector4a recipApprox = _mm_rcp_ps(delta.mQ); +// oneOverDelta.setMul( delta, recipApprox ); +// oneOverDelta.setSub( two, oneOverDelta ); +// oneOverDelta.mul( recipApprox ); +// } + + val.mul(oneOverDelta); + val.mul(*reinterpret_cast<const LLVector4a*>(F_U8MAX_4A)); + } + + val = _mm_cvtepi32_ps(_mm_cvtps_epi32( val.mQ )); + + { + val.mul(*reinterpret_cast<const LLVector4a*>(F_OOU8MAX_4A)); + val.mul(delta); + val.add(low); + } + + { + LLVector4a maxError; maxError.setMul(delta, *reinterpret_cast<const LLVector4a*>(F_OOU8MAX_4A)); + LLVector4a absVal; absVal.setAbs( val ); + setSelectWithMask( absVal.lessThan( maxError ), F_ZERO_4A, val ); + } +} + +void LLVector4a::quantize16( const LLVector4a& low, const LLVector4a& high ) +{ + LLVector4a val(mQ); + LLVector4a delta; delta.setSub( high, low ); + + { + val.clamp(low, high); + val.sub(low); + + // 16-bit quantization means we need a round of Newton-Raphson + LLVector4a oneOverDelta; + { + static LL_ALIGN_16( const F32 F_TWO_4A[4] ) = { 2.f, 2.f, 2.f, 2.f }; + LLVector4a two; two.load4a( F_TWO_4A ); + + // Here we use _mm_rcp_ps plus one round of newton-raphson + // We wish to find 'x' such that x = 1/delta + // As a first approximation, we take x0 = _mm_rcp_ps(delta) + // Then x1 = 2 * x0 - a * x0^2 or x1 = x0 * ( 2 - a * x0 ) + // See Intel AP-803 http://ompf.org/!/Intel_application_note_AP-803.pdf + const LLVector4a recipApprox = _mm_rcp_ps(delta.mQ); + oneOverDelta.setMul( delta, recipApprox ); + oneOverDelta.setSub( two, oneOverDelta ); + oneOverDelta.mul( recipApprox ); + } + + val.mul(oneOverDelta); + val.mul(*reinterpret_cast<const LLVector4a*>(F_U16MAX_4A)); + } + + val = _mm_cvtepi32_ps(_mm_cvtps_epi32( val.mQ )); + + { + val.mul(*reinterpret_cast<const LLVector4a*>(F_OOU16MAX_4A)); + val.mul(delta); + val.add(low); + } + + { + LLVector4a maxError; maxError.setMul(delta, *reinterpret_cast<const LLVector4a*>(F_OOU16MAX_4A)); + LLVector4a absVal; absVal.setAbs( val ); + setSelectWithMask( absVal.lessThan( maxError ), F_ZERO_4A, val ); + } +} diff --git a/indra/llmath/llvector4a.h b/indra/llmath/llvector4a.h new file mode 100644 index 0000000000..596082509d --- /dev/null +++ b/indra/llmath/llvector4a.h @@ -0,0 +1,324 @@ +/** + * @file llvector4a.h + * @brief LLVector4a class header file - memory aligned and vectorized 4 component vector + * + * $LicenseInfo:firstyear=2010&license=viewerlgpl$ + * Second Life Viewer Source Code + * Copyright (C) 2010, Linden Research, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; + * version 2.1 of the License only. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + * Linden Research, Inc., 945 Battery Street, San Francisco, CA 94111 USA + * $/LicenseInfo$ + */ + +#ifndef LL_LLVECTOR4A_H +#define LL_LLVECTOR4A_H + + +class LLRotation; + +#include <assert.h> +#include "llpreprocessor.h" + +/////////////////////////////////// +// FIRST TIME USERS PLEASE READ +////////////////////////////////// +// This is just the beginning of LLVector4a. There are many more useful functions +// yet to be implemented. For example, setNeg to negate a vector, rotate() to apply +// a matrix rotation, various functions to manipulate only the X, Y, and Z elements +// and many others (including a whole variety of accessors). So if you don't see a +// function here that you need, please contact Falcon or someone else with SSE +// experience (Richard, I think, has some and davep has a little as of the time +// of this writing, July 08, 2010) about getting it implemented before you resort to +// LLVector3/LLVector4. +///////////////////////////////// + +class LLVector4a +{ +public: + + /////////////////////////////////// + // STATIC METHODS + /////////////////////////////////// + + // Call initClass() at startup to avoid 15,000+ cycle penalties from denormalized numbers + static void initClass() + { + _MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_ON); + _MM_SET_ROUNDING_MODE(_MM_ROUND_NEAREST); + } + + // Return a vector of all zeros + static inline const LLVector4a& getZero() + { + extern const LLVector4a LL_V4A_ZERO; + return LL_V4A_ZERO; + } + + // Return a vector of all epsilon, where epsilon is a small float suitable for approximate equality checks + static inline const LLVector4a& getEpsilon() + { + extern const LLVector4a LL_V4A_EPSILON; + return LL_V4A_EPSILON; + } + + // Copy 16 bytes from src to dst. Source and destination must be 16-byte aligned + static inline void copy4a(F32* dst, const F32* src) + { + _mm_store_ps(dst, _mm_load_ps(src)); + } + + // Copy words 16-byte blocks from src to dst. Source and destination must not overlap. + static void memcpyNonAliased16(F32* __restrict dst, const F32* __restrict src, size_t bytes); + + //////////////////////////////////// + // CONSTRUCTORS + //////////////////////////////////// + + LLVector4a() + { //DO NOT INITIALIZE -- The overhead is completely unnecessary + } + + LLVector4a(F32 x, F32 y, F32 z, F32 w = 0.f) + { + set(x,y,z,w); + } + + LLVector4a(F32 x) + { + splat(x); + } + + LLVector4a(const LLSimdScalar& x) + { + splat(x); + } + + LLVector4a(LLQuad q) + { + mQ = q; + } + + //////////////////////////////////// + // LOAD/STORE + //////////////////////////////////// + + // Load from 16-byte aligned src array (preferred method of loading) + inline void load4a(const F32* src); + + // Load from unaligned src array (NB: Significantly slower than load4a) + inline void loadua(const F32* src); + + // Load only three floats beginning at address 'src'. Slowest method. + inline void load3(const F32* src); + + // Store to a 16-byte aligned memory address + inline void store4a(F32* dst) const; + + //////////////////////////////////// + // BASIC GET/SET + //////////////////////////////////// + + // Return a "this" as an F32 pointer. Do not use unless you have a very good reason. (Not sure? Ask Falcon) + inline F32* getF32ptr(); + + // Return a "this" as a const F32 pointer. Do not use unless you have a very good reason. (Not sure? Ask Falcon) + inline const F32* const getF32ptr() const; + + // Read-only access a single float in this vector. Do not use in proximity to any function call that manipulates + // the data at the whole vector level or you will incur a substantial penalty. Consider using the splat functions instead + inline F32 operator[](const S32 idx) const; + + // Prefer this method for read-only access to a single element. Prefer the templated version if the elem is known at compile time. + inline LLSimdScalar getScalarAt(const S32 idx) const; + + // Prefer this method for read-only access to a single element. Prefer the templated version if the elem is known at compile time. + template <int N> LL_FORCE_INLINE LLSimdScalar getScalarAt() const; + + // Set to an x, y, z and optional w provided + inline void set(F32 x, F32 y, F32 z, F32 w = 0.f); + + // Set to all zeros. This is preferred to using ::getZero() + inline void clear(); + + // Set all elements to 'x' + inline void splat(const F32 x); + + // Set all elements to 'x' + inline void splat(const LLSimdScalar& x); + + // Set all 4 elements to element N of src, with N known at compile time + template <int N> void splat(const LLVector4a& src); + + // Set all 4 elements to element i of v, with i NOT known at compile time + inline void splat(const LLVector4a& v, U32 i); + + // Select bits from sourceIfTrue and sourceIfFalse according to bits in mask + inline void setSelectWithMask( const LLVector4Logical& mask, const LLVector4a& sourceIfTrue, const LLVector4a& sourceIfFalse ); + + //////////////////////////////////// + // ALGEBRAIC + //////////////////////////////////// + + // Set this to the element-wise (a + b) + inline void setAdd(const LLVector4a& a, const LLVector4a& b); + + // Set this to element-wise (a - b) + inline void setSub(const LLVector4a& a, const LLVector4a& b); + + // Set this to element-wise multiply (a * b) + inline void setMul(const LLVector4a& a, const LLVector4a& b); + + // Set this to element-wise quotient (a / b) + inline void setDiv(const LLVector4a& a, const LLVector4a& b); + + // Set this to the element-wise absolute value of src + inline void setAbs(const LLVector4a& src); + + // Add to each component in this vector the corresponding component in rhs + inline void add(const LLVector4a& rhs); + + // Subtract from each component in this vector the corresponding component in rhs + inline void sub(const LLVector4a& rhs); + + // Multiply each component in this vector by the corresponding component in rhs + inline void mul(const LLVector4a& rhs); + + // Divide each component in this vector by the corresponding component in rhs + inline void div(const LLVector4a& rhs); + + // Multiply this vector by x in a scalar fashion + inline void mul(const F32 x); + + // Set this to (a x b) (geometric cross-product) + inline void setCross3(const LLVector4a& a, const LLVector4a& b); + + // Set all elements to the dot product of the x, y, and z elements in a and b + inline void setAllDot3(const LLVector4a& a, const LLVector4a& b); + + // Set all elements to the dot product of the x, y, z, and w elements in a and b + inline void setAllDot4(const LLVector4a& a, const LLVector4a& b); + + // Return the 3D dot product of this vector and b + inline LLSimdScalar dot3(const LLVector4a& b) const; + + // Return the 4D dot product of this vector and b + inline LLSimdScalar dot4(const LLVector4a& b) const; + + // Normalize this vector with respect to the x, y, and z components only. Accurate to 22 bites of precision. W component is destroyed + // Note that this does not consider zero length vectors! + inline void normalize3(); + + // Same as normalize3() but with respect to all 4 components + inline void normalize4(); + + // Same as normalize3(), but returns length as a SIMD scalar + inline LLSimdScalar normalize3withLength(); + + // Normalize this vector with respect to the x, y, and z components only. Accurate only to 10-12 bits of precision. W component is destroyed + // Note that this does not consider zero length vectors! + inline void normalize3fast(); + + // Return true if this vector is normalized with respect to x,y,z up to tolerance + inline LLBool32 isNormalized3( F32 tolerance = 1e-3 ) const; + + // Return true if this vector is normalized with respect to all components up to tolerance + inline LLBool32 isNormalized4( F32 tolerance = 1e-3 ) const; + + // Set all elements to the length of vector 'v' + inline void setAllLength3( const LLVector4a& v ); + + // Get this vector's length + inline LLSimdScalar getLength3() const; + + // Set the components of this vector to the minimum of the corresponding components of lhs and rhs + inline void setMin(const LLVector4a& lhs, const LLVector4a& rhs); + + // Set the components of this vector to the maximum of the corresponding components of lhs and rhs + inline void setMax(const LLVector4a& lhs, const LLVector4a& rhs); + + // Clamps this vector to be within the component-wise range low to high (inclusive) + inline void clamp( const LLVector4a& low, const LLVector4a& high ); + + // Set this to (c * lhs) + rhs * ( 1 - c) + inline void setLerp(const LLVector4a& lhs, const LLVector4a& rhs, F32 c); + + // Return true (nonzero) if x, y, z (and w for Finite4) are all finite floats + inline LLBool32 isFinite3() const; + inline LLBool32 isFinite4() const; + + // Set this vector to 'vec' rotated by the LLRotation or LLQuaternion2 provided + void setRotated( const LLRotation& rot, const LLVector4a& vec ); + void setRotated( const class LLQuaternion2& quat, const LLVector4a& vec ); + + // Set this vector to 'vec' rotated by the INVERSE of the LLRotation or LLQuaternion2 provided + inline void setRotatedInv( const LLRotation& rot, const LLVector4a& vec ); + inline void setRotatedInv( const class LLQuaternion2& quat, const LLVector4a& vec ); + + // Quantize this vector to 8 or 16 bit precision + void quantize8( const LLVector4a& low, const LLVector4a& high ); + void quantize16( const LLVector4a& low, const LLVector4a& high ); + + //////////////////////////////////// + // LOGICAL + //////////////////////////////////// + // The functions in this section will compare the elements in this vector + // to those in rhs and return an LLVector4Logical with all bits set in elements + // where the comparison was true and all bits unset in elements where the comparison + // was false. See llvector4logica.h + //////////////////////////////////// + // WARNING: Other than equals3 and equals4, these functions do NOT account + // for floating point tolerance. You should include the appropriate tolerance + // in the inputs. + //////////////////////////////////// + + inline LLVector4Logical greaterThan(const LLVector4a& rhs) const; + + inline LLVector4Logical lessThan(const LLVector4a& rhs) const; + + inline LLVector4Logical greaterEqual(const LLVector4a& rhs) const; + + inline LLVector4Logical lessEqual(const LLVector4a& rhs) const; + + inline LLVector4Logical equal(const LLVector4a& rhs) const; + + // Returns true if this and rhs are componentwise equal up to the specified absolute tolerance + inline bool equals4(const LLVector4a& rhs, F32 tolerance = F_APPROXIMATELY_ZERO ) const; + + inline bool equals3(const LLVector4a& rhs, F32 tolerance = F_APPROXIMATELY_ZERO ) const; + + //////////////////////////////////// + // OPERATORS + //////////////////////////////////// + + // Do NOT add aditional operators without consulting someone with SSE experience + inline const LLVector4a& operator= ( const LLVector4a& rhs ); + + inline const LLVector4a& operator= ( const LLQuad& rhs ); + + inline operator LLQuad() const; + +private: + LLQuad mQ; +}; + +inline void update_min_max(LLVector4a& min, LLVector4a& max, const LLVector4a& p) +{ + min.setMin(min, p); + max.setMax(max, p); +} + +#endif diff --git a/indra/llmath/llvector4a.inl b/indra/llmath/llvector4a.inl new file mode 100644 index 0000000000..7ad22a5631 --- /dev/null +++ b/indra/llmath/llvector4a.inl @@ -0,0 +1,593 @@ +/** + * @file llvector4a.inl + * @brief LLVector4a inline function implementations + * + * $LicenseInfo:firstyear=2010&license=viewerlgpl$ + * Second Life Viewer Source Code + * Copyright (C) 2010, Linden Research, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; + * version 2.1 of the License only. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + * Linden Research, Inc., 945 Battery Street, San Francisco, CA 94111 USA + * $/LicenseInfo$ + */ + +//////////////////////////////////// +// LOAD/STORE +//////////////////////////////////// + +// Load from 16-byte aligned src array (preferred method of loading) +inline void LLVector4a::load4a(const F32* src) +{ + mQ = _mm_load_ps(src); +} + +// Load from unaligned src array (NB: Significantly slower than load4a) +inline void LLVector4a::loadua(const F32* src) +{ + mQ = _mm_loadu_ps(src); +} + +// Load only three floats beginning at address 'src'. Slowest method. +inline void LLVector4a::load3(const F32* src) +{ + // mQ = { 0.f, src[2], src[1], src[0] } = { W, Z, Y, X } + // NB: This differs from the convention of { Z, Y, X, W } + mQ = _mm_set_ps(0.f, src[2], src[1], src[0]); +} + +// Store to a 16-byte aligned memory address +inline void LLVector4a::store4a(F32* dst) const +{ + _mm_store_ps(dst, mQ); +} + +//////////////////////////////////// +// BASIC GET/SET +//////////////////////////////////// + +// Return a "this" as an F32 pointer. Do not use unless you have a very good reason. (Not sure? Ask Falcon) +F32* LLVector4a::getF32ptr() +{ + return (F32*) &mQ; +} + +// Return a "this" as a const F32 pointer. Do not use unless you have a very good reason. (Not sure? Ask Falcon) +const F32* const LLVector4a::getF32ptr() const +{ + return (const F32* const) &mQ; +} + +// Read-only access a single float in this vector. Do not use in proximity to any function call that manipulates +// the data at the whole vector level or you will incur a substantial penalty. Consider using the splat functions instead +inline F32 LLVector4a::operator[](const S32 idx) const +{ + return ((F32*)&mQ)[idx]; +} + +// Prefer this method for read-only access to a single element. Prefer the templated version if the elem is known at compile time. +inline LLSimdScalar LLVector4a::getScalarAt(const S32 idx) const +{ + // Return appropriate LLQuad. It will be cast to LLSimdScalar automatically (should be effectively a nop) + switch (idx) + { + case 0: + return mQ; + case 1: + return _mm_shuffle_ps(mQ, mQ, _MM_SHUFFLE(1, 1, 1, 1)); + case 2: + return _mm_shuffle_ps(mQ, mQ, _MM_SHUFFLE(2, 2, 2, 2)); + case 3: + default: + return _mm_shuffle_ps(mQ, mQ, _MM_SHUFFLE(3, 3, 3, 3)); + } +} + +// Prefer this method for read-only access to a single element. Prefer the templated version if the elem is known at compile time. +template <int N> LL_FORCE_INLINE LLSimdScalar LLVector4a::getScalarAt() const +{ + return _mm_shuffle_ps(mQ, mQ, _MM_SHUFFLE(N, N, N, N)); +} + +template<> LL_FORCE_INLINE LLSimdScalar LLVector4a::getScalarAt<0>() const +{ + return mQ; +} + +// Set to an x, y, z and optional w provided +inline void LLVector4a::set(F32 x, F32 y, F32 z, F32 w) +{ + mQ = _mm_set_ps(w, z, y, x); +} + +// Set to all zeros +inline void LLVector4a::clear() +{ + mQ = LLVector4a::getZero().mQ; +} + +inline void LLVector4a::splat(const F32 x) +{ + mQ = _mm_set1_ps(x); +} + +inline void LLVector4a::splat(const LLSimdScalar& x) +{ + mQ = _mm_shuffle_ps( x.getQuad(), x.getQuad(), _MM_SHUFFLE(0,0,0,0) ); +} + +// Set all 4 elements to element N of src, with N known at compile time +template <int N> void LLVector4a::splat(const LLVector4a& src) +{ + mQ = _mm_shuffle_ps(src.mQ, src.mQ, _MM_SHUFFLE(N, N, N, N) ); +} + +// Set all 4 elements to element i of v, with i NOT known at compile time +inline void LLVector4a::splat(const LLVector4a& v, U32 i) +{ + switch (i) + { + case 0: + mQ = _mm_shuffle_ps(v.mQ, v.mQ, _MM_SHUFFLE(0, 0, 0, 0)); + break; + case 1: + mQ = _mm_shuffle_ps(v.mQ, v.mQ, _MM_SHUFFLE(1, 1, 1, 1)); + break; + case 2: + mQ = _mm_shuffle_ps(v.mQ, v.mQ, _MM_SHUFFLE(2, 2, 2, 2)); + break; + case 3: + mQ = _mm_shuffle_ps(v.mQ, v.mQ, _MM_SHUFFLE(3, 3, 3, 3)); + break; + } +} + +// Select bits from sourceIfTrue and sourceIfFalse according to bits in mask +inline void LLVector4a::setSelectWithMask( const LLVector4Logical& mask, const LLVector4a& sourceIfTrue, const LLVector4a& sourceIfFalse ) +{ + // ((( sourceIfTrue ^ sourceIfFalse ) & mask) ^ sourceIfFalse ) + // E.g., sourceIfFalse = 1010b, sourceIfTrue = 0101b, mask = 1100b + // (sourceIfTrue ^ sourceIfFalse) = 1111b --> & mask = 1100b --> ^ sourceIfFalse = 0110b, + // as expected (01 from sourceIfTrue, 10 from sourceIfFalse) + // Courtesy of Mark++, http://markplusplus.wordpress.com/2007/03/14/fast-sse-select-operation/ + mQ = _mm_xor_ps( sourceIfFalse, _mm_and_ps( mask, _mm_xor_ps( sourceIfTrue, sourceIfFalse ) ) ); +} + +//////////////////////////////////// +// ALGEBRAIC +//////////////////////////////////// + +// Set this to the element-wise (a + b) +inline void LLVector4a::setAdd(const LLVector4a& a, const LLVector4a& b) +{ + mQ = _mm_add_ps(a.mQ, b.mQ); +} + +// Set this to element-wise (a - b) +inline void LLVector4a::setSub(const LLVector4a& a, const LLVector4a& b) +{ + mQ = _mm_sub_ps(a.mQ, b.mQ); +} + +// Set this to element-wise multiply (a * b) +inline void LLVector4a::setMul(const LLVector4a& a, const LLVector4a& b) +{ + mQ = _mm_mul_ps(a.mQ, b.mQ); +} + +// Set this to element-wise quotient (a / b) +inline void LLVector4a::setDiv(const LLVector4a& a, const LLVector4a& b) +{ + mQ = _mm_div_ps( a.mQ, b.mQ ); +} + +// Set this to the element-wise absolute value of src +inline void LLVector4a::setAbs(const LLVector4a& src) +{ + static const LL_ALIGN_16(U32 F_ABS_MASK_4A[4]) = { 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF }; + mQ = _mm_and_ps(src.mQ, *reinterpret_cast<const LLQuad*>(F_ABS_MASK_4A)); +} + +// Add to each component in this vector the corresponding component in rhs +inline void LLVector4a::add(const LLVector4a& rhs) +{ + mQ = _mm_add_ps(mQ, rhs.mQ); +} + +// Subtract from each component in this vector the corresponding component in rhs +inline void LLVector4a::sub(const LLVector4a& rhs) +{ + mQ = _mm_sub_ps(mQ, rhs.mQ); +} + +// Multiply each component in this vector by the corresponding component in rhs +inline void LLVector4a::mul(const LLVector4a& rhs) +{ + mQ = _mm_mul_ps(mQ, rhs.mQ); +} + +// Divide each component in this vector by the corresponding component in rhs +inline void LLVector4a::div(const LLVector4a& rhs) +{ + // TODO: Check accuracy, maybe add divFast + mQ = _mm_div_ps(mQ, rhs.mQ); +} + +// Multiply this vector by x in a scalar fashion +inline void LLVector4a::mul(const F32 x) +{ + LLVector4a t; + t.splat(x); + + mQ = _mm_mul_ps(mQ, t.mQ); +} + +// Set this to (a x b) (geometric cross-product) +inline void LLVector4a::setCross3(const LLVector4a& a, const LLVector4a& b) +{ + // Vectors are stored in memory in w, z, y, x order from high to low + // Set vector1 = { a[W], a[X], a[Z], a[Y] } + const LLQuad vector1 = _mm_shuffle_ps( a.mQ, a.mQ, _MM_SHUFFLE( 3, 0, 2, 1 )); + // Set vector2 = { b[W], b[Y], b[X], b[Z] } + const LLQuad vector2 = _mm_shuffle_ps( b.mQ, b.mQ, _MM_SHUFFLE( 3, 1, 0, 2 )); + // mQ = { a[W]*b[W], a[X]*b[Y], a[Z]*b[X], a[Y]*b[Z] } + mQ = _mm_mul_ps( vector1, vector2 ); + // vector3 = { a[W], a[Y], a[X], a[Z] } + const LLQuad vector3 = _mm_shuffle_ps( a.mQ, a.mQ, _MM_SHUFFLE( 3, 1, 0, 2 )); + // vector4 = { b[W], b[X], b[Z], b[Y] } + const LLQuad vector4 = _mm_shuffle_ps( b.mQ, b.mQ, _MM_SHUFFLE( 3, 0, 2, 1 )); + // mQ = { 0, a[X]*b[Y] - a[Y]*b[X], a[Z]*b[X] - a[X]*b[Z], a[Y]*b[Z] - a[Z]*b[Y] } + mQ = _mm_sub_ps( mQ, _mm_mul_ps( vector3, vector4 )); +} + +/* This function works, but may be slightly slower than the one below on older machines + inline void LLVector4a::setAllDot3(const LLVector4a& a, const LLVector4a& b) + { + // ab = { a[W]*b[W], a[Z]*b[Z], a[Y]*b[Y], a[X]*b[X] } + const LLQuad ab = _mm_mul_ps( a.mQ, b.mQ ); + // yzxw = { a[W]*b[W], a[Z]*b[Z], a[X]*b[X], a[Y]*b[Y] } + const LLQuad wzxy = _mm_shuffle_ps( ab, ab, _MM_SHUFFLE(3, 2, 0, 1 )); + // xPlusY = { 2*a[W]*b[W], 2 * a[Z] * b[Z], a[Y]*b[Y] + a[X] * b[X], a[X] * b[X] + a[Y] * b[Y] } + const LLQuad xPlusY = _mm_add_ps(ab, wzxy); + // xPlusYSplat = { a[Y]*b[Y] + a[X] * b[X], a[X] * b[X] + a[Y] * b[Y], a[Y]*b[Y] + a[X] * b[X], a[X] * b[X] + a[Y] * b[Y] } + const LLQuad xPlusYSplat = _mm_movelh_ps(xPlusY, xPlusY); + // zSplat = { a[Z]*b[Z], a[Z]*b[Z], a[Z]*b[Z], a[Z]*b[Z] } + const LLQuad zSplat = _mm_shuffle_ps( ab, ab, _MM_SHUFFLE( 2, 2, 2, 2 )); + // mQ = { a[Z] * b[Z] + a[Y] * b[Y] + a[X] * b[X], same, same, same } + mQ = _mm_add_ps(zSplat, xPlusYSplat); + }*/ + +// Set all elements to the dot product of the x, y, and z elements in a and b +inline void LLVector4a::setAllDot3(const LLVector4a& a, const LLVector4a& b) +{ + // ab = { a[W]*b[W], a[Z]*b[Z], a[Y]*b[Y], a[X]*b[X] } + const LLQuad ab = _mm_mul_ps( a.mQ, b.mQ ); + // yzxw = { a[W]*b[W], a[Z]*b[Z], a[X]*b[X], a[Y]*b[Y] } + const __m128i wzxy = _mm_shuffle_epi32(_mm_castps_si128(ab), _MM_SHUFFLE(3, 2, 0, 1 )); + // xPlusY = { 2*a[W]*b[W], 2 * a[Z] * b[Z], a[Y]*b[Y] + a[X] * b[X], a[X] * b[X] + a[Y] * b[Y] } + const LLQuad xPlusY = _mm_add_ps(ab, _mm_castsi128_ps(wzxy)); + // xPlusYSplat = { a[Y]*b[Y] + a[X] * b[X], a[X] * b[X] + a[Y] * b[Y], a[Y]*b[Y] + a[X] * b[X], a[X] * b[X] + a[Y] * b[Y] } + const LLQuad xPlusYSplat = _mm_movelh_ps(xPlusY, xPlusY); + // zSplat = { a[Z]*b[Z], a[Z]*b[Z], a[Z]*b[Z], a[Z]*b[Z] } + const __m128i zSplat = _mm_shuffle_epi32(_mm_castps_si128(ab), _MM_SHUFFLE( 2, 2, 2, 2 )); + // mQ = { a[Z] * b[Z] + a[Y] * b[Y] + a[X] * b[X], same, same, same } + mQ = _mm_add_ps(_mm_castsi128_ps(zSplat), xPlusYSplat); +} + +// Set all elements to the dot product of the x, y, z, and w elements in a and b +inline void LLVector4a::setAllDot4(const LLVector4a& a, const LLVector4a& b) +{ + // ab = { a[W]*b[W], a[Z]*b[Z], a[Y]*b[Y], a[X]*b[X] } + const LLQuad ab = _mm_mul_ps( a.mQ, b.mQ ); + // yzxw = { a[W]*b[W], a[Z]*b[Z], a[X]*b[X], a[Y]*b[Y] } + const __m128i zwxy = _mm_shuffle_epi32(_mm_castps_si128(ab), _MM_SHUFFLE(2, 3, 0, 1 )); + // zPlusWandXplusY = { a[W]*b[W] + a[Z]*b[Z], a[Z] * b[Z] + a[W]*b[W], a[Y]*b[Y] + a[X] * b[X], a[X] * b[X] + a[Y] * b[Y] } + const LLQuad zPlusWandXplusY = _mm_add_ps(ab, _mm_castsi128_ps(zwxy)); + // xPlusYSplat = { a[Y]*b[Y] + a[X] * b[X], a[X] * b[X] + a[Y] * b[Y], a[Y]*b[Y] + a[X] * b[X], a[X] * b[X] + a[Y] * b[Y] } + const LLQuad xPlusYSplat = _mm_movelh_ps(zPlusWandXplusY, zPlusWandXplusY); + const LLQuad zPlusWSplat = _mm_movehl_ps(zPlusWandXplusY, zPlusWandXplusY); + + // mQ = { a[W]*b[W] + a[Z] * b[Z] + a[Y] * b[Y] + a[X] * b[X], same, same, same } + mQ = _mm_add_ps(xPlusYSplat, zPlusWSplat); +} + +// Return the 3D dot product of this vector and b +inline LLSimdScalar LLVector4a::dot3(const LLVector4a& b) const +{ + const LLQuad ab = _mm_mul_ps( mQ, b.mQ ); + const LLQuad splatY = _mm_castsi128_ps( _mm_shuffle_epi32( _mm_castps_si128(ab), _MM_SHUFFLE(1, 1, 1, 1) ) ); + const LLQuad splatZ = _mm_castsi128_ps( _mm_shuffle_epi32( _mm_castps_si128(ab), _MM_SHUFFLE(2, 2, 2, 2) ) ); + const LLQuad xPlusY = _mm_add_ps( ab, splatY ); + return _mm_add_ps( xPlusY, splatZ ); +} + +// Return the 4D dot product of this vector and b +inline LLSimdScalar LLVector4a::dot4(const LLVector4a& b) const +{ + // ab = { w, z, y, x } + const LLQuad ab = _mm_mul_ps( mQ, b.mQ ); + // upperProdsInLowerElems = { y, x, y, x } + const LLQuad upperProdsInLowerElems = _mm_movehl_ps( ab, ab ); + // sumOfPairs = { w+y, z+x, 2y, 2x } + const LLQuad sumOfPairs = _mm_add_ps( upperProdsInLowerElems, ab ); + // shuffled = { z+x, z+x, z+x, z+x } + const LLQuad shuffled = _mm_castsi128_ps( _mm_shuffle_epi32( _mm_castps_si128( sumOfPairs ), _MM_SHUFFLE(1, 1, 1, 1) ) ); + return _mm_add_ss( sumOfPairs, shuffled ); +} + +// Normalize this vector with respect to the x, y, and z components only. Accurate to 22 bites of precision. W component is destroyed +// Note that this does not consider zero length vectors! +inline void LLVector4a::normalize3() +{ + // lenSqrd = a dot a + LLVector4a lenSqrd; lenSqrd.setAllDot3( *this, *this ); + // rsqrt = approximate reciprocal square (i.e., { ~1/len(a)^2, ~1/len(a)^2, ~1/len(a)^2, ~1/len(a)^2 } + const LLQuad rsqrt = _mm_rsqrt_ps(lenSqrd.mQ); + static const LLQuad half = { 0.5f, 0.5f, 0.5f, 0.5f }; + static const LLQuad three = {3.f, 3.f, 3.f, 3.f }; + // Now we do one round of Newton-Raphson approximation to get full accuracy + // According to the Newton-Raphson method, given a first 'w' for the root of f(x) = 1/x^2 - a (i.e., x = 1/sqrt(a)) + // the next better approximation w[i+1] = w - f(w)/f'(w) = w - (1/w^2 - a)/(-2*w^(-3)) + // w[i+1] = w + 0.5 * (1/w^2 - a) * w^3 = w + 0.5 * (w - a*w^3) = 1.5 * w - 0.5 * a * w^3 + // = 0.5 * w * (3 - a*w^2) + // Our first approx is w = rsqrt. We need out = a * w[i+1] (this is the input vector 'a', not the 'a' from the above formula + // which is actually lenSqrd). So out = a * [0.5*rsqrt * (3 - lenSqrd*rsqrt*rsqrt)] + const LLQuad AtimesRsqrt = _mm_mul_ps( lenSqrd.mQ, rsqrt ); + const LLQuad AtimesRsqrtTimesRsqrt = _mm_mul_ps( AtimesRsqrt, rsqrt ); + const LLQuad threeMinusAtimesRsqrtTimesRsqrt = _mm_sub_ps(three, AtimesRsqrtTimesRsqrt ); + const LLQuad nrApprox = _mm_mul_ps(half, _mm_mul_ps(rsqrt, threeMinusAtimesRsqrtTimesRsqrt)); + mQ = _mm_mul_ps( mQ, nrApprox ); +} + +// Normalize this vector with respect to all components. Accurate to 22 bites of precision. +// Note that this does not consider zero length vectors! +inline void LLVector4a::normalize4() +{ + // lenSqrd = a dot a + LLVector4a lenSqrd; lenSqrd.setAllDot4( *this, *this ); + // rsqrt = approximate reciprocal square (i.e., { ~1/len(a)^2, ~1/len(a)^2, ~1/len(a)^2, ~1/len(a)^2 } + const LLQuad rsqrt = _mm_rsqrt_ps(lenSqrd.mQ); + static const LLQuad half = { 0.5f, 0.5f, 0.5f, 0.5f }; + static const LLQuad three = {3.f, 3.f, 3.f, 3.f }; + // Now we do one round of Newton-Raphson approximation to get full accuracy + // According to the Newton-Raphson method, given a first 'w' for the root of f(x) = 1/x^2 - a (i.e., x = 1/sqrt(a)) + // the next better approximation w[i+1] = w - f(w)/f'(w) = w - (1/w^2 - a)/(-2*w^(-3)) + // w[i+1] = w + 0.5 * (1/w^2 - a) * w^3 = w + 0.5 * (w - a*w^3) = 1.5 * w - 0.5 * a * w^3 + // = 0.5 * w * (3 - a*w^2) + // Our first approx is w = rsqrt. We need out = a * w[i+1] (this is the input vector 'a', not the 'a' from the above formula + // which is actually lenSqrd). So out = a * [0.5*rsqrt * (3 - lenSqrd*rsqrt*rsqrt)] + const LLQuad AtimesRsqrt = _mm_mul_ps( lenSqrd.mQ, rsqrt ); + const LLQuad AtimesRsqrtTimesRsqrt = _mm_mul_ps( AtimesRsqrt, rsqrt ); + const LLQuad threeMinusAtimesRsqrtTimesRsqrt = _mm_sub_ps(three, AtimesRsqrtTimesRsqrt ); + const LLQuad nrApprox = _mm_mul_ps(half, _mm_mul_ps(rsqrt, threeMinusAtimesRsqrtTimesRsqrt)); + mQ = _mm_mul_ps( mQ, nrApprox ); +} + +// Normalize this vector with respect to the x, y, and z components only. Accurate to 22 bites of precision. W component is destroyed +// Note that this does not consider zero length vectors! +inline LLSimdScalar LLVector4a::normalize3withLength() +{ + // lenSqrd = a dot a + LLVector4a lenSqrd; lenSqrd.setAllDot3( *this, *this ); + // rsqrt = approximate reciprocal square (i.e., { ~1/len(a)^2, ~1/len(a)^2, ~1/len(a)^2, ~1/len(a)^2 } + const LLQuad rsqrt = _mm_rsqrt_ps(lenSqrd.mQ); + static const LLQuad half = { 0.5f, 0.5f, 0.5f, 0.5f }; + static const LLQuad three = {3.f, 3.f, 3.f, 3.f }; + // Now we do one round of Newton-Raphson approximation to get full accuracy + // According to the Newton-Raphson method, given a first 'w' for the root of f(x) = 1/x^2 - a (i.e., x = 1/sqrt(a)) + // the next better approximation w[i+1] = w - f(w)/f'(w) = w - (1/w^2 - a)/(-2*w^(-3)) + // w[i+1] = w + 0.5 * (1/w^2 - a) * w^3 = w + 0.5 * (w - a*w^3) = 1.5 * w - 0.5 * a * w^3 + // = 0.5 * w * (3 - a*w^2) + // Our first approx is w = rsqrt. We need out = a * w[i+1] (this is the input vector 'a', not the 'a' from the above formula + // which is actually lenSqrd). So out = a * [0.5*rsqrt * (3 - lenSqrd*rsqrt*rsqrt)] + const LLQuad AtimesRsqrt = _mm_mul_ps( lenSqrd.mQ, rsqrt ); + const LLQuad AtimesRsqrtTimesRsqrt = _mm_mul_ps( AtimesRsqrt, rsqrt ); + const LLQuad threeMinusAtimesRsqrtTimesRsqrt = _mm_sub_ps(three, AtimesRsqrtTimesRsqrt ); + const LLQuad nrApprox = _mm_mul_ps(half, _mm_mul_ps(rsqrt, threeMinusAtimesRsqrtTimesRsqrt)); + mQ = _mm_mul_ps( mQ, nrApprox ); + return _mm_sqrt_ss(lenSqrd); +} + +// Normalize this vector with respect to the x, y, and z components only. Accurate only to 10-12 bits of precision. W component is destroyed +// Note that this does not consider zero length vectors! +inline void LLVector4a::normalize3fast() +{ + LLVector4a lenSqrd; lenSqrd.setAllDot3( *this, *this ); + const LLQuad approxRsqrt = _mm_rsqrt_ps(lenSqrd.mQ); + mQ = _mm_mul_ps( mQ, approxRsqrt ); +} + +// Return true if this vector is normalized with respect to x,y,z up to tolerance +inline LLBool32 LLVector4a::isNormalized3( F32 tolerance ) const +{ + static LL_ALIGN_16(const U32 ones[4]) = { 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000 }; + LLSimdScalar tol = _mm_load_ss( &tolerance ); + tol = _mm_mul_ss( tol, tol ); + LLVector4a lenSquared; lenSquared.setAllDot3( *this, *this ); + lenSquared.sub( *reinterpret_cast<const LLVector4a*>(ones) ); + lenSquared.setAbs(lenSquared); + return _mm_comile_ss( lenSquared, tol ); +} + +// Return true if this vector is normalized with respect to all components up to tolerance +inline LLBool32 LLVector4a::isNormalized4( F32 tolerance ) const +{ + static LL_ALIGN_16(const U32 ones[4]) = { 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000 }; + LLSimdScalar tol = _mm_load_ss( &tolerance ); + tol = _mm_mul_ss( tol, tol ); + LLVector4a lenSquared; lenSquared.setAllDot4( *this, *this ); + lenSquared.sub( *reinterpret_cast<const LLVector4a*>(ones) ); + lenSquared.setAbs(lenSquared); + return _mm_comile_ss( lenSquared, tol ); +} + +// Set all elements to the length of vector 'v' +inline void LLVector4a::setAllLength3( const LLVector4a& v ) +{ + LLVector4a lenSqrd; + lenSqrd.setAllDot3(v, v); + + mQ = _mm_sqrt_ps(lenSqrd.mQ); +} + +// Get this vector's length +inline LLSimdScalar LLVector4a::getLength3() const +{ + return _mm_sqrt_ss( dot3( (const LLVector4a)mQ ) ); +} + +// Set the components of this vector to the minimum of the corresponding components of lhs and rhs +inline void LLVector4a::setMin(const LLVector4a& lhs, const LLVector4a& rhs) +{ + mQ = _mm_min_ps(lhs.mQ, rhs.mQ); +} + +// Set the components of this vector to the maximum of the corresponding components of lhs and rhs +inline void LLVector4a::setMax(const LLVector4a& lhs, const LLVector4a& rhs) +{ + mQ = _mm_max_ps(lhs.mQ, rhs.mQ); +} + +// Set this to (c * lhs) + rhs * ( 1 - c) +inline void LLVector4a::setLerp(const LLVector4a& lhs, const LLVector4a& rhs, F32 c) +{ + LLVector4a a = lhs; + a.mul(c); + + LLVector4a b = rhs; + b.mul(1.f-c); + + setAdd(a, b); +} + +inline LLBool32 LLVector4a::isFinite3() const +{ + static LL_ALIGN_16(const U32 nanOrInfMask[4]) = { 0x7f800000, 0x7f800000, 0x7f800000, 0x7f800000 }; + const __m128i nanOrInfMaskV = *reinterpret_cast<const __m128i*> (nanOrInfMask); + const __m128i maskResult = _mm_and_si128( _mm_castps_si128(mQ), nanOrInfMaskV ); + const LLVector4Logical equalityCheck = _mm_castsi128_ps(_mm_cmpeq_epi32( maskResult, nanOrInfMaskV )); + return !equalityCheck.areAnySet( LLVector4Logical::MASK_XYZ ); +} + +inline LLBool32 LLVector4a::isFinite4() const +{ + static LL_ALIGN_16(const U32 nanOrInfMask[4]) = { 0x7f800000, 0x7f800000, 0x7f800000, 0x7f800000 }; + const __m128i nanOrInfMaskV = *reinterpret_cast<const __m128i*> (nanOrInfMask); + const __m128i maskResult = _mm_and_si128( _mm_castps_si128(mQ), nanOrInfMaskV ); + const LLVector4Logical equalityCheck = _mm_castsi128_ps(_mm_cmpeq_epi32( maskResult, nanOrInfMaskV )); + return !equalityCheck.areAnySet( LLVector4Logical::MASK_XYZW ); +} + +inline void LLVector4a::setRotatedInv( const LLRotation& rot, const LLVector4a& vec ) +{ + LLRotation inv; inv.setTranspose( rot ); + setRotated( inv, vec ); +} + +inline void LLVector4a::setRotatedInv( const LLQuaternion2& quat, const LLVector4a& vec ) +{ + LLQuaternion2 invRot; invRot.setConjugate( quat ); + setRotated(invRot, vec); +} + +inline void LLVector4a::clamp( const LLVector4a& low, const LLVector4a& high ) +{ + const LLVector4Logical highMask = greaterThan( high ); + const LLVector4Logical lowMask = lessThan( low ); + + setSelectWithMask( highMask, high, *this ); + setSelectWithMask( lowMask, low, *this ); +} + + +//////////////////////////////////// +// LOGICAL +//////////////////////////////////// +// The functions in this section will compare the elements in this vector +// to those in rhs and return an LLVector4Logical with all bits set in elements +// where the comparison was true and all bits unset in elements where the comparison +// was false. See llvector4logica.h +//////////////////////////////////// +// WARNING: Other than equals3 and equals4, these functions do NOT account +// for floating point tolerance. You should include the appropriate tolerance +// in the inputs. +//////////////////////////////////// + +inline LLVector4Logical LLVector4a::greaterThan(const LLVector4a& rhs) const +{ + return _mm_cmpgt_ps(mQ, rhs.mQ); +} + +inline LLVector4Logical LLVector4a::lessThan(const LLVector4a& rhs) const +{ + return _mm_cmplt_ps(mQ, rhs.mQ); +} + +inline LLVector4Logical LLVector4a::greaterEqual(const LLVector4a& rhs) const +{ + return _mm_cmpge_ps(mQ, rhs.mQ); +} + +inline LLVector4Logical LLVector4a::lessEqual(const LLVector4a& rhs) const +{ + return _mm_cmple_ps(mQ, rhs.mQ); +} + +inline LLVector4Logical LLVector4a::equal(const LLVector4a& rhs) const +{ + return _mm_cmpeq_ps(mQ, rhs.mQ); +} + +// Returns true if this and rhs are componentwise equal up to the specified absolute tolerance +inline bool LLVector4a::equals4(const LLVector4a& rhs, F32 tolerance ) const +{ + LLVector4a diff; diff.setSub( *this, rhs ); + diff.setAbs( diff ); + const LLQuad tol = _mm_set1_ps( tolerance ); + const LLQuad cmp = _mm_cmplt_ps( diff, tol ); + return (_mm_movemask_ps( cmp ) & LLVector4Logical::MASK_XYZW) == LLVector4Logical::MASK_XYZW; +} + +inline bool LLVector4a::equals3(const LLVector4a& rhs, F32 tolerance ) const +{ + LLVector4a diff; diff.setSub( *this, rhs ); + diff.setAbs( diff ); + const LLQuad tol = _mm_set1_ps( tolerance ); + const LLQuad t = _mm_cmplt_ps( diff, tol ); + return (_mm_movemask_ps( t ) & LLVector4Logical::MASK_XYZ) == LLVector4Logical::MASK_XYZ; + +} + +//////////////////////////////////// +// OPERATORS +//////////////////////////////////// + +// Do NOT add aditional operators without consulting someone with SSE experience +inline const LLVector4a& LLVector4a::operator= ( const LLVector4a& rhs ) +{ + mQ = rhs.mQ; + return *this; +} + +inline const LLVector4a& LLVector4a::operator= ( const LLQuad& rhs ) +{ + mQ = rhs; + return *this; +} + +inline LLVector4a::operator LLQuad() const +{ + return mQ; +} diff --git a/indra/llmath/llvector4logical.h b/indra/llmath/llvector4logical.h new file mode 100644 index 0000000000..dd66b09d43 --- /dev/null +++ b/indra/llmath/llvector4logical.h @@ -0,0 +1,124 @@ +/** + * @file llvector4logical.h + * @brief LLVector4Logical class header file - Companion class to LLVector4a for logical and bit-twiddling operations + * + * $LicenseInfo:firstyear=2010&license=viewerlgpl$ + * Second Life Viewer Source Code + * Copyright (C) 2010, Linden Research, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; + * version 2.1 of the License only. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + * Linden Research, Inc., 945 Battery Street, San Francisco, CA 94111 USA + * $/LicenseInfo$ + */ + +#ifndef LL_VECTOR4LOGICAL_H +#define LL_VECTOR4LOGICAL_H + + +//////////////////////////// +// LLVector4Logical +//////////////////////////// +// This class is incomplete. If you need additional functionality, +// for example setting/unsetting particular elements or performing +// other boolean operations, feel free to implement. If you need +// assistance in determining the most optimal implementation, +// contact someone with SSE experience (Falcon, Richard, Davep, e.g.) +//////////////////////////// + +static LL_ALIGN_16(const U32 S_V4LOGICAL_MASK_TABLE[4*4]) = +{ + 0xFFFFFFFF, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0xFFFFFFFF, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0xFFFFFFFF, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0xFFFFFFFF +}; + +class LLVector4Logical +{ +public: + + enum { + MASK_X = 1, + MASK_Y = 1 << 1, + MASK_Z = 1 << 2, + MASK_W = 1 << 3, + MASK_XYZ = MASK_X | MASK_Y | MASK_Z, + MASK_XYZW = MASK_XYZ | MASK_W + }; + + // Empty default ctor + LLVector4Logical() {} + + LLVector4Logical( const LLQuad& quad ) + { + mQ = quad; + } + + // Create and return a mask consisting of the lowest order bit of each element + inline U32 getGatheredBits() const + { + return _mm_movemask_ps(mQ); + }; + + // Invert this mask + inline LLVector4Logical& invert() + { + static const LL_ALIGN_16(U32 allOnes[4]) = { 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF }; + mQ = _mm_andnot_ps( mQ, *(LLQuad*)(allOnes) ); + return *this; + } + + inline LLBool32 areAllSet( U32 mask ) const + { + return ( getGatheredBits() & mask) == mask; + } + + inline LLBool32 areAllSet() const + { + return areAllSet( MASK_XYZW ); + } + + inline LLBool32 areAnySet( U32 mask ) const + { + return getGatheredBits() & mask; + } + + inline LLBool32 areAnySet() const + { + return areAnySet( MASK_XYZW ); + } + + inline operator LLQuad() const + { + return mQ; + } + + inline void clear() + { + mQ = _mm_setzero_ps(); + } + + template<int N> void setElement() + { + mQ = _mm_or_ps( mQ, *reinterpret_cast<const LLQuad*>(S_V4LOGICAL_MASK_TABLE + 4*N) ); + } + +private: + + LLQuad mQ; +}; + +#endif //LL_VECTOR4ALOGICAL_H diff --git a/indra/llmath/llvolume.cpp b/indra/llmath/llvolume.cpp index 71b92962fb..c504215ee5 100644 --- a/indra/llmath/llvolume.cpp +++ b/indra/llmath/llvolume.cpp @@ -1,4 +1,5 @@ /** + * @file llvolume.cpp * * $LicenseInfo:firstyear=2002&license=viewerlgpl$ @@ -24,9 +25,13 @@ */ #include "linden_common.h" +#include "llmemory.h" #include "llmath.h" #include <set> +#if !LL_WINDOWS +#include <stdint.h> +#endif #include "llerror.h" #include "llmemtype.h" @@ -37,9 +42,16 @@ #include "v4math.h" #include "m4math.h" #include "m3math.h" +#include "llmatrix3a.h" +#include "lloctree.h" #include "lldarray.h" #include "llvolume.h" +#include "llvolumeoctree.h" #include "llstl.h" +#include "llsdserialize.h" +#include "llvector4a.h" +#include "llmatrix4a.h" +#include "lltimer.h" #define DEBUG_SILHOUETTE_BINORMALS 0 #define DEBUG_SILHOUETTE_NORMALS 0 // TomY: Use this to display normals using the silhouette @@ -80,7 +92,18 @@ const F32 SKEW_MAX = 0.95f; const F32 SCULPT_MIN_AREA = 0.002f; const S32 SCULPT_MIN_AREA_DETAIL = 1; -#define GEN_TRI_STRIP 0 +extern BOOL gDebugGL; + +void assert_aligned(void* ptr, uintptr_t alignment) +{ +#if 0 + uintptr_t t = (uintptr_t) ptr; + if (t%alignment != 0) + { + llerrs << "WTF?" << llendl; + } +#endif +} BOOL check_same_clock_dir( const LLVector3& pt1, const LLVector3& pt2, const LLVector3& pt3, const LLVector3& norm) { @@ -99,128 +122,262 @@ BOOL check_same_clock_dir( const LLVector3& pt1, const LLVector3& pt2, const LLV BOOL LLLineSegmentBoxIntersect(const LLVector3& start, const LLVector3& end, const LLVector3& center, const LLVector3& size) { - float fAWdU[3]; - LLVector3 dir; - LLVector3 diff; + return LLLineSegmentBoxIntersect(start.mV, end.mV, center.mV, size.mV); +} + +BOOL LLLineSegmentBoxIntersect(const F32* start, const F32* end, const F32* center, const F32* size) +{ + F32 fAWdU[3]; + F32 dir[3]; + F32 diff[3]; for (U32 i = 0; i < 3; i++) { - dir.mV[i] = 0.5f * (end.mV[i] - start.mV[i]); - diff.mV[i] = (0.5f * (end.mV[i] + start.mV[i])) - center.mV[i]; - fAWdU[i] = fabsf(dir.mV[i]); - if(fabsf(diff.mV[i])>size.mV[i] + fAWdU[i]) return false; + dir[i] = 0.5f * (end[i] - start[i]); + diff[i] = (0.5f * (end[i] + start[i])) - center[i]; + fAWdU[i] = fabsf(dir[i]); + if(fabsf(diff[i])>size[i] + fAWdU[i]) return false; } float f; - f = dir.mV[1] * diff.mV[2] - dir.mV[2] * diff.mV[1]; if(fabsf(f)>size.mV[1]*fAWdU[2] + size.mV[2]*fAWdU[1]) return false; - f = dir.mV[2] * diff.mV[0] - dir.mV[0] * diff.mV[2]; if(fabsf(f)>size.mV[0]*fAWdU[2] + size.mV[2]*fAWdU[0]) return false; - f = dir.mV[0] * diff.mV[1] - dir.mV[1] * diff.mV[0]; if(fabsf(f)>size.mV[0]*fAWdU[1] + size.mV[1]*fAWdU[0]) return false; + f = dir[1] * diff[2] - dir[2] * diff[1]; if(fabsf(f)>size[1]*fAWdU[2] + size[2]*fAWdU[1]) return false; + f = dir[2] * diff[0] - dir[0] * diff[2]; if(fabsf(f)>size[0]*fAWdU[2] + size[2]*fAWdU[0]) return false; + f = dir[0] * diff[1] - dir[1] * diff[0]; if(fabsf(f)>size[0]*fAWdU[1] + size[1]*fAWdU[0]) return false; return true; } + // intersect test between triangle vert0, vert1, vert2 and a ray from orig in direction dir. // returns TRUE if intersecting and returns barycentric coordinates in intersection_a, intersection_b, // and returns the intersection point along dir in intersection_t. // Moller-Trumbore algorithm -BOOL LLTriangleRayIntersect(const LLVector3& vert0, const LLVector3& vert1, const LLVector3& vert2, const LLVector3& orig, const LLVector3& dir, - F32* intersection_a, F32* intersection_b, F32* intersection_t, BOOL two_sided) +BOOL LLTriangleRayIntersect(const LLVector4a& vert0, const LLVector4a& vert1, const LLVector4a& vert2, const LLVector4a& orig, const LLVector4a& dir, + F32& intersection_a, F32& intersection_b, F32& intersection_t) { - F32 u, v, t; /* find vectors for two edges sharing vert0 */ - LLVector3 edge1 = vert1 - vert0; + LLVector4a edge1; + edge1.setSub(vert1, vert0); - LLVector3 edge2 = vert2 - vert0;; + LLVector4a edge2; + edge2.setSub(vert2, vert0); /* begin calculating determinant - also used to calculate U parameter */ - LLVector3 pvec = dir % edge2; - - /* if determinant is near zero, ray lies in plane of triangle */ - F32 det = edge1 * pvec; + LLVector4a pvec; + pvec.setCross3(dir, edge2); - if (!two_sided) + /* if determinant is near zero, ray lies in plane of triangle */ + LLVector4a det; + det.setAllDot3(edge1, pvec); + + if (det.greaterEqual(LLVector4a::getEpsilon()).getGatheredBits() & 0x7) { - if (det < F_APPROXIMATELY_ZERO) - { - return FALSE; - } - /* calculate distance from vert0 to ray origin */ - LLVector3 tvec = orig - vert0; + LLVector4a tvec; + tvec.setSub(orig, vert0); /* calculate U parameter and test bounds */ - u = tvec * pvec; + LLVector4a u; + u.setAllDot3(tvec,pvec); - if (u < 0.f || u > det) + if ((u.greaterEqual(LLVector4a::getZero()).getGatheredBits() & 0x7) && + (u.lessEqual(det).getGatheredBits() & 0x7)) { - return FALSE; + /* prepare to test V parameter */ + LLVector4a qvec; + qvec.setCross3(tvec, edge1); + + /* calculate V parameter and test bounds */ + LLVector4a v; + v.setAllDot3(dir, qvec); + + + //if (!(v < 0.f || u + v > det)) + + LLVector4a sum_uv; + sum_uv.setAdd(u, v); + + S32 v_gequal = v.greaterEqual(LLVector4a::getZero()).getGatheredBits() & 0x7; + S32 sum_lequal = sum_uv.lessEqual(det).getGatheredBits() & 0x7; + + if (v_gequal && sum_lequal) + { + /* calculate t, scale parameters, ray intersects triangle */ + LLVector4a t; + t.setAllDot3(edge2,qvec); + + t.div(det); + u.div(det); + v.div(det); + + intersection_a = u[0]; + intersection_b = v[0]; + intersection_t = t[0]; + return TRUE; + } } - - /* prepare to test V parameter */ - LLVector3 qvec = tvec % edge1; + } - /* calculate V parameter and test bounds */ - v = dir * qvec; - if (v < 0.f || u + v > det) - { - return FALSE; - } + return FALSE; +} - /* calculate t, scale parameters, ray intersects triangle */ - t = edge2 * qvec; - F32 inv_det = 1.0 / det; - t *= inv_det; - u *= inv_det; - v *= inv_det; - } +BOOL LLTriangleRayIntersectTwoSided(const LLVector4a& vert0, const LLVector4a& vert1, const LLVector4a& vert2, const LLVector4a& orig, const LLVector4a& dir, + F32& intersection_a, F32& intersection_b, F32& intersection_t) +{ + F32 u, v, t; - else // two sided - { - if (det > -F_APPROXIMATELY_ZERO && det < F_APPROXIMATELY_ZERO) - { - return FALSE; - } - F32 inv_det = 1.0 / det; + /* find vectors for two edges sharing vert0 */ + LLVector4a edge1; + edge1.setSub(vert1, vert0); + + + LLVector4a edge2; + edge2.setSub(vert2, vert0); - /* calculate distance from vert0 to ray origin */ - LLVector3 tvec = orig - vert0; - - /* calculate U parameter and test bounds */ - u = (tvec * pvec) * inv_det; - if (u < 0.f || u > 1.f) - { - return FALSE; - } + /* begin calculating determinant - also used to calculate U parameter */ + LLVector4a pvec; + pvec.setCross3(dir, edge2); - /* prepare to test V parameter */ - LLVector3 qvec = tvec - edge1; - - /* calculate V parameter and test bounds */ - v = (dir * qvec) * inv_det; - - if (v < 0.f || u + v > 1.f) - { - return FALSE; - } + /* if determinant is near zero, ray lies in plane of triangle */ + F32 det = edge1.dot3(pvec).getF32(); + + + if (det > -F_APPROXIMATELY_ZERO && det < F_APPROXIMATELY_ZERO) + { + return FALSE; + } + + F32 inv_det = 1.f / det; - /* calculate t, ray intersects triangle */ - t = (edge2 * qvec) * inv_det; + /* calculate distance from vert0 to ray origin */ + LLVector4a tvec; + tvec.setSub(orig, vert0); + + /* calculate U parameter and test bounds */ + u = (tvec.dot3(pvec).getF32()) * inv_det; + if (u < 0.f || u > 1.f) + { + return FALSE; } + + /* prepare to test V parameter */ + tvec.sub(edge1); + + /* calculate V parameter and test bounds */ + v = (dir.dot3(tvec).getF32()) * inv_det; + + if (v < 0.f || u + v > 1.f) + { + return FALSE; + } + + /* calculate t, ray intersects triangle */ + t = (edge2.dot3(tvec).getF32()) * inv_det; - if (intersection_a != NULL) - *intersection_a = u; - if (intersection_b != NULL) - *intersection_b = v; - if (intersection_t != NULL) - *intersection_t = t; + intersection_a = u; + intersection_b = v; + intersection_t = t; return TRUE; } +//helper for non-aligned vectors +BOOL LLTriangleRayIntersect(const LLVector3& vert0, const LLVector3& vert1, const LLVector3& vert2, const LLVector3& orig, const LLVector3& dir, + F32& intersection_a, F32& intersection_b, F32& intersection_t, BOOL two_sided) +{ + LLVector4a vert0a, vert1a, vert2a, origa, dira; + vert0a.load3(vert0.mV); + vert1a.load3(vert1.mV); + vert2a.load3(vert2.mV); + origa.load3(orig.mV); + dira.load3(dir.mV); + + if (two_sided) + { + return LLTriangleRayIntersectTwoSided(vert0a, vert1a, vert2a, origa, dira, + intersection_a, intersection_b, intersection_t); + } + else + { + return LLTriangleRayIntersect(vert0a, vert1a, vert2a, origa, dira, + intersection_a, intersection_b, intersection_t); + } +} + +class LLVolumeOctreeRebound : public LLOctreeTravelerDepthFirst<LLVolumeTriangle> +{ +public: + const LLVolumeFace* mFace; + + LLVolumeOctreeRebound(const LLVolumeFace* face) + { + mFace = face; + } + + virtual void visit(const LLOctreeNode<LLVolumeTriangle>* branch) + { //this is a depth first traversal, so it's safe to assum all children have complete + //bounding data + + LLVolumeOctreeListener* node = (LLVolumeOctreeListener*) branch->getListener(0); + + LLVector4a& min = node->mExtents[0]; + LLVector4a& max = node->mExtents[1]; + + if (!branch->getData().empty()) + { //node has data, find AABB that binds data set + const LLVolumeTriangle* tri = *(branch->getData().begin()); + + //initialize min/max to first available vertex + min = *(tri->mV[0]); + max = *(tri->mV[0]); + + for (LLOctreeNode<LLVolumeTriangle>::const_element_iter iter = + branch->getData().begin(); iter != branch->getData().end(); ++iter) + { //for each triangle in node + + //stretch by triangles in node + tri = *iter; + + min.setMin(min, *tri->mV[0]); + min.setMin(min, *tri->mV[1]); + min.setMin(min, *tri->mV[2]); + + max.setMax(max, *tri->mV[0]); + max.setMax(max, *tri->mV[1]); + max.setMax(max, *tri->mV[2]); + } + } + else if (!branch->getChildren().empty()) + { //no data, but child nodes exist + LLVolumeOctreeListener* child = (LLVolumeOctreeListener*) branch->getChild(0)->getListener(0); + + //initialize min/max to extents of first child + min = child->mExtents[0]; + max = child->mExtents[1]; + } + else + { + llerrs << "WTF? Empty leaf" << llendl; + } + + for (S32 i = 0; i < branch->getChildCount(); ++i) + { //stretch by child extents + LLVolumeOctreeListener* child = (LLVolumeOctreeListener*) branch->getChild(i)->getListener(0); + min.setMin(min, child->mExtents[0]); + max.setMax(max, child->mExtents[1]); + } + + node->mBounds[0].setAdd(min, max); + node->mBounds[0].mul(0.5f); + + node->mBounds[1].setSub(max,min); + node->mBounds[1].mul(0.5f); + } +}; //------------------------------------------------------------------- // statics @@ -1669,7 +1826,13 @@ LLVolume::LLVolume(const LLVolumeParams ¶ms, const F32 detail, const BOOL ge mFaceMask = 0x0; mDetail = detail; mSculptLevel = -2; - + mIsTetrahedron = FALSE; + mLODScaleBias.setVec(1,1,1); + mHullPoints = NULL; + mHullIndices = NULL; + mNumHullPoints = 0; + mNumHullIndices = 0; + // set defaults if (mParams.getPathParams().getCurveType() == LL_PCODE_PATH_FLEXIBLE) { @@ -1684,7 +1847,8 @@ LLVolume::LLVolume(const LLVolumeParams ¶ms, const F32 detail, const BOOL ge mGenerateSingleFace = generate_single_face; generate(); - if (mParams.getSculptID().isNull() && params.getSculptType() == LL_SCULPT_TYPE_NONE) + + if (mParams.getSculptID().isNull() && mParams.getSculptType() == LL_SCULPT_TYPE_NONE) { createVolumeFaces(); } @@ -1719,6 +1883,11 @@ LLVolume::~LLVolume() mPathp = NULL; mProfilep = NULL; mVolumeFaces.clear(); + + ll_aligned_free_16(mHullPoints); + mHullPoints = NULL; + ll_aligned_free_16(mHullIndices); + mHullIndices = NULL; } BOOL LLVolume::generate() @@ -1835,6 +2004,577 @@ BOOL LLVolume::generate() return FALSE; } +void LLVolumeFace::VertexData::init() +{ + if (!mData) + { + mData = (LLVector4a*) ll_aligned_malloc_16(sizeof(LLVector4a)*2); + } +} + +LLVolumeFace::VertexData::VertexData() +{ + mData = NULL; + init(); +} + +LLVolumeFace::VertexData::VertexData(const VertexData& rhs) +{ + mData = NULL; + *this = rhs; +} + +const LLVolumeFace::VertexData& LLVolumeFace::VertexData::operator=(const LLVolumeFace::VertexData& rhs) +{ + if (this != &rhs) + { + init(); + LLVector4a::memcpyNonAliased16((F32*) mData, (F32*) rhs.mData, 2*sizeof(LLVector4a)); + mTexCoord = rhs.mTexCoord; + } + return *this; +} + +LLVolumeFace::VertexData::~VertexData() +{ + ll_aligned_free_16(mData); + mData = NULL; +} + +LLVector4a& LLVolumeFace::VertexData::getPosition() +{ + return mData[POSITION]; +} + +LLVector4a& LLVolumeFace::VertexData::getNormal() +{ + return mData[NORMAL]; +} + +const LLVector4a& LLVolumeFace::VertexData::getPosition() const +{ + return mData[POSITION]; +} + +const LLVector4a& LLVolumeFace::VertexData::getNormal() const +{ + return mData[NORMAL]; +} + + +void LLVolumeFace::VertexData::setPosition(const LLVector4a& pos) +{ + mData[POSITION] = pos; +} + +void LLVolumeFace::VertexData::setNormal(const LLVector4a& norm) +{ + mData[NORMAL] = norm; +} + +bool LLVolumeFace::VertexData::operator<(const LLVolumeFace::VertexData& rhs)const +{ + const F32* lp = this->getPosition().getF32ptr(); + const F32* rp = rhs.getPosition().getF32ptr(); + + if (lp[0] != rp[0]) + { + return lp[0] < rp[0]; + } + + if (rp[1] != lp[1]) + { + return lp[1] < rp[1]; + } + + if (rp[2] != lp[2]) + { + return lp[2] < rp[2]; + } + + lp = getNormal().getF32ptr(); + rp = rhs.getNormal().getF32ptr(); + + if (lp[0] != rp[0]) + { + return lp[0] < rp[0]; + } + + if (rp[1] != lp[1]) + { + return lp[1] < rp[1]; + } + + if (rp[2] != lp[2]) + { + return lp[2] < rp[2]; + } + + if (mTexCoord.mV[0] != rhs.mTexCoord.mV[0]) + { + return mTexCoord.mV[0] < rhs.mTexCoord.mV[0]; + } + + return mTexCoord.mV[1] < rhs.mTexCoord.mV[1]; +} + +bool LLVolumeFace::VertexData::operator==(const LLVolumeFace::VertexData& rhs)const +{ + return mData[POSITION].equals3(rhs.getPosition()) && + mData[NORMAL].equals3(rhs.getNormal()) && + mTexCoord == rhs.mTexCoord; +} + +bool LLVolumeFace::VertexData::compareNormal(const LLVolumeFace::VertexData& rhs, F32 angle_cutoff) const +{ + bool retval = false; + if (rhs.mData[POSITION].equals3(mData[POSITION]) && rhs.mTexCoord == mTexCoord) + { + if (angle_cutoff > 1.f) + { + retval = (mData[NORMAL].equals3(rhs.mData[NORMAL])); + } + else + { + F32 cur_angle = rhs.mData[NORMAL].dot3(mData[NORMAL]).getF32(); + retval = cur_angle > angle_cutoff; + } + } + + return retval; +} + +bool LLVolume::unpackVolumeFaces(std::istream& is, S32 size) +{ + //input stream is now pointing at a zlib compressed block of LLSD + //decompress block + LLSD mdl; + if (!unzip_llsd(mdl, is, size)) + { + llwarns << "not a valid mesh asset!" << llendl; + return false; + } + + { + U32 face_count = mdl.size(); + + if (face_count == 0) + { + llerrs << "WTF?" << llendl; + } + + mVolumeFaces.resize(face_count); + + for (U32 i = 0; i < face_count; ++i) + { + LLSD::Binary pos = mdl[i]["Position"]; + LLSD::Binary norm = mdl[i]["Normal"]; + LLSD::Binary tc = mdl[i]["TexCoord0"]; + LLSD::Binary idx = mdl[i]["TriangleList"]; + + LLVolumeFace& face = mVolumeFaces[i]; + + //copy out indices + face.resizeIndices(idx.size()/2); + + if (idx.empty() || face.mNumIndices < 3) + { //why is there an empty index list? + llerrs <<"WTF?" << llendl; + continue; + } + + U16* indices = (U16*) &(idx[0]); + U32 count = idx.size()/2; + for (U32 j = 0; j < count; ++j) + { + face.mIndices[j] = indices[j]; + } + + //copy out vertices + U32 num_verts = pos.size()/(3*2); + face.resizeVertices(num_verts); + + LLVector3 minp; + LLVector3 maxp; + LLVector2 min_tc; + LLVector2 max_tc; + + minp.setValue(mdl[i]["PositionDomain"]["Min"]); + maxp.setValue(mdl[i]["PositionDomain"]["Max"]); + LLVector4a min_pos, max_pos; + min_pos.load3(minp.mV); + max_pos.load3(maxp.mV); + + min_tc.setValue(mdl[i]["TexCoord0Domain"]["Min"]); + max_tc.setValue(mdl[i]["TexCoord0Domain"]["Max"]); + + LLVector4a pos_range; + pos_range.setSub(max_pos, min_pos); + LLVector2 tc_range2 = max_tc - min_tc; + LLVector4a tc_range; + tc_range.set(tc_range2[0], tc_range2[1], tc_range2[0], tc_range2[1]); + LLVector4a min_tc4(min_tc[0], min_tc[1], min_tc[0], min_tc[1]); + + LLVector4a* pos_out = face.mPositions; + LLVector4a* norm_out = face.mNormals; + LLVector4a* tc_out = (LLVector4a*) face.mTexCoords; + + { + U16* v = (U16*) &(pos[0]); + for (U32 j = 0; j < num_verts; ++j) + { + pos_out->set((F32) v[0], (F32) v[1], (F32) v[2]); + pos_out->div(65535.f); + pos_out->mul(pos_range); + pos_out->add(min_pos); + pos_out++; + v += 3; + } + + } + + { + U16* n = (U16*) &(norm[0]); + for (U32 j = 0; j < num_verts; ++j) + { + norm_out->set((F32) n[0], (F32) n[1], (F32) n[2]); + norm_out->div(65535.f); + norm_out->mul(2.f); + norm_out->sub(1.f); + norm_out++; + n += 3; + } + } + + { + U16* t = (U16*) &(tc[0]); + for (U32 j = 0; j < num_verts; j+=2) + { + if (j < num_verts-1) + { + tc_out->set((F32) t[0], (F32) t[1], (F32) t[2], (F32) t[3]); + } + else + { + tc_out->set((F32) t[0], (F32) t[1], 0.f, 0.f); + } + + t += 4; + + tc_out->div(65535.f); + tc_out->mul(tc_range); + tc_out->add(min_tc4); + + tc_out++; + } + } + + if (mdl[i].has("Weights")) + { + face.allocateWeights(num_verts); + + LLSD::Binary weights = mdl[i]["Weights"]; + + U32 idx = 0; + + U32 cur_vertex = 0; + while (idx < weights.size() && cur_vertex < num_verts) + { + const U8 END_INFLUENCES = 0xFF; + U8 joint = weights[idx++]; + + U32 cur_influence = 0; + LLVector4 wght(0,0,0,0); + + while (joint != END_INFLUENCES && idx < weights.size()) + { + U16 influence = weights[idx++]; + influence |= ((U16) weights[idx++] << 8); + + F32 w = llclamp((F32) influence / 65535.f, 0.f, 0.99999f); + wght.mV[cur_influence++] = (F32) joint + w; + + if (cur_influence >= 4) + { + joint = END_INFLUENCES; + } + else + { + joint = weights[idx++]; + } + } + + face.mWeights[cur_vertex].loadua(wght.mV); + + cur_vertex++; + } + + if (cur_vertex != num_verts || idx != weights.size()) + { + llwarns << "Vertex weight count does not match vertex count!" << llendl; + } + + } + + // modifier flags? + bool do_mirror = (mParams.getSculptType() & LL_SCULPT_FLAG_MIRROR); + bool do_invert = (mParams.getSculptType() &LL_SCULPT_FLAG_INVERT); + + + // translate to actions: + bool do_reflect_x = false; + bool do_reverse_triangles = false; + bool do_invert_normals = false; + + if (do_mirror) + { + do_reflect_x = true; + do_reverse_triangles = !do_reverse_triangles; + } + + if (do_invert) + { + do_invert_normals = true; + do_reverse_triangles = !do_reverse_triangles; + } + + // now do the work + + if (do_reflect_x) + { + LLVector4a* p = (LLVector4a*) face.mPositions; + LLVector4a* n = (LLVector4a*) face.mNormals; + + for (S32 i = 0; i < face.mNumVertices; i++) + { + p[i].mul(-1.0f); + n[i].mul(-1.0f); + } + } + + if (do_invert_normals) + { + LLVector4a* n = (LLVector4a*) face.mNormals; + + for (S32 i = 0; i < face.mNumVertices; i++) + { + n[i].mul(-1.0f); + } + } + + if (do_reverse_triangles) + { + for (U32 j = 0; j < face.mNumIndices; j += 3) + { + // swap the 2nd and 3rd index + S32 swap = face.mIndices[j+1]; + face.mIndices[j+1] = face.mIndices[j+2]; + face.mIndices[j+2] = swap; + } + } + + //calculate bounding box + LLVector4a& min = face.mExtents[0]; + LLVector4a& max = face.mExtents[1]; + + min.clear(); + max.clear(); + min = max = face.mPositions[0]; + + for (S32 i = 1; i < face.mNumVertices; ++i) + { + min.setMin(min, face.mPositions[i]); + max.setMax(max, face.mPositions[i]); + } + } + } + + mSculptLevel = 0; // success! + + cacheOptimize(); + + return true; +} + +void tetrahedron_set_normal(LLVolumeFace::VertexData* cv) +{ + LLVector4a v0; + v0.setSub(cv[1].getPosition(), cv[0].getNormal()); + LLVector4a v1; + v1.setSub(cv[2].getNormal(), cv[0].getPosition()); + + cv[0].getNormal().setCross3(v0,v1); + cv[0].getNormal().normalize3fast(); + cv[1].setNormal(cv[0].getNormal()); + cv[2].setNormal(cv[1].getNormal()); +} + +BOOL LLVolume::isTetrahedron() +{ + return mIsTetrahedron; +} + +void LLVolume::makeTetrahedron() +{ + mVolumeFaces.clear(); + + LLVolumeFace face; + + F32 x = 0.25f; + LLVector4a p[] = + { //unit tetrahedron corners + LLVector4a(x,x,x), + LLVector4a(-x,-x,x), + LLVector4a(-x,x,-x), + LLVector4a(x,-x,-x) + }; + + face.mExtents[0].splat(-x); + face.mExtents[1].splat(x); + + LLVolumeFace::VertexData cv[3]; + + //set texture coordinates + cv[0].mTexCoord = LLVector2(0,0); + cv[1].mTexCoord = LLVector2(1,0); + cv[2].mTexCoord = LLVector2(0.5f, 0.5f*F_SQRT3); + + + //side 1 + cv[0].setPosition(p[1]); + cv[1].setPosition(p[0]); + cv[2].setPosition(p[2]); + + tetrahedron_set_normal(cv); + + face.resizeVertices(12); + face.resizeIndices(12); + + LLVector4a* v = (LLVector4a*) face.mPositions; + LLVector4a* n = (LLVector4a*) face.mNormals; + LLVector2* tc = (LLVector2*) face.mTexCoords; + + v[0] = cv[0].getPosition(); + v[1] = cv[1].getPosition(); + v[2] = cv[2].getPosition(); + v += 3; + + n[0] = cv[0].getNormal(); + n[1] = cv[1].getNormal(); + n[2] = cv[2].getNormal(); + n += 3; + + tc[0] = cv[0].mTexCoord; + tc[1] = cv[1].mTexCoord; + tc[2] = cv[2].mTexCoord; + tc += 3; + + + //side 2 + cv[0].setPosition(p[3]); + cv[1].setPosition(p[0]); + cv[2].setPosition(p[1]); + + tetrahedron_set_normal(cv); + + v[0] = cv[0].getPosition(); + v[1] = cv[1].getPosition(); + v[2] = cv[2].getPosition(); + v += 3; + + n[0] = cv[0].getNormal(); + n[1] = cv[1].getNormal(); + n[2] = cv[2].getNormal(); + n += 3; + + tc[0] = cv[0].mTexCoord; + tc[1] = cv[1].mTexCoord; + tc[2] = cv[2].mTexCoord; + tc += 3; + + //side 3 + cv[0].setPosition(p[3]); + cv[1].setPosition(p[1]); + cv[2].setPosition(p[2]); + + tetrahedron_set_normal(cv); + + v[0] = cv[0].getPosition(); + v[1] = cv[1].getPosition(); + v[2] = cv[2].getPosition(); + v += 3; + + n[0] = cv[0].getNormal(); + n[1] = cv[1].getNormal(); + n[2] = cv[2].getNormal(); + n += 3; + + tc[0] = cv[0].mTexCoord; + tc[1] = cv[1].mTexCoord; + tc[2] = cv[2].mTexCoord; + tc += 3; + + //side 4 + cv[0].setPosition(p[2]); + cv[1].setPosition(p[0]); + cv[2].setPosition(p[3]); + + tetrahedron_set_normal(cv); + + v[0] = cv[0].getPosition(); + v[1] = cv[1].getPosition(); + v[2] = cv[2].getPosition(); + v += 3; + + n[0] = cv[0].getNormal(); + n[1] = cv[1].getNormal(); + n[2] = cv[2].getNormal(); + n += 3; + + tc[0] = cv[0].mTexCoord; + tc[1] = cv[1].mTexCoord; + tc[2] = cv[2].mTexCoord; + tc += 3; + + //set index buffer + for (U16 i = 0; i < 12; i++) + { + face.mIndices[i] = i; + } + + mVolumeFaces.push_back(face); + mSculptLevel = 0; + mIsTetrahedron = TRUE; +} + +void LLVolume::copyVolumeFaces(const LLVolume* volume) +{ + mVolumeFaces = volume->mVolumeFaces; + mSculptLevel = 0; + mIsTetrahedron = FALSE; +} + +void LLVolume::cacheOptimize() +{ + for (S32 i = 0; i < mVolumeFaces.size(); ++i) + { + mVolumeFaces[i].cacheOptimize(); + } +} + + +S32 LLVolume::getNumFaces() const +{ + U8 sculpt_type = (mParams.getSculptType() & LL_SCULPT_TYPE_MASK); + + if (sculpt_type == LL_SCULPT_TYPE_MESH) + { + return LL_SCULPT_MESH_MAX_FACES; + } + + return (S32)mProfilep->mFaces.size(); +} + void LLVolume::createVolumeFaces() { @@ -2188,7 +2928,7 @@ void sculpt_calc_mesh_resolution(U16 width, U16 height, U8 type, F32 detail, S32 ratio = (F32) width / (F32) height; - s = (S32)fsqrtf(((F32)vertices / ratio)); + s = (S32)(F32) sqrt(((F32)vertices / ratio)); s = llmax(s, 4); // no degenerate sizes, please t = vertices / s; @@ -2281,6 +3021,16 @@ BOOL LLVolume::isFlat(S32 face) } +bool LLVolumeParams::isSculpt() const +{ + return mSculptID.notNull(); +} + +bool LLVolumeParams::isMeshSculpt() const +{ + return isSculpt() && ((mSculptType & LL_SCULPT_TYPE_MASK) == LL_SCULPT_TYPE_MESH); +} + bool LLVolumeParams::operator==(const LLVolumeParams ¶ms) const { return ( (getPathParams() == params.getPathParams()) && @@ -2314,7 +3064,6 @@ bool LLVolumeParams::operator<(const LLVolumeParams ¶ms) const return mSculptID < params.mSculptID; } - return mSculptType < params.mSculptType; @@ -3372,34 +4121,62 @@ S32 LLVolume::getNumTriangleIndices() const return count; } + +S32 LLVolume::getNumTriangles() const +{ + U32 triangle_count = 0; + + for (S32 i = 0; i < getNumVolumeFaces(); ++i) + { + triangle_count += getVolumeFace(i).mNumIndices/3; + } + + return triangle_count; +} + + //----------------------------------------------------------------------------- // generateSilhouetteVertices() //----------------------------------------------------------------------------- void LLVolume::generateSilhouetteVertices(std::vector<LLVector3> &vertices, std::vector<LLVector3> &normals, - std::vector<S32> &segments, - const LLVector3& obj_cam_vec, - const LLMatrix4& mat, - const LLMatrix3& norm_mat, + const LLVector3& obj_cam_vec_in, + const LLMatrix4& mat_in, + const LLMatrix3& norm_mat_in, S32 face_mask) { LLMemType m1(LLMemType::MTYPE_VOLUME); - + + LLMatrix4a mat; + mat.loadu(mat_in); + + LLMatrix4a norm_mat; + norm_mat.loadu(norm_mat_in); + + LLVector4a obj_cam_vec; + obj_cam_vec.load3(obj_cam_vec_in.mV); + vertices.clear(); normals.clear(); - segments.clear(); + if ((mParams.getSculptType() & LL_SCULPT_TYPE_MASK) == LL_SCULPT_TYPE_MESH) + { + return; + } + S32 cur_index = 0; //for each face for (face_list_t::iterator iter = mVolumeFaces.begin(); iter != mVolumeFaces.end(); ++iter) { - const LLVolumeFace& face = *iter; + LLVolumeFace& face = *iter; - if (!(face_mask & (0x1 << cur_index++))) + if (!(face_mask & (0x1 << cur_index++)) || + face.mNumIndices == 0 || face.mEdge.empty()) { continue; } + if (face.mTypeMask & (LLVolumeFace::CAP_MASK)) { } @@ -3412,7 +4189,7 @@ void LLVolume::generateSilhouetteVertices(std::vector<LLVector3> &vertices, #if DEBUG_SILHOUETTE_EDGE_MAP //for each triangle - U32 count = face.mIndices.size(); + U32 count = face.mNumIndices; for (U32 j = 0; j < count/3; j++) { //get vertices S32 v1 = face.mIndices[j*3+0]; @@ -3420,9 +4197,9 @@ void LLVolume::generateSilhouetteVertices(std::vector<LLVector3> &vertices, S32 v3 = face.mIndices[j*3+2]; //get current face center - LLVector3 cCenter = (face.mVertices[v1].mPosition + - face.mVertices[v2].mPosition + - face.mVertices[v3].mPosition) / 3.0f; + LLVector3 cCenter = (face.mVertices[v1].getPosition() + + face.mVertices[v2].getPosition() + + face.mVertices[v3].getPosition()) / 3.0f; //for each edge for (S32 k = 0; k < 3; k++) { @@ -3440,9 +4217,9 @@ void LLVolume::generateSilhouetteVertices(std::vector<LLVector3> &vertices, v3 = face.mIndices[nIndex*3+2]; //get neighbor face center - LLVector3 nCenter = (face.mVertices[v1].mPosition + - face.mVertices[v2].mPosition + - face.mVertices[v3].mPosition) / 3.0f; + LLVector3 nCenter = (face.mVertices[v1].getPosition() + + face.mVertices[v2].getPosition() + + face.mVertices[v3].getPosition()) / 3.0f; //draw line vertices.push_back(cCenter); @@ -3465,15 +4242,15 @@ void LLVolume::generateSilhouetteVertices(std::vector<LLVector3> &vertices, #elif DEBUG_SILHOUETTE_NORMALS //for each vertex - for (U32 j = 0; j < face.mVertices.size(); j++) { - vertices.push_back(face.mVertices[j].mPosition); - vertices.push_back(face.mVertices[j].mPosition + face.mVertices[j].mNormal*0.1f); + for (U32 j = 0; j < face.mNumVertices; j++) { + vertices.push_back(face.mVertices[j].getPosition()); + vertices.push_back(face.mVertices[j].getPosition() + face.mVertices[j].getNormal()*0.1f); normals.push_back(LLVector3(0,0,1)); normals.push_back(LLVector3(0,0,1)); segments.push_back(vertices.size()); #if DEBUG_SILHOUETTE_BINORMALS - vertices.push_back(face.mVertices[j].mPosition); - vertices.push_back(face.mVertices[j].mPosition + face.mVertices[j].mBinormal*0.1f); + vertices.push_back(face.mVertices[j].getPosition()); + vertices.push_back(face.mVertices[j].getPosition() + face.mVertices[j].mBinormal*0.1f); normals.push_back(LLVector3(0,0,1)); normals.push_back(LLVector3(0,0,1)); segments.push_back(vertices.size()); @@ -3491,26 +4268,36 @@ void LLVolume::generateSilhouetteVertices(std::vector<LLVector3> &vertices, //for each triangle std::vector<U8> fFacing; - vector_append(fFacing, face.mIndices.size()/3); - for (U32 j = 0; j < face.mIndices.size()/3; j++) + vector_append(fFacing, face.mNumIndices/3); + + LLVector4a* v = (LLVector4a*) face.mPositions; + LLVector4a* n = (LLVector4a*) face.mNormals; + + for (U32 j = 0; j < face.mNumIndices/3; j++) { //approximate normal S32 v1 = face.mIndices[j*3+0]; S32 v2 = face.mIndices[j*3+1]; S32 v3 = face.mIndices[j*3+2]; - LLVector3 norm = (face.mVertices[v1].mPosition - face.mVertices[v2].mPosition) % - (face.mVertices[v2].mPosition - face.mVertices[v3].mPosition); - - if (norm.magVecSquared() < 0.00000001f) + LLVector4a c1,c2; + c1.setSub(v[v1], v[v2]); + c2.setSub(v[v2], v[v3]); + + LLVector4a norm; + + norm.setCross3(c1, c2); + + if (norm.dot3(norm) < 0.00000001f) { fFacing[j] = AWAY | TOWARDS; } else { //get view vector - LLVector3 view = (obj_cam_vec-face.mVertices[v1].mPosition); - bool away = view * norm > 0.0f; + LLVector4a view; + view.setSub(obj_cam_vec, v[v1]); + bool away = view.dot3(norm) > 0.0f; if (away) { fFacing[j] = AWAY; @@ -3523,7 +4310,7 @@ void LLVolume::generateSilhouetteVertices(std::vector<LLVector3> &vertices, } //for each triangle - for (U32 j = 0; j < face.mIndices.size()/3; j++) + for (U32 j = 0; j < face.mNumIndices/3; j++) { if (fFacing[j] == (AWAY | TOWARDS)) { //this is a degenerate triangle @@ -3556,17 +4343,21 @@ void LLVolume::generateSilhouetteVertices(std::vector<LLVector3> &vertices, S32 v1 = face.mIndices[j*3+k]; S32 v2 = face.mIndices[j*3+((k+1)%3)]; - vertices.push_back(face.mVertices[v1].mPosition*mat); - LLVector3 norm1 = face.mVertices[v1].mNormal * norm_mat; - norm1.normVec(); - normals.push_back(norm1); + LLVector4a t; + mat.affineTransform(v[v1], t); + vertices.push_back(LLVector3(t[0], t[1], t[2])); - vertices.push_back(face.mVertices[v2].mPosition*mat); - LLVector3 norm2 = face.mVertices[v2].mNormal * norm_mat; - norm2.normVec(); - normals.push_back(norm2); + norm_mat.rotate(n[v1], t); - segments.push_back(vertices.size()); + t.normalize3fast(); + normals.push_back(LLVector3(t[0], t[1], t[2])); + + mat.affineTransform(v[v2], t); + vertices.push_back(LLVector3(t[0], t[1], t[2])); + + norm_mat.rotate(n[v2], t); + t.normalize3fast(); + normals.push_back(LLVector3(t[0], t[1], t[2])); } } } @@ -3579,6 +4370,19 @@ S32 LLVolume::lineSegmentIntersect(const LLVector3& start, const LLVector3& end, S32 face, LLVector3* intersection,LLVector2* tex_coord, LLVector3* normal, LLVector3* bi_normal) { + LLVector4a starta, enda; + starta.load3(start.mV); + enda.load3(end.mV); + + return lineSegmentIntersect(starta, enda, face, intersection, tex_coord, normal, bi_normal); + +} + + +S32 LLVolume::lineSegmentIntersect(const LLVector4a& start, const LLVector4a& end, + S32 face, + LLVector3* intersection,LLVector2* tex_coord, LLVector3* normal, LLVector3* bi_normal) +{ S32 hit_face = -1; S32 start_face; @@ -3595,16 +4399,23 @@ S32 LLVolume::lineSegmentIntersect(const LLVector3& start, const LLVector3& end, end_face = face; } - LLVector3 dir = end - start; + LLVector4a dir; + dir.setSub(end, start); F32 closest_t = 2.f; // must be larger than 1 + end_face = llmin(end_face, getNumVolumeFaces()-1); + for (S32 i = start_face; i <= end_face; i++) { - const LLVolumeFace &face = getVolumeFace((U32)i); + LLVolumeFace &face = mVolumeFaces[i]; + + LLVector4a box_center; + box_center.setAdd(face.mExtents[0], face.mExtents[1]); + box_center.mul(0.5f); - LLVector3 box_center = (face.mExtents[0] + face.mExtents[1]) / 2.f; - LLVector3 box_size = face.mExtents[1] - face.mExtents[0]; + LLVector4a box_size; + box_size.setSub(face.mExtents[1], face.mExtents[0]); if (LLLineSegmentBoxIntersect(start, end, box_center, box_size)) { @@ -3612,56 +4423,19 @@ S32 LLVolume::lineSegmentIntersect(const LLVector3& start, const LLVector3& end, { genBinormals(i); } - - for (U32 tri = 0; tri < face.mIndices.size()/3; tri++) - { - S32 index1 = face.mIndices[tri*3+0]; - S32 index2 = face.mIndices[tri*3+1]; - S32 index3 = face.mIndices[tri*3+2]; - F32 a, b, t; + if (!face.mOctree) + { + face.createOctree(); + } - if (LLTriangleRayIntersect(face.mVertices[index1].mPosition, - face.mVertices[index2].mPosition, - face.mVertices[index3].mPosition, - start, dir, &a, &b, &t, FALSE)) - { - if ((t >= 0.f) && // if hit is after start - (t <= 1.f) && // and before end - (t < closest_t)) // and this hit is closer - { - closest_t = t; - hit_face = i; + //LLVector4a* p = (LLVector4a*) face.mPositions; - if (intersection != NULL) - { - *intersection = start + dir * closest_t; - } - - if (tex_coord != NULL) + LLOctreeTriangleRayIntersect intersect(start, dir, &face, &closest_t, intersection, tex_coord, normal, bi_normal); + intersect.traverse(face.mOctree); + if (intersect.mHitFace) { - *tex_coord = ((1.f - a - b) * face.mVertices[index1].mTexCoord + - a * face.mVertices[index2].mTexCoord + - b * face.mVertices[index3].mTexCoord); - - } - - if (normal != NULL) - { - *normal = ((1.f - a - b) * face.mVertices[index1].mNormal + - a * face.mVertices[index2].mNormal + - b * face.mVertices[index3].mNormal); - } - - if (bi_normal != NULL) - { - *bi_normal = ((1.f - a - b) * face.mVertices[index1].mBinormal + - a * face.mVertices[index2].mBinormal + - b * face.mVertices[index3].mBinormal); - } - - } - } + hit_face = i; } } } @@ -4109,11 +4883,28 @@ BOOL LLVolumeParams::exportLegacyStream(std::ostream& output_stream) const return TRUE; } +LLSD LLVolumeParams::sculptAsLLSD() const +{ + LLSD sd = LLSD(); + sd["id"] = getSculptID(); + sd["type"] = getSculptType(); + + return sd; +} + +bool LLVolumeParams::sculptFromLLSD(LLSD& sd) +{ + setSculptID(sd["id"].asUUID(), (U8)sd["type"].asInteger()); + return true; +} + LLSD LLVolumeParams::asLLSD() const { LLSD sd = LLSD(); sd["path"] = mPathParams; sd["profile"] = mProfileParams; + sd["sculpt"] = sculptAsLLSD(); + return sd; } @@ -4121,6 +4912,8 @@ bool LLVolumeParams::fromLLSD(LLSD& sd) { mPathParams.fromLLSD(sd["path"]); mProfileParams.fromLLSD(sd["profile"]); + sculptFromLLSD(sd["sculpt"]); + return true; } @@ -4163,6 +4956,12 @@ const F32 MIN_CONCAVE_PATH_WEDGE = 0.111111f; // 1/9 unity // for collison purposes BOOL LLVolumeParams::isConvex() const { + if (!getSculptID().isNull()) + { + // can't determine, be safe and say no: + return FALSE; + } + F32 path_length = mPathParams.getEnd() - mPathParams.getBegin(); F32 hollow = mProfileParams.getHollow(); @@ -4403,9 +5202,154 @@ std::ostream& operator<<(std::ostream &s, const LLVolume *volumep) return s; } +LLVolumeFace::LLVolumeFace() : + mID(0), + mTypeMask(0), + mBeginS(0), + mBeginT(0), + mNumS(0), + mNumT(0), + mNumVertices(0), + mNumIndices(0), + mPositions(NULL), + mNormals(NULL), + mBinormals(NULL), + mTexCoords(NULL), + mIndices(NULL), + mWeights(NULL), + mOctree(NULL) +{ + mExtents = (LLVector4a*) ll_aligned_malloc_16(sizeof(LLVector4a)*3); + mCenter = mExtents+2; +} + +LLVolumeFace::LLVolumeFace(const LLVolumeFace& src) +: mID(0), + mTypeMask(0), + mBeginS(0), + mBeginT(0), + mNumS(0), + mNumT(0), + mNumVertices(0), + mNumIndices(0), + mPositions(NULL), + mNormals(NULL), + mBinormals(NULL), + mTexCoords(NULL), + mIndices(NULL), + mWeights(NULL), + mOctree(NULL) +{ + mExtents = (LLVector4a*) ll_aligned_malloc_16(sizeof(LLVector4a)*3); + mCenter = mExtents+2; + *this = src; +} + +LLVolumeFace& LLVolumeFace::operator=(const LLVolumeFace& src) +{ + if (&src == this) + { //self assignment, do nothing + return *this; + } + + mID = src.mID; + mTypeMask = src.mTypeMask; + mBeginS = src.mBeginS; + mBeginT = src.mBeginT; + mNumS = src.mNumS; + mNumT = src.mNumT; + + mExtents[0] = src.mExtents[0]; + mExtents[1] = src.mExtents[1]; + *mCenter = *src.mCenter; + + mNumVertices = 0; + mNumIndices = 0; + + freeData(); + + LLVector4a::memcpyNonAliased16((F32*) mExtents, (F32*) src.mExtents, 3*sizeof(LLVector4a)); + + resizeVertices(src.mNumVertices); + resizeIndices(src.mNumIndices); + + if (mNumVertices) + { + S32 vert_size = mNumVertices*sizeof(LLVector4a); + S32 tc_size = (mNumVertices*sizeof(LLVector2)+0xF) & ~0xF; + + LLVector4a::memcpyNonAliased16((F32*) mPositions, (F32*) src.mPositions, vert_size); + LLVector4a::memcpyNonAliased16((F32*) mNormals, (F32*) src.mNormals, vert_size); + LLVector4a::memcpyNonAliased16((F32*) mTexCoords, (F32*) src.mTexCoords, tc_size); + + + if (src.mBinormals) + { + allocateBinormals(src.mNumVertices); + LLVector4a::memcpyNonAliased16((F32*) mBinormals, (F32*) src.mBinormals, vert_size); + } + else + { + ll_aligned_free_16(mBinormals); + mBinormals = NULL; + } + + if (src.mWeights) + { + allocateWeights(src.mNumVertices); + LLVector4a::memcpyNonAliased16((F32*) mWeights, (F32*) src.mWeights, vert_size); + } + else + { + ll_aligned_free_16(mWeights); + mWeights = NULL; + } + } + + if (mNumIndices) + { + S32 idx_size = (mNumIndices*sizeof(U16)+0xF) & ~0xF; + + LLVector4a::memcpyNonAliased16((F32*) mIndices, (F32*) src.mIndices, idx_size); + } + + //delete + return *this; +} + +LLVolumeFace::~LLVolumeFace() +{ + ll_aligned_free_16(mExtents); + mExtents = NULL; + + freeData(); +} + +void LLVolumeFace::freeData() +{ + ll_aligned_free_16(mPositions); + mPositions = NULL; + ll_aligned_free_16( mNormals); + mNormals = NULL; + ll_aligned_free_16(mTexCoords); + mTexCoords = NULL; + ll_aligned_free_16(mIndices); + mIndices = NULL; + ll_aligned_free_16(mBinormals); + mBinormals = NULL; + ll_aligned_free_16(mWeights); + mWeights = NULL; + + delete mOctree; + mOctree = NULL; +} BOOL LLVolumeFace::create(LLVolume* volume, BOOL partial_build) { + //tree for this face is no longer valid + delete mOctree; + mOctree = NULL; + BOOL ret = FALSE ; if (mTypeMask & CAP_MASK) { @@ -4426,25 +5370,24 @@ BOOL LLVolumeFace::create(LLVolume* volume, BOOL partial_build) mTexCoordExtents[0].setVec(1.f, 1.f) ; mTexCoordExtents[1].setVec(0.f, 0.f) ; - U32 end = mVertices.size() ; - for(U32 i = 0 ; i < end ; i++) + for(U32 i = 0 ; i < mNumVertices ; i++) { - if(mTexCoordExtents[0].mV[0] > mVertices[i].mTexCoord.mV[0]) + if(mTexCoordExtents[0].mV[0] > mTexCoords[i].mV[0]) { - mTexCoordExtents[0].mV[0] = mVertices[i].mTexCoord.mV[0] ; + mTexCoordExtents[0].mV[0] = mTexCoords[i].mV[0] ; } - if(mTexCoordExtents[1].mV[0] < mVertices[i].mTexCoord.mV[0]) + if(mTexCoordExtents[1].mV[0] < mTexCoords[i].mV[0]) { - mTexCoordExtents[1].mV[0] = mVertices[i].mTexCoord.mV[0] ; + mTexCoordExtents[1].mV[0] = mTexCoords[i].mV[0] ; } - if(mTexCoordExtents[0].mV[1] > mVertices[i].mTexCoord.mV[1]) + if(mTexCoordExtents[0].mV[1] > mTexCoords[i].mV[1]) { - mTexCoordExtents[0].mV[1] = mVertices[i].mTexCoord.mV[1] ; + mTexCoordExtents[0].mV[1] = mTexCoords[i].mV[1] ; } - if(mTexCoordExtents[1].mV[1] < mVertices[i].mTexCoord.mV[1]) + if(mTexCoordExtents[1].mV[1] < mTexCoords[i].mV[1]) { - mTexCoordExtents[1].mV[1] = mVertices[i].mTexCoord.mV[1] ; + mTexCoordExtents[1].mV[1] = mTexCoords[i].mV[1] ; } } mTexCoordExtents[0].mV[0] = llmax(0.f, mTexCoordExtents[0].mV[0]) ; @@ -4456,6 +5399,610 @@ BOOL LLVolumeFace::create(LLVolume* volume, BOOL partial_build) return ret ; } +void LLVolumeFace::getVertexData(U16 index, LLVolumeFace::VertexData& cv) +{ + cv.setPosition(mPositions[index]); + cv.setNormal(mNormals[index]); + cv.mTexCoord = mTexCoords[index]; +} + +bool LLVolumeFace::VertexMapData::operator==(const LLVolumeFace::VertexData& rhs) const +{ + return getPosition().equals3(rhs.getPosition()) && + mTexCoord == rhs.mTexCoord && + getNormal().equals3(rhs.getNormal()); +} + +bool LLVolumeFace::VertexMapData::ComparePosition::operator()(const LLVector3& a, const LLVector3& b) const +{ + if (a.mV[0] != b.mV[0]) + { + return a.mV[0] < b.mV[0]; + } + + if (a.mV[1] != b.mV[1]) + { + return a.mV[1] < b.mV[1]; + } + + return a.mV[2] < b.mV[2]; +} + +void LLVolumeFace::optimize(F32 angle_cutoff) +{ + LLVolumeFace new_face; + + //map of points to vector of vertices at that point + VertexMapData::PointMap point_map; + + //remove redundant vertices + for (U32 i = 0; i < mNumIndices; ++i) + { + U16 index = mIndices[i]; + + LLVolumeFace::VertexData cv; + getVertexData(index, cv); + + BOOL found = FALSE; + VertexMapData::PointMap::iterator point_iter = point_map.find(LLVector3(cv.getPosition().getF32ptr())); + if (point_iter != point_map.end()) + { //duplicate point might exist + for (U32 j = 0; j < point_iter->second.size(); ++j) + { + LLVolumeFace::VertexData& tv = (point_iter->second)[j]; + if (tv.compareNormal(cv, angle_cutoff)) + { + found = TRUE; + new_face.pushIndex((point_iter->second)[j].mIndex); + break; + } + } + } + + if (!found) + { + new_face.pushVertex(cv); + U16 index = (U16) new_face.mNumVertices-1; + new_face.pushIndex(index); + + VertexMapData d; + d.setPosition(cv.getPosition()); + d.mTexCoord = cv.mTexCoord; + d.setNormal(cv.getNormal()); + d.mIndex = index; + if (point_iter != point_map.end()) + { + point_iter->second.push_back(d); + } + else + { + point_map[LLVector3(d.getPosition().getF32ptr())].push_back(d); + } + } + } + + swapData(new_face); +} + +class LLVCacheTriangleData; + +class LLVCacheVertexData +{ +public: + S32 mIdx; + S32 mCacheTag; + F32 mScore; + U32 mActiveTriangles; + std::vector<LLVCacheTriangleData*> mTriangles; + + LLVCacheVertexData() + { + mCacheTag = -1; + mScore = 0.f; + mActiveTriangles = 0; + mIdx = -1; + } +}; + +class LLVCacheTriangleData +{ +public: + bool mActive; + F32 mScore; + LLVCacheVertexData* mVertex[3]; + + LLVCacheTriangleData() + { + mActive = true; + mScore = 0.f; + mVertex[0] = mVertex[1] = mVertex[2] = NULL; + } + + void complete() + { + mActive = false; + for (S32 i = 0; i < 3; ++i) + { + if (mVertex[i]) + { + llassert_always(mVertex[i]->mActiveTriangles > 0); + mVertex[i]->mActiveTriangles--; + } + } + } + + bool operator<(const LLVCacheTriangleData& rhs) const + { //highest score first + return rhs.mScore < mScore; + } +}; + +const F32 FindVertexScore_CacheDecayPower = 1.5f; +const F32 FindVertexScore_LastTriScore = 0.75f; +const F32 FindVertexScore_ValenceBoostScale = 2.0f; +const F32 FindVertexScore_ValenceBoostPower = 0.5f; +const U32 MaxSizeVertexCache = 32; + +F32 find_vertex_score(LLVCacheVertexData& data) +{ + if (data.mActiveTriangles == 0) + { //no triangle references this vertex + return -1.f; + } + + F32 score = 0.f; + + S32 cache_idx = data.mCacheTag; + + if (cache_idx < 0) + { + //not in cache + } + else + { + if (cache_idx < 3) + { //vertex was in the last triangle + score = FindVertexScore_LastTriScore; + } + else + { //more points for being higher in the cache + F32 scaler = 1.f/(MaxSizeVertexCache-3); + score = 1.f-((cache_idx-3)*scaler); + score = powf(score, FindVertexScore_CacheDecayPower); + } + } + + //bonus points for having low valence + F32 valence_boost = powf(data.mActiveTriangles, -FindVertexScore_ValenceBoostPower); + score += FindVertexScore_ValenceBoostScale * valence_boost; + + return score; +} + +class LLVCacheFIFO +{ +public: + LLVCacheVertexData* mCache[MaxSizeVertexCache]; + U32 mMisses; + + LLVCacheFIFO() + { + mMisses = 0; + for (U32 i = 0; i < MaxSizeVertexCache; ++i) + { + mCache[i] = NULL; + } + } + + void addVertex(LLVCacheVertexData* data) + { + if (data->mCacheTag == -1) + { + mMisses++; + + S32 end = MaxSizeVertexCache-1; + + if (mCache[end]) + { + mCache[end]->mCacheTag = -1; + } + + for (S32 i = end; i > 0; --i) + { + mCache[i] = mCache[i-1]; + if (mCache[i]) + { + mCache[i]->mCacheTag = i; + } + } + + mCache[0] = data; + data->mCacheTag = 0; + } + } +}; + +class LLVCacheLRU +{ +public: + LLVCacheVertexData* mCache[MaxSizeVertexCache+3]; + + LLVCacheTriangleData* mBestTriangle; + + U32 mMisses; + + LLVCacheLRU() + { + for (U32 i = 0; i < MaxSizeVertexCache+3; ++i) + { + mCache[i] = NULL; + } + + mBestTriangle = NULL; + mMisses = 0; + } + + void addVertex(LLVCacheVertexData* data) + { + S32 end = MaxSizeVertexCache+2; + if (data->mCacheTag != -1) + { //just moving a vertex to the front of the cache + end = data->mCacheTag; + } + else + { + mMisses++; + if (mCache[end]) + { //adding a new vertex, vertex at end of cache falls off + mCache[end]->mCacheTag = -1; + } + } + + for (S32 i = end; i > 0; --i) + { //adjust cache pointers and tags + mCache[i] = mCache[i-1]; + + if (mCache[i]) + { + mCache[i]->mCacheTag = i; + } + } + + mCache[0] = data; + mCache[0]->mCacheTag = 0; + } + + void addTriangle(LLVCacheTriangleData* data) + { + addVertex(data->mVertex[0]); + addVertex(data->mVertex[1]); + addVertex(data->mVertex[2]); + } + + void updateScores() + { + for (U32 i = MaxSizeVertexCache; i < MaxSizeVertexCache+3; ++i) + { //trailing 3 vertices aren't actually in the cache for scoring purposes + if (mCache[i]) + { + mCache[i]->mCacheTag = -1; + } + } + + for (U32 i = 0; i < MaxSizeVertexCache; ++i) + { //update scores of vertices in cache + if (mCache[i]) + { + mCache[i]->mScore = find_vertex_score(*(mCache[i])); + llassert_always(mCache[i]->mCacheTag == i); + } + } + + mBestTriangle = NULL; + //update triangle scores + for (U32 i = 0; i < MaxSizeVertexCache+3; ++i) + { + if (mCache[i]) + { + for (U32 j = 0; j < mCache[i]->mTriangles.size(); ++j) + { + LLVCacheTriangleData* tri = mCache[i]->mTriangles[j]; + if (tri->mActive) + { + tri->mScore = tri->mVertex[0]->mScore; + tri->mScore += tri->mVertex[1]->mScore; + tri->mScore += tri->mVertex[2]->mScore; + + if (!mBestTriangle || mBestTriangle->mScore < tri->mScore) + { + mBestTriangle = tri; + } + } + } + } + } + + //knock trailing 3 vertices off the cache + for (U32 i = MaxSizeVertexCache; i < MaxSizeVertexCache+3; ++i) + { + if (mCache[i]) + { + llassert_always(mCache[i]->mCacheTag == -1); + mCache[i] = NULL; + } + } + } +}; + + +void LLVolumeFace::cacheOptimize() +{ //optimize for vertex cache according to Forsyth method: + // http://home.comcast.net/~tom_forsyth/papers/fast_vert_cache_opt.html + + LLVCacheLRU cache; + + //mapping of vertices to triangles and indices + std::vector<LLVCacheVertexData> vertex_data; + + //mapping of triangles do vertices + std::vector<LLVCacheTriangleData> triangle_data; + + triangle_data.resize(mNumIndices/3); + vertex_data.resize(mNumVertices); + + for (U32 i = 0; i < mNumIndices; i++) + { //populate vertex data and triangle data arrays + U16 idx = mIndices[i]; + U32 tri_idx = i/3; + + vertex_data[idx].mTriangles.push_back(&(triangle_data[tri_idx])); + vertex_data[idx].mIdx = idx; + triangle_data[tri_idx].mVertex[i%3] = &(vertex_data[idx]); + } + + /*F32 pre_acmr = 1.f; + //measure cache misses from before rebuild + { + LLVCacheFIFO test_cache; + for (U32 i = 0; i < mNumIndices; ++i) + { + test_cache.addVertex(&vertex_data[mIndices[i]]); + } + + for (U32 i = 0; i < mNumVertices; i++) + { + vertex_data[i].mCacheTag = -1; + } + + pre_acmr = (F32) test_cache.mMisses/(mNumIndices/3); + }*/ + + for (U32 i = 0; i < mNumVertices; i++) + { //initialize score values (no cache -- might try a fifo cache here) + vertex_data[i].mScore = find_vertex_score(vertex_data[i]); + vertex_data[i].mActiveTriangles = vertex_data[i].mTriangles.size(); + + for (U32 j = 0; j < vertex_data[i].mTriangles.size(); ++j) + { + vertex_data[i].mTriangles[j]->mScore += vertex_data[i].mScore; + } + } + + //sort triangle data by score + std::sort(triangle_data.begin(), triangle_data.end()); + + std::vector<U16> new_indices; + + LLVCacheTriangleData* tri; + + //prime pump by adding first triangle to cache; + tri = &(triangle_data[0]); + cache.addTriangle(tri); + new_indices.push_back(tri->mVertex[0]->mIdx); + new_indices.push_back(tri->mVertex[1]->mIdx); + new_indices.push_back(tri->mVertex[2]->mIdx); + tri->complete(); + + U32 breaks = 0; + for (U32 i = 1; i < mNumIndices/3; ++i) + { + cache.updateScores(); + tri = cache.mBestTriangle; + if (!tri) + { + breaks++; + for (U32 j = 0; j < triangle_data.size(); ++j) + { + if (triangle_data[j].mActive) + { + tri = &(triangle_data[j]); + break; + } + } + } + + cache.addTriangle(tri); + new_indices.push_back(tri->mVertex[0]->mIdx); + new_indices.push_back(tri->mVertex[1]->mIdx); + new_indices.push_back(tri->mVertex[2]->mIdx); + tri->complete(); + } + + for (U32 i = 0; i < mNumIndices; ++i) + { + mIndices[i] = new_indices[i]; + } + + /*F32 post_acmr = 1.f; + //measure cache misses from after rebuild + { + LLVCacheFIFO test_cache; + for (U32 i = 0; i < mNumVertices; i++) + { + vertex_data[i].mCacheTag = -1; + } + + for (U32 i = 0; i < mNumIndices; ++i) + { + test_cache.addVertex(&vertex_data[mIndices[i]]); + } + + post_acmr = (F32) test_cache.mMisses/(mNumIndices/3); + }*/ + + //optimize for pre-TnL cache + + //allocate space for new buffer + S32 num_verts = mNumVertices; + LLVector4a* pos = (LLVector4a*) ll_aligned_malloc_16(sizeof(LLVector4a)*num_verts); + LLVector4a* norm = (LLVector4a*) ll_aligned_malloc_16(sizeof(LLVector4a)*num_verts); + S32 size = ((num_verts*sizeof(LLVector2)) + 0xF) & ~0xF; + LLVector2* tc = (LLVector2*) ll_aligned_malloc_16(size); + + LLVector4a* wght = NULL; + if (mWeights) + { + wght = (LLVector4a*) ll_aligned_malloc_16(sizeof(LLVector4a)*num_verts); + } + + LLVector4a* binorm = NULL; + if (mBinormals) + { + binorm = (LLVector4a*) ll_aligned_malloc_16(sizeof(LLVector4a)*num_verts); + } + + //allocate mapping of old indices to new indices + std::vector<S32> new_idx; + new_idx.resize(mNumVertices, -1); + + S32 cur_idx = 0; + for (U32 i = 0; i < mNumIndices; ++i) + { + U16 idx = mIndices[i]; + if (new_idx[idx] == -1) + { //this vertex hasn't been added yet + new_idx[idx] = cur_idx; + + //copy vertex data + pos[cur_idx] = mPositions[idx]; + norm[cur_idx] = mNormals[idx]; + tc[cur_idx] = mTexCoords[idx]; + if (mWeights) + { + wght[cur_idx] = mWeights[idx]; + } + if (mBinormals) + { + binorm[cur_idx] = mBinormals[idx]; + } + + cur_idx++; + } + } + + for (U32 i = 0; i < mNumIndices; ++i) + { + mIndices[i] = new_idx[mIndices[i]]; + } + + ll_aligned_free_16(mPositions); + ll_aligned_free_16(mNormals); + ll_aligned_free_16(mTexCoords); + ll_aligned_free_16(mWeights); + ll_aligned_free_16(mBinormals); + + mPositions = pos; + mNormals = norm; + mTexCoords = tc; + mWeights = wght; + mBinormals = binorm; + + //std::string result = llformat("ACMR pre/post: %.3f/%.3f -- %d triangles %d breaks", pre_acmr, post_acmr, mNumIndices/3, breaks); + //llinfos << result << llendl; + +} + +void LLVolumeFace::createOctree(F32 scaler, const LLVector4a& center, const LLVector4a& size) +{ + if (mOctree) + { + return; + } + + mOctree = new LLOctreeRoot<LLVolumeTriangle>(center, size, NULL); + new LLVolumeOctreeListener(mOctree); + + for (U32 i = 0; i < mNumIndices; i+= 3) + { //for each triangle + LLPointer<LLVolumeTriangle> tri = new LLVolumeTriangle(); + + const LLVector4a& v0 = mPositions[mIndices[i]]; + const LLVector4a& v1 = mPositions[mIndices[i+1]]; + const LLVector4a& v2 = mPositions[mIndices[i+2]]; + + //store pointers to vertex data + tri->mV[0] = &v0; + tri->mV[1] = &v1; + tri->mV[2] = &v2; + + //store indices + tri->mIndex[0] = mIndices[i]; + tri->mIndex[1] = mIndices[i+1]; + tri->mIndex[2] = mIndices[i+2]; + + //get minimum point + LLVector4a min = v0; + min.setMin(min, v1); + min.setMin(min, v2); + + //get maximum point + LLVector4a max = v0; + max.setMax(max, v1); + max.setMax(max, v2); + + //compute center + LLVector4a center; + center.setAdd(min, max); + center.mul(0.5f); + + tri->mPositionGroup = center; + + //compute "radius" + LLVector4a size; + size.setSub(max,min); + + tri->mRadius = size.getLength3().getF32() * scaler; + + //insert + mOctree->insert(tri); + } + + //remove unneeded octree layers + while (!mOctree->balance()) { } + + //calculate AABB for each node + LLVolumeOctreeRebound rebound(this); + rebound.traverse(mOctree); + + if (gDebugGL) + { + LLVolumeOctreeValidate validate; + validate.traverse(mOctree); + } +} + + +void LLVolumeFace::swapData(LLVolumeFace& rhs) +{ + llswap(rhs.mPositions, mPositions); + llswap(rhs.mNormals, mNormals); + llswap(rhs.mBinormals, mBinormals); + llswap(rhs.mTexCoords, mTexCoords); + llswap(rhs.mIndices,mIndices); + llswap(rhs.mNumVertices, mNumVertices); + llswap(rhs.mNumIndices, mNumIndices); +} + void LerpPlanarVertex(LLVolumeFace::VertexData& v0, LLVolumeFace::VertexData& v1, LLVolumeFace::VertexData& v2, @@ -4463,10 +6010,21 @@ void LerpPlanarVertex(LLVolumeFace::VertexData& v0, F32 coef01, F32 coef02) { - vout.mPosition = v0.mPosition + ((v1.mPosition-v0.mPosition)*coef01)+((v2.mPosition-v0.mPosition)*coef02); + + LLVector4a lhs; + lhs.setSub(v1.getPosition(), v0.getPosition()); + lhs.mul(coef01); + LLVector4a rhs; + rhs.setSub(v2.getPosition(), v0.getPosition()); + rhs.mul(coef02); + + rhs.add(lhs); + rhs.add(v0.getPosition()); + + vout.setPosition(rhs); + vout.mTexCoord = v0.mTexCoord + ((v1.mTexCoord-v0.mTexCoord)*coef01)+((v2.mTexCoord-v0.mTexCoord)*coef02); - vout.mNormal = v0.mNormal; - vout.mBinormal = v0.mBinormal; + vout.setNormal(v0.getNormal()); } BOOL LLVolumeFace::createUnCutCubeCap(LLVolume* volume, BOOL partial_build) @@ -4486,84 +6044,113 @@ BOOL LLVolumeFace::createUnCutCubeCap(LLVolume* volume, BOOL partial_build) num_vertices = (grid_size+1)*(grid_size+1); num_indices = quad_count * 4; - LLVector3& min = mExtents[0]; - LLVector3& max = mExtents[1]; + LLVector4a& min = mExtents[0]; + LLVector4a& max = mExtents[1]; S32 offset = 0; if (mTypeMask & TOP_MASK) + { offset = (max_t-1) * max_s; + } else + { offset = mBeginS; - - VertexData corners[4]; - VertexData baseVert; - for(int t = 0; t < 4; t++){ - corners[t].mPosition = mesh[offset + (grid_size*t)].mPos; - corners[t].mTexCoord.mV[0] = profile[grid_size*t].mV[0]+0.5f; - corners[t].mTexCoord.mV[1] = 0.5f - profile[grid_size*t].mV[1]; - } - baseVert.mNormal = - ((corners[1].mPosition-corners[0].mPosition) % - (corners[2].mPosition-corners[1].mPosition)); - baseVert.mNormal.normVec(); - if(!(mTypeMask & TOP_MASK)){ - baseVert.mNormal *= -1.0f; - }else{ - //Swap the UVs on the U(X) axis for top face - LLVector2 swap; - swap = corners[0].mTexCoord; - corners[0].mTexCoord=corners[3].mTexCoord; - corners[3].mTexCoord=swap; - swap = corners[1].mTexCoord; - corners[1].mTexCoord=corners[2].mTexCoord; - corners[2].mTexCoord=swap; } - baseVert.mBinormal = calc_binormal_from_triangle( - corners[0].mPosition, corners[0].mTexCoord, - corners[1].mPosition, corners[1].mTexCoord, - corners[2].mPosition, corners[2].mTexCoord); - for(int t = 0; t < 4; t++){ - corners[t].mBinormal = baseVert.mBinormal; - corners[t].mNormal = baseVert.mNormal; - } - mHasBinormals = TRUE; - if (partial_build) { - mVertices.clear(); - } + VertexData corners[4]; + VertexData baseVert; + for(S32 t = 0; t < 4; t++) + { + corners[t].getPosition().load3( mesh[offset + (grid_size*t)].mPos.mV); + corners[t].mTexCoord.mV[0] = profile[grid_size*t].mV[0]+0.5f; + corners[t].mTexCoord.mV[1] = 0.5f - profile[grid_size*t].mV[1]; + } - S32 vtop = mVertices.size(); - for(int gx = 0;gx<grid_size+1;gx++){ - for(int gy = 0;gy<grid_size+1;gy++){ - VertexData newVert; - LerpPlanarVertex( - corners[0], - corners[1], - corners[3], - newVert, - (F32)gx/(F32)grid_size, - (F32)gy/(F32)grid_size); - mVertices.push_back(newVert); + { + LLVector4a lhs; + lhs.setSub(corners[1].getPosition(), corners[0].getPosition()); + LLVector4a rhs; + rhs.setSub(corners[2].getPosition(), corners[1].getPosition()); + baseVert.getNormal().setCross3(lhs, rhs); + baseVert.getNormal().normalize3fast(); + } - if (gx == 0 && gy == 0) - { - min = max = newVert.mPosition; - } - else + if(!(mTypeMask & TOP_MASK)) + { + baseVert.getNormal().mul(-1.0f); + } + else + { + //Swap the UVs on the U(X) axis for top face + LLVector2 swap; + swap = corners[0].mTexCoord; + corners[0].mTexCoord=corners[3].mTexCoord; + corners[3].mTexCoord=swap; + swap = corners[1].mTexCoord; + corners[1].mTexCoord=corners[2].mTexCoord; + corners[2].mTexCoord=swap; + } + + LLVector4a binormal; + + calc_binormal_from_triangle( binormal, + corners[0].getPosition(), corners[0].mTexCoord, + corners[1].getPosition(), corners[1].mTexCoord, + corners[2].getPosition(), corners[2].mTexCoord); + + binormal.normalize3fast(); + + S32 size = (grid_size+1)*(grid_size+1); + resizeVertices(size); + allocateBinormals(size); + + LLVector4a* pos = (LLVector4a*) mPositions; + LLVector4a* norm = (LLVector4a*) mNormals; + LLVector4a* binorm = (LLVector4a*) mBinormals; + LLVector2* tc = (LLVector2*) mTexCoords; + + for(int gx = 0;gx<grid_size+1;gx++) + { + for(int gy = 0;gy<grid_size+1;gy++) { - update_min_max(min,max,newVert.mPosition); + VertexData newVert; + LerpPlanarVertex( + corners[0], + corners[1], + corners[3], + newVert, + (F32)gx/(F32)grid_size, + (F32)gy/(F32)grid_size); + + *pos++ = newVert.getPosition(); + *norm++ = baseVert.getNormal(); + *tc++ = newVert.mTexCoord; + *binorm++ = binormal; + + if (gx == 0 && gy == 0) + { + min = newVert.getPosition(); + max = min; + } + else + { + min.setMin(min, newVert.getPosition()); + max.setMax(max, newVert.getPosition()); + } } } - } - mCenter = (min + max) * 0.5f; + mCenter->setAdd(min, max); + mCenter->mul(0.5f); + } if (!partial_build) { -#if GEN_TRI_STRIP - mTriStrip.clear(); -#endif + resizeIndices(grid_size*grid_size*6); + + U16* out = mIndices; + S32 idxs[] = {0,1,(grid_size+1)+1,(grid_size+1)+1,(grid_size+1),0}; for(S32 gx = 0;gx<grid_size;gx++) { @@ -4574,61 +6161,18 @@ BOOL LLVolumeFace::createUnCutCubeCap(LLVolume* volume, BOOL partial_build) { for(S32 i=5;i>=0;i--) { - mIndices.push_back(vtop+(gy*(grid_size+1))+gx+idxs[i]); - } - -#if GEN_TRI_STRIP - if (gy == 0) - { - mTriStrip.push_back((gx+1)*(grid_size+1)); - mTriStrip.push_back((gx+1)*(grid_size+1)); - mTriStrip.push_back(gx*(grid_size+1)); - } - - mTriStrip.push_back(gy+1+(gx+1)*(grid_size+1)); - mTriStrip.push_back(gy+1+gx*(grid_size+1)); - - - if (gy == grid_size-1) - { - mTriStrip.push_back(gy+1+gx*(grid_size+1)); - } -#endif + *out++ = ((gy*(grid_size+1))+gx+idxs[i]); + } } else { for(S32 i=0;i<6;i++) { - mIndices.push_back(vtop+(gy*(grid_size+1))+gx+idxs[i]); - } - -#if GEN_TRI_STRIP - if (gy == 0) - { - mTriStrip.push_back(gx*(grid_size+1)); - mTriStrip.push_back(gx*(grid_size+1)); - mTriStrip.push_back((gx+1)*(grid_size+1)); - } - - mTriStrip.push_back(gy+1+gx*(grid_size+1)); - mTriStrip.push_back(gy+1+(gx+1)*(grid_size+1)); - - if (gy == grid_size-1) - { - mTriStrip.push_back(gy+1+(gx+1)*(grid_size+1)); + *out++ = ((gy*(grid_size+1))+gx+idxs[i]); } -#endif } - } - - } - -#if GEN_TRI_STRIP - if (mTriStrip.size()%2 == 1) - { - mTriStrip.push_back(mTriStrip[mTriStrip.size()-1]); + } } -#endif } return TRUE; @@ -4658,17 +6202,31 @@ BOOL LLVolumeFace::createCap(LLVolume* volume, BOOL partial_build) num_vertices = profile.size(); num_indices = (profile.size() - 2)*3; - mVertices.resize(num_vertices); + if (!(mTypeMask & HOLLOW_MASK) && !(mTypeMask & OPEN_MASK)) + { + resizeVertices(num_vertices+1); + allocateBinormals(num_vertices+1); - if (!partial_build) + if (!partial_build) + { + resizeIndices(num_indices+3); + } + } + else { - mIndices.resize(num_indices); + resizeVertices(num_vertices); + allocateBinormals(num_vertices); + + if (!partial_build) + { + resizeIndices(num_indices); + } } S32 max_s = volume->getProfile().getTotal(); S32 max_t = volume->getPath().mPath.size(); - mCenter.clearVec(); + mCenter->clear(); S32 offset = 0; if (mTypeMask & TOP_MASK) @@ -4686,82 +6244,91 @@ BOOL LLVolumeFace::createCap(LLVolume* volume, BOOL partial_build) LLVector2 cuv; LLVector2 min_uv, max_uv; - LLVector3& min = mExtents[0]; - LLVector3& max = mExtents[1]; + LLVector4a& min = mExtents[0]; + LLVector4a& max = mExtents[1]; + + LLVector2* tc = (LLVector2*) mTexCoords; + LLVector4a* pos = (LLVector4a*) mPositions; + LLVector4a* norm = (LLVector4a*) mNormals; + LLVector4a* binorm = (LLVector4a*) mBinormals; // Copy the vertices into the array for (S32 i = 0; i < num_vertices; i++) { if (mTypeMask & TOP_MASK) { - mVertices[i].mTexCoord.mV[0] = profile[i].mV[0]+0.5f; - mVertices[i].mTexCoord.mV[1] = profile[i].mV[1]+0.5f; + tc[i].mV[0] = profile[i].mV[0]+0.5f; + tc[i].mV[1] = profile[i].mV[1]+0.5f; } else { // Mirror for underside. - mVertices[i].mTexCoord.mV[0] = profile[i].mV[0]+0.5f; - mVertices[i].mTexCoord.mV[1] = 0.5f - profile[i].mV[1]; + tc[i].mV[0] = profile[i].mV[0]+0.5f; + tc[i].mV[1] = 0.5f - profile[i].mV[1]; } - mVertices[i].mPosition = mesh[i + offset].mPos; + pos[i].load3(mesh[i + offset].mPos.mV); if (i == 0) { - min = max = mVertices[i].mPosition; - min_uv = max_uv = mVertices[i].mTexCoord; + max = pos[i]; + min = max; + min_uv = max_uv = tc[i]; } else { - update_min_max(min,max, mVertices[i].mPosition); - update_min_max(min_uv, max_uv, mVertices[i].mTexCoord); + update_min_max(min,max,pos[i]); + update_min_max(min_uv, max_uv, tc[i]); } } - mCenter = (min+max)*0.5f; + mCenter->setAdd(min, max); + mCenter->mul(0.5f); + cuv = (min_uv + max_uv)*0.5f; - LLVector3 binormal = calc_binormal_from_triangle( - mCenter, cuv, - mVertices[0].mPosition, mVertices[0].mTexCoord, - mVertices[1].mPosition, mVertices[1].mTexCoord); - binormal.normVec(); + LLVector4a binormal; + calc_binormal_from_triangle(binormal, + *mCenter, cuv, + pos[0], tc[0], + pos[1], tc[1]); + binormal.normalize3fast(); + + LLVector4a normal; + LLVector4a d0, d1; + - LLVector3 d0; - LLVector3 d1; - LLVector3 normal; + d0.setSub(*mCenter, pos[0]); + d1.setSub(*mCenter, pos[1]); - d0 = mCenter-mVertices[0].mPosition; - d1 = mCenter-mVertices[1].mPosition; + if (mTypeMask & TOP_MASK) + { + normal.setCross3(d0, d1); + } + else + { + normal.setCross3(d1, d0); + } - normal = (mTypeMask & TOP_MASK) ? (d0%d1) : (d1%d0); - normal.normVec(); + normal.normalize3fast(); VertexData vd; - vd.mPosition = mCenter; - vd.mNormal = normal; - vd.mBinormal = binormal; + vd.setPosition(*mCenter); vd.mTexCoord = cuv; if (!(mTypeMask & HOLLOW_MASK) && !(mTypeMask & OPEN_MASK)) { - mVertices.push_back(vd); + pos[num_vertices] = *mCenter; + tc[num_vertices] = cuv; num_vertices++; - if (!partial_build) - { - vector_append(mIndices, 3); - } } - for (S32 i = 0; i < num_vertices; i++) { - mVertices[i].mBinormal = binormal; - mVertices[i].mNormal = normal; + binorm[i].load4a(binormal.getF32ptr()); + norm[i].load4a(normal.getF32ptr()); } - mHasBinormals = TRUE; - if (partial_build) { return TRUE; @@ -4869,8 +6436,6 @@ BOOL LLVolumeFace::createCap(LLVolume* volume, BOOL partial_build) pt2--; } } - - makeTriStrip(); } else { @@ -4975,8 +6540,6 @@ BOOL LLVolumeFace::createCap(LLVolume* volume, BOOL partial_build) pt2--; } } - - makeTriStrip(); } } else @@ -4998,131 +6561,277 @@ BOOL LLVolumeFace::createCap(LLVolume* volume, BOOL partial_build) mIndices[3*i+v2] = i + 1; } -#if GEN_TRI_STRIP - //make tri strip - if (mTypeMask & OPEN_MASK) - { - makeTriStrip(); - } - else - { - S32 j = num_vertices-2; - if (mTypeMask & TOP_MASK) - { - mTriStrip.push_back(0); - for (S32 i = 0; i <= j; ++i) - { - mTriStrip.push_back(i); - if (i != j) - { - mTriStrip.push_back(j); - } - --j; - } - } - else - { - mTriStrip.push_back(j); - for (S32 i = 0; i <= j; ++i) - { - if (i != j) - { - mTriStrip.push_back(j); - } - mTriStrip.push_back(i); - --j; - } - } - - mTriStrip.push_back(mTriStrip[mTriStrip.size()-1]); - if (mTriStrip.size()%2 == 1) - { - mTriStrip.push_back(mTriStrip[mTriStrip.size()-1]); - } - } -#endif } return TRUE; } -void LLVolumeFace::makeTriStrip() +void LLVolumeFace::createBinormals() { -#if GEN_TRI_STRIP - for (U32 i = 0; i < mIndices.size(); i+=3) + LLMemType m1(LLMemType::MTYPE_VOLUME); + + if (!mBinormals) { - U16 i0 = mIndices[i]; - U16 i1 = mIndices[i+1]; - U16 i2 = mIndices[i+2]; + allocateBinormals(mNumVertices); - if ((i/3)%2 == 1) - { - mTriStrip.push_back(i0); - mTriStrip.push_back(i0); - mTriStrip.push_back(i1); - mTriStrip.push_back(i2); - mTriStrip.push_back(i2); - } - else + //generate binormals + LLVector4a* pos = mPositions; + LLVector2* tc = (LLVector2*) mTexCoords; + LLVector4a* binorm = (LLVector4a*) mBinormals; + + LLVector4a* end = mBinormals+mNumVertices; + while (binorm < end) { - mTriStrip.push_back(i2); - mTriStrip.push_back(i2); - mTriStrip.push_back(i1); - mTriStrip.push_back(i0); - mTriStrip.push_back(i0); + (*binorm++).clear(); } - } - if (mTriStrip.size()%2 == 1) - { - mTriStrip.push_back(mTriStrip[mTriStrip.size()-1]); - } -#endif -} + binorm = mBinormals; -void LLVolumeFace::createBinormals() -{ - LLMemType m1(LLMemType::MTYPE_VOLUME); - - if (!mHasBinormals) - { - //generate binormals - for (U32 i = 0; i < mIndices.size()/3; i++) + for (U32 i = 0; i < mNumIndices/3; i++) { //for each triangle - const VertexData& v0 = mVertices[mIndices[i*3+0]]; - const VertexData& v1 = mVertices[mIndices[i*3+1]]; - const VertexData& v2 = mVertices[mIndices[i*3+2]]; + const U16& i0 = mIndices[i*3+0]; + const U16& i1 = mIndices[i*3+1]; + const U16& i2 = mIndices[i*3+2]; //calculate binormal - LLVector3 binorm = calc_binormal_from_triangle(v0.mPosition, v0.mTexCoord, - v1.mPosition, v1.mTexCoord, - v2.mPosition, v2.mTexCoord); + LLVector4a binormal; + calc_binormal_from_triangle(binormal, + pos[i0], tc[i0], + pos[i1], tc[i1], + pos[i2], tc[i2]); - for (U32 j = 0; j < 3; j++) - { //add triangle normal to vertices - mVertices[mIndices[i*3+j]].mBinormal += binorm; // * (weight_sum - d[j])/weight_sum; - } + + //add triangle normal to vertices + binorm[i0].add(binormal); + binorm[i1].add(binormal); + binorm[i2].add(binormal); //even out quad contributions if (i % 2 == 0) { - mVertices[mIndices[i*3+2]].mBinormal += binorm; + binorm[i2].add(binormal); } else { - mVertices[mIndices[i*3+1]].mBinormal += binorm; + binorm[i1].add(binormal); } } //normalize binormals - for (U32 i = 0; i < mVertices.size(); i++) + for (U32 i = 0; i < mNumVertices; i++) { - mVertices[i].mBinormal.normVec(); - mVertices[i].mNormal.normVec(); + binorm[i].normalize3fast(); + //bump map/planar projection code requires normals to be normalized + mNormals[i].normalize3fast(); } + } +} + +void LLVolumeFace::resizeVertices(S32 num_verts) +{ + ll_aligned_free_16(mPositions); + ll_aligned_free_16(mNormals); + ll_aligned_free_16(mBinormals); + ll_aligned_free_16(mTexCoords); - mHasBinormals = TRUE; + mBinormals = NULL; + + if (num_verts) + { + mPositions = (LLVector4a*) ll_aligned_malloc_16(sizeof(LLVector4a)*num_verts); + assert_aligned(mPositions, 16); + mNormals = (LLVector4a*) ll_aligned_malloc_16(sizeof(LLVector4a)*num_verts); + assert_aligned(mNormals, 16); + + //pad texture coordinate block end to allow for QWORD reads + S32 size = ((num_verts*sizeof(LLVector2)) + 0xF) & ~0xF; + mTexCoords = (LLVector2*) ll_aligned_malloc_16(size); + assert_aligned(mTexCoords, 16); + } + else + { + mPositions = NULL; + mNormals = NULL; + mTexCoords = NULL; + } + + mNumVertices = num_verts; +} + +void LLVolumeFace::pushVertex(const LLVolumeFace::VertexData& cv) +{ + pushVertex(cv.getPosition(), cv.getNormal(), cv.mTexCoord); +} + +void LLVolumeFace::pushVertex(const LLVector4a& pos, const LLVector4a& norm, const LLVector2& tc) +{ + S32 new_verts = mNumVertices+1; + S32 new_size = new_verts*16; +// S32 old_size = mNumVertices*16; + + //positions + mPositions = (LLVector4a*) realloc(mPositions, new_size); + + //normals + mNormals = (LLVector4a*) realloc(mNormals, new_size); + + //tex coords + new_size = ((new_verts*8)+0xF) & ~0xF; + mTexCoords = (LLVector2*) realloc(mTexCoords, new_size); + + + //just clear binormals + ll_aligned_free_16(mBinormals); + mBinormals = NULL; + + mPositions[mNumVertices] = pos; + mNormals[mNumVertices] = norm; + mTexCoords[mNumVertices] = tc; + + mNumVertices++; +} + +void LLVolumeFace::allocateBinormals(S32 num_verts) +{ + ll_aligned_free_16(mBinormals); + mBinormals = (LLVector4a*) ll_aligned_malloc_16(sizeof(LLVector4a)*num_verts); +} + +void LLVolumeFace::allocateWeights(S32 num_verts) +{ + ll_aligned_free_16(mWeights); + mWeights = (LLVector4a*) ll_aligned_malloc_16(sizeof(LLVector4a)*num_verts); +} + +void LLVolumeFace::resizeIndices(S32 num_indices) +{ + ll_aligned_free_16(mIndices); + + if (num_indices) + { + //pad index block end to allow for QWORD reads + S32 size = ((num_indices*sizeof(U16)) + 0xF) & ~0xF; + + mIndices = (U16*) ll_aligned_malloc_16(size); + } + else + { + mIndices = NULL; + } + + mNumIndices = num_indices; +} + +void LLVolumeFace::pushIndex(const U16& idx) +{ + S32 new_count = mNumIndices + 1; + S32 new_size = ((new_count*2)+0xF) & ~0xF; + + S32 old_size = ((mNumIndices*2)+0xF) & ~0xF; + if (new_size != old_size) + { + mIndices = (U16*) realloc(mIndices, new_size); + } + + mIndices[mNumIndices++] = idx; +} + +void LLVolumeFace::fillFromLegacyData(std::vector<LLVolumeFace::VertexData>& v, std::vector<U16>& idx) +{ + resizeVertices(v.size()); + resizeIndices(idx.size()); + + for (U32 i = 0; i < v.size(); ++i) + { + mPositions[i] = v[i].getPosition(); + mNormals[i] = v[i].getNormal(); + mTexCoords[i] = v[i].mTexCoord; + } + + for (U32 i = 0; i < idx.size(); ++i) + { + mIndices[i] = idx[i]; + } +} + +void LLVolumeFace::appendFace(const LLVolumeFace& face, LLMatrix4& mat_in, LLMatrix4& norm_mat_in) +{ + U16 offset = mNumVertices; + + S32 new_count = face.mNumVertices + mNumVertices; + + if (new_count > 65536) + { + llerrs << "Cannot append face -- 16-bit overflow will occur." << llendl; + } + + if (face.mNumVertices == 0) + { + llerrs << "Cannot append empty face." << llendl; + } + + //allocate new buffer space + mPositions = (LLVector4a*) realloc(mPositions, new_count*sizeof(LLVector4a)); + assert_aligned(mPositions, 16); + mNormals = (LLVector4a*) realloc(mNormals, new_count*sizeof(LLVector4a)); + assert_aligned(mNormals, 16); + mTexCoords = (LLVector2*) realloc(mTexCoords, (new_count*sizeof(LLVector2)+0xF) & ~0xF); + assert_aligned(mTexCoords, 16); + + mNumVertices = new_count; + + //get destination address of appended face + LLVector4a* dst_pos = mPositions+offset; + LLVector2* dst_tc = mTexCoords+offset; + LLVector4a* dst_norm = mNormals+offset; + + //get source addresses of appended face + const LLVector4a* src_pos = face.mPositions; + const LLVector2* src_tc = face.mTexCoords; + const LLVector4a* src_norm = face.mNormals; + + //load aligned matrices + LLMatrix4a mat, norm_mat; + mat.loadu(mat_in); + norm_mat.loadu(norm_mat_in); + + for (U32 i = 0; i < face.mNumVertices; ++i) + { + //transform appended face position and store + mat.affineTransform(src_pos[i], dst_pos[i]); + + //transform appended face normal and store + norm_mat.rotate(src_norm[i], dst_norm[i]); + dst_norm[i].normalize3fast(); + + //copy appended face texture coordinate + dst_tc[i] = src_tc[i]; + + if (offset == 0 && i == 0) + { //initialize bounding box + mExtents[0] = mExtents[1] = dst_pos[i]; + } + else + { + //stretch bounding box + update_min_max(mExtents[0], mExtents[1], dst_pos[i]); + } + } + + + new_count = mNumIndices + face.mNumIndices; + + //allocate new index buffer + mIndices = (U16*) realloc(mIndices, (new_count*sizeof(U16)+0xF) & ~0xF); + + //get destination address into new index buffer + U16* dst_idx = mIndices+mNumIndices; + mNumIndices = new_count; + + for (U32 i = 0; i < face.mNumIndices; ++i) + { //copy indices, offsetting by old vertex count + dst_idx[i] = face.mIndices[i]+offset; } } @@ -5152,18 +6861,20 @@ BOOL LLVolumeFace::createSide(LLVolume* volume, BOOL partial_build) num_vertices = mNumS*mNumT; num_indices = (mNumS-1)*(mNumT-1)*6; - mVertices.resize(num_vertices); - if (!partial_build) { - mIndices.resize(num_indices); |
