From 473ade269628d1fb6cbc7b96a91e1c0aec1b8e59 Mon Sep 17 00:00:00 2001 From: Henri Beauchamp Date: Tue, 31 Jan 2023 17:42:51 +0100 Subject: SL-19110 Fast hashing classes for use in place of the slow LLMD5, where speed matters. (#64) This commit adds the HBXX64 and HBXX128 classes for use as a drop-in replacement for the slow LLMD5 hashing class, where speed matters and backward compatibility (with standard hashing algorithms) and/or cryptographic hashing qualities are not required. It also replaces LLMD5 with HBXX* in a few existing hot (well, ok, just "warm" for some) paths meeting the above requirements, while paving the way for future use cases, such as in the DRTVWR-559 and sibling branches where the slow LLMD5 is used (e.g. to hash materials and vertex buffer cache entries), and could be use such a (way) faster algorithm with very significant benefits and no negative impact. Here is the comment I added in indra/llcommon/hbxx.h: // HBXXH* classes are to be used where speed matters and cryptographic quality // is not required (no "one-way" guarantee, though they are likely not worst in // this respect than MD5 which got busted and is now considered too weak). The // xxHash code they are built upon is vectorized and about 50 times faster than // MD5. A 64 bits hash class is also provided for when 128 bits of entropy are // not needed. The hashes collision rate is similar to MD5's. // See https://github.com/Cyan4973/xxHash#readme for details. --- indra/llcommon/CMakeLists.txt | 2 + indra/llcommon/hbxxh.cpp | 377 ++++++++++++++++++++++++++++++++++++++++++ indra/llcommon/hbxxh.h | 259 +++++++++++++++++++++++++++++ indra/llcommon/lluuid.cpp | 28 +--- 4 files changed, 646 insertions(+), 20 deletions(-) create mode 100644 indra/llcommon/hbxxh.cpp create mode 100644 indra/llcommon/hbxxh.h (limited to 'indra/llcommon') diff --git a/indra/llcommon/CMakeLists.txt b/indra/llcommon/CMakeLists.txt index 991c3f70a1..fab7550c9a 100644 --- a/indra/llcommon/CMakeLists.txt +++ b/indra/llcommon/CMakeLists.txt @@ -119,6 +119,7 @@ set(llcommon_SOURCE_FILES lluriparser.cpp lluuid.cpp llworkerthread.cpp + hbxxh.cpp u64.cpp threadpool.cpp workqueue.cpp @@ -257,6 +258,7 @@ set(llcommon_HEADER_FILES llwin32headers.h llwin32headerslean.h llworkerthread.h + hbxxh.h lockstatic.h stdtypes.h stringize.h diff --git a/indra/llcommon/hbxxh.cpp b/indra/llcommon/hbxxh.cpp new file mode 100644 index 0000000000..e94581d415 --- /dev/null +++ b/indra/llcommon/hbxxh.cpp @@ -0,0 +1,377 @@ +/** + * @file hbxxh.cpp + * @brief High performances vectorized hashing based on xxHash. + * + * $LicenseInfo:firstyear=2023&license=viewergpl$ + * Second Life Viewer Source Code + * Copyright (c) 2023, Henri Beauchamp. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; + * version 2.1 of the License only. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + * Linden Research, Inc., 945 Battery Street, San Francisco, CA 94111 USA + * $/LicenseInfo$ + */ + +#include "linden_common.h" + +// This define ensures that xxHash will be compiled within this module, with +// vectorized (*) and inlined functions (with no exported API symbol); our +// xxhash "pre-built library" package actually only contains the xxhash.h +// header (no library needed at link time). +// (*) SSE2 is normally used for x86(_64) builds, unless you enabled AVX2 +// in your build, in which case the latter would be used instead. For ARM64 +// builds, this would also automatically enable NEON vectorization. +#define XXH_INLINE_ALL +#include "xxhash.h" + +#include "hbxxh.h" + +// How many bytes to grab at a time when hashing files or streams +constexpr size_t BLOCK_LEN = 4096; + +/////////////////////////////////////////////////////////////////////////////// +// HBXXH64 class +/////////////////////////////////////////////////////////////////////////////// + +//static +U64 HBXXH64::digest(const void* buffer, size_t len) +{ + return XXH3_64bits(buffer, len); +} + +//static +U64 HBXXH64::digest(const char* str) +{ + return XXH3_64bits((const void*)str, strlen(str)); +} + +//static +U64 HBXXH64::digest(const std::string& str) +{ + return XXH3_64bits((const void*)str.c_str(), str.size()); +} + +// Must be called by all constructors. +void HBXXH64::init() +{ + mDigest = 0; + mState = (void*)XXH3_createState(); + if (!mState || XXH3_64bits_reset((XXH3_state_t*)mState) != XXH_OK) + { + LL_WARNS() << "Failed to initialize state !" << LL_ENDL; + } +} + +HBXXH64::~HBXXH64() +{ + if (mState) + { + XXH3_freeState((XXH3_state_t*)mState); + } +} + +void HBXXH64::update(const void* buffer, size_t len) +{ + if (mState) + { + XXH3_64bits_update((XXH3_state_t*)mState, buffer, len); + } + else + { + LL_WARNS() << "Cannot update a finalized digest !" << LL_ENDL; + } +} + +void HBXXH64::update(const std::string& str) +{ + if (mState) + { + XXH3_64bits_update((XXH3_state_t*)mState, (const void*)str.c_str(), + str.length()); + } + else + { + LL_WARNS() << "Cannot update a finalized digest !" << LL_ENDL; + } +} + +void HBXXH64::update(std::istream& stream) +{ + if (!mState) + { + LL_WARNS() << "Cannot update a finalized digest !" << LL_ENDL; + return; + } + + char buffer[BLOCK_LEN]; + size_t len; + while (stream.good()) + { + stream.read(buffer, BLOCK_LEN); + len = stream.gcount(); + XXH3_64bits_update((XXH3_state_t*)mState, (const void*)buffer, len); + } +} + +void HBXXH64::update(FILE* file) +{ + if (!mState) + { + LL_WARNS() << "Cannot update a finalized digest !" << LL_ENDL; + return; + } + + char buffer[BLOCK_LEN]; + size_t len; + while ((len = fread((void*)buffer, 1, BLOCK_LEN, file))) + { + XXH3_64bits_update((XXH3_state_t*)mState, (const void*)buffer, len); + } + fclose(file); +} + +void HBXXH64::finalize() +{ + if (!mState) + { + LL_WARNS() << "Already finalized !" << LL_ENDL; + return; + } + mDigest = XXH3_64bits_digest((XXH3_state_t*)mState); + XXH3_freeState((XXH3_state_t*)mState); + mState = NULL; +} + +U64 HBXXH64::digest() const +{ + return mState ? XXH3_64bits_digest((XXH3_state_t*)mState) : mDigest; +} + +std::ostream& operator<<(std::ostream& stream, HBXXH64 context) +{ + stream << context.digest(); + return stream; +} + +/////////////////////////////////////////////////////////////////////////////// +// HBXXH128 class +/////////////////////////////////////////////////////////////////////////////// + +//static +LLUUID HBXXH128::digest(const void* buffer, size_t len) +{ + XXH128_hash_t hash = XXH3_128bits(buffer, len); + LLUUID id; + U64* data = (U64*)id.mData; + // Note: we do not check endianness here and we just store in the same + // order as XXH128_hash_t, that is low word "first". + data[0] = hash.low64; + data[1] = hash.high64; + return id; +} + +//static +LLUUID HBXXH128::digest(const char* str) +{ + XXH128_hash_t hash = XXH3_128bits((const void*)str, strlen(str)); + LLUUID id; + U64* data = (U64*)id.mData; + // Note: we do not check endianness here and we just store in the same + // order as XXH128_hash_t, that is low word "first". + data[0] = hash.low64; + data[1] = hash.high64; + return id; +} + +//static +LLUUID HBXXH128::digest(const std::string& str) +{ + XXH128_hash_t hash = XXH3_128bits((const void*)str.c_str(), str.size()); + LLUUID id; + U64* data = (U64*)id.mData; + // Note: we do not check endianness here and we just store in the same + // order as XXH128_hash_t, that is low word "first". + data[0] = hash.low64; + data[1] = hash.high64; + return id; +} + +//static +void HBXXH128::digest(LLUUID& result, const void* buffer, size_t len) +{ + XXH128_hash_t hash = XXH3_128bits(buffer, len); + U64* data = (U64*)result.mData; + // Note: we do not check endianness here and we just store in the same + // order as XXH128_hash_t, that is low word "first". + data[0] = hash.low64; + data[1] = hash.high64; +} + +//static +void HBXXH128::digest(LLUUID& result, const char* str) +{ + XXH128_hash_t hash = XXH3_128bits((const void*)str, strlen(str)); + U64* data = (U64*)result.mData; + // Note: we do not check endianness here and we just store in the same + // order as XXH128_hash_t, that is low word "first". + data[0] = hash.low64; + data[1] = hash.high64; +} + +//static +void HBXXH128::digest(LLUUID& result, const std::string& str) +{ + XXH128_hash_t hash = XXH3_128bits((const void*)str.c_str(), str.size()); + U64* data = (U64*)result.mData; + // Note: we do not check endianness here and we just store in the same + // order as XXH128_hash_t, that is low word "first". + data[0] = hash.low64; + data[1] = hash.high64; +} + +// Must be called by all constructors. +void HBXXH128::init() +{ + mState = (void*)XXH3_createState(); + if (!mState || XXH3_128bits_reset((XXH3_state_t*)mState) != XXH_OK) + { + LL_WARNS() << "Failed to initialize state !" << LL_ENDL; + } +} + +HBXXH128::~HBXXH128() +{ + if (mState) + { + XXH3_freeState((XXH3_state_t*)mState); + } +} + +void HBXXH128::update(const void* buffer, size_t len) +{ + if (mState) + { + XXH3_128bits_update((XXH3_state_t*)mState, buffer, len); + } + else + { + LL_WARNS() << "Cannot update a finalized digest !" << LL_ENDL; + } +} + +void HBXXH128::update(const std::string& str) +{ + if (mState) + { + XXH3_128bits_update((XXH3_state_t*)mState, (const void*)str.c_str(), + str.length()); + } + else + { + LL_WARNS() << "Cannot update a finalized digest !" << LL_ENDL; + } +} + +void HBXXH128::update(std::istream& stream) +{ + if (!mState) + { + LL_WARNS() << "Cannot update a finalized digest !" << LL_ENDL; + return; + } + + char buffer[BLOCK_LEN]; + size_t len; + while (stream.good()) + { + stream.read(buffer, BLOCK_LEN); + len = stream.gcount(); + XXH3_128bits_update((XXH3_state_t*)mState, (const void*)buffer, len); + } +} + +void HBXXH128::update(FILE* file) +{ + if (!mState) + { + LL_WARNS() << "Cannot update a finalized digest !" << LL_ENDL; + return; + } + + char buffer[BLOCK_LEN]; + size_t len; + while ((len = fread((void*)buffer, 1, BLOCK_LEN, file))) + { + XXH3_128bits_update((XXH3_state_t*)mState, (const void*)buffer, len); + } + fclose(file); +} + +void HBXXH128::finalize() +{ + if (!mState) + { + LL_WARNS() << "Already finalized !" << LL_ENDL; + return; + } + XXH128_hash_t hash = XXH3_128bits_digest((XXH3_state_t*)mState); + U64* data = (U64*)mDigest.mData; + // Note: we do not check endianness here and we just store in the same + // order as XXH128_hash_t, that is low word "first". + data[0] = hash.low64; + data[1] = hash.high64; + XXH3_freeState((XXH3_state_t*)mState); + mState = NULL; +} + +const LLUUID& HBXXH128::digest() const +{ + if (mState) + { + XXH128_hash_t hash = XXH3_128bits_digest((XXH3_state_t*)mState); + // We cheat the const-ness of the method here, but this is OK, since + // mDigest is private and cannot be accessed indirectly by other + // methods than digest() ones, that do check for mState to decide + // wether mDigest's current value may be provided as is or not. This + // cheat saves us a temporary LLLUID copy. + U64* data = (U64*)mDigest.mData; + // Note: we do not check endianness here and we just store in the same + // order as XXH128_hash_t, that is low word "first". + data[0] = hash.low64; + data[1] = hash.high64; + } + return mDigest; +} + +void HBXXH128::digest(LLUUID& result) const +{ + if (!mState) + { + result = mDigest; + return; + } + XXH128_hash_t hash = XXH3_128bits_digest((XXH3_state_t*)mState); + U64* data = (U64*)result.mData; + // Note: we do not check endianness here and we just store in the same + // order as XXH128_hash_t, that is low word "first". + data[0] = hash.low64; + data[1] = hash.high64; +} + +std::ostream& operator<<(std::ostream& stream, HBXXH128 context) +{ + stream << context.digest(); + return stream; +} diff --git a/indra/llcommon/hbxxh.h b/indra/llcommon/hbxxh.h new file mode 100644 index 0000000000..8a5f977648 --- /dev/null +++ b/indra/llcommon/hbxxh.h @@ -0,0 +1,259 @@ +/** + * @file hbxxh.h + * @brief High performances vectorized hashing based on xxHash. + * + * $LicenseInfo:firstyear=2023&license=viewergpl$ + * Second Life Viewer Source Code + * Copyright (c) 2023, Henri Beauchamp. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; + * version 2.1 of the License only. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + * Linden Research, Inc., 945 Battery Street, San Francisco, CA 94111 USA + * $/LicenseInfo$ + */ + +#ifndef LL_HBXXH_H +#define LL_HBXXH_H + +#include "lluuid.h" + +// HBXXH* classes are to be used where speed matters and cryptographic quality +// is not required (no "one-way" guarantee, though they are likely not worst in +// this respect than MD5 which got busted and is now considered too weak). The +// xxHash code they are built upon is vectorized and about 50 times faster than +// MD5. A 64 bits hash class is also provided for when 128 bits of entropy are +// not needed. The hashes collision rate is similar to MD5's. +// See https://github.com/Cyan4973/xxHash#readme for details. + +// 64 bits hashing class + +class HBXXH64 +{ + friend std::ostream& operator<<(std::ostream&, HBXXH64); + +protected: + LOG_CLASS(HBXXH64); + +public: + inline HBXXH64() { init(); } + + // Constructors for special circumstances; they all digest the first passed + // parameter. Set 'do_finalize' to false if you do not want to finalize the + // context, which is useful/needed when you want to update() it afterwards. + // Ideally, the compiler should be smart enough to get our clue and + // optimize out the const bool test during inlining... + + inline HBXXH64(const void* buffer, size_t len, + const bool do_finalize = true) + { + init(); + update(buffer, len); + if (do_finalize) + { + finalize(); + } + } + + inline HBXXH64(const std::string& str, const bool do_finalize = true) + { + init(); + update(str); + if (do_finalize) + { + finalize(); + } + } + + inline HBXXH64(std::istream& s, const bool do_finalize = true) + { + init(); + update(s); + if (do_finalize) + { + finalize(); + } + } + + inline HBXXH64(FILE* file, const bool do_finalize = true) + { + init(); + update(file); + if (do_finalize) + { + finalize(); + } + } + + ~HBXXH64(); + + void update(const void* buffer, size_t len); + void update(const std::string& str); + void update(std::istream& s); + void update(FILE* file); + + // Note that unlike what happens with LLMD5, you do not need to finalize() + // HBXXH64 before using digest(), and you may keep updating() it even after + // you got a first digest() (the next digest would of course change after + // any update). It is still useful to use finalize() when you do not want + // to store a final digest() result in a separate U64; after this method + // has been called, digest() simply returns mDigest value. + void finalize(); + + U64 digest() const; + + // Fast static methods. Use them when hashing just one contiguous block of + // data. + static U64 digest(const void* buffer, size_t len); + static U64 digest(const char* str); // str must be NUL-terminated + static U64 digest(const std::string& str); + +private: + void init(); + +private: + // We use a void pointer to avoid including xxhash.h here for XXH3_state_t + // (which cannot either be trivially forward-declared, due to complex API + // related pre-processor macros in xxhash.h). + void* mState; + U64 mDigest; +}; + +inline bool operator==(const HBXXH64& a, const HBXXH64& b) +{ + return a.digest() == b.digest(); +} + +inline bool operator!=(const HBXXH64& a, const HBXXH64& b) +{ + return a.digest() != b.digest(); +} + +// 128 bits hashing class + +class HBXXH128 +{ + friend std::ostream& operator<<(std::ostream&, HBXXH128); + +protected: + LOG_CLASS(HBXXH128); + +public: + inline HBXXH128() { init(); } + + // Constructors for special circumstances; they all digest the first passed + // parameter. Set 'do_finalize' to false if you do not want to finalize the + // context, which is useful/needed when you want to update() it afterwards. + // Ideally, the compiler should be smart enough to get our clue and + // optimize out the const bool test during inlining... + + inline HBXXH128(const void* buffer, size_t len, + const bool do_finalize = true) + { + init(); + update(buffer, len); + if (do_finalize) + { + finalize(); + } + } + + inline HBXXH128(const std::string& str, const bool do_finalize = true) + { + init(); + update(str); + if (do_finalize) + { + finalize(); + } + } + + inline HBXXH128(std::istream& s, const bool do_finalize = true) + { + init(); + update(s); + if (do_finalize) + { + finalize(); + } + } + + inline HBXXH128(FILE* file, const bool do_finalize = true) + { + init(); + update(file); + if (do_finalize) + { + finalize(); + } + } + + ~HBXXH128(); + + void update(const void* buffer, size_t len); + void update(const std::string& str); + void update(std::istream& s); + void update(FILE* file); + + // Note that unlike what happens with LLMD5, you do not need to finalize() + // HBXXH128 before using digest(), and you may keep updating() it even + // after you got a first digest() (the next digest would of course change + // after any update). It is still useful to use finalize() when you do not + // want to store a final digest() result in a separate LLUUID; after this + // method has been called, digest() simply returns a reference on mDigest. + void finalize(); + + // We use an LLUUID for the digest, since this is a 128 bits wide native + // type available in the viewer code, making it easy to manipulate. It also + // allows to use HBXXH128 efficiently in LLUUID generate() and combine() + // methods. + const LLUUID& digest() const; + + // Here, we avoid an LLUUID copy whenever we already got one to store the + // result *and* we did not yet call finalize(). + void digest(LLUUID& result) const; + + // Fast static methods. Use them when hashing just one contiguous block of + // data. + static LLUUID digest(const void* buffer, size_t len); + static LLUUID digest(const char* str); // str must be NUL-terminated + static LLUUID digest(const std::string& str); + // Same as above, but saves you from an LLUUID copy when you already got + // one for storage use. + static void digest(LLUUID& result, const void* buffer, size_t len); + static void digest(LLUUID& result, const char* str); // str NUL-terminated + static void digest(LLUUID& result, const std::string& str); + +private: + void init(); + +private: + // We use a void pointer to avoid including xxhash.h here for XXH3_state_t + // (which cannot either be trivially forward-declared, due to complex API + // related pre-processor macros in xxhash.h). + void* mState; + LLUUID mDigest; +}; + +inline bool operator==(const HBXXH128& a, const HBXXH128& b) +{ + return a.digest() == b.digest(); +} + +inline bool operator!=(const HBXXH128& a, const HBXXH128& b) +{ + return a.digest() != b.digest(); +} + +#endif // LL_HBXXH_H diff --git a/indra/llcommon/lluuid.cpp b/indra/llcommon/lluuid.cpp index acce8366ea..8ff6c45760 100644 --- a/indra/llcommon/lluuid.cpp +++ b/indra/llcommon/lluuid.cpp @@ -40,11 +40,11 @@ #include "lluuid.h" #include "llerror.h" #include "llrand.h" -#include "llmd5.h" #include "llstring.h" #include "lltimer.h" #include "llthread.h" #include "llmutex.h" +#include "hbxxh.h" const LLUUID LLUUID::null; const LLTransactionID LLTransactionID::tnull; @@ -402,11 +402,9 @@ LLUUID LLUUID::operator^(const LLUUID& rhs) const void LLUUID::combine(const LLUUID& other, LLUUID& result) const { - LLMD5 md5_uuid; - md5_uuid.update((unsigned char*)mData, 16); - md5_uuid.update((unsigned char*)other.mData, 16); - md5_uuid.finalize(); - md5_uuid.raw_digest(result.mData); + HBXXH128 hash((const void*)mData, 16, false); // false = do not finalize + hash.update((const void*)other.mData, 16); + hash.digest(result); } LLUUID LLUUID::combine(const LLUUID &other) const @@ -857,17 +855,12 @@ void LLUUID::generate() tmp >>= 8; mData[8] = (unsigned char) tmp; - LLMD5 md5_uuid; - - md5_uuid.update(mData,16); - md5_uuid.finalize(); - md5_uuid.raw_digest(mData); + HBXXH128::digest(*this, (const void*)mData, 16); } void LLUUID::generate(const std::string& hash_string) { - LLMD5 md5_uuid((U8*)hash_string.c_str()); - md5_uuid.raw_digest(mData); + HBXXH128::digest(*this, hash_string); } U32 LLUUID::getRandomSeed() @@ -885,13 +878,8 @@ U32 LLUUID::getRandomSeed() seed[7]=(unsigned char)(pid); getSystemTime((uuid_time_t *)(&seed[8])); - LLMD5 md5_seed; - - md5_seed.update(seed,16); - md5_seed.finalize(); - md5_seed.raw_digest(seed); - - return(*(U32 *)seed); + U64 seed64 = HBXXH64((const void*)seed, 16).digest(); + return U32(seed64) ^ U32(seed64 >> 32); } BOOL LLUUID::parseUUID(const std::string& buf, LLUUID* value) -- cgit v1.2.3