diff options
author | Henri Beauchamp <sldevel@users.noreply.github.com> | 2023-01-31 17:42:51 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-01-31 18:42:51 +0200 |
commit | 9438ef5f79fdac11080c3fa10c518e335fd7d8d6 (patch) | |
tree | 9a97248bab30408815c4d242f1ce932e822853a3 /indra/llcommon/hbxxh.h | |
parent | 21b592865228bacf07ec8a526f1756bae69597e0 (diff) |
SL-19110 Fast hashing classes for use in place of the slow LLMD5, where speed matters. (#64)
This commit adds the HBXX64 and HBXX128 classes for use as a drop-in
replacement for the slow LLMD5 hashing class, where speed matters and
backward compatibility (with standard hashing algorithms) and/or
cryptographic hashing qualities are not required.
It also replaces LLMD5 with HBXX* in a few existing hot (well, ok, just
"warm" for some) paths meeting the above requirements, while paving the way for
future use cases, such as in the DRTVWR-559 and sibling branches where the slow
LLMD5 is used (e.g. to hash materials and vertex buffer cache entries), and
could be use such a (way) faster algorithm with very significant benefits and
no negative impact.
Here is the comment I added in indra/llcommon/hbxx.h:
// HBXXH* classes are to be used where speed matters and cryptographic quality
// is not required (no "one-way" guarantee, though they are likely not worst in
// this respect than MD5 which got busted and is now considered too weak). The
// xxHash code they are built upon is vectorized and about 50 times faster than
// MD5. A 64 bits hash class is also provided for when 128 bits of entropy are
// not needed. The hashes collision rate is similar to MD5's.
// See https://github.com/Cyan4973/xxHash#readme for details.
Diffstat (limited to 'indra/llcommon/hbxxh.h')
-rw-r--r-- | indra/llcommon/hbxxh.h | 259 |
1 files changed, 259 insertions, 0 deletions
diff --git a/indra/llcommon/hbxxh.h b/indra/llcommon/hbxxh.h new file mode 100644 index 0000000000..8a5f977648 --- /dev/null +++ b/indra/llcommon/hbxxh.h @@ -0,0 +1,259 @@ +/** + * @file hbxxh.h + * @brief High performances vectorized hashing based on xxHash. + * + * $LicenseInfo:firstyear=2023&license=viewergpl$ + * Second Life Viewer Source Code + * Copyright (c) 2023, Henri Beauchamp. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; + * version 2.1 of the License only. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + * Linden Research, Inc., 945 Battery Street, San Francisco, CA 94111 USA + * $/LicenseInfo$ + */ + +#ifndef LL_HBXXH_H +#define LL_HBXXH_H + +#include "lluuid.h" + +// HBXXH* classes are to be used where speed matters and cryptographic quality +// is not required (no "one-way" guarantee, though they are likely not worst in +// this respect than MD5 which got busted and is now considered too weak). The +// xxHash code they are built upon is vectorized and about 50 times faster than +// MD5. A 64 bits hash class is also provided for when 128 bits of entropy are +// not needed. The hashes collision rate is similar to MD5's. +// See https://github.com/Cyan4973/xxHash#readme for details. + +// 64 bits hashing class + +class HBXXH64 +{ + friend std::ostream& operator<<(std::ostream&, HBXXH64); + +protected: + LOG_CLASS(HBXXH64); + +public: + inline HBXXH64() { init(); } + + // Constructors for special circumstances; they all digest the first passed + // parameter. Set 'do_finalize' to false if you do not want to finalize the + // context, which is useful/needed when you want to update() it afterwards. + // Ideally, the compiler should be smart enough to get our clue and + // optimize out the const bool test during inlining... + + inline HBXXH64(const void* buffer, size_t len, + const bool do_finalize = true) + { + init(); + update(buffer, len); + if (do_finalize) + { + finalize(); + } + } + + inline HBXXH64(const std::string& str, const bool do_finalize = true) + { + init(); + update(str); + if (do_finalize) + { + finalize(); + } + } + + inline HBXXH64(std::istream& s, const bool do_finalize = true) + { + init(); + update(s); + if (do_finalize) + { + finalize(); + } + } + + inline HBXXH64(FILE* file, const bool do_finalize = true) + { + init(); + update(file); + if (do_finalize) + { + finalize(); + } + } + + ~HBXXH64(); + + void update(const void* buffer, size_t len); + void update(const std::string& str); + void update(std::istream& s); + void update(FILE* file); + + // Note that unlike what happens with LLMD5, you do not need to finalize() + // HBXXH64 before using digest(), and you may keep updating() it even after + // you got a first digest() (the next digest would of course change after + // any update). It is still useful to use finalize() when you do not want + // to store a final digest() result in a separate U64; after this method + // has been called, digest() simply returns mDigest value. + void finalize(); + + U64 digest() const; + + // Fast static methods. Use them when hashing just one contiguous block of + // data. + static U64 digest(const void* buffer, size_t len); + static U64 digest(const char* str); // str must be NUL-terminated + static U64 digest(const std::string& str); + +private: + void init(); + +private: + // We use a void pointer to avoid including xxhash.h here for XXH3_state_t + // (which cannot either be trivially forward-declared, due to complex API + // related pre-processor macros in xxhash.h). + void* mState; + U64 mDigest; +}; + +inline bool operator==(const HBXXH64& a, const HBXXH64& b) +{ + return a.digest() == b.digest(); +} + +inline bool operator!=(const HBXXH64& a, const HBXXH64& b) +{ + return a.digest() != b.digest(); +} + +// 128 bits hashing class + +class HBXXH128 +{ + friend std::ostream& operator<<(std::ostream&, HBXXH128); + +protected: + LOG_CLASS(HBXXH128); + +public: + inline HBXXH128() { init(); } + + // Constructors for special circumstances; they all digest the first passed + // parameter. Set 'do_finalize' to false if you do not want to finalize the + // context, which is useful/needed when you want to update() it afterwards. + // Ideally, the compiler should be smart enough to get our clue and + // optimize out the const bool test during inlining... + + inline HBXXH128(const void* buffer, size_t len, + const bool do_finalize = true) + { + init(); + update(buffer, len); + if (do_finalize) + { + finalize(); + } + } + + inline HBXXH128(const std::string& str, const bool do_finalize = true) + { + init(); + update(str); + if (do_finalize) + { + finalize(); + } + } + + inline HBXXH128(std::istream& s, const bool do_finalize = true) + { + init(); + update(s); + if (do_finalize) + { + finalize(); + } + } + + inline HBXXH128(FILE* file, const bool do_finalize = true) + { + init(); + update(file); + if (do_finalize) + { + finalize(); + } + } + + ~HBXXH128(); + + void update(const void* buffer, size_t len); + void update(const std::string& str); + void update(std::istream& s); + void update(FILE* file); + + // Note that unlike what happens with LLMD5, you do not need to finalize() + // HBXXH128 before using digest(), and you may keep updating() it even + // after you got a first digest() (the next digest would of course change + // after any update). It is still useful to use finalize() when you do not + // want to store a final digest() result in a separate LLUUID; after this + // method has been called, digest() simply returns a reference on mDigest. + void finalize(); + + // We use an LLUUID for the digest, since this is a 128 bits wide native + // type available in the viewer code, making it easy to manipulate. It also + // allows to use HBXXH128 efficiently in LLUUID generate() and combine() + // methods. + const LLUUID& digest() const; + + // Here, we avoid an LLUUID copy whenever we already got one to store the + // result *and* we did not yet call finalize(). + void digest(LLUUID& result) const; + + // Fast static methods. Use them when hashing just one contiguous block of + // data. + static LLUUID digest(const void* buffer, size_t len); + static LLUUID digest(const char* str); // str must be NUL-terminated + static LLUUID digest(const std::string& str); + // Same as above, but saves you from an LLUUID copy when you already got + // one for storage use. + static void digest(LLUUID& result, const void* buffer, size_t len); + static void digest(LLUUID& result, const char* str); // str NUL-terminated + static void digest(LLUUID& result, const std::string& str); + +private: + void init(); + +private: + // We use a void pointer to avoid including xxhash.h here for XXH3_state_t + // (which cannot either be trivially forward-declared, due to complex API + // related pre-processor macros in xxhash.h). + void* mState; + LLUUID mDigest; +}; + +inline bool operator==(const HBXXH128& a, const HBXXH128& b) +{ + return a.digest() == b.digest(); +} + +inline bool operator!=(const HBXXH128& a, const HBXXH128& b) +{ + return a.digest() != b.digest(); +} + +#endif // LL_HBXXH_H |