summaryrefslogtreecommitdiff
path: root/indra/llcommon/hbxxh.h
diff options
context:
space:
mode:
authorHenri Beauchamp <sldevel@users.noreply.github.com>2023-01-31 17:42:51 +0100
committerGitHub <noreply@github.com>2023-01-31 18:42:51 +0200
commit9438ef5f79fdac11080c3fa10c518e335fd7d8d6 (patch)
tree9a97248bab30408815c4d242f1ce932e822853a3 /indra/llcommon/hbxxh.h
parent21b592865228bacf07ec8a526f1756bae69597e0 (diff)
SL-19110 Fast hashing classes for use in place of the slow LLMD5, where speed matters. (#64)
This commit adds the HBXX64 and HBXX128 classes for use as a drop-in replacement for the slow LLMD5 hashing class, where speed matters and backward compatibility (with standard hashing algorithms) and/or cryptographic hashing qualities are not required. It also replaces LLMD5 with HBXX* in a few existing hot (well, ok, just "warm" for some) paths meeting the above requirements, while paving the way for future use cases, such as in the DRTVWR-559 and sibling branches where the slow LLMD5 is used (e.g. to hash materials and vertex buffer cache entries), and could be use such a (way) faster algorithm with very significant benefits and no negative impact. Here is the comment I added in indra/llcommon/hbxx.h: // HBXXH* classes are to be used where speed matters and cryptographic quality // is not required (no "one-way" guarantee, though they are likely not worst in // this respect than MD5 which got busted and is now considered too weak). The // xxHash code they are built upon is vectorized and about 50 times faster than // MD5. A 64 bits hash class is also provided for when 128 bits of entropy are // not needed. The hashes collision rate is similar to MD5's. // See https://github.com/Cyan4973/xxHash#readme for details.
Diffstat (limited to 'indra/llcommon/hbxxh.h')
-rw-r--r--indra/llcommon/hbxxh.h259
1 files changed, 259 insertions, 0 deletions
diff --git a/indra/llcommon/hbxxh.h b/indra/llcommon/hbxxh.h
new file mode 100644
index 0000000000..8a5f977648
--- /dev/null
+++ b/indra/llcommon/hbxxh.h
@@ -0,0 +1,259 @@
+/**
+ * @file hbxxh.h
+ * @brief High performances vectorized hashing based on xxHash.
+ *
+ * $LicenseInfo:firstyear=2023&license=viewergpl$
+ * Second Life Viewer Source Code
+ * Copyright (c) 2023, Henri Beauchamp.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation;
+ * version 2.1 of the License only.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ * Linden Research, Inc., 945 Battery Street, San Francisco, CA 94111 USA
+ * $/LicenseInfo$
+ */
+
+#ifndef LL_HBXXH_H
+#define LL_HBXXH_H
+
+#include "lluuid.h"
+
+// HBXXH* classes are to be used where speed matters and cryptographic quality
+// is not required (no "one-way" guarantee, though they are likely not worst in
+// this respect than MD5 which got busted and is now considered too weak). The
+// xxHash code they are built upon is vectorized and about 50 times faster than
+// MD5. A 64 bits hash class is also provided for when 128 bits of entropy are
+// not needed. The hashes collision rate is similar to MD5's.
+// See https://github.com/Cyan4973/xxHash#readme for details.
+
+// 64 bits hashing class
+
+class HBXXH64
+{
+ friend std::ostream& operator<<(std::ostream&, HBXXH64);
+
+protected:
+ LOG_CLASS(HBXXH64);
+
+public:
+ inline HBXXH64() { init(); }
+
+ // Constructors for special circumstances; they all digest the first passed
+ // parameter. Set 'do_finalize' to false if you do not want to finalize the
+ // context, which is useful/needed when you want to update() it afterwards.
+ // Ideally, the compiler should be smart enough to get our clue and
+ // optimize out the const bool test during inlining...
+
+ inline HBXXH64(const void* buffer, size_t len,
+ const bool do_finalize = true)
+ {
+ init();
+ update(buffer, len);
+ if (do_finalize)
+ {
+ finalize();
+ }
+ }
+
+ inline HBXXH64(const std::string& str, const bool do_finalize = true)
+ {
+ init();
+ update(str);
+ if (do_finalize)
+ {
+ finalize();
+ }
+ }
+
+ inline HBXXH64(std::istream& s, const bool do_finalize = true)
+ {
+ init();
+ update(s);
+ if (do_finalize)
+ {
+ finalize();
+ }
+ }
+
+ inline HBXXH64(FILE* file, const bool do_finalize = true)
+ {
+ init();
+ update(file);
+ if (do_finalize)
+ {
+ finalize();
+ }
+ }
+
+ ~HBXXH64();
+
+ void update(const void* buffer, size_t len);
+ void update(const std::string& str);
+ void update(std::istream& s);
+ void update(FILE* file);
+
+ // Note that unlike what happens with LLMD5, you do not need to finalize()
+ // HBXXH64 before using digest(), and you may keep updating() it even after
+ // you got a first digest() (the next digest would of course change after
+ // any update). It is still useful to use finalize() when you do not want
+ // to store a final digest() result in a separate U64; after this method
+ // has been called, digest() simply returns mDigest value.
+ void finalize();
+
+ U64 digest() const;
+
+ // Fast static methods. Use them when hashing just one contiguous block of
+ // data.
+ static U64 digest(const void* buffer, size_t len);
+ static U64 digest(const char* str); // str must be NUL-terminated
+ static U64 digest(const std::string& str);
+
+private:
+ void init();
+
+private:
+ // We use a void pointer to avoid including xxhash.h here for XXH3_state_t
+ // (which cannot either be trivially forward-declared, due to complex API
+ // related pre-processor macros in xxhash.h).
+ void* mState;
+ U64 mDigest;
+};
+
+inline bool operator==(const HBXXH64& a, const HBXXH64& b)
+{
+ return a.digest() == b.digest();
+}
+
+inline bool operator!=(const HBXXH64& a, const HBXXH64& b)
+{
+ return a.digest() != b.digest();
+}
+
+// 128 bits hashing class
+
+class HBXXH128
+{
+ friend std::ostream& operator<<(std::ostream&, HBXXH128);
+
+protected:
+ LOG_CLASS(HBXXH128);
+
+public:
+ inline HBXXH128() { init(); }
+
+ // Constructors for special circumstances; they all digest the first passed
+ // parameter. Set 'do_finalize' to false if you do not want to finalize the
+ // context, which is useful/needed when you want to update() it afterwards.
+ // Ideally, the compiler should be smart enough to get our clue and
+ // optimize out the const bool test during inlining...
+
+ inline HBXXH128(const void* buffer, size_t len,
+ const bool do_finalize = true)
+ {
+ init();
+ update(buffer, len);
+ if (do_finalize)
+ {
+ finalize();
+ }
+ }
+
+ inline HBXXH128(const std::string& str, const bool do_finalize = true)
+ {
+ init();
+ update(str);
+ if (do_finalize)
+ {
+ finalize();
+ }
+ }
+
+ inline HBXXH128(std::istream& s, const bool do_finalize = true)
+ {
+ init();
+ update(s);
+ if (do_finalize)
+ {
+ finalize();
+ }
+ }
+
+ inline HBXXH128(FILE* file, const bool do_finalize = true)
+ {
+ init();
+ update(file);
+ if (do_finalize)
+ {
+ finalize();
+ }
+ }
+
+ ~HBXXH128();
+
+ void update(const void* buffer, size_t len);
+ void update(const std::string& str);
+ void update(std::istream& s);
+ void update(FILE* file);
+
+ // Note that unlike what happens with LLMD5, you do not need to finalize()
+ // HBXXH128 before using digest(), and you may keep updating() it even
+ // after you got a first digest() (the next digest would of course change
+ // after any update). It is still useful to use finalize() when you do not
+ // want to store a final digest() result in a separate LLUUID; after this
+ // method has been called, digest() simply returns a reference on mDigest.
+ void finalize();
+
+ // We use an LLUUID for the digest, since this is a 128 bits wide native
+ // type available in the viewer code, making it easy to manipulate. It also
+ // allows to use HBXXH128 efficiently in LLUUID generate() and combine()
+ // methods.
+ const LLUUID& digest() const;
+
+ // Here, we avoid an LLUUID copy whenever we already got one to store the
+ // result *and* we did not yet call finalize().
+ void digest(LLUUID& result) const;
+
+ // Fast static methods. Use them when hashing just one contiguous block of
+ // data.
+ static LLUUID digest(const void* buffer, size_t len);
+ static LLUUID digest(const char* str); // str must be NUL-terminated
+ static LLUUID digest(const std::string& str);
+ // Same as above, but saves you from an LLUUID copy when you already got
+ // one for storage use.
+ static void digest(LLUUID& result, const void* buffer, size_t len);
+ static void digest(LLUUID& result, const char* str); // str NUL-terminated
+ static void digest(LLUUID& result, const std::string& str);
+
+private:
+ void init();
+
+private:
+ // We use a void pointer to avoid including xxhash.h here for XXH3_state_t
+ // (which cannot either be trivially forward-declared, due to complex API
+ // related pre-processor macros in xxhash.h).
+ void* mState;
+ LLUUID mDigest;
+};
+
+inline bool operator==(const HBXXH128& a, const HBXXH128& b)
+{
+ return a.digest() == b.digest();
+}
+
+inline bool operator!=(const HBXXH128& a, const HBXXH128& b)
+{
+ return a.digest() != b.digest();
+}
+
+#endif // LL_HBXXH_H