4 files changed, 199 insertions, 2 deletions
diff --git a/indra/llcommon/CMakeLists.txt b/indra/llcommon/CMakeLists.txt
index 60549d9d11..a504e71340 100644
--- a/indra/llcommon/CMakeLists.txt
+++ b/indra/llcommon/CMakeLists.txt
@@ -119,6 +119,7 @@ set(llcommon_HEADER_FILES
     commoncontrol.h
     ctype_workaround.h
     fix_macros.h
+    fsyspath.h
     function_types.h
     indra_constants.h
     lazyeventapi.h
diff --git a/indra/llcommon/fsyspath.h b/indra/llcommon/fsyspath.h
new file mode 100644
index 0000000000..5fa4c8ad1b
--- /dev/null
+++ b/indra/llcommon/fsyspath.h
@@ -0,0 +1,79 @@
+/**
+ * @file   fsyspath.h
+ * @author Nat Goodspeed
+ * @date   2024-04-03
+ * @brief  Adapt our UTF-8 std::strings for std::filesystem::path
+ *
+ * $LicenseInfo:firstyear=2024&license=viewerlgpl$
+ * Copyright (c) 2024, Linden Research, Inc.
+ * $/LicenseInfo$
+ */
+
+#if ! defined(LL_FSYSPATH_H)
+#define LL_FSYSPATH_H
+
+#include <filesystem>
+
+// While std::filesystem::path can be directly constructed from std::string on
+// both Posix and Windows, that's not what we want on Windows. Per
+// https://en.cppreference.com/w/cpp/filesystem/path/path:
+
+// ... the method of conversion to the native character set depends on the
+// character type used by source.
+// 
+// * If the source character type is char, the encoding of the source is
+//   assumed to be the native narrow encoding (so no conversion takes place on
+//   POSIX systems).
+// * If the source character type is char8_t, conversion from UTF-8 to native
+//   filesystem encoding is used. (since C++20)
+// * If the source character type is wchar_t, the input is assumed to be the
+//   native wide encoding (so no conversion takes places on Windows).
+
+// The trouble is that on Windows, from std::string ("source character type is
+// char"), the "native narrow encoding" isn't UTF-8, so file paths containing
+// non-ASCII characters get mangled.
+//
+// Once we're building with C++20, we could pass a UTF-8 std::string through a
+// vector<char8_t> to engage std::filesystem::path's own UTF-8 conversion. But
+// sigh, as of 2024-04-03 we're not yet there.
+//
+// Anyway, encapsulating the important UTF-8 conversions in our own subclass
+// allows us to migrate forward to C++20 conventions without changing
+// referencing code.
+
+class fsyspath: public std::filesystem::path
+{
+    using super = std::filesystem::path;
+
+public:
+    // default
+    fsyspath() {}
+    // construct from UTF-8 encoded std::string
+    fsyspath(const std::string& path): super(std::filesystem::u8path(path)) {}
+    // construct from UTF-8 encoded const char*
+    fsyspath(const char* path): super(std::filesystem::u8path(path)) {}
+    // construct from existing path
+    fsyspath(const super& path): super(path) {}
+
+    fsyspath& operator=(const super& p) { super::operator=(p); return *this; }
+    fsyspath& operator=(const std::string& p)
+    {
+        super::operator=(std::filesystem::u8path(p));
+        return *this;
+    }
+    fsyspath& operator=(const char* p)
+    {
+        super::operator=(std::filesystem::u8path(p));
+        return *this;
+    }
+
+    // shadow base-class string() method with UTF-8 aware method
+    std::string string() const { return super::u8string(); }
+    // On Posix systems, where value_type is already char, this operator
+    // std::string() method shadows the base class operator string_type()
+    // method. But on Windows, where value_type is wchar_t, the base class
+    // doesn't have operator std::string(). Provide it.
+    operator std::string() const { return string(); }
+};
+
+#endif /* ! defined(LL_FSYSPATH_H) */
diff --git a/indra/llcommon/hexdump.h b/indra/llcommon/hexdump.h
new file mode 100755
index 0000000000..234168cd61
--- /dev/null
+++ b/indra/llcommon/hexdump.h
@@ -0,0 +1,106 @@
+/**
+ * @file   hexdump.h
+ * @author Nat Goodspeed
+ * @date   2023-10-03
+ * @brief  iostream manipulators to stream hex, or string with nonprinting chars
+ * 
+ * $LicenseInfo:firstyear=2023&license=viewerlgpl$
+ * Copyright (c) 2023, Linden Research, Inc.
+ * $/LicenseInfo$
+ */
+
+#if ! defined(LL_HEXDUMP_H)
+#define LL_HEXDUMP_H
+
+#include <cctype>
+#include <iomanip>
+#include <iostream>
+#include <string_view>
+
+namespace LL
+{
+
+// Format a given byte string as 2-digit hex values, no separators
+// Usage: std::cout << hexdump(somestring) << ...
+class hexdump
+{
+public:
+    hexdump(const std::string_view& data):
+        hexdump(data.data(), data.length())
+    {}
+
+    hexdump(const char* data, size_t len):
+        hexdump(reinterpret_cast<const unsigned char*>(data), len)
+    {}
+
+    hexdump(const std::vector<unsigned char>& data):
+        hexdump(data.data(), data.size())
+    {}
+
+    hexdump(const unsigned char* data, size_t len):
+        mData(data, data + len)
+    {}
+
+    friend std::ostream& operator<<(std::ostream& out, const hexdump& self)
+    {
+        auto oldfmt{ out.flags() };
+        auto oldfill{ out.fill() };
+        out.setf(std::ios_base::hex, std::ios_base::basefield);
+        out.fill('0');
+        for (auto c : self.mData)
+        {
+            out << std::setw(2) << unsigned(c);
+        }
+        out.setf(oldfmt, std::ios_base::basefield);
+        out.fill(oldfill);
+        return out;
+    }
+
+private:
+    std::vector<unsigned char> mData;
+};
+
+// Format a given byte string as a mix of printable characters and, for each
+// non-printable character, "\xnn"
+// Usage: std::cout << hexmix(somestring) << ...
+class hexmix
+{
+public:
+    hexmix(const std::string_view& data):
+        mData(data)
+    {}
+
+    hexmix(const char* data, size_t len):
+        mData(data, len)
+    {}
+
+    friend std::ostream& operator<<(std::ostream& out, const hexmix& self)
+    {
+        auto oldfmt{ out.flags() };
+        auto oldfill{ out.fill() };
+        out.setf(std::ios_base::hex, std::ios_base::basefield);
+        out.fill('0');
+        for (auto c : self.mData)
+        {
+            // std::isprint() must be passed an unsigned char!
+            if (std::isprint(static_cast<unsigned char>(c)))
+            {
+                out << c;
+            }
+            else
+            {
+                out << "\\x" << std::setw(2) << unsigned(c);
+            }
+        }
+        out.setf(oldfmt, std::ios_base::basefield);
+        out.fill(oldfill);
+        return out;
+    }
+
+private:
+    std::string mData;
+};
+
+} // namespace LL
+
+#endif /* ! defined(LL_HEXDUMP_H) */
diff --git a/indra/llcommon/llsdjson.cpp b/indra/llcommon/llsdjson.cpp
index 5d38e55686..1df2a8f9eb 100644
--- a/indra/llcommon/llsdjson.cpp
+++ b/indra/llcommon/llsdjson.cpp
@@ -61,12 +61,20 @@ LLSD LlsdFromJson(const boost::json::value& val)
         result = LLSD(val.as_bool());
         break;
     case boost::json::kind::array:
+    {
         result = LLSD::emptyArray();
-        for (const auto &element : val.as_array())
+        auto& array = val.as_array();
+        // allocate elements 0 .. (size() - 1) to avoid incremental allocation
+        if (! array.empty())
+        {
+            result[array.size() - 1] = LLSD();
+        }
+        for (const auto &element : array)
         {
             result.append(LlsdFromJson(element));
         }
         break;
+    }
     case boost::json::kind::object:
         result = LLSD::emptyMap();
         for (const auto& element : val.as_object())
@@ -106,6 +114,7 @@ boost::json::value LlsdToJson(const LLSD &val)
     case LLSD::TypeMap:
     {
         boost::json::object& obj = result.emplace_object();
+        obj.reserve(val.size());
         for (const auto& llsd_dat : llsd::inMap(val))
         {
             obj[llsd_dat.first] = LlsdToJson(llsd_dat.second);
@@ -115,6 +124,7 @@ boost::json::value LlsdToJson(const LLSD &val)
     case LLSD::TypeArray:
     {
         boost::json::array& json_array = result.emplace_array();
+        json_array.reserve(val.size());
         for (const auto& llsd_dat : llsd::inArray(val))
         {
             json_array.push_back(LlsdToJson(llsd_dat));
@@ -123,7 +133,8 @@ boost::json::value LlsdToJson(const LLSD &val)
     }
     case LLSD::TypeBinary:
     default:
-        LL_ERRS("LlsdToJson") << "Unsupported conversion to JSON from LLSD type (" << val.type() << ")." << LL_ENDL;
+        LL_ERRS("LlsdToJson") << "Unsupported conversion to JSON from LLSD type ("
+                              << val.type() << ")." << LL_ENDL;
         break;
     }