diff options
Diffstat (limited to 'indra/llcommon')
-rw-r--r-- | indra/llcommon/CMakeLists.txt | 1 | ||||
-rw-r--r-- | indra/llcommon/fsyspath.h | 79 | ||||
-rwxr-xr-x | indra/llcommon/hexdump.h | 106 | ||||
-rw-r--r-- | indra/llcommon/llsdjson.cpp | 15 |
4 files changed, 199 insertions, 2 deletions
diff --git a/indra/llcommon/CMakeLists.txt b/indra/llcommon/CMakeLists.txt index 60549d9d11..a504e71340 100644 --- a/indra/llcommon/CMakeLists.txt +++ b/indra/llcommon/CMakeLists.txt @@ -119,6 +119,7 @@ set(llcommon_HEADER_FILES commoncontrol.h ctype_workaround.h fix_macros.h + fsyspath.h function_types.h indra_constants.h lazyeventapi.h diff --git a/indra/llcommon/fsyspath.h b/indra/llcommon/fsyspath.h new file mode 100644 index 0000000000..1b4aec09b4 --- /dev/null +++ b/indra/llcommon/fsyspath.h @@ -0,0 +1,79 @@ +/** + * @file fsyspath.h + * @author Nat Goodspeed + * @date 2024-04-03 + * @brief Adapt our UTF-8 std::strings for std::filesystem::path + * + * $LicenseInfo:firstyear=2024&license=viewerlgpl$ + * Copyright (c) 2024, Linden Research, Inc. + * $/LicenseInfo$ + */ + +#if ! defined(LL_FSYSPATH_H) +#define LL_FSYSPATH_H + +#include <filesystem> + +// While std::filesystem::path can be directly constructed from std::string on +// both Posix and Windows, that's not what we want on Windows. Per +// https://en.cppreference.com/w/cpp/filesystem/path/path: + +// ... the method of conversion to the native character set depends on the +// character type used by source. +// +// * If the source character type is char, the encoding of the source is +// assumed to be the native narrow encoding (so no conversion takes place on +// POSIX systems). +// * If the source character type is char8_t, conversion from UTF-8 to native +// filesystem encoding is used. (since C++20) +// * If the source character type is wchar_t, the input is assumed to be the +// native wide encoding (so no conversion takes places on Windows). + +// The trouble is that on Windows, from std::string ("source character type is +// char"), the "native narrow encoding" isn't UTF-8, so file paths containing +// non-ASCII characters get mangled. +// +// Once we're building with C++20, we could pass a UTF-8 std::string through a +// vector<char8_t> to engage std::filesystem::path's own UTF-8 conversion. But +// sigh, as of 2024-04-03 we're not yet there. +// +// Anyway, encapsulating the important UTF-8 conversions in our own subclass +// allows us to migrate forward to C++20 conventions without changing +// referencing code. + +class fsyspath: public std::filesystem::path +{ + using super = std::filesystem::path; + +public: + // default + fsyspath() {} + // construct from UTF-8 encoded std::string + fsyspath(const std::string& path): super(std::filesystem::u8path(path)) {} + // construct from UTF-8 encoded const char* + fsyspath(const char* path): super(std::filesystem::u8path(path)) {} + // construct from existing path + fsyspath(const super& path): super(path) {} + + fsyspath& operator=(const super& p) { super::operator=(p); return *this; } + fsyspath& operator=(const std::string& p) + { + super::operator=(std::filesystem::u8path(p)); + return *this; + } + fsyspath& operator=(const char* p) + { + super::operator=(std::filesystem::u8path(p)); + return *this; + } + + // shadow base-class string() method with UTF-8 aware method + std::string string() const { return super::u8string(); } + // On Posix systems, where value_type is already char, this operator + // std::string() method shadows the base class operator string_type() + // method. But on Windows, where value_type is wchar_t, the base class + // doesn't have operator std::string(). Provide it. + operator std::string() const { return string(); } +}; + +#endif /* ! defined(LL_FSYSPATH_H) */ diff --git a/indra/llcommon/hexdump.h b/indra/llcommon/hexdump.h new file mode 100755 index 0000000000..ab5ba2b16d --- /dev/null +++ b/indra/llcommon/hexdump.h @@ -0,0 +1,106 @@ +/** + * @file hexdump.h + * @author Nat Goodspeed + * @date 2023-10-03 + * @brief iostream manipulators to stream hex, or string with nonprinting chars + * + * $LicenseInfo:firstyear=2023&license=viewerlgpl$ + * Copyright (c) 2023, Linden Research, Inc. + * $/LicenseInfo$ + */ + +#if ! defined(LL_HEXDUMP_H) +#define LL_HEXDUMP_H + +#include <cctype> +#include <iomanip> +#include <iostream> +#include <string_view> + +namespace LL +{ + +// Format a given byte string as 2-digit hex values, no separators +// Usage: std::cout << hexdump(somestring) << ... +class hexdump +{ +public: + hexdump(const std::string_view& data): + hexdump(data.data(), data.length()) + {} + + hexdump(const char* data, size_t len): + hexdump(reinterpret_cast<const unsigned char*>(data), len) + {} + + hexdump(const std::vector<unsigned char>& data): + hexdump(data.data(), data.size()) + {} + + hexdump(const unsigned char* data, size_t len): + mData(data, data + len) + {} + + friend std::ostream& operator<<(std::ostream& out, const hexdump& self) + { + auto oldfmt{ out.flags() }; + auto oldfill{ out.fill() }; + out.setf(std::ios_base::hex, std::ios_base::basefield); + out.fill('0'); + for (auto c : self.mData) + { + out << std::setw(2) << unsigned(c); + } + out.setf(oldfmt, std::ios_base::basefield); + out.fill(oldfill); + return out; + } + +private: + std::vector<unsigned char> mData; +}; + +// Format a given byte string as a mix of printable characters and, for each +// non-printable character, "\xnn" +// Usage: std::cout << hexmix(somestring) << ... +class hexmix +{ +public: + hexmix(const std::string_view& data): + mData(data) + {} + + hexmix(const char* data, size_t len): + mData(data, len) + {} + + friend std::ostream& operator<<(std::ostream& out, const hexmix& self) + { + auto oldfmt{ out.flags() }; + auto oldfill{ out.fill() }; + out.setf(std::ios_base::hex, std::ios_base::basefield); + out.fill('0'); + for (auto c : self.mData) + { + // std::isprint() must be passed an unsigned char! + if (std::isprint(static_cast<unsigned char>(c))) + { + out << c; + } + else + { + out << "\\x" << std::setw(2) << unsigned(c); + } + } + out.setf(oldfmt, std::ios_base::basefield); + out.fill(oldfill); + return out; + } + +private: + std::string mData; +}; + +} // namespace LL + +#endif /* ! defined(LL_HEXDUMP_H) */ diff --git a/indra/llcommon/llsdjson.cpp b/indra/llcommon/llsdjson.cpp index 5d38e55686..1df2a8f9eb 100644 --- a/indra/llcommon/llsdjson.cpp +++ b/indra/llcommon/llsdjson.cpp @@ -61,12 +61,20 @@ LLSD LlsdFromJson(const boost::json::value& val) result = LLSD(val.as_bool()); break; case boost::json::kind::array: + { result = LLSD::emptyArray(); - for (const auto &element : val.as_array()) + auto& array = val.as_array(); + // allocate elements 0 .. (size() - 1) to avoid incremental allocation + if (! array.empty()) + { + result[array.size() - 1] = LLSD(); + } + for (const auto &element : array) { result.append(LlsdFromJson(element)); } break; + } case boost::json::kind::object: result = LLSD::emptyMap(); for (const auto& element : val.as_object()) @@ -106,6 +114,7 @@ boost::json::value LlsdToJson(const LLSD &val) case LLSD::TypeMap: { boost::json::object& obj = result.emplace_object(); + obj.reserve(val.size()); for (const auto& llsd_dat : llsd::inMap(val)) { obj[llsd_dat.first] = LlsdToJson(llsd_dat.second); @@ -115,6 +124,7 @@ boost::json::value LlsdToJson(const LLSD &val) case LLSD::TypeArray: { boost::json::array& json_array = result.emplace_array(); + json_array.reserve(val.size()); for (const auto& llsd_dat : llsd::inArray(val)) { json_array.push_back(LlsdToJson(llsd_dat)); @@ -123,7 +133,8 @@ boost::json::value LlsdToJson(const LLSD &val) } case LLSD::TypeBinary: default: - LL_ERRS("LlsdToJson") << "Unsupported conversion to JSON from LLSD type (" << val.type() << ")." << LL_ENDL; + LL_ERRS("LlsdToJson") << "Unsupported conversion to JSON from LLSD type (" + << val.type() << ")." << LL_ENDL; break; } |