summaryrefslogtreecommitdiff
path: root/indra/llcommon
diff options
context:
space:
mode:
authorNat Goodspeed <nat@lindenlab.com>2024-09-25 21:38:46 -0400
committerNat Goodspeed <nat@lindenlab.com>2024-09-25 21:38:46 -0400
commitdc0c6d396eb70392f294eea65975646dad662f6a (patch)
tree9a99cb4015a7199c624fe4d726b8ac5bf874fb6d /indra/llcommon
parent55df7328c6f8c864ea309c57d73e791e079b3c2c (diff)
Adapt `fsyspath` for C++20 conventions.
In C++20, `std::filesystem::u8path()` (that accepts a UTF-8 encoded `std::string` and returns a `std::filesystem::path`) is deprecated. Instead, to engage UTF-8 coding conversions, we're supposed to pass the `path` constructor a `std::u8string`, i.e. a `std::basic_string<char8_t>`. Since `char8_t` is a type distinct from both `char` and `unsigned char`, we must Do Something to pass a UTF-8 encoded `std::string` into `std::filesystem::path`. To avoid copying characters from a `std::string` into a temporary `std::u8string` and from there into the `std::filesystem::path`, make a `boost::transform_iterator` that accepts a `std::string_view::iterator` and adapts it to dereference `char8_t` characters. Make `fsyspath(std::string_view)` engage the base-class constructor accepting (iterator, iterator), adapting `string_view::begin()` and `end()` to deliver `char8_t` characters. Use the same tactic for `fsyspath::operator=(std::string_view)`, explicitly calling `std::filesystem::path::assign()` with the adapted iterators. To resolve ambiguities, provide both constructors and assignment operators accepting `(const std::string&)` and `(const char*)`, explicitly converting each to `std::string_view`. At the same time, `std::filesystem::path::u8string()` now returns `std::u8string` rather than `std::string`. Since `std::filesystem::path` delivers only that `std::u8string` rather than iterators into its internal representation, we can't avoid capturing it and copying to the returned `std::string`. Remove explicit `.u8string()` calls from a few existing `fsyspath` instances, now that `fsyspath` supports implicit conversion to `std::string`.
Diffstat (limited to 'indra/llcommon')
-rw-r--r--indra/llcommon/fsyspath.h56
-rw-r--r--indra/llcommon/lua_function.cpp8
2 files changed, 40 insertions, 24 deletions
diff --git a/indra/llcommon/fsyspath.h b/indra/llcommon/fsyspath.h
index 1b4aec09b4..f66970ed8f 100644
--- a/indra/llcommon/fsyspath.h
+++ b/indra/llcommon/fsyspath.h
@@ -12,7 +12,10 @@
#if ! defined(LL_FSYSPATH_H)
#define LL_FSYSPATH_H
+#include <boost/iterator/transform_iterator.hpp>
#include <filesystem>
+#include <string>
+#include <string_view>
// While std::filesystem::path can be directly constructed from std::string on
// both Posix and Windows, that's not what we want on Windows. Per
@@ -33,42 +36,55 @@
// char"), the "native narrow encoding" isn't UTF-8, so file paths containing
// non-ASCII characters get mangled.
//
-// Once we're building with C++20, we could pass a UTF-8 std::string through a
-// vector<char8_t> to engage std::filesystem::path's own UTF-8 conversion. But
-// sigh, as of 2024-04-03 we're not yet there.
-//
-// Anyway, encapsulating the important UTF-8 conversions in our own subclass
-// allows us to migrate forward to C++20 conventions without changing
-// referencing code.
+// Encapsulating the important UTF-8 conversions in our own subclass allows us
+// to migrate forward to C++20 conventions without changing referencing code.
class fsyspath: public std::filesystem::path
{
using super = std::filesystem::path;
+ // In C++20 (__cpp_lib_char8_t), std::filesystem::u8path() is deprecated.
+ // std::filesystem::path(iter, iter) performs UTF-8 conversions when the
+ // value_type of the iterators is char8_t. While we could copy into a
+ // temporary std::u8string and from there into std::filesystem::path, to
+ // minimize string copying we'll define a transform_iterator that accepts
+ // a std::string_view::iterator and dereferences to char8_t.
+ struct u8ify
+ {
+ char8_t operator()(char c) const { return char8_t(c); }
+ };
+ using u8iter = boost::transform_iterator<u8ify, std::string_view::iterator>;
+
public:
// default
fsyspath() {}
- // construct from UTF-8 encoded std::string
- fsyspath(const std::string& path): super(std::filesystem::u8path(path)) {}
- // construct from UTF-8 encoded const char*
- fsyspath(const char* path): super(std::filesystem::u8path(path)) {}
+ // construct from UTF-8 encoded string
+ fsyspath(const std::string& path): fsyspath(std::string_view(path)) {}
+ fsyspath(const char* path): fsyspath(std::string_view(path)) {}
+ fsyspath(std::string_view path):
+ super(u8iter(path.begin(), u8ify()), u8iter(path.end(), u8ify()))
+ {}
// construct from existing path
fsyspath(const super& path): super(path) {}
- fsyspath& operator=(const super& p) { super::operator=(p); return *this; }
- fsyspath& operator=(const std::string& p)
- {
- super::operator=(std::filesystem::u8path(p));
- return *this;
- }
- fsyspath& operator=(const char* p)
+ fsyspath& operator=(const super& p) { super::operator=(p); return *this; }
+ fsyspath& operator=(const std::string& p) { return (*this) = std::string_view(p); }
+ fsyspath& operator=(const char* p) { return (*this) = std::string_view(p); }
+ fsyspath& operator=(std::string_view p)
{
- super::operator=(std::filesystem::u8path(p));
+ assign(u8iter(p.begin(), u8ify()), u8iter(p.end(), u8ify()));
return *this;
}
// shadow base-class string() method with UTF-8 aware method
- std::string string() const { return super::u8string(); }
+ std::string string() const
+ {
+ // Short of forbidden type punning, I see no way to avoid copying this
+ // std::u8string to a std::string.
+ auto u8str{ super::u8string() };
+ // from https://github.com/tahonermann/char8_t-remediation/blob/master/char8_t-remediation.h#L180-L182
+ return { u8str.begin(), u8str.end() };
+ }
// On Posix systems, where value_type is already char, this operator
// std::string() method shadows the base class operator string_type()
// method. But on Windows, where value_type is wchar_t, the base class
diff --git a/indra/llcommon/lua_function.cpp b/indra/llcommon/lua_function.cpp
index a9f88f3170..e28c0caa27 100644
--- a/indra/llcommon/lua_function.cpp
+++ b/indra/llcommon/lua_function.cpp
@@ -170,7 +170,7 @@ fsyspath source_path(lua_State* L)
{
lua_getinfo(L, i, "s", &ar);
}
- return ar.source;
+ return { ar.source };
}
} // namespace lluau
@@ -1108,7 +1108,7 @@ lua_function(source_path, "source_path(): return the source path of the running
{
lua_checkdelta(L, 1);
lluau_checkstack(L, 1);
- lua_pushstdstring(L, lluau::source_path(L).u8string());
+ lua_pushstdstring(L, lluau::source_path(L));
return 1;
}
@@ -1119,7 +1119,7 @@ lua_function(source_dir, "source_dir(): return the source directory of the runni
{
lua_checkdelta(L, 1);
lluau_checkstack(L, 1);
- lua_pushstdstring(L, lluau::source_path(L).parent_path().u8string());
+ lua_pushstdstring(L, lluau::source_path(L).parent_path());
return 1;
}
@@ -1132,7 +1132,7 @@ lua_function(abspath, "abspath(path): "
lua_checkdelta(L);
auto path{ lua_tostdstring(L, 1) };
lua_pop(L, 1);
- lua_pushstdstring(L, (lluau::source_path(L).parent_path() / path).u8string());
+ lua_pushstdstring(L, (lluau::source_path(L).parent_path() / path));
return 1;
}