diff options
author | Nat Goodspeed <nat@lindenlab.com> | 2021-10-27 13:01:37 -0400 |
---|---|---|
committer | Nat Goodspeed <nat@lindenlab.com> | 2021-10-27 13:01:37 -0400 |
commit | af5c5a994b90a27e16ef6f2f5044e096269e4217 (patch) | |
tree | b8b1c55265a02da48b92e2ec412fd79045c836b5 | |
parent | cbaba2df56c66926e051d50b6cb02955c81c2a6c (diff) |
SL-16207: Update llstring.h handling of different string types.
In llpreprocessor.h, consider the case of clang on Windows: #define
LL_WCHAR_T_NATIVE there as well as for the Microsoft compiler with /Zc:wchar_t
switch.
In stdtypes.h, inject a LLWCHAR_IS_WCHAR_T symbol to allow the preprocessor to
make decisions about when the types are identical.
llstring.h's conversion logic deals with three types of wide strings
(LLWString, std::wstring and utf16string) based on three types of wide char
(llwchar, wchar_t and U16, respectively). Sometimes they're three distinct
types, sometimes wchar_t is identical to llwchar and sometimes wchar_t is
identical to U16. Rationalize the three cases using ll_convert_u16_alias() and
new ll_convert_wstr_alias() macros.
stringize.h was directly calling wstring_to_utf8str() and utf8str_to_wstring(),
which was producing errors with VS 2019 clang since there isn't actually a
wstring_to_utf8str(std::wstring) overload. Use ll_convert<std::string>()
instead, since that redirects to the relevant ll_convert_wide_to_string()
function. (And now you see why we've been trying to migrate to the uniform
ll_convert<target>() wrapper!) Similarly, call ll_convert<std::wstring>()
instead of a two-step conversion from utf8str_to_wstring(), producing LLWString,
then a character-by-character copy from LLWString to std::wstring. That
isn't even correct: on Windows, we should be encoding from UTF32 to UTF16.
-rw-r--r-- | indra/llcommon/llpreprocessor.h | 4 | ||||
-rw-r--r-- | indra/llcommon/llstring.h | 65 | ||||
-rw-r--r-- | indra/llcommon/stdtypes.h | 7 | ||||
-rw-r--r-- | indra/llcommon/stringize.h | 13 |
4 files changed, 50 insertions, 39 deletions
diff --git a/indra/llcommon/llpreprocessor.h b/indra/llcommon/llpreprocessor.h index b17a8e761a..dc586b0008 100644 --- a/indra/llcommon/llpreprocessor.h +++ b/indra/llcommon/llpreprocessor.h @@ -171,7 +171,9 @@ #define LL_DLLIMPORT #endif // LL_WINDOWS -#if ! defined(LL_WINDOWS) +#if __clang__ || ! defined(LL_WINDOWS) +// Only on Windows, and only with the Microsoft compiler (vs. clang) is +// wchar_t potentially not a distinct type. #define LL_WCHAR_T_NATIVE 1 #else // LL_WINDOWS // https://docs.microsoft.com/en-us/cpp/preprocessor/predefined-macros diff --git a/indra/llcommon/llstring.h b/indra/llcommon/llstring.h index 4263122f36..89e95ef40a 100644 --- a/indra/llcommon/llstring.h +++ b/indra/llcommon/llstring.h @@ -535,6 +535,11 @@ struct ll_convert_impl<TO, FROM> \ TO operator()(const FROM& in) const { return EXPR; } \ } +// If all we're doing is copying characters, pass this as EXPR. Since it +// expands into the 'return EXPR' slot in the ll_convert_impl specialization +// above, it implies TO{ in.begin(), in.end() }. +#define LL_CONVERT_COPY_CHARS { in.begin(), in.end() } + // Make the incoming string a utf8 string. Replaces any unknown glyph // with the UNKNOWN_CHARACTER. Once any unknown glyph is found, the rest // of the data may not be recovered. @@ -571,30 +576,31 @@ LL_COMMON_API std::string rawstr_to_utf8(const std::string& raw); // LL_WCHAR_T_NATIVE. typedef std::basic_string<U16> llutf16string; -#if ! defined(LL_WCHAR_T_NATIVE) -// wchar_t is identical to U16, and std::wstring is identical to llutf16string. -// Defining an ll_convert alias involving llutf16string would collide with the -// comparable preferred alias involving std::wstring. (In this scenario, if -// you pass llutf16string, it will engage the std::wstring specialization.) -#define ll_convert_u16_alias(TO, FROM, EXPR) // nothing -#else // defined(LL_WCHAR_T_NATIVE) -// wchar_t is a distinct native type, so llutf16string is also a distinct -// type, and there IS a point to converting separately to/from llutf16string. -// (But why? Windows APIs are still defined in terms of wchar_t, and -// in this scenario llutf16string won't work for them!) -#define ll_convert_u16_alias(TO, FROM, EXPR) ll_convert_alias(TO, FROM, EXPR) - -#if LL_WINDOWS -// LL_WCHAR_T_NATIVE is defined on non-Windows systems because, in fact, -// wchar_t is native. Everywhere but Windows, we use it for llwchar (see -// stdtypes.h). That makes LLWString identical to std::wstring, so these -// aliases for std::wstring would collide with those for LLWString. Only -// define on Windows, where converting between std::wstring and llutf16string -// means copying chars. -ll_convert_alias(llutf16string, std::wstring, llutf16string(in.begin(), in.end())); -ll_convert_alias(std::wstring, llutf16string, std::wstring(in.begin(), in.end())); -#endif // LL_WINDOWS -#endif // defined(LL_WCHAR_T_NATIVE) +// Considering wchar_t, llwchar and U16, there are three relevant cases: +#if LLWCHAR_IS_WCHAR_T // every which way but Windows +// llwchar is identical to wchar_t, LLWString is identical to std::wstring. +// U16 is distinct, llutf16string is distinct (though pretty useless). +// Given conversions to/from LLWString and to/from llutf16string, conversions +// involving std::wstring would collide. +#define ll_convert_wstr_alias(TO, FROM, EXPR) // nothing +// but we can define conversions involving llutf16string without collisions +#define ll_convert_u16_alias(TO, FROM, EXPR) ll_convert_alias(TO, FROM, EXPR) + +#elif defined(LL_WCHAR_T_NATIVE) // Windows, either clang or MS /Zc:wchar_t +// llwchar (32-bit), wchar_t (16-bit) and U16 are all different types. +// Conversions to/from LLWString, to/from std::wstring and to/from llutf16string +// can all be defined. +#define ll_convert_wstr_alias(TO, FROM, EXPR) ll_convert_alias(TO, FROM, EXPR) +#define ll_convert_u16_alias(TO, FROM, EXPR) ll_convert_alias(TO, FROM, EXPR) + +#else // ! LL_WCHAR_T_NATIVE: Windows with MS /Zc:wchar_t- +// wchar_t is identical to U16, std::wstring is identical to llutf16string. +// Given conversions to/from LLWString and to/from std::wstring, conversions +// involving llutf16string would collide. +#define ll_convert_u16_alias(TO, FROM, EXPR) // nothing +// but we can define conversions involving std::wstring without collisions +#define ll_convert_wstr_alias(TO, FROM, EXPR) ll_convert_alias(TO, FROM, EXPR) +#endif LL_COMMON_API LLWString utf16str_to_wstring(const llutf16string &utf16str, S32 len); LL_COMMON_API LLWString utf16str_to_wstring(const llutf16string &utf16str); @@ -625,9 +631,8 @@ LL_COMMON_API std::string utf16str_to_utf8str(const llutf16string &utf16str, S32 LL_COMMON_API std::string utf16str_to_utf8str(const llutf16string &utf16str); ll_convert_u16_alias(std::string, llutf16string, utf16str_to_utf8str(in)); -#if LL_WINDOWS +// an older alias for utf16str_to_utf8str(llutf16string) inline std::string wstring_to_utf8str(const llutf16string &utf16str) { return utf16str_to_utf8str(utf16str);} -#endif // Length of this UTF32 string in bytes when transformed to UTF8 LL_COMMON_API S32 wstring_utf8_length(const LLWString& wstr); @@ -715,7 +720,7 @@ inline std::string ll_convert_wide_to_string(const std::wstring& in) { return ll_convert_wide_to_string(in.c_str()); } -ll_convert_alias(std::string, std::wstring, ll_convert_wide_to_string(in)); +ll_convert_wstr_alias(std::string, std::wstring, ll_convert_wide_to_string(in)); /** * Converts a string to wide string. @@ -724,19 +729,19 @@ LL_COMMON_API std::wstring ll_convert_string_to_wide(const std::string& in, unsigned int code_page); LL_COMMON_API std::wstring ll_convert_string_to_wide(const std::string& in); // default CP_UTF8 -ll_convert_alias(std::wstring, std::string, ll_convert_string_to_wide(in)); +ll_convert_wstr_alias(std::wstring, std::string, ll_convert_string_to_wide(in)); /** * Convert a Windows wide string to our LLWString */ LL_COMMON_API LLWString ll_convert_wide_to_wstring(const std::wstring& in); -ll_convert_alias(LLWString, std::wstring, ll_convert_wide_to_wstring(in)); +ll_convert_wstr_alias(LLWString, std::wstring, ll_convert_wide_to_wstring(in)); /** * Convert LLWString to Windows wide string */ LL_COMMON_API std::wstring ll_convert_wstring_to_wide(const LLWString& in); -ll_convert_alias(std::wstring, LLWString, ll_convert_wstring_to_wide(in)); +ll_convert_wstr_alias(std::wstring, LLWString, ll_convert_wstring_to_wide(in)); /** * Converts incoming string into utf8 string diff --git a/indra/llcommon/stdtypes.h b/indra/llcommon/stdtypes.h index 887f6ab733..b07805b628 100644 --- a/indra/llcommon/stdtypes.h +++ b/indra/llcommon/stdtypes.h @@ -42,10 +42,17 @@ typedef unsigned int U32; // Windows wchar_t is 16-bit, whichever way /Zc:wchar_t is set. In effect, // Windows wchar_t is always a typedef, either for unsigned short or __wchar_t. // (__wchar_t, available either way, is Microsoft's native 2-byte wchar_t type.) +// The version of clang available with VS 2019 also defines wchar_t as __wchar_t +// which is also 16 bits. // In any case, llwchar should be a UTF-32 type. typedef U32 llwchar; #else typedef wchar_t llwchar; +// What we'd actually want is a simple module-scope 'if constexpr' to test +// std::is_same<wchar_t, llwchar>::value and use that to define, or not +// define, string conversion specializations. Since we don't have that, we'll +// have to rely on #if instead. Sorry, Dr. Stroustrup. +#define LLWCHAR_IS_WCHAR_T 1 #endif #if LL_WINDOWS diff --git a/indra/llcommon/stringize.h b/indra/llcommon/stringize.h index 38dd198ad3..31a114f167 100644 --- a/indra/llcommon/stringize.h +++ b/indra/llcommon/stringize.h @@ -52,7 +52,7 @@ std::basic_string<CHARTYPE> gstringize(const T& item) */ inline std::string stringize(const std::wstring& item) { - return wstring_to_utf8str(item); + return ll_convert<std::string>(item); } /** @@ -72,8 +72,7 @@ inline std::wstring wstringize(const std::string& item) { // utf8str_to_wstring() returns LLWString, which isn't necessarily the // same as std::wstring - LLWString s(utf8str_to_wstring(item)); - return std::wstring(s.begin(), s.end()); + return ll_convert<std::wstring>(item); } /** @@ -146,11 +145,9 @@ void destringize_f(std::basic_string<CHARTYPE> const & str, Functor const & f) * std::istringstream in(str); * in >> item1 >> item2 >> item3 ... ; * @endcode - * @NOTE - once we get generic lambdas, we shouldn't need DEWSTRINGIZE() any - * more since DESTRINGIZE() should do the right thing with a std::wstring. But - * until then, the lambda we pass must accept the right std::basic_istream. */ -#define DESTRINGIZE(STR, EXPRESSION) (destringize_f((STR), [&](std::istream& in){in >> EXPRESSION;})) -#define DEWSTRINGIZE(STR, EXPRESSION) (destringize_f((STR), [&](std::wistream& in){in >> EXPRESSION;})) +#define DESTRINGIZE(STR, EXPRESSION) (destringize_f((STR), [&](auto& in){in >> EXPRESSION;})) +// legacy name, just use DESTRINGIZE() going forward +#define DEWSTRINGIZE(STR, EXPRESSION) DESTRINGIZE(STR, EXPRESSION) #endif /* ! defined(LL_STRINGIZE_H) */ |