summaryrefslogtreecommitdiff
path: root/indra/llcommon/llstring.h
diff options
context:
space:
mode:
authorNat Goodspeed <nat@lindenlab.com>2021-10-27 13:01:37 -0400
committerNat Goodspeed <nat@lindenlab.com>2021-10-27 13:01:37 -0400
commitaf5c5a994b90a27e16ef6f2f5044e096269e4217 (patch)
treeb8b1c55265a02da48b92e2ec412fd79045c836b5 /indra/llcommon/llstring.h
parentcbaba2df56c66926e051d50b6cb02955c81c2a6c (diff)
SL-16207: Update llstring.h handling of different string types.
In llpreprocessor.h, consider the case of clang on Windows: #define LL_WCHAR_T_NATIVE there as well as for the Microsoft compiler with /Zc:wchar_t switch. In stdtypes.h, inject a LLWCHAR_IS_WCHAR_T symbol to allow the preprocessor to make decisions about when the types are identical. llstring.h's conversion logic deals with three types of wide strings (LLWString, std::wstring and utf16string) based on three types of wide char (llwchar, wchar_t and U16, respectively). Sometimes they're three distinct types, sometimes wchar_t is identical to llwchar and sometimes wchar_t is identical to U16. Rationalize the three cases using ll_convert_u16_alias() and new ll_convert_wstr_alias() macros. stringize.h was directly calling wstring_to_utf8str() and utf8str_to_wstring(), which was producing errors with VS 2019 clang since there isn't actually a wstring_to_utf8str(std::wstring) overload. Use ll_convert<std::string>() instead, since that redirects to the relevant ll_convert_wide_to_string() function. (And now you see why we've been trying to migrate to the uniform ll_convert<target>() wrapper!) Similarly, call ll_convert<std::wstring>() instead of a two-step conversion from utf8str_to_wstring(), producing LLWString, then a character-by-character copy from LLWString to std::wstring. That isn't even correct: on Windows, we should be encoding from UTF32 to UTF16.
Diffstat (limited to 'indra/llcommon/llstring.h')
-rw-r--r--indra/llcommon/llstring.h65
1 files changed, 35 insertions, 30 deletions
diff --git a/indra/llcommon/llstring.h b/indra/llcommon/llstring.h
index 4263122f36..89e95ef40a 100644
--- a/indra/llcommon/llstring.h
+++ b/indra/llcommon/llstring.h
@@ -535,6 +535,11 @@ struct ll_convert_impl<TO, FROM> \
TO operator()(const FROM& in) const { return EXPR; } \
}
+// If all we're doing is copying characters, pass this as EXPR. Since it
+// expands into the 'return EXPR' slot in the ll_convert_impl specialization
+// above, it implies TO{ in.begin(), in.end() }.
+#define LL_CONVERT_COPY_CHARS { in.begin(), in.end() }
+
// Make the incoming string a utf8 string. Replaces any unknown glyph
// with the UNKNOWN_CHARACTER. Once any unknown glyph is found, the rest
// of the data may not be recovered.
@@ -571,30 +576,31 @@ LL_COMMON_API std::string rawstr_to_utf8(const std::string& raw);
// LL_WCHAR_T_NATIVE.
typedef std::basic_string<U16> llutf16string;
-#if ! defined(LL_WCHAR_T_NATIVE)
-// wchar_t is identical to U16, and std::wstring is identical to llutf16string.
-// Defining an ll_convert alias involving llutf16string would collide with the
-// comparable preferred alias involving std::wstring. (In this scenario, if
-// you pass llutf16string, it will engage the std::wstring specialization.)
-#define ll_convert_u16_alias(TO, FROM, EXPR) // nothing
-#else // defined(LL_WCHAR_T_NATIVE)
-// wchar_t is a distinct native type, so llutf16string is also a distinct
-// type, and there IS a point to converting separately to/from llutf16string.
-// (But why? Windows APIs are still defined in terms of wchar_t, and
-// in this scenario llutf16string won't work for them!)
-#define ll_convert_u16_alias(TO, FROM, EXPR) ll_convert_alias(TO, FROM, EXPR)
-
-#if LL_WINDOWS
-// LL_WCHAR_T_NATIVE is defined on non-Windows systems because, in fact,
-// wchar_t is native. Everywhere but Windows, we use it for llwchar (see
-// stdtypes.h). That makes LLWString identical to std::wstring, so these
-// aliases for std::wstring would collide with those for LLWString. Only
-// define on Windows, where converting between std::wstring and llutf16string
-// means copying chars.
-ll_convert_alias(llutf16string, std::wstring, llutf16string(in.begin(), in.end()));
-ll_convert_alias(std::wstring, llutf16string, std::wstring(in.begin(), in.end()));
-#endif // LL_WINDOWS
-#endif // defined(LL_WCHAR_T_NATIVE)
+// Considering wchar_t, llwchar and U16, there are three relevant cases:
+#if LLWCHAR_IS_WCHAR_T // every which way but Windows
+// llwchar is identical to wchar_t, LLWString is identical to std::wstring.
+// U16 is distinct, llutf16string is distinct (though pretty useless).
+// Given conversions to/from LLWString and to/from llutf16string, conversions
+// involving std::wstring would collide.
+#define ll_convert_wstr_alias(TO, FROM, EXPR) // nothing
+// but we can define conversions involving llutf16string without collisions
+#define ll_convert_u16_alias(TO, FROM, EXPR) ll_convert_alias(TO, FROM, EXPR)
+
+#elif defined(LL_WCHAR_T_NATIVE) // Windows, either clang or MS /Zc:wchar_t
+// llwchar (32-bit), wchar_t (16-bit) and U16 are all different types.
+// Conversions to/from LLWString, to/from std::wstring and to/from llutf16string
+// can all be defined.
+#define ll_convert_wstr_alias(TO, FROM, EXPR) ll_convert_alias(TO, FROM, EXPR)
+#define ll_convert_u16_alias(TO, FROM, EXPR) ll_convert_alias(TO, FROM, EXPR)
+
+#else // ! LL_WCHAR_T_NATIVE: Windows with MS /Zc:wchar_t-
+// wchar_t is identical to U16, std::wstring is identical to llutf16string.
+// Given conversions to/from LLWString and to/from std::wstring, conversions
+// involving llutf16string would collide.
+#define ll_convert_u16_alias(TO, FROM, EXPR) // nothing
+// but we can define conversions involving std::wstring without collisions
+#define ll_convert_wstr_alias(TO, FROM, EXPR) ll_convert_alias(TO, FROM, EXPR)
+#endif
LL_COMMON_API LLWString utf16str_to_wstring(const llutf16string &utf16str, S32 len);
LL_COMMON_API LLWString utf16str_to_wstring(const llutf16string &utf16str);
@@ -625,9 +631,8 @@ LL_COMMON_API std::string utf16str_to_utf8str(const llutf16string &utf16str, S32
LL_COMMON_API std::string utf16str_to_utf8str(const llutf16string &utf16str);
ll_convert_u16_alias(std::string, llutf16string, utf16str_to_utf8str(in));
-#if LL_WINDOWS
+// an older alias for utf16str_to_utf8str(llutf16string)
inline std::string wstring_to_utf8str(const llutf16string &utf16str) { return utf16str_to_utf8str(utf16str);}
-#endif
// Length of this UTF32 string in bytes when transformed to UTF8
LL_COMMON_API S32 wstring_utf8_length(const LLWString& wstr);
@@ -715,7 +720,7 @@ inline std::string ll_convert_wide_to_string(const std::wstring& in)
{
return ll_convert_wide_to_string(in.c_str());
}
-ll_convert_alias(std::string, std::wstring, ll_convert_wide_to_string(in));
+ll_convert_wstr_alias(std::string, std::wstring, ll_convert_wide_to_string(in));
/**
* Converts a string to wide string.
@@ -724,19 +729,19 @@ LL_COMMON_API std::wstring ll_convert_string_to_wide(const std::string& in,
unsigned int code_page);
LL_COMMON_API std::wstring ll_convert_string_to_wide(const std::string& in);
// default CP_UTF8
-ll_convert_alias(std::wstring, std::string, ll_convert_string_to_wide(in));
+ll_convert_wstr_alias(std::wstring, std::string, ll_convert_string_to_wide(in));
/**
* Convert a Windows wide string to our LLWString
*/
LL_COMMON_API LLWString ll_convert_wide_to_wstring(const std::wstring& in);
-ll_convert_alias(LLWString, std::wstring, ll_convert_wide_to_wstring(in));
+ll_convert_wstr_alias(LLWString, std::wstring, ll_convert_wide_to_wstring(in));
/**
* Convert LLWString to Windows wide string
*/
LL_COMMON_API std::wstring ll_convert_wstring_to_wide(const LLWString& in);
-ll_convert_alias(std::wstring, LLWString, ll_convert_wstring_to_wide(in));
+ll_convert_wstr_alias(std::wstring, LLWString, ll_convert_wstring_to_wide(in));
/**
* Converts incoming string into utf8 string