From af5c5a994b90a27e16ef6f2f5044e096269e4217 Mon Sep 17 00:00:00 2001 From: Nat Goodspeed Date: Wed, 27 Oct 2021 13:01:37 -0400 Subject: SL-16207: Update llstring.h handling of different string types. In llpreprocessor.h, consider the case of clang on Windows: #define LL_WCHAR_T_NATIVE there as well as for the Microsoft compiler with /Zc:wchar_t switch. In stdtypes.h, inject a LLWCHAR_IS_WCHAR_T symbol to allow the preprocessor to make decisions about when the types are identical. llstring.h's conversion logic deals with three types of wide strings (LLWString, std::wstring and utf16string) based on three types of wide char (llwchar, wchar_t and U16, respectively). Sometimes they're three distinct types, sometimes wchar_t is identical to llwchar and sometimes wchar_t is identical to U16. Rationalize the three cases using ll_convert_u16_alias() and new ll_convert_wstr_alias() macros. stringize.h was directly calling wstring_to_utf8str() and utf8str_to_wstring(), which was producing errors with VS 2019 clang since there isn't actually a wstring_to_utf8str(std::wstring) overload. Use ll_convert() instead, since that redirects to the relevant ll_convert_wide_to_string() function. (And now you see why we've been trying to migrate to the uniform ll_convert() wrapper!) Similarly, call ll_convert() instead of a two-step conversion from utf8str_to_wstring(), producing LLWString, then a character-by-character copy from LLWString to std::wstring. That isn't even correct: on Windows, we should be encoding from UTF32 to UTF16. --- indra/llcommon/llpreprocessor.h | 4 ++- indra/llcommon/llstring.h | 65 ++++++++++++++++++++++------------------- indra/llcommon/stdtypes.h | 7 +++++ indra/llcommon/stringize.h | 13 ++++----- 4 files changed, 50 insertions(+), 39 deletions(-) (limited to 'indra/llcommon') diff --git a/indra/llcommon/llpreprocessor.h b/indra/llcommon/llpreprocessor.h index b17a8e761a..dc586b0008 100644 --- a/indra/llcommon/llpreprocessor.h +++ b/indra/llcommon/llpreprocessor.h @@ -171,7 +171,9 @@ #define LL_DLLIMPORT #endif // LL_WINDOWS -#if ! defined(LL_WINDOWS) +#if __clang__ || ! defined(LL_WINDOWS) +// Only on Windows, and only with the Microsoft compiler (vs. clang) is +// wchar_t potentially not a distinct type. #define LL_WCHAR_T_NATIVE 1 #else // LL_WINDOWS // https://docs.microsoft.com/en-us/cpp/preprocessor/predefined-macros diff --git a/indra/llcommon/llstring.h b/indra/llcommon/llstring.h index 4263122f36..89e95ef40a 100644 --- a/indra/llcommon/llstring.h +++ b/indra/llcommon/llstring.h @@ -535,6 +535,11 @@ struct ll_convert_impl \ TO operator()(const FROM& in) const { return EXPR; } \ } +// If all we're doing is copying characters, pass this as EXPR. Since it +// expands into the 'return EXPR' slot in the ll_convert_impl specialization +// above, it implies TO{ in.begin(), in.end() }. +#define LL_CONVERT_COPY_CHARS { in.begin(), in.end() } + // Make the incoming string a utf8 string. Replaces any unknown glyph // with the UNKNOWN_CHARACTER. Once any unknown glyph is found, the rest // of the data may not be recovered. @@ -571,30 +576,31 @@ LL_COMMON_API std::string rawstr_to_utf8(const std::string& raw); // LL_WCHAR_T_NATIVE. typedef std::basic_string llutf16string; -#if ! defined(LL_WCHAR_T_NATIVE) -// wchar_t is identical to U16, and std::wstring is identical to llutf16string. -// Defining an ll_convert alias involving llutf16string would collide with the -// comparable preferred alias involving std::wstring. (In this scenario, if -// you pass llutf16string, it will engage the std::wstring specialization.) -#define ll_convert_u16_alias(TO, FROM, EXPR) // nothing -#else // defined(LL_WCHAR_T_NATIVE) -// wchar_t is a distinct native type, so llutf16string is also a distinct -// type, and there IS a point to converting separately to/from llutf16string. -// (But why? Windows APIs are still defined in terms of wchar_t, and -// in this scenario llutf16string won't work for them!) -#define ll_convert_u16_alias(TO, FROM, EXPR) ll_convert_alias(TO, FROM, EXPR) - -#if LL_WINDOWS -// LL_WCHAR_T_NATIVE is defined on non-Windows systems because, in fact, -// wchar_t is native. Everywhere but Windows, we use it for llwchar (see -// stdtypes.h). That makes LLWString identical to std::wstring, so these -// aliases for std::wstring would collide with those for LLWString. Only -// define on Windows, where converting between std::wstring and llutf16string -// means copying chars. -ll_convert_alias(llutf16string, std::wstring, llutf16string(in.begin(), in.end())); -ll_convert_alias(std::wstring, llutf16string, std::wstring(in.begin(), in.end())); -#endif // LL_WINDOWS -#endif // defined(LL_WCHAR_T_NATIVE) +// Considering wchar_t, llwchar and U16, there are three relevant cases: +#if LLWCHAR_IS_WCHAR_T // every which way but Windows +// llwchar is identical to wchar_t, LLWString is identical to std::wstring. +// U16 is distinct, llutf16string is distinct (though pretty useless). +// Given conversions to/from LLWString and to/from llutf16string, conversions +// involving std::wstring would collide. +#define ll_convert_wstr_alias(TO, FROM, EXPR) // nothing +// but we can define conversions involving llutf16string without collisions +#define ll_convert_u16_alias(TO, FROM, EXPR) ll_convert_alias(TO, FROM, EXPR) + +#elif defined(LL_WCHAR_T_NATIVE) // Windows, either clang or MS /Zc:wchar_t +// llwchar (32-bit), wchar_t (16-bit) and U16 are all different types. +// Conversions to/from LLWString, to/from std::wstring and to/from llutf16string +// can all be defined. +#define ll_convert_wstr_alias(TO, FROM, EXPR) ll_convert_alias(TO, FROM, EXPR) +#define ll_convert_u16_alias(TO, FROM, EXPR) ll_convert_alias(TO, FROM, EXPR) + +#else // ! LL_WCHAR_T_NATIVE: Windows with MS /Zc:wchar_t- +// wchar_t is identical to U16, std::wstring is identical to llutf16string. +// Given conversions to/from LLWString and to/from std::wstring, conversions +// involving llutf16string would collide. +#define ll_convert_u16_alias(TO, FROM, EXPR) // nothing +// but we can define conversions involving std::wstring without collisions +#define ll_convert_wstr_alias(TO, FROM, EXPR) ll_convert_alias(TO, FROM, EXPR) +#endif LL_COMMON_API LLWString utf16str_to_wstring(const llutf16string &utf16str, S32 len); LL_COMMON_API LLWString utf16str_to_wstring(const llutf16string &utf16str); @@ -625,9 +631,8 @@ LL_COMMON_API std::string utf16str_to_utf8str(const llutf16string &utf16str, S32 LL_COMMON_API std::string utf16str_to_utf8str(const llutf16string &utf16str); ll_convert_u16_alias(std::string, llutf16string, utf16str_to_utf8str(in)); -#if LL_WINDOWS +// an older alias for utf16str_to_utf8str(llutf16string) inline std::string wstring_to_utf8str(const llutf16string &utf16str) { return utf16str_to_utf8str(utf16str);} -#endif // Length of this UTF32 string in bytes when transformed to UTF8 LL_COMMON_API S32 wstring_utf8_length(const LLWString& wstr); @@ -715,7 +720,7 @@ inline std::string ll_convert_wide_to_string(const std::wstring& in) { return ll_convert_wide_to_string(in.c_str()); } -ll_convert_alias(std::string, std::wstring, ll_convert_wide_to_string(in)); +ll_convert_wstr_alias(std::string, std::wstring, ll_convert_wide_to_string(in)); /** * Converts a string to wide string. @@ -724,19 +729,19 @@ LL_COMMON_API std::wstring ll_convert_string_to_wide(const std::string& in, unsigned int code_page); LL_COMMON_API std::wstring ll_convert_string_to_wide(const std::string& in); // default CP_UTF8 -ll_convert_alias(std::wstring, std::string, ll_convert_string_to_wide(in)); +ll_convert_wstr_alias(std::wstring, std::string, ll_convert_string_to_wide(in)); /** * Convert a Windows wide string to our LLWString */ LL_COMMON_API LLWString ll_convert_wide_to_wstring(const std::wstring& in); -ll_convert_alias(LLWString, std::wstring, ll_convert_wide_to_wstring(in)); +ll_convert_wstr_alias(LLWString, std::wstring, ll_convert_wide_to_wstring(in)); /** * Convert LLWString to Windows wide string */ LL_COMMON_API std::wstring ll_convert_wstring_to_wide(const LLWString& in); -ll_convert_alias(std::wstring, LLWString, ll_convert_wstring_to_wide(in)); +ll_convert_wstr_alias(std::wstring, LLWString, ll_convert_wstring_to_wide(in)); /** * Converts incoming string into utf8 string diff --git a/indra/llcommon/stdtypes.h b/indra/llcommon/stdtypes.h index 887f6ab733..b07805b628 100644 --- a/indra/llcommon/stdtypes.h +++ b/indra/llcommon/stdtypes.h @@ -42,10 +42,17 @@ typedef unsigned int U32; // Windows wchar_t is 16-bit, whichever way /Zc:wchar_t is set. In effect, // Windows wchar_t is always a typedef, either for unsigned short or __wchar_t. // (__wchar_t, available either way, is Microsoft's native 2-byte wchar_t type.) +// The version of clang available with VS 2019 also defines wchar_t as __wchar_t +// which is also 16 bits. // In any case, llwchar should be a UTF-32 type. typedef U32 llwchar; #else typedef wchar_t llwchar; +// What we'd actually want is a simple module-scope 'if constexpr' to test +// std::is_same::value and use that to define, or not +// define, string conversion specializations. Since we don't have that, we'll +// have to rely on #if instead. Sorry, Dr. Stroustrup. +#define LLWCHAR_IS_WCHAR_T 1 #endif #if LL_WINDOWS diff --git a/indra/llcommon/stringize.h b/indra/llcommon/stringize.h index 38dd198ad3..31a114f167 100644 --- a/indra/llcommon/stringize.h +++ b/indra/llcommon/stringize.h @@ -52,7 +52,7 @@ std::basic_string gstringize(const T& item) */ inline std::string stringize(const std::wstring& item) { - return wstring_to_utf8str(item); + return ll_convert(item); } /** @@ -72,8 +72,7 @@ inline std::wstring wstringize(const std::string& item) { // utf8str_to_wstring() returns LLWString, which isn't necessarily the // same as std::wstring - LLWString s(utf8str_to_wstring(item)); - return std::wstring(s.begin(), s.end()); + return ll_convert(item); } /** @@ -146,11 +145,9 @@ void destringize_f(std::basic_string const & str, Functor const & f) * std::istringstream in(str); * in >> item1 >> item2 >> item3 ... ; * @endcode - * @NOTE - once we get generic lambdas, we shouldn't need DEWSTRINGIZE() any - * more since DESTRINGIZE() should do the right thing with a std::wstring. But - * until then, the lambda we pass must accept the right std::basic_istream. */ -#define DESTRINGIZE(STR, EXPRESSION) (destringize_f((STR), [&](std::istream& in){in >> EXPRESSION;})) -#define DEWSTRINGIZE(STR, EXPRESSION) (destringize_f((STR), [&](std::wistream& in){in >> EXPRESSION;})) +#define DESTRINGIZE(STR, EXPRESSION) (destringize_f((STR), [&](auto& in){in >> EXPRESSION;})) +// legacy name, just use DESTRINGIZE() going forward +#define DEWSTRINGIZE(STR, EXPRESSION) DESTRINGIZE(STR, EXPRESSION) #endif /* ! defined(LL_STRINGIZE_H) */ -- cgit v1.2.3 From 10692ab4a4f999e1ee302675e4ffb84f37a52643 Mon Sep 17 00:00:00 2001 From: Nat Goodspeed Date: Tue, 2 Nov 2021 10:35:34 -0400 Subject: SL-16207: Create uniform overload sets for wide-string conversions. Use new ll_convert_forms() macro in llstring.h to declare, for each wide-string conversion function of interest, four overloads. The real one, the nontrivial one, is (const char*, size_t len), implemented in llstring.cpp. Then (const string&, size_t len), (const char*) and (const string&) are each trivially implemented with an inline call to (const char*, size_t len). Notably, we change all S32 len parameters to size_t. Using S32 is old skool. Tweak each nontrivial implementation in llstring.cpp to accept (const char*, size_t len) instead of (const string&) with or without explicit length. Eliminate from llstring.cpp trivial overloads (deriving length from either a const char* or from a string), since those are now inline in the header. Of course three of those overloads will be unified once we enable C++17 and change each relevant parameter to std::string_view, but we're not yet there. Meanwhile, this suite of overloads minimizes, to the best of our ability, new string allocations solely for parameter passing. And use of a macro means we need only change the macro once we get std::string_view. We take this step because some use cases require (const char*), some require (const string&, size_t len), others (const char*, size_t len) ... We were missing some key overloads, and had to work around them by instantiating new string objects (necessitating both allocation and character copying) just to pass the desired parameter. Using the macro ensures this consistent set of overloads for every wide-string conversion function. Additionally, knowing that the ugly-name overloads exist, ll_convert_forms() implicitly defines corresponding ll_convert() overloads. Streamline declarations of utf16str_to_wstring(), wstring_to_utf16str(), utf8str_to_utf16str(), utf16str_to_utf8str(), utf8str_to_wstring(), wstring_to_utf8str(), ll_convert_wide_to_wstring() and ll_convert_wstring_to_wide() using ll_convert_forms(). Use corresponding new ll_convert_cp_forms() macro to declare consistent overloads for conversion functions accepting an optional unsigned int code_page parameter. We used to delegate to the .cpp file the implementation of each overload accepting code_page so llstring.h need not include the Windows header defining the CP_UTF8 default; this is more simply accomplished by introducing a small ll_wstring_default_code_page() function to retrieve it from the .cpp file. That lets us specify the code_page parameter as optional, using that function as its default value. Use ll_convert_cp_forms() to streamline declarations of ll_convert_wide_to_string() and ll_convert_string_to_wide(). Introduce real implementations of ll_convert_wide_to_wstring() and ll_convert_wstring_to_wide(). The previous implementations merely copied individual characters, which is wrong: when we convert UTF16LE to UTF32, we can and should fold multi-character UTF16LE encodings to the corresponding single UTF32 character. The real implemenations leverage our awareness that both llutf16string and Windows std::wstring (either variant) use UTF16LE encoding, so we can reuse the corresponding llutf16string conversions. Introduce generic ll_convert_length() function, specialized as either std::strlen() or std::wcslen() depending on parameter type. (Even if std::wcslen() is derived from classic C, why doesn't the C++ standard library define a std::strlen(const wchar_t*) overload to call it?) Fix ll_convert_alias()'s ll_convert_impl specialization's operator() to accept boost::call_traits::param_type, so we can pass (e.g.) const std::wstring& but also const wchar_t* instead of const wchar_t*&. --- indra/llcommon/llstring.cpp | 95 +++++++++-------------------- indra/llcommon/llstring.h | 143 ++++++++++++++++++++++++++------------------ 2 files changed, 113 insertions(+), 125 deletions(-) (limited to 'indra/llcommon') diff --git a/indra/llcommon/llstring.cpp b/indra/llcommon/llstring.cpp index 0290eea143..5f426e5dca 100644 --- a/indra/llcommon/llstring.cpp +++ b/indra/llcommon/llstring.cpp @@ -215,7 +215,7 @@ S32 utf16chars_to_wchar(const U16* inchars, llwchar* outchar) return inchars - base; } -llutf16string wstring_to_utf16str(const LLWString &utf32str, S32 len) +llutf16string wstring_to_utf16str(const llwchar* utf32str, size_t len) { llutf16string out; @@ -237,27 +237,19 @@ llutf16string wstring_to_utf16str(const LLWString &utf32str, S32 len) return out; } -llutf16string wstring_to_utf16str(const LLWString &utf32str) +llutf16string utf8str_to_utf16str( const char* utf8str, size_t len ) { - const S32 len = (S32)utf32str.length(); - return wstring_to_utf16str(utf32str, len); -} - -llutf16string utf8str_to_utf16str ( const std::string& utf8str ) -{ - LLWString wstr = utf8str_to_wstring ( utf8str ); + LLWString wstr = utf8str_to_wstring ( utf8str, len ); return wstring_to_utf16str ( wstr ); } - -LLWString utf16str_to_wstring(const llutf16string &utf16str, S32 len) +LLWString utf16str_to_wstring(const U16* utf16str, size_t len) { LLWString wout; - if((len <= 0) || utf16str.empty()) return wout; + if (len == 0) return wout; S32 i = 0; - // craziness to make gcc happy (llutf16string.c_str() is tweaked on linux): - const U16* chars16 = &(*(utf16str.begin())); + const U16* chars16 = utf16str; while (i < len) { llwchar cur_char; @@ -267,12 +259,6 @@ LLWString utf16str_to_wstring(const llutf16string &utf16str, S32 len) return wout; } -LLWString utf16str_to_wstring(const llutf16string &utf16str) -{ - const S32 len = (S32)utf16str.length(); - return utf16str_to_wstring(utf16str, len); -} - // Length in llwchar (UTF-32) of the first len units (16 bits) of the given UTF-16 string. S32 utf16str_wstring_length(const llutf16string &utf16str, const S32 utf16_len) { @@ -392,8 +378,7 @@ S32 wstring_utf8_length(const LLWString& wstr) return len; } - -LLWString utf8str_to_wstring(const std::string& utf8str, S32 len) +LLWString utf8str_to_wstring(const char* utf8str, size_t len) { LLWString wout; @@ -481,13 +466,7 @@ LLWString utf8str_to_wstring(const std::string& utf8str, S32 len) return wout; } -LLWString utf8str_to_wstring(const std::string& utf8str) -{ - const S32 len = (S32)utf8str.length(); - return utf8str_to_wstring(utf8str, len); -} - -std::string wstring_to_utf8str(const LLWString& utf32str, S32 len) +std::string wstring_to_utf8str(const llwchar* utf32str, size_t len) { std::string out; @@ -503,20 +482,9 @@ std::string wstring_to_utf8str(const LLWString& utf32str, S32 len) return out; } -std::string wstring_to_utf8str(const LLWString& utf32str) -{ - const S32 len = (S32)utf32str.length(); - return wstring_to_utf8str(utf32str, len); -} - -std::string utf16str_to_utf8str(const llutf16string& utf16str) -{ - return wstring_to_utf8str(utf16str_to_wstring(utf16str)); -} - -std::string utf16str_to_utf8str(const llutf16string& utf16str, S32 len) +std::string utf16str_to_utf8str(const U16* utf16str, size_t len) { - return wstring_to_utf8str(utf16str_to_wstring(utf16str, len), len); + return wstring_to_utf8str(utf16str_to_wstring(utf16str, len)); } std::string utf8str_trim(const std::string& utf8str) @@ -657,17 +625,16 @@ std::string utf8str_removeCRLF(const std::string& utf8str) } #if LL_WINDOWS -std::string ll_convert_wide_to_string(const wchar_t* in) +unsigned int ll_wstring_default_code_page() { - return ll_convert_wide_to_string(in, CP_UTF8); + return CP_UTF8; } -std::string ll_convert_wide_to_string(const wchar_t* in, unsigned int code_page) +std::string ll_convert_wide_to_string(const wchar_t* in, size_t len_in, unsigned int code_page) { std::string out; if(in) { - int len_in = wcslen(in); int len_out = WideCharToMultiByte( code_page, 0, @@ -699,12 +666,7 @@ std::string ll_convert_wide_to_string(const wchar_t* in, unsigned int code_page) return out; } -std::wstring ll_convert_string_to_wide(const std::string& in) -{ - return ll_convert_string_to_wide(in, CP_UTF8); -} - -std::wstring ll_convert_string_to_wide(const std::string& in, unsigned int code_page) +std::wstring ll_convert_string_to_wide(const char* in, size_t len, unsigned int code_page) { // From review: // We can preallocate a wide char buffer that is the same length (in wchar_t elements) as the utf8 input, @@ -716,10 +678,10 @@ std::wstring ll_convert_string_to_wide(const std::string& in, unsigned int code_ // reserve an output buffer that will be destroyed on exit, with a place // to put NULL terminator - std::vector w_out(in.length() + 1); + std::vector w_out(len + 1); memset(&w_out[0], 0, w_out.size()); - int real_output_str_len = MultiByteToWideChar(code_page, 0, in.c_str(), in.length(), + int real_output_str_len = MultiByteToWideChar(code_page, 0, in, len, &w_out[0], w_out.size() - 1); //looks like MultiByteToWideChar didn't add null terminator to converted string, see EXT-4858. @@ -729,22 +691,23 @@ std::wstring ll_convert_string_to_wide(const std::string& in, unsigned int code_ return {&w_out[0]}; } -LLWString ll_convert_wide_to_wstring(const std::wstring& in) +LLWString ll_convert_wide_to_wstring(const wchar_t* in, size_t len) { - // This function, like its converse, is a placeholder, encapsulating a - // guilty little hack: the only "official" way nat has found to convert - // between std::wstring (16 bits on Windows) and LLWString (UTF-32) is - // by using iconv, which we've avoided so far. It kinda sorta works to - // just copy individual characters... - // The point is that if/when we DO introduce some more official way to - // perform such conversions, we should only have to call it here. - return { in.begin(), in.end() }; + // Whether or not std::wstring and llutf16string are distinct types, they + // both hold UTF-16LE characters. (See header file comments.) Pretend this + // wchar_t* sequence is really a U16* sequence and use the conversion we + // define above. + return utf16str_to_wstring(reinterpret_cast(in), len); } -std::wstring ll_convert_wstring_to_wide(const LLWString& in) +std::wstring ll_convert_wstring_to_wide(const llwchar* in, size_t len) { - // See comments in ll_convert_wide_to_wstring() - return { in.begin(), in.end() }; + // first, convert to llutf16string, for which we have a real implementation + auto utf16str{ wstring_to_utf16str(in, len) }; + // then, because each U16 char must be UTF-16LE encoded, pretend the U16* + // string pointer is a wchar_t* and instantiate a std::wstring of the same + // length. + return { reinterpret_cast(utf16str.c_str()), utf16str.length() }; } std::string ll_convert_string_to_utf8_string(const std::string& in) diff --git a/indra/llcommon/llstring.h b/indra/llcommon/llstring.h index 89e95ef40a..a0598e8a11 100644 --- a/indra/llcommon/llstring.h +++ b/indra/llcommon/llstring.h @@ -27,9 +27,11 @@ #ifndef LL_LLSTRING_H #define LL_LLSTRING_H +#include #include #include #include +#include // std::wcslen() //#include #include #include @@ -532,14 +534,59 @@ struct ll_convert_impl template<> \ struct ll_convert_impl \ { \ - TO operator()(const FROM& in) const { return EXPR; } \ + /* param_type optimally passes both char* and string */ \ + TO operator()(typename boost::call_traits::param_type in) const { return EXPR; } \ } -// If all we're doing is copying characters, pass this as EXPR. Since it -// expands into the 'return EXPR' slot in the ll_convert_impl specialization -// above, it implies TO{ in.begin(), in.end() }. +// If all we're doing is copying characters, pass this to ll_convert_alias as +// EXPR. Since it expands into the 'return EXPR' slot in the ll_convert_impl +// specialization above, it implies TO{ in.begin(), in.end() }. #define LL_CONVERT_COPY_CHARS { in.begin(), in.end() } +// Generic name for strlen() / wcslen() - the default implementation should +// (!) work with U16 and llwchar, but we don't intend to engage it. +template +size_t ll_convert_length(const CHARTYPE* zstr) +{ + const CHARTYPE* zp; + // classic C string scan + for (zp = zstr; *zp; ++zp) + ; + return (zp - zstr); +} + +// specialize where we have a library function; may use intrinsic operations +template <> +inline size_t ll_convert_length(const wchar_t* zstr) { return std::wcslen(zstr); } +template <> +inline size_t ll_convert_length (const char* zstr) { return std::strlen(zstr); } + +// ll_convert_forms() is short for a bunch of boilerplate. It defines +// longname(const char*, len), longname(const char*), longname(const string&) +// and longname(const string&, len) so calls written pre-ll_convert() will +// work. Most of these overloads will be unified once we turn on C++17 and can +// use std::string_view. +// It also uses aliasmacro to ensure that both ll_convert(const char*) +// and ll_convert(const string&) will work. +#define ll_convert_forms(aliasmacro, OUTSTR, INSTR, longname) \ +LL_COMMON_API OUTSTR longname(const INSTR::value_type* in, size_t len); \ +inline auto longname(const INSTR& in, size_t len) \ +{ \ + return longname(in.c_str(), len); \ +} \ +inline auto longname(const INSTR::value_type* in) \ +{ \ + return longname(in, ll_convert_length(in)); \ +} \ +inline auto longname(const INSTR& in) \ +{ \ + return longname(in.c_str(), in.length()); \ +} \ +/* string param */ \ +aliasmacro(OUTSTR, INSTR, longname(in)); \ +/* char* param */ \ +aliasmacro(OUTSTR, const INSTR::value_type*, longname(in)) + // Make the incoming string a utf8 string. Replaces any unknown glyph // with the UNKNOWN_CHARACTER. Once any unknown glyph is found, the rest // of the data may not be recovered. @@ -602,34 +649,18 @@ typedef std::basic_string llutf16string; #define ll_convert_wstr_alias(TO, FROM, EXPR) ll_convert_alias(TO, FROM, EXPR) #endif -LL_COMMON_API LLWString utf16str_to_wstring(const llutf16string &utf16str, S32 len); -LL_COMMON_API LLWString utf16str_to_wstring(const llutf16string &utf16str); -ll_convert_u16_alias(LLWString, llutf16string, utf16str_to_wstring(in)); - -LL_COMMON_API llutf16string wstring_to_utf16str(const LLWString &utf32str, S32 len); -LL_COMMON_API llutf16string wstring_to_utf16str(const LLWString &utf32str); -ll_convert_u16_alias(llutf16string, LLWString, wstring_to_utf16str(in)); +ll_convert_forms(ll_convert_u16_alias, LLWString, llutf16string, utf16str_to_wstring); +ll_convert_forms(ll_convert_u16_alias, llutf16string, LLWString, wstring_to_utf16str); +ll_convert_forms(ll_convert_u16_alias, llutf16string, std::string, utf8str_to_utf16str); +ll_convert_forms(ll_convert_alias, LLWString, std::string, utf8str_to_wstring); -LL_COMMON_API llutf16string utf8str_to_utf16str ( const std::string& utf8str, S32 len); -LL_COMMON_API llutf16string utf8str_to_utf16str ( const std::string& utf8str ); -ll_convert_u16_alias(llutf16string, std::string, utf8str_to_utf16str(in)); - -LL_COMMON_API LLWString utf8str_to_wstring(const std::string &utf8str, S32 len); -LL_COMMON_API LLWString utf8str_to_wstring(const std::string &utf8str); // Same function, better name. JC inline LLWString utf8string_to_wstring(const std::string& utf8_string) { return utf8str_to_wstring(utf8_string); } -// best name of all -ll_convert_alias(LLWString, std::string, utf8string_to_wstring(in)); -// LL_COMMON_API S32 wchar_to_utf8chars(llwchar inchar, char* outchars); -LL_COMMON_API std::string wstring_to_utf8str(const LLWString &utf32str, S32 len); -LL_COMMON_API std::string wstring_to_utf8str(const LLWString &utf32str); -ll_convert_alias(std::string, LLWString, wstring_to_utf8str(in)); -LL_COMMON_API std::string utf16str_to_utf8str(const llutf16string &utf16str, S32 len); -LL_COMMON_API std::string utf16str_to_utf8str(const llutf16string &utf16str); -ll_convert_u16_alias(std::string, llutf16string, utf16str_to_utf8str(in)); +ll_convert_forms(ll_convert_alias, std::string, LLWString, wstring_to_utf8str); +ll_convert_forms(ll_convert_u16_alias, std::string, llutf16string, utf16str_to_utf8str); // an older alias for utf16str_to_utf8str(llutf16string) inline std::string wstring_to_utf8str(const llutf16string &utf16str) { return utf16str_to_utf8str(utf16str);} @@ -706,42 +737,36 @@ LL_COMMON_API std::string utf8str_removeCRLF(const std::string& utf8str); //@{ /** - * @brief Convert a wide string to std::string + * @brief Convert a wide string to/from std::string + * Convert a Windows wide string to/from our LLWString * * This replaces the unsafe W2A macro from ATL. */ -LL_COMMON_API std::string ll_convert_wide_to_string(const wchar_t* in, unsigned int code_page); -LL_COMMON_API std::string ll_convert_wide_to_string(const wchar_t* in); // default CP_UTF8 -inline std::string ll_convert_wide_to_string(const std::wstring& in, unsigned int code_page) -{ - return ll_convert_wide_to_string(in.c_str(), code_page); -} -inline std::string ll_convert_wide_to_string(const std::wstring& in) -{ - return ll_convert_wide_to_string(in.c_str()); -} -ll_convert_wstr_alias(std::string, std::wstring, ll_convert_wide_to_string(in)); - -/** - * Converts a string to wide string. - */ -LL_COMMON_API std::wstring ll_convert_string_to_wide(const std::string& in, - unsigned int code_page); -LL_COMMON_API std::wstring ll_convert_string_to_wide(const std::string& in); - // default CP_UTF8 -ll_convert_wstr_alias(std::wstring, std::string, ll_convert_string_to_wide(in)); - -/** - * Convert a Windows wide string to our LLWString - */ -LL_COMMON_API LLWString ll_convert_wide_to_wstring(const std::wstring& in); -ll_convert_wstr_alias(LLWString, std::wstring, ll_convert_wide_to_wstring(in)); - -/** - * Convert LLWString to Windows wide string - */ -LL_COMMON_API std::wstring ll_convert_wstring_to_wide(const LLWString& in); -ll_convert_wstr_alias(std::wstring, LLWString, ll_convert_wstring_to_wide(in)); +// Avoid requiring this header to #include the Windows header file declaring +// our actual default code_page by delegating this function to our .cpp file. +LL_COMMON_API unsigned int ll_wstring_default_code_page(); + +// This is like ll_convert_forms(), with the added complexity of a code page +// parameter that may or may not be passed. +#define ll_convert_cp_forms(aliasmacro, OUTSTR, INSTR, longname) \ +LL_COMMON_API OUTSTR longname( \ + const INSTR::value_type* in, \ + size_t len=ll_convert_length(in), \ + unsigned int code_page=ll_wstring_default_code_page()); \ +inline auto longname( \ + const INSTR& in, \ + size_t len=in.length(), \ + unsigned int code_page=ll_wstring_default_code_page()) \ +{ \ + return longname(in.c_str(), len, code_page); \ +} \ +aliasmacro(OUTSTR, INSTR, longname(in)); \ +aliasmacro(OUTSTR, const INSTR::value_type*, longname(in)) + +ll_convert_cp_forms(ll_convert_wstr_alias, std::string, std::wstring, ll_convert_wide_to_string); +ll_convert_cp_forms(ll_convert_wstr_alias, std::wstring, std::string, ll_convert_string_to_wide); + ll_convert_forms(ll_convert_wstr_alias, LLWString, std::wstring, ll_convert_wide_to_wstring); + ll_convert_forms(ll_convert_wstr_alias, std::wstring, LLWString, ll_convert_wstring_to_wide); /** * Converts incoming string into utf8 string -- cgit v1.2.3 From 95958bc8b2a5340bef93996f2ff0c04956bfb743 Mon Sep 17 00:00:00 2001 From: Nat Goodspeed Date: Tue, 2 Nov 2021 11:31:47 -0400 Subject: SL-16207: Guess Microsoft compiler isn't smart about default params? clang allows us to specify, as a default function parameter, an expression involving a preceding parameter, e.g. (char* ptr, size_t len=strlen(ptr)). The Microsoft compiler produces errors, requiring more overloads to address that. Also #undef llstring.h's declaration helper macros at the bottom of the file. Once we've used them to declare stuff, they need not (should not) be visible to the consuming source file. --- indra/llcommon/llstring.h | 26 ++++++++++++++++++++++++-- 1 file changed, 24 insertions(+), 2 deletions(-) (limited to 'indra/llcommon') diff --git a/indra/llcommon/llstring.h b/indra/llcommon/llstring.h index a0598e8a11..54e3f9ee63 100644 --- a/indra/llcommon/llstring.h +++ b/indra/llcommon/llstring.h @@ -749,17 +749,29 @@ LL_COMMON_API unsigned int ll_wstring_default_code_page(); // This is like ll_convert_forms(), with the added complexity of a code page // parameter that may or may not be passed. #define ll_convert_cp_forms(aliasmacro, OUTSTR, INSTR, longname) \ +/* declare the only nontrivial implementation (in .cpp file) */ \ LL_COMMON_API OUTSTR longname( \ const INSTR::value_type* in, \ - size_t len=ll_convert_length(in), \ + size_t len, \ unsigned int code_page=ll_wstring_default_code_page()); \ +/* if passed only a char pointer, scan for nul terminator */ \ +inline auto longname(const INSTR::value_type* in) \ +{ \ + return longname(in, ll_convert_length(in)); \ +} \ +/* if passed string and length, extract its char pointer */ \ inline auto longname( \ const INSTR& in, \ - size_t len=in.length(), \ + size_t len, \ unsigned int code_page=ll_wstring_default_code_page()) \ { \ return longname(in.c_str(), len, code_page); \ } \ +/* if passed only a string object, no scan, pass known length */ \ +inline auto longname(const INSTR& in) \ +{ \ + return longname(in.c_str(), in.length()); \ +} \ aliasmacro(OUTSTR, INSTR, longname(in)); \ aliasmacro(OUTSTR, const INSTR::value_type*, longname(in)) @@ -1967,4 +1979,14 @@ void LLStringUtilBase::truncate(string_type& string, size_type count) string.resize(count < cur_size ? count : cur_size); } +// The good thing about *declaration* macros, vs. usage macros, is that now +// we're done with them: we don't need them to bleed into the consuming source +// file. +#undef ll_convert_alias +#undef ll_convert_u16_alias +#undef ll_convert_wstr_alias +#undef LL_CONVERT_COPY_CHARS +#undef ll_convert_forms +#undef ll_convert_cp_forms + #endif // LL_STRING_H -- cgit v1.2.3 From a33718ee4ca4edbbc4c4034b29ec4c8d102f3a7e Mon Sep 17 00:00:00 2001 From: Nat Goodspeed Date: Tue, 2 Nov 2021 17:27:59 -0400 Subject: SL-16207: Fix bug in ll_convert_string_to_utf8_string(). That function wants to pass a code_page to ll_convert_string_to_wide(), but the code_page parameter was being mistaken for the length parameter, leading to access violations. --- indra/llcommon/llstring.cpp | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'indra/llcommon') diff --git a/indra/llcommon/llstring.cpp b/indra/llcommon/llstring.cpp index 5f426e5dca..03f706f5a5 100644 --- a/indra/llcommon/llstring.cpp +++ b/indra/llcommon/llstring.cpp @@ -712,10 +712,11 @@ std::wstring ll_convert_wstring_to_wide(const llwchar* in, size_t len) std::string ll_convert_string_to_utf8_string(const std::string& in) { - auto w_mesg = ll_convert_string_to_wide(in, CP_ACP); - std::string out_utf8(ll_convert_wide_to_string(w_mesg.c_str(), CP_UTF8)); - - return out_utf8; + // If you pass code_page, you must also pass length, otherwise the code + // page parameter will be mistaken for length. + auto w_mesg = ll_convert_string_to_wide(in, in.length(), CP_ACP); + // CP_UTF8 is default -- see ll_wstring_default_code_page() above. + return ll_convert_wide_to_string(w_mesg); } namespace -- cgit v1.2.3