From 4e894eb2a7ed6651c54890cd20106bfacd61ef0a Mon Sep 17 00:00:00 2001 From: Nat Goodspeed Date: Tue, 11 Dec 2018 20:48:20 -0500 Subject: SL-10153: Improve ll_convert_string_to_wide() and its converse. Instead of returning a wchar_t* and requiring the caller to delete it later, return a std::basic_string that's self-cleaning. If the caller wants a wchar_t*, s/he can call c_str() on the returned string. Default the code_page parameter to CP_UTF8, since we try to be really consistent about using UTF-8 encoding for all our internal std::strings. --- indra/llcommon/llstring.h | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'indra/llcommon/llstring.h') diff --git a/indra/llcommon/llstring.h b/indra/llcommon/llstring.h index 68ee9db46b..7c3e9f952d 100644 --- a/indra/llcommon/llstring.h +++ b/indra/llcommon/llstring.h @@ -635,16 +635,18 @@ using snprintf_hack::snprintf; * This replaces the unsafe W2A macro from ATL. */ LL_COMMON_API std::string ll_convert_wide_to_string(const wchar_t* in, unsigned int code_page); +LL_COMMON_API std::string ll_convert_wide_to_string(const wchar_t* in); // default CP_UTF8 /** * Converts a string to wide string. - * - * It will allocate memory for result string with "new []". Don't forget to release it with "delete []". */ -LL_COMMON_API wchar_t* ll_convert_string_to_wide(const std::string& in, unsigned int code_page); +LL_COMMON_API std::basic_string ll_convert_string_to_wide(const std::string& in, + unsigned int code_page); +LL_COMMON_API std::basic_string ll_convert_string_to_wide(const std::string& in); + // default CP_UTF8 /** - * Converts incoming string into urf8 string + * Converts incoming string into utf8 string * */ LL_COMMON_API std::string ll_convert_string_to_utf8_string(const std::string& in); -- cgit v1.2.3 From 9ffcafb64b4483c315d00e88ffc1438bce1f7915 Mon Sep 17 00:00:00 2001 From: Nat Goodspeed Date: Fri, 14 Dec 2018 10:48:43 -0500 Subject: SL-10153: Introduce ll_convert, windows_message() templates. Add ll_convert template, used as (e.g.): ll_convert(value_of_some_other_string_type); There is no generic template implementation -- the template exists solely to provide generic aliases for a bewildering family of llstring.h string- conversion functions with highly-specific names. There's a generic implementation, though, for the degenerate case where FROM and TO are identical. Add ll_convert<> specialization aliases for most of the string-conversion functions declared in llstring.h, including the Windows-specific ones involving llutf16string and std::wstring. Add a mini-lecture in llstring.h about appropriate use of string types on Windows. Add LL_WCHAR_T_NATIVE llpreprocessor.h macro so we can detect whether to provide separate conversions for llutf16string and std::wstring, or whether those would collide because the types are identical. Add inline ll_convert_wide_to_string(const std::wstring&) overloads so caller isn't required to call arg.c_str(), which naturally permits an ll_convert alias. Add ll_convert_wide_to_wstring(), ll_convert_wstring_to_wide() as placeholders for converting between Windows std::wstring and Linden LLWString, with corresponding ll_convert aliases. We don't yet have library code to perform such conversions officially; for now, just copy characters. Add LLStringUtil::getenv(key) and getoptenv(key) functions. The latter returns boost::optional in case the caller needs to detect absence of a given environment variable rather than simply accepting a default value. Naturally getenv(), which accepts a default, is implemented using getoptenv(). getoptenv(), in turn, is implemented using an underlying llstring_getoptenv(). On Windows, llstring_getoptenv() returns boost::optional (based on GetEnvironmentVariableW()), whereas elsewhere, llstring_getoptenv() returns boost::optional (based on classic Posix getenv()). The beauty of generic ll_convert is that the portable LLStringUtilBase:: getoptenv() template can call the platform-specific llstring_getoptenv() and transparently perform whatever conversion is necessary to return the desired string_type. Add windows_message(error) template, with an overload that implicitly calls GetLastError(). We provide a single concrete windows_message() implementation because that's what we get from Windows FormatMessageW() -- everything else is a generic conversion to the desired target string type. This obviates llprocess.cpp's previous WindowsErrorString() implementation -- reimplement using windows_message(). --- indra/llcommon/llstring.h | 176 ++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 171 insertions(+), 5 deletions(-) (limited to 'indra/llcommon/llstring.h') diff --git a/indra/llcommon/llstring.h b/indra/llcommon/llstring.h index 7c3e9f952d..2dccc7cbdf 100644 --- a/indra/llcommon/llstring.h +++ b/indra/llcommon/llstring.h @@ -27,6 +27,7 @@ #ifndef LL_LLSTRING_H #define LL_LLSTRING_H +#include #include #include //#include @@ -337,6 +338,19 @@ public: const string_type& string, const string_type& substr); + /** + * get environment string value with proper Unicode handling + * (key is always UTF-8) + * detect absence by return value == dflt + */ + static string_type getenv(const std::string& key, const string_type& dflt=""); + /** + * get optional environment string value with proper Unicode handling + * (key is always UTF-8) + * detect absence by (! return value) + */ + static boost::optional getoptenv(const std::string& key); + static void addCRLF(string_type& string); static void removeCRLF(string_type& string); static void removeWindowsCR(string_type& string); @@ -496,6 +510,37 @@ LL_COMMON_API bool iswindividual(llwchar elem); * Unicode support */ +/// generic conversion aliases +template +struct ll_convert_impl +{ + // Don't even provide a generic implementation. We specialize for every + // combination we do support. + TO operator()(const FROM& in) const; +}; + +// Use a function template to get the nice ll_convert(from_value) API. +template +TO ll_convert(const FROM& in) +{ + return ll_convert_impl()(in); +} + +// degenerate case +template +struct ll_convert_impl +{ + T operator()(const T& in) const { return in; } +}; + +// specialize ll_convert_impl to return EXPR +#define ll_convert_alias(TO, FROM, EXPR) \ +template<> \ +struct ll_convert_impl \ +{ \ + TO operator()(const FROM& in) const { return EXPR; } \ +} + // Make the incoming string a utf8 string. Replaces any unknown glyph // with the UNKNOWN_CHARACTER. Once any unknown glyph is found, the rest // of the data may not be recovered. @@ -503,30 +548,83 @@ LL_COMMON_API std::string rawstr_to_utf8(const std::string& raw); // // We should never use UTF16 except when communicating with Win32! +// https://docs.microsoft.com/en-us/cpp/cpp/char-wchar-t-char16-t-char32-t +// nat 2018-12-14: I consider the whole llutf16string thing a mistake, because +// the Windows APIs we want to call are all defined in terms of wchar_t* +// (or worse, LPCTSTR). +// https://docs.microsoft.com/en-us/windows/desktop/winprog/windows-data-types + +// While there is no point coding for an ASCII-only world (! defined(UNICODE)), +// use of U16 and llutf16string for Windows APIs locks in /Zc:wchar_t-. Going +// forward, we should code in terms of wchar_t and std::wstring so as to +// support either setting of /Zc:wchar_t. + +// The first link above states that char can be used to hold ASCII or any +// multi-byte character set, and distinguishes wchar_t (UTF-16LE), char16_t +// (UTF-16) and char32_t (UTF-32). Nonetheless, within this code base: +// * char and std::string always hold UTF-8 (of which ASCII is a subset). It +// is a BUG if they are used to pass strings in any other multi-byte +// encoding. +// * wchar_t and std::wstring should be our interface to Windows wide-string +// APIs, and therefore hold UTF-16LE. +// * U16 and llutf16string are the previous but DEPRECATED UTF-16LE type. Do +// not introduce new uses of U16 or llutf16string for string data. +// * llwchar and LLWString hold UTF-32 strings. +// * Do not introduce char16_t or std::u16string. +// * Do not introduce char32_t or std::u32string. // +// This typedef may or may not be identical to std::wstring, depending on +// LL_WCHAR_T_NATIVE. typedef std::basic_string llutf16string; +#if ! defined(LL_WCHAR_T_NATIVE) +// wchar_t is identical to U16, and std::wstring is identical to llutf16string. +// Defining an ll_convert alias involving llutf16string would collide with the +// comparable preferred alias involving std::wstring. (In this scenario, if +// you pass llutf16string, it will engage the std::wstring specialization.) +#define ll_convert_u16_alias(TO, FROM, EXPR) // nothing +#else +// wchar_t is a distinct native type, so llutf16string is also a distinct +// type, and there IS a point to converting separately to/from llutf16string. +// (But why? Windows APIs are still defined in terms of wchar_t, and +// in this scenario llutf16string won't work for them!) +#define ll_convert_u16_alias(TO, FROM, EXPR) ll_convert_alias(TO, FROM, EXPR) + +// converting between std::wstring and llutf16string involves copying chars +// enclose the brace-initialization expression in parens to protect the comma +// from macro-argument parsing +ll_convert_alias(llutf16string, std::wstring, ({ in.begin(), in.end() })); +ll_convert_alias(std::wstring, llutf16string, ({ in.begin(), in.end() })); +#endif + LL_COMMON_API LLWString utf16str_to_wstring(const llutf16string &utf16str, S32 len); LL_COMMON_API LLWString utf16str_to_wstring(const llutf16string &utf16str); +ll_convert_u16_alias(LLWString, llutf16string, utf16str_to_wstring(in)); LL_COMMON_API llutf16string wstring_to_utf16str(const LLWString &utf32str, S32 len); LL_COMMON_API llutf16string wstring_to_utf16str(const LLWString &utf32str); +ll_convert_u16_alias(llutf16string, LLWString, wstring_to_utf16str(in)); LL_COMMON_API llutf16string utf8str_to_utf16str ( const std::string& utf8str, S32 len); LL_COMMON_API llutf16string utf8str_to_utf16str ( const std::string& utf8str ); +ll_convert_u16_alias(llutf16string, std::string, utf8str_to_utf16str(in)); LL_COMMON_API LLWString utf8str_to_wstring(const std::string &utf8str, S32 len); LL_COMMON_API LLWString utf8str_to_wstring(const std::string &utf8str); // Same function, better name. JC inline LLWString utf8string_to_wstring(const std::string& utf8_string) { return utf8str_to_wstring(utf8_string); } +// best name of all +ll_convert_alias(LLWString, std::string, utf8string_to_wstring(in)); // LL_COMMON_API S32 wchar_to_utf8chars(llwchar inchar, char* outchars); LL_COMMON_API std::string wstring_to_utf8str(const LLWString &utf32str, S32 len); LL_COMMON_API std::string wstring_to_utf8str(const LLWString &utf32str); +ll_convert_alias(std::string, LLWString, wstring_to_utf8str(in)); LL_COMMON_API std::string utf16str_to_utf8str(const llutf16string &utf16str, S32 len); LL_COMMON_API std::string utf16str_to_utf8str(const llutf16string &utf16str); +ll_convert_u16_alias(std::string, llutf16string, utf16str_to_utf8str(in)); #if LL_WINDOWS inline std::string wstring_to_utf8str(const llutf16string &utf16str) { return utf16str_to_utf8str(utf16str);} @@ -636,14 +734,36 @@ using snprintf_hack::snprintf; */ LL_COMMON_API std::string ll_convert_wide_to_string(const wchar_t* in, unsigned int code_page); LL_COMMON_API std::string ll_convert_wide_to_string(const wchar_t* in); // default CP_UTF8 +inline std::string ll_convert_wide_to_string(const std::wstring& in, unsigned int code_page) +{ + return ll_convert_wide_to_string(in.c_str(), code_page); +} +inline std::string ll_convert_wide_to_string(const std::wstring& in) +{ + return ll_convert_wide_to_string(in.c_str()); +} +ll_convert_alias(std::string, std::wstring, ll_convert_wide_to_string(in)); /** * Converts a string to wide string. */ -LL_COMMON_API std::basic_string ll_convert_string_to_wide(const std::string& in, - unsigned int code_page); -LL_COMMON_API std::basic_string ll_convert_string_to_wide(const std::string& in); - // default CP_UTF8 +LL_COMMON_API std::wstring ll_convert_string_to_wide(const std::string& in, + unsigned int code_page); +LL_COMMON_API std::wstring ll_convert_string_to_wide(const std::string& in); + // default CP_UTF8 +ll_convert_alias(std::wstring, std::string, ll_convert_string_to_wide(in)); + +/** + * Convert a Windows wide string to our LLWString + */ +LL_COMMON_API LLWString ll_convert_wide_to_wstring(const std::wstring& in); +ll_convert_alias(LLWString, std::wstring, ll_convert_wide_to_wstring(in)); + +/** + * Convert LLWString to Windows wide string + */ +LL_COMMON_API std::wstring ll_convert_wstring_to_wide(const LLWString& in); +ll_convert_alias(std::wstring, LLWString, ll_convert_wstring_to_wide(in)); /** * Converts incoming string into utf8 string @@ -651,8 +771,39 @@ LL_COMMON_API std::basic_string ll_convert_string_to_wide(const std::st */ LL_COMMON_API std::string ll_convert_string_to_utf8_string(const std::string& in); +/// Get Windows message string for passed GetLastError() code +// VS 2013 doesn't let us forward-declare this template, which is what we +// started with, so the implementation could reference the specialization we +// haven't yet declared. Somewhat weirdly, just stating the generic +// implementation in terms of the specialization works, even in this order... + +// the general case is just a conversion from the sole implementation +// Microsoft says DWORD is a typedef for unsigned long +// https://docs.microsoft.com/en-us/windows/desktop/winprog/windows-data-types +// so rather than drag windows.h into everybody's include space... +template +STRING windows_message(unsigned long error) +{ + return ll_convert(windows_message(error)); +} + +/// There's only one real implementation +template<> +LL_COMMON_API std::wstring windows_message(unsigned long error); + +/// Get Windows message string, implicitly calling GetLastError() +template +STRING windows_message() { return windows_message(GetLastError()); } + //@} -#endif // LL_WINDOWS + +LL_COMMON_API boost::optional llstring_getoptenv(const std::string& key); + +#else // ! LL_WINDOWS + +LL_COMMON_API boost::optional llstring_getoptenv(const std::string& key); + +#endif // ! LL_WINDOWS /** * Many of the 'strip' and 'replace' methods of LLStringUtilBase need @@ -1595,6 +1746,21 @@ bool LLStringUtilBase::endsWith( return (idx == (string.size() - substr.size())); } +// static +template +auto LLStringUtilBase::getoptenv(const std::string& key) -> boost::optional +{ + auto found{llstring_getoptenv(key)}; + return found? { ll_convert(*found) } : {}; +} + +// static +template +auto LLStringUtilBase::getenv(const std::string& key, const string_type& dflt) -> string_type +{ + auto found{getoptenv(key)}; + return found? *found : dflt; +} template BOOL LLStringUtilBase::convertToBOOL(const string_type& string, BOOL& value) -- cgit v1.2.3 From 132e708fec50fd756b822925313456c70a4ff27f Mon Sep 17 00:00:00 2001 From: Nat Goodspeed Date: Fri, 14 Dec 2018 12:01:51 -0500 Subject: SL-10153: Fix previous commit for non-Windows systems. Move Windows-flavored llstring_getoptenv() to Windows-specific section of llstring.cpp. boost::optional type must be stated explicitly to initialize with a value. On platforms where llwchar is the same as wchar_t, LLWString is the same as std::wstring, so ll_convert specializations for std::wstring would duplicate those for LLWString. Defend against that. The compilers we use don't like 'return condition? { expr } : {}', in which we hope to construct and return an instance of the declared return type without having to restate the type. It works to use an explicit 'if' statement. --- indra/llcommon/llstring.h | 30 ++++++++++++++++++++++-------- 1 file changed, 22 insertions(+), 8 deletions(-) (limited to 'indra/llcommon/llstring.h') diff --git a/indra/llcommon/llstring.h b/indra/llcommon/llstring.h index 2dccc7cbdf..87cb35c59c 100644 --- a/indra/llcommon/llstring.h +++ b/indra/llcommon/llstring.h @@ -583,19 +583,24 @@ typedef std::basic_string llutf16string; // comparable preferred alias involving std::wstring. (In this scenario, if // you pass llutf16string, it will engage the std::wstring specialization.) #define ll_convert_u16_alias(TO, FROM, EXPR) // nothing -#else +#else // defined(LL_WCHAR_T_NATIVE) // wchar_t is a distinct native type, so llutf16string is also a distinct // type, and there IS a point to converting separately to/from llutf16string. // (But why? Windows APIs are still defined in terms of wchar_t, and // in this scenario llutf16string won't work for them!) #define ll_convert_u16_alias(TO, FROM, EXPR) ll_convert_alias(TO, FROM, EXPR) -// converting between std::wstring and llutf16string involves copying chars -// enclose the brace-initialization expression in parens to protect the comma -// from macro-argument parsing -ll_convert_alias(llutf16string, std::wstring, ({ in.begin(), in.end() })); -ll_convert_alias(std::wstring, llutf16string, ({ in.begin(), in.end() })); -#endif +#if LL_WINDOWS +// LL_WCHAR_T_NATIVE is defined on non-Windows systems because, in fact, +// wchar_t is native. Everywhere but Windows, we use it for llwchar (see +// stdtypes.h). That makes LLWString identical to std::wstring, so these +// aliases for std::wstring would collide with those for LLWString. Only +// define on Windows, where converting between std::wstring and llutf16string +// means copying chars. +ll_convert_alias(llutf16string, std::wstring, llutf16string(in.begin(), in.end())); +ll_convert_alias(std::wstring, llutf16string, std::wstring(in.begin(), in.end())); +#endif // LL_WINDOWS +#endif // defined(LL_WCHAR_T_NATIVE) LL_COMMON_API LLWString utf16str_to_wstring(const llutf16string &utf16str, S32 len); LL_COMMON_API LLWString utf16str_to_wstring(const llutf16string &utf16str); @@ -1751,7 +1756,16 @@ template auto LLStringUtilBase::getoptenv(const std::string& key) -> boost::optional { auto found{llstring_getoptenv(key)}; - return found? { ll_convert(*found) } : {}; + if (found) + { + // return populated boost::optional + return { ll_convert(*found) }; + } + else + { + // empty boost::optional + return {}; + } } // static -- cgit v1.2.3 From 3c53f8abded5da7e9743b743170538a1ede5635a Mon Sep 17 00:00:00 2001 From: Nat Goodspeed Date: Fri, 14 Dec 2018 16:00:09 -0500 Subject: SL-10153: VS 2013 isn't so fond of ?: involving std::string. --- indra/llcommon/llstring.h | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'indra/llcommon/llstring.h') diff --git a/indra/llcommon/llstring.h b/indra/llcommon/llstring.h index 87cb35c59c..8d2c8d79d7 100644 --- a/indra/llcommon/llstring.h +++ b/indra/llcommon/llstring.h @@ -1773,7 +1773,14 @@ template auto LLStringUtilBase::getenv(const std::string& key, const string_type& dflt) -> string_type { auto found{getoptenv(key)}; - return found? *found : dflt; + if (found) + { + return *found; + } + else + { + return dflt; + } } template -- cgit v1.2.3 From 4a136572857fcf5d5fd21789a777bbde67c1076d Mon Sep 17 00:00:00 2001 From: Nat Goodspeed Date: Sat, 15 Dec 2018 09:13:24 -0500 Subject: SL-10153: auto name{expression} declares an initializer_list instead of a variable of type decltype(expression). Using SHGetKnownFolderPath(FOLDERID_Fonts) in LLFontGL::getFontPathSystem() requires new Windows #include files. A variable with a constructor can't be declared within the braces of a switch statement, even outside any of its case clauses. --- indra/llcommon/llstring.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'indra/llcommon/llstring.h') diff --git a/indra/llcommon/llstring.h b/indra/llcommon/llstring.h index 8d2c8d79d7..30bec3a6f8 100644 --- a/indra/llcommon/llstring.h +++ b/indra/llcommon/llstring.h @@ -1755,7 +1755,7 @@ bool LLStringUtilBase::endsWith( template auto LLStringUtilBase::getoptenv(const std::string& key) -> boost::optional { - auto found{llstring_getoptenv(key)}; + auto found(llstring_getoptenv(key)); if (found) { // return populated boost::optional @@ -1772,7 +1772,7 @@ auto LLStringUtilBase::getoptenv(const std::string& key) -> boost::optional auto LLStringUtilBase::getenv(const std::string& key, const string_type& dflt) -> string_type { - auto found{getoptenv(key)}; + auto found(getoptenv(key)); if (found) { return *found; -- cgit v1.2.3