diff options
author | Nat Goodspeed <nat@lindenlab.com> | 2018-12-14 10:48:43 -0500 |
---|---|---|
committer | Nat Goodspeed <nat@lindenlab.com> | 2018-12-14 10:48:43 -0500 |
commit | 9ffcafb64b4483c315d00e88ffc1438bce1f7915 (patch) | |
tree | 499aaaf46732ad439162e35462f54269cd33512f | |
parent | cbf4295b63fcc7128f928d1718efe069a9f3a789 (diff) |
SL-10153: Introduce ll_convert, windows_message() templates.
Add ll_convert<TO, FROM> template, used as (e.g.):
ll_convert<std::string>(value_of_some_other_string_type);
There is no generic template implementation -- the template exists solely to
provide generic aliases for a bewildering family of llstring.h string-
conversion functions with highly-specific names. There's a generic
implementation, though, for the degenerate case where FROM and TO are
identical.
Add ll_convert<> specialization aliases for most of the string-conversion
functions declared in llstring.h, including the Windows-specific ones
involving llutf16string and std::wstring.
Add a mini-lecture in llstring.h about appropriate use of string types on
Windows.
Add LL_WCHAR_T_NATIVE llpreprocessor.h macro so we can detect whether to
provide separate conversions for llutf16string and std::wstring, or whether
those would collide because the types are identical.
Add inline ll_convert_wide_to_string(const std::wstring&) overloads so caller
isn't required to call arg.c_str(), which naturally permits an ll_convert
alias.
Add ll_convert_wide_to_wstring(), ll_convert_wstring_to_wide() as placeholders
for converting between Windows std::wstring and Linden LLWString, with
corresponding ll_convert aliases. We don't yet have library code to perform
such conversions officially; for now, just copy characters.
Add LLStringUtil::getenv(key) and getoptenv(key) functions. The latter returns
boost::optional<string_type> in case the caller needs to detect absence of a
given environment variable rather than simply accepting a default value.
Naturally getenv(), which accepts a default, is implemented using getoptenv().
getoptenv(), in turn, is implemented using an underlying llstring_getoptenv().
On Windows, llstring_getoptenv() returns boost::optional<std::wstring> (based
on GetEnvironmentVariableW()), whereas elsewhere, llstring_getoptenv() returns
boost::optional<std::string> (based on classic Posix getenv()).
The beauty of generic ll_convert is that the portable LLStringUtilBase<T>::
getoptenv() template can call the platform-specific llstring_getoptenv() and
transparently perform whatever conversion is necessary to return the desired
string_type.
Add windows_message<T>(error) template, with an overload that implicitly calls
GetLastError(). We provide a single concrete windows_message<std::wstring>()
implementation because that's what we get from Windows FormatMessageW() --
everything else is a generic conversion to the desired target string type.
This obviates llprocess.cpp's previous WindowsErrorString() implementation --
reimplement using windows_message<std::string>().
-rw-r--r-- | indra/llcommon/llpreprocessor.h | 21 | ||||
-rw-r--r-- | indra/llcommon/llprocess.cpp | 27 | ||||
-rw-r--r-- | indra/llcommon/llstring.cpp | 125 | ||||
-rw-r--r-- | indra/llcommon/llstring.h | 176 | ||||
-rw-r--r-- | indra/llcommon/stdtypes.h | 7 |
5 files changed, 323 insertions, 33 deletions
diff --git a/indra/llcommon/llpreprocessor.h b/indra/llcommon/llpreprocessor.h index ef015fdce4..e8f9981437 100644 --- a/indra/llcommon/llpreprocessor.h +++ b/indra/llcommon/llpreprocessor.h @@ -101,6 +101,9 @@ #endif +// Although thread_local is now a standard storage class, we can't just +// #define LL_THREAD_LOCAL as thread_local because the *usage* is different. +// We'll have to take the time to change LL_THREAD_LOCAL declarations by hand. #if LL_WINDOWS # define LL_THREAD_LOCAL __declspec(thread) #else @@ -177,6 +180,24 @@ #define LL_DLLIMPORT #endif // LL_WINDOWS +#if ! defined(LL_WINDOWS) +#define LL_WCHAR_T_NATIVE 1 +#else // LL_WINDOWS +// https://docs.microsoft.com/en-us/cpp/preprocessor/predefined-macros +// _WCHAR_T_DEFINED is defined if wchar_t is provided at all. +// Specifically, it has value 1 if wchar_t is an intrinsic type, else empty. +// _NATIVE_WCHAR_T_DEFINED has value 1 if wchar_t is intrinsic, else undefined. +// For years we have compiled with /Zc:wchar_t-, meaning that wchar_t is a +// typedef for unsigned short (in stddef.h). Lore has it that one of our +// proprietary binary-only libraries has traditionally been built that way and +// therefore EVERYTHING ELSE requires it. Therefore, in a typical Linden +// Windows build, _WCHAR_T_DEFINED is defined but empty, while +// _NATIVE_WCHAR_T_DEFINED is undefined. +# if defined(_NATIVE_WCHAR_T_DEFINED) +# define LL_WCHAR_T_NATIVE 1 +# endif // _NATIVE_WCHAR_T_DEFINED +#endif // LL_WINDOWS + #if LL_COMMON_LINK_SHARED // CMake automagically defines llcommon_EXPORTS only when building llcommon // sources, and only when llcommon is a shared library (i.e. when diff --git a/indra/llcommon/llprocess.cpp b/indra/llcommon/llprocess.cpp index 5753efdc59..1fa53f322b 100644 --- a/indra/llcommon/llprocess.cpp +++ b/indra/llcommon/llprocess.cpp @@ -1205,30 +1205,9 @@ static LLProcess::Status interpret_status(int status) /// GetLastError()/FormatMessage() boilerplate static std::string WindowsErrorString(const std::string& operation) { - int result = GetLastError(); - - LPTSTR error_str = 0; - if (FormatMessage( FORMAT_MESSAGE_ALLOCATE_BUFFER | FORMAT_MESSAGE_FROM_SYSTEM, - NULL, - result, - 0, - (LPTSTR)&error_str, - 0, - NULL) - != 0) - { - // convert from wide-char string to multi-byte string - char message[256]; - wcstombs(message, error_str, sizeof(message)); - message[sizeof(message)-1] = 0; - LocalFree(error_str); - // convert to std::string to trim trailing whitespace - std::string mbsstr(message); - mbsstr.erase(mbsstr.find_last_not_of(" \t\r\n")); - return STRINGIZE(operation << " failed (" << result << "): " << mbsstr); - } - return STRINGIZE(operation << " failed (" << result - << "), but FormatMessage() did not explain"); + auto result = GetLastError(); + return STRINGIZE(operation << " failed (" << result << "): " + << windows_message<std::string>(result)); } /***************************************************************************** diff --git a/indra/llcommon/llstring.cpp b/indra/llcommon/llstring.cpp index 42390c8a7b..f931103ba6 100644 --- a/indra/llcommon/llstring.cpp +++ b/indra/llcommon/llstring.cpp @@ -53,6 +53,40 @@ std::string ll_safe_string(const char* in, S32 maxlen) return std::string(); } +boost::optional<std::wstring> llstring_getoptenv(const std::string& key) +{ + auto wkey = ll_convert_string_to_wide(key); + // Take a wild guess as to how big the buffer should be. + std::vector<wchar_t> buffer(1024); + auto n = GetEnvironmentVariableW(wkey.c_str(), &buffer[0], buffer.size()); + // If our initial guess was too short, n will indicate the size (in + // wchar_t's) that buffer should have been, including the terminating nul. + if (n > (buffer.size() - 1)) + { + // make it big enough + buffer.resize(n); + // and try again + n = GetEnvironmentVariableW(wkey.c_str(), &buffer[0], buffer.size()); + } + // did that (ultimately) succeed? + if (n) + { + // great, return populated boost::optional + return { &buffer[0] }; + } + + // not successful + auto last_error = GetLastError(); + // Don't bother warning for NOT_FOUND; that's an expected case + if (last_error != ERROR_ENVVAR_NOT_FOUND) + { + LL_WARNS() << "GetEnvironmentVariableW('" << key << "') failed: " + << windows_message<std::string>(last_error) << LL_ENDL; + } + // return empty boost::optional + return {}; +} + bool is_char_hex(char hex) { if((hex >= '0') && (hex <= '9')) @@ -715,12 +749,12 @@ std::string ll_convert_wide_to_string(const wchar_t* in, unsigned int code_page) return out; } -std::basic_string<wchar_t> ll_convert_string_to_wide(const std::string& in) +std::wstring ll_convert_string_to_wide(const std::string& in) { return ll_convert_string_to_wide(in, CP_UTF8); } -std::basic_string<wchar_t> ll_convert_string_to_wide(const std::string& in, unsigned int code_page) +std::wstring ll_convert_string_to_wide(const std::string& in, unsigned int code_page) { // From review: // We can preallocate a wide char buffer that is the same length (in wchar_t elements) as the utf8 input, @@ -745,6 +779,24 @@ std::basic_string<wchar_t> ll_convert_string_to_wide(const std::string& in, unsi return {&w_out[0]}; } +LLWString ll_convert_wide_to_wstring(const std::wstring& in) +{ + // This function, like its converse, is a placeholder, encapsulating a + // guilty little hack: the only "official" way nat has found to convert + // between std::wstring (16 bits on Windows) and LLWString (UTF-32) is + // by using iconv, which we've avoided so far. It kinda sorta works to + // just copy individual characters... + // The point is that if/when we DO introduce some more official way to + // perform such conversions, we should only have to call it here. + return { in.begin(), in.end() }; +} + +std::wstring ll_convert_wstring_to_wide(const LLWString& in) +{ + // See comments in ll_convert_wide_to_wstring() + return { in.begin(), in.end() }; +} + std::string ll_convert_string_to_utf8_string(const std::string& in) { auto w_mesg = ll_convert_string_to_wide(in, CP_ACP); @@ -752,7 +804,74 @@ std::string ll_convert_string_to_utf8_string(const std::string& in) return out_utf8; } -#endif // LL_WINDOWS + +namespace +{ + +void HeapFree_deleter(void* ptr) +{ + // instead of LocalFree(), per https://stackoverflow.com/a/31541205 + HeapFree(GetProcessHeap(), NULL, ptr); +} + +} // anonymous namespace + +template<> +std::wstring windows_message<std::wstring>(DWORD error) +{ + // derived from https://stackoverflow.com/a/455533 + wchar_t* rawptr = nullptr; + auto okay = FormatMessageW( + // use system message tables for GetLastError() codes + FORMAT_MESSAGE_FROM_SYSTEM | + // internally allocate buffer and return its pointer + FORMAT_MESSAGE_ALLOCATE_BUFFER | + // you cannot pass insertion parameters (thanks Gandalf) + FORMAT_MESSAGE_IGNORE_INSERTS | + // ignore line breaks in message definition text + FORMAT_MESSAGE_MAX_WIDTH_MASK, + NULL, // lpSource, unused with FORMAT_MESSAGE_FROM_SYSTEM + error, // dwMessageId + MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT), // dwLanguageId + (LPWSTR)&rawptr, // lpBuffer: force-cast wchar_t** to wchar_t* + 0, // nSize, unused with FORMAT_MESSAGE_ALLOCATE_BUFFER + NULL); // Arguments, unused + + // make a unique_ptr from rawptr so it gets cleaned up properly + std::unique_ptr<wchar_t, void(*)(void*)> bufferptr(rawptr, HeapFree_deleter); + + if (okay && bufferptr) + { + // got the message, return it ('okay' is length in characters) + return { bufferptr.get(), okay }; + } + + // did not get the message, synthesize one + auto format_message_error = GetLastError(); + std::wostringstream out; + out << L"GetLastError() " << error << L" (FormatMessageW() failed with " + << format_message_error << L")"; + return out.str(); +} + +#else // ! LL_WINDOWS + +boost::optional<std::string> llstring_getoptenv(const std::string& key) +{ + auto found = getenv(key.c_str()); + if (found) + { + // return populated boost::optional + return { found }; + } + else + { + // return empty boost::optional + return {}; + } +} + +#endif // ! LL_WINDOWS long LLStringOps::sPacificTimeOffset = 0; long LLStringOps::sLocalTimeOffset = 0; diff --git a/indra/llcommon/llstring.h b/indra/llcommon/llstring.h index 7c3e9f952d..2dccc7cbdf 100644 --- a/indra/llcommon/llstring.h +++ b/indra/llcommon/llstring.h @@ -27,6 +27,7 @@ #ifndef LL_LLSTRING_H #define LL_LLSTRING_H +#include <boost/optional/optional.hpp> #include <string> #include <cstdio> //#include <locale> @@ -337,6 +338,19 @@ public: const string_type& string, const string_type& substr); + /** + * get environment string value with proper Unicode handling + * (key is always UTF-8) + * detect absence by return value == dflt + */ + static string_type getenv(const std::string& key, const string_type& dflt=""); + /** + * get optional environment string value with proper Unicode handling + * (key is always UTF-8) + * detect absence by (! return value) + */ + static boost::optional<string_type> getoptenv(const std::string& key); + static void addCRLF(string_type& string); static void removeCRLF(string_type& string); static void removeWindowsCR(string_type& string); @@ -496,6 +510,37 @@ LL_COMMON_API bool iswindividual(llwchar elem); * Unicode support */ +/// generic conversion aliases +template<typename TO, typename FROM, typename Enable=void> +struct ll_convert_impl +{ + // Don't even provide a generic implementation. We specialize for every + // combination we do support. + TO operator()(const FROM& in) const; +}; + +// Use a function template to get the nice ll_convert<TO>(from_value) API. +template<typename TO, typename FROM> +TO ll_convert(const FROM& in) +{ + return ll_convert_impl<TO, FROM>()(in); +} + +// degenerate case +template<typename T> +struct ll_convert_impl<T, T> +{ + T operator()(const T& in) const { return in; } +}; + +// specialize ll_convert_impl<TO, FROM> to return EXPR +#define ll_convert_alias(TO, FROM, EXPR) \ +template<> \ +struct ll_convert_impl<TO, FROM> \ +{ \ + TO operator()(const FROM& in) const { return EXPR; } \ +} + // Make the incoming string a utf8 string. Replaces any unknown glyph // with the UNKNOWN_CHARACTER. Once any unknown glyph is found, the rest // of the data may not be recovered. @@ -503,30 +548,83 @@ LL_COMMON_API std::string rawstr_to_utf8(const std::string& raw); // // We should never use UTF16 except when communicating with Win32! +// https://docs.microsoft.com/en-us/cpp/cpp/char-wchar-t-char16-t-char32-t +// nat 2018-12-14: I consider the whole llutf16string thing a mistake, because +// the Windows APIs we want to call are all defined in terms of wchar_t* +// (or worse, LPCTSTR). +// https://docs.microsoft.com/en-us/windows/desktop/winprog/windows-data-types + +// While there is no point coding for an ASCII-only world (! defined(UNICODE)), +// use of U16 and llutf16string for Windows APIs locks in /Zc:wchar_t-. Going +// forward, we should code in terms of wchar_t and std::wstring so as to +// support either setting of /Zc:wchar_t. + +// The first link above states that char can be used to hold ASCII or any +// multi-byte character set, and distinguishes wchar_t (UTF-16LE), char16_t +// (UTF-16) and char32_t (UTF-32). Nonetheless, within this code base: +// * char and std::string always hold UTF-8 (of which ASCII is a subset). It +// is a BUG if they are used to pass strings in any other multi-byte +// encoding. +// * wchar_t and std::wstring should be our interface to Windows wide-string +// APIs, and therefore hold UTF-16LE. +// * U16 and llutf16string are the previous but DEPRECATED UTF-16LE type. Do +// not introduce new uses of U16 or llutf16string for string data. +// * llwchar and LLWString hold UTF-32 strings. +// * Do not introduce char16_t or std::u16string. +// * Do not introduce char32_t or std::u32string. // +// This typedef may or may not be identical to std::wstring, depending on +// LL_WCHAR_T_NATIVE. typedef std::basic_string<U16> llutf16string; +#if ! defined(LL_WCHAR_T_NATIVE) +// wchar_t is identical to U16, and std::wstring is identical to llutf16string. +// Defining an ll_convert alias involving llutf16string would collide with the +// comparable preferred alias involving std::wstring. (In this scenario, if +// you pass llutf16string, it will engage the std::wstring specialization.) +#define ll_convert_u16_alias(TO, FROM, EXPR) // nothing +#else +// wchar_t is a distinct native type, so llutf16string is also a distinct +// type, and there IS a point to converting separately to/from llutf16string. +// (But why? Windows APIs are still defined in terms of wchar_t, and +// in this scenario llutf16string won't work for them!) +#define ll_convert_u16_alias(TO, FROM, EXPR) ll_convert_alias(TO, FROM, EXPR) + +// converting between std::wstring and llutf16string involves copying chars +// enclose the brace-initialization expression in parens to protect the comma +// from macro-argument parsing +ll_convert_alias(llutf16string, std::wstring, ({ in.begin(), in.end() })); +ll_convert_alias(std::wstring, llutf16string, ({ in.begin(), in.end() })); +#endif + LL_COMMON_API LLWString utf16str_to_wstring(const llutf16string &utf16str, S32 len); LL_COMMON_API LLWString utf16str_to_wstring(const llutf16string &utf16str); +ll_convert_u16_alias(LLWString, llutf16string, utf16str_to_wstring(in)); LL_COMMON_API llutf16string wstring_to_utf16str(const LLWString &utf32str, S32 len); LL_COMMON_API llutf16string wstring_to_utf16str(const LLWString &utf32str); +ll_convert_u16_alias(llutf16string, LLWString, wstring_to_utf16str(in)); LL_COMMON_API llutf16string utf8str_to_utf16str ( const std::string& utf8str, S32 len); LL_COMMON_API llutf16string utf8str_to_utf16str ( const std::string& utf8str ); +ll_convert_u16_alias(llutf16string, std::string, utf8str_to_utf16str(in)); LL_COMMON_API LLWString utf8str_to_wstring(const std::string &utf8str, S32 len); LL_COMMON_API LLWString utf8str_to_wstring(const std::string &utf8str); // Same function, better name. JC inline LLWString utf8string_to_wstring(const std::string& utf8_string) { return utf8str_to_wstring(utf8_string); } +// best name of all +ll_convert_alias(LLWString, std::string, utf8string_to_wstring(in)); // LL_COMMON_API S32 wchar_to_utf8chars(llwchar inchar, char* outchars); LL_COMMON_API std::string wstring_to_utf8str(const LLWString &utf32str, S32 len); LL_COMMON_API std::string wstring_to_utf8str(const LLWString &utf32str); +ll_convert_alias(std::string, LLWString, wstring_to_utf8str(in)); LL_COMMON_API std::string utf16str_to_utf8str(const llutf16string &utf16str, S32 len); LL_COMMON_API std::string utf16str_to_utf8str(const llutf16string &utf16str); +ll_convert_u16_alias(std::string, llutf16string, utf16str_to_utf8str(in)); #if LL_WINDOWS inline std::string wstring_to_utf8str(const llutf16string &utf16str) { return utf16str_to_utf8str(utf16str);} @@ -636,14 +734,36 @@ using snprintf_hack::snprintf; */ LL_COMMON_API std::string ll_convert_wide_to_string(const wchar_t* in, unsigned int code_page); LL_COMMON_API std::string ll_convert_wide_to_string(const wchar_t* in); // default CP_UTF8 +inline std::string ll_convert_wide_to_string(const std::wstring& in, unsigned int code_page) +{ + return ll_convert_wide_to_string(in.c_str(), code_page); +} +inline std::string ll_convert_wide_to_string(const std::wstring& in) +{ + return ll_convert_wide_to_string(in.c_str()); +} +ll_convert_alias(std::string, std::wstring, ll_convert_wide_to_string(in)); /** * Converts a string to wide string. */ -LL_COMMON_API std::basic_string<wchar_t> ll_convert_string_to_wide(const std::string& in, - unsigned int code_page); -LL_COMMON_API std::basic_string<wchar_t> ll_convert_string_to_wide(const std::string& in); - // default CP_UTF8 +LL_COMMON_API std::wstring ll_convert_string_to_wide(const std::string& in, + unsigned int code_page); +LL_COMMON_API std::wstring ll_convert_string_to_wide(const std::string& in); + // default CP_UTF8 +ll_convert_alias(std::wstring, std::string, ll_convert_string_to_wide(in)); + +/** + * Convert a Windows wide string to our LLWString + */ +LL_COMMON_API LLWString ll_convert_wide_to_wstring(const std::wstring& in); +ll_convert_alias(LLWString, std::wstring, ll_convert_wide_to_wstring(in)); + +/** + * Convert LLWString to Windows wide string + */ +LL_COMMON_API std::wstring ll_convert_wstring_to_wide(const LLWString& in); +ll_convert_alias(std::wstring, LLWString, ll_convert_wstring_to_wide(in)); /** * Converts incoming string into utf8 string @@ -651,8 +771,39 @@ LL_COMMON_API std::basic_string<wchar_t> ll_convert_string_to_wide(const std::st */ LL_COMMON_API std::string ll_convert_string_to_utf8_string(const std::string& in); +/// Get Windows message string for passed GetLastError() code +// VS 2013 doesn't let us forward-declare this template, which is what we +// started with, so the implementation could reference the specialization we +// haven't yet declared. Somewhat weirdly, just stating the generic +// implementation in terms of the specialization works, even in this order... + +// the general case is just a conversion from the sole implementation +// Microsoft says DWORD is a typedef for unsigned long +// https://docs.microsoft.com/en-us/windows/desktop/winprog/windows-data-types +// so rather than drag windows.h into everybody's include space... +template<typename STRING> +STRING windows_message(unsigned long error) +{ + return ll_convert<STRING>(windows_message<std::wstring>(error)); +} + +/// There's only one real implementation +template<> +LL_COMMON_API std::wstring windows_message<std::wstring>(unsigned long error); + +/// Get Windows message string, implicitly calling GetLastError() +template<typename STRING> +STRING windows_message() { return windows_message<STRING>(GetLastError()); } + //@} -#endif // LL_WINDOWS + +LL_COMMON_API boost::optional<std::wstring> llstring_getoptenv(const std::string& key); + +#else // ! LL_WINDOWS + +LL_COMMON_API boost::optional<std::string> llstring_getoptenv(const std::string& key); + +#endif // ! LL_WINDOWS /** * Many of the 'strip' and 'replace' methods of LLStringUtilBase need @@ -1595,6 +1746,21 @@ bool LLStringUtilBase<T>::endsWith( return (idx == (string.size() - substr.size())); } +// static +template<class T> +auto LLStringUtilBase<T>::getoptenv(const std::string& key) -> boost::optional<string_type> +{ + auto found{llstring_getoptenv(key)}; + return found? { ll_convert<string_type>(*found) } : {}; +} + +// static +template<class T> +auto LLStringUtilBase<T>::getenv(const std::string& key, const string_type& dflt) -> string_type +{ + auto found{getoptenv(key)}; + return found? *found : dflt; +} template<class T> BOOL LLStringUtilBase<T>::convertToBOOL(const string_type& string, BOOL& value) diff --git a/indra/llcommon/stdtypes.h b/indra/llcommon/stdtypes.h index bf3f3f9ee8..6c9871e76c 100644 --- a/indra/llcommon/stdtypes.h +++ b/indra/llcommon/stdtypes.h @@ -37,7 +37,12 @@ typedef signed int S32; typedef unsigned int U32; #if LL_WINDOWS -// Windows wchar_t is 16-bit +// https://docs.microsoft.com/en-us/cpp/build/reference/zc-wchar-t-wchar-t-is-native-type +// https://docs.microsoft.com/en-us/cpp/cpp/fundamental-types-cpp +// Windows wchar_t is 16-bit, whichever way /Zc:wchar_t is set. In effect, +// Windows wchar_t is always a typedef, either for unsigned short or __wchar_t. +// (__wchar_t, available either way, is Microsoft's native 2-byte wchar_t type.) +// In any case, llwchar should be a UTF-32 type. typedef U32 llwchar; #else typedef wchar_t llwchar; |