From 4e894eb2a7ed6651c54890cd20106bfacd61ef0a Mon Sep 17 00:00:00 2001 From: Nat Goodspeed Date: Tue, 11 Dec 2018 20:48:20 -0500 Subject: SL-10153: Improve ll_convert_string_to_wide() and its converse. Instead of returning a wchar_t* and requiring the caller to delete it later, return a std::basic_string that's self-cleaning. If the caller wants a wchar_t*, s/he can call c_str() on the returned string. Default the code_page parameter to CP_UTF8, since we try to be really consistent about using UTF-8 encoding for all our internal std::strings. --- indra/llcommon/llstring.cpp | 32 ++++++++++++++++++++++---------- 1 file changed, 22 insertions(+), 10 deletions(-) (limited to 'indra/llcommon/llstring.cpp') diff --git a/indra/llcommon/llstring.cpp b/indra/llcommon/llstring.cpp index 9a02fecd72..42390c8a7b 100644 --- a/indra/llcommon/llstring.cpp +++ b/indra/llcommon/llstring.cpp @@ -30,6 +30,7 @@ #include "llerror.h" #include "llfasttimer.h" #include "llsd.h" +#include #if LL_WINDOWS #include "llwin32headerslean.h" @@ -672,6 +673,11 @@ namespace snprintf_hack } } +std::string ll_convert_wide_to_string(const wchar_t* in) +{ + return ll_convert_wide_to_string(in, CP_UTF8); +} + std::string ll_convert_wide_to_string(const wchar_t* in, unsigned int code_page) { std::string out; @@ -709,7 +715,12 @@ std::string ll_convert_wide_to_string(const wchar_t* in, unsigned int code_page) return out; } -wchar_t* ll_convert_string_to_wide(const std::string& in, unsigned int code_page) +std::basic_string ll_convert_string_to_wide(const std::string& in) +{ + return ll_convert_string_to_wide(in, CP_UTF8); +} + +std::basic_string ll_convert_string_to_wide(const std::string& in, unsigned int code_page) { // From review: // We can preallocate a wide char buffer that is the same length (in wchar_t elements) as the utf8 input, @@ -719,24 +730,25 @@ wchar_t* ll_convert_string_to_wide(const std::string& in, unsigned int code_page // but we *are* seeing string operations taking a bunch of time, especially when constructing widgets. // int output_str_len = MultiByteToWideChar(code_page, 0, in.c_str(), in.length(), NULL, 0); - // reserve place to NULL terminator - int output_str_len = in.length(); - wchar_t* w_out = new wchar_t[output_str_len + 1]; + // reserve an output buffer that will be destroyed on exit, with a place + // to put NULL terminator + std::vector w_out(in.length() + 1); - memset(w_out, 0, output_str_len + 1); - int real_output_str_len = MultiByteToWideChar (code_page, 0, in.c_str(), in.length(), w_out, output_str_len); + memset(&w_out[0], 0, w_out.size()); + int real_output_str_len = MultiByteToWideChar(code_page, 0, in.c_str(), in.length(), + &w_out[0], w_out.size() - 1); //looks like MultiByteToWideChar didn't add null terminator to converted string, see EXT-4858. w_out[real_output_str_len] = 0; - return w_out; + // construct string from our temporary output buffer + return {&w_out[0]}; } std::string ll_convert_string_to_utf8_string(const std::string& in) { - wchar_t* w_mesg = ll_convert_string_to_wide(in, CP_ACP); - std::string out_utf8(ll_convert_wide_to_string(w_mesg, CP_UTF8)); - delete[] w_mesg; + auto w_mesg = ll_convert_string_to_wide(in, CP_ACP); + std::string out_utf8(ll_convert_wide_to_string(w_mesg.c_str(), CP_UTF8)); return out_utf8; } -- cgit v1.2.3 From 9ffcafb64b4483c315d00e88ffc1438bce1f7915 Mon Sep 17 00:00:00 2001 From: Nat Goodspeed Date: Fri, 14 Dec 2018 10:48:43 -0500 Subject: SL-10153: Introduce ll_convert, windows_message() templates. Add ll_convert template, used as (e.g.): ll_convert(value_of_some_other_string_type); There is no generic template implementation -- the template exists solely to provide generic aliases for a bewildering family of llstring.h string- conversion functions with highly-specific names. There's a generic implementation, though, for the degenerate case where FROM and TO are identical. Add ll_convert<> specialization aliases for most of the string-conversion functions declared in llstring.h, including the Windows-specific ones involving llutf16string and std::wstring. Add a mini-lecture in llstring.h about appropriate use of string types on Windows. Add LL_WCHAR_T_NATIVE llpreprocessor.h macro so we can detect whether to provide separate conversions for llutf16string and std::wstring, or whether those would collide because the types are identical. Add inline ll_convert_wide_to_string(const std::wstring&) overloads so caller isn't required to call arg.c_str(), which naturally permits an ll_convert alias. Add ll_convert_wide_to_wstring(), ll_convert_wstring_to_wide() as placeholders for converting between Windows std::wstring and Linden LLWString, with corresponding ll_convert aliases. We don't yet have library code to perform such conversions officially; for now, just copy characters. Add LLStringUtil::getenv(key) and getoptenv(key) functions. The latter returns boost::optional in case the caller needs to detect absence of a given environment variable rather than simply accepting a default value. Naturally getenv(), which accepts a default, is implemented using getoptenv(). getoptenv(), in turn, is implemented using an underlying llstring_getoptenv(). On Windows, llstring_getoptenv() returns boost::optional (based on GetEnvironmentVariableW()), whereas elsewhere, llstring_getoptenv() returns boost::optional (based on classic Posix getenv()). The beauty of generic ll_convert is that the portable LLStringUtilBase:: getoptenv() template can call the platform-specific llstring_getoptenv() and transparently perform whatever conversion is necessary to return the desired string_type. Add windows_message(error) template, with an overload that implicitly calls GetLastError(). We provide a single concrete windows_message() implementation because that's what we get from Windows FormatMessageW() -- everything else is a generic conversion to the desired target string type. This obviates llprocess.cpp's previous WindowsErrorString() implementation -- reimplement using windows_message(). --- indra/llcommon/llstring.cpp | 125 ++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 122 insertions(+), 3 deletions(-) (limited to 'indra/llcommon/llstring.cpp') diff --git a/indra/llcommon/llstring.cpp b/indra/llcommon/llstring.cpp index 42390c8a7b..f931103ba6 100644 --- a/indra/llcommon/llstring.cpp +++ b/indra/llcommon/llstring.cpp @@ -53,6 +53,40 @@ std::string ll_safe_string(const char* in, S32 maxlen) return std::string(); } +boost::optional llstring_getoptenv(const std::string& key) +{ + auto wkey = ll_convert_string_to_wide(key); + // Take a wild guess as to how big the buffer should be. + std::vector buffer(1024); + auto n = GetEnvironmentVariableW(wkey.c_str(), &buffer[0], buffer.size()); + // If our initial guess was too short, n will indicate the size (in + // wchar_t's) that buffer should have been, including the terminating nul. + if (n > (buffer.size() - 1)) + { + // make it big enough + buffer.resize(n); + // and try again + n = GetEnvironmentVariableW(wkey.c_str(), &buffer[0], buffer.size()); + } + // did that (ultimately) succeed? + if (n) + { + // great, return populated boost::optional + return { &buffer[0] }; + } + + // not successful + auto last_error = GetLastError(); + // Don't bother warning for NOT_FOUND; that's an expected case + if (last_error != ERROR_ENVVAR_NOT_FOUND) + { + LL_WARNS() << "GetEnvironmentVariableW('" << key << "') failed: " + << windows_message(last_error) << LL_ENDL; + } + // return empty boost::optional + return {}; +} + bool is_char_hex(char hex) { if((hex >= '0') && (hex <= '9')) @@ -715,12 +749,12 @@ std::string ll_convert_wide_to_string(const wchar_t* in, unsigned int code_page) return out; } -std::basic_string ll_convert_string_to_wide(const std::string& in) +std::wstring ll_convert_string_to_wide(const std::string& in) { return ll_convert_string_to_wide(in, CP_UTF8); } -std::basic_string ll_convert_string_to_wide(const std::string& in, unsigned int code_page) +std::wstring ll_convert_string_to_wide(const std::string& in, unsigned int code_page) { // From review: // We can preallocate a wide char buffer that is the same length (in wchar_t elements) as the utf8 input, @@ -745,6 +779,24 @@ std::basic_string ll_convert_string_to_wide(const std::string& in, unsi return {&w_out[0]}; } +LLWString ll_convert_wide_to_wstring(const std::wstring& in) +{ + // This function, like its converse, is a placeholder, encapsulating a + // guilty little hack: the only "official" way nat has found to convert + // between std::wstring (16 bits on Windows) and LLWString (UTF-32) is + // by using iconv, which we've avoided so far. It kinda sorta works to + // just copy individual characters... + // The point is that if/when we DO introduce some more official way to + // perform such conversions, we should only have to call it here. + return { in.begin(), in.end() }; +} + +std::wstring ll_convert_wstring_to_wide(const LLWString& in) +{ + // See comments in ll_convert_wide_to_wstring() + return { in.begin(), in.end() }; +} + std::string ll_convert_string_to_utf8_string(const std::string& in) { auto w_mesg = ll_convert_string_to_wide(in, CP_ACP); @@ -752,7 +804,74 @@ std::string ll_convert_string_to_utf8_string(const std::string& in) return out_utf8; } -#endif // LL_WINDOWS + +namespace +{ + +void HeapFree_deleter(void* ptr) +{ + // instead of LocalFree(), per https://stackoverflow.com/a/31541205 + HeapFree(GetProcessHeap(), NULL, ptr); +} + +} // anonymous namespace + +template<> +std::wstring windows_message(DWORD error) +{ + // derived from https://stackoverflow.com/a/455533 + wchar_t* rawptr = nullptr; + auto okay = FormatMessageW( + // use system message tables for GetLastError() codes + FORMAT_MESSAGE_FROM_SYSTEM | + // internally allocate buffer and return its pointer + FORMAT_MESSAGE_ALLOCATE_BUFFER | + // you cannot pass insertion parameters (thanks Gandalf) + FORMAT_MESSAGE_IGNORE_INSERTS | + // ignore line breaks in message definition text + FORMAT_MESSAGE_MAX_WIDTH_MASK, + NULL, // lpSource, unused with FORMAT_MESSAGE_FROM_SYSTEM + error, // dwMessageId + MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT), // dwLanguageId + (LPWSTR)&rawptr, // lpBuffer: force-cast wchar_t** to wchar_t* + 0, // nSize, unused with FORMAT_MESSAGE_ALLOCATE_BUFFER + NULL); // Arguments, unused + + // make a unique_ptr from rawptr so it gets cleaned up properly + std::unique_ptr bufferptr(rawptr, HeapFree_deleter); + + if (okay && bufferptr) + { + // got the message, return it ('okay' is length in characters) + return { bufferptr.get(), okay }; + } + + // did not get the message, synthesize one + auto format_message_error = GetLastError(); + std::wostringstream out; + out << L"GetLastError() " << error << L" (FormatMessageW() failed with " + << format_message_error << L")"; + return out.str(); +} + +#else // ! LL_WINDOWS + +boost::optional llstring_getoptenv(const std::string& key) +{ + auto found = getenv(key.c_str()); + if (found) + { + // return populated boost::optional + return { found }; + } + else + { + // return empty boost::optional + return {}; + } +} + +#endif // ! LL_WINDOWS long LLStringOps::sPacificTimeOffset = 0; long LLStringOps::sLocalTimeOffset = 0; -- cgit v1.2.3 From 132e708fec50fd756b822925313456c70a4ff27f Mon Sep 17 00:00:00 2001 From: Nat Goodspeed Date: Fri, 14 Dec 2018 12:01:51 -0500 Subject: SL-10153: Fix previous commit for non-Windows systems. Move Windows-flavored llstring_getoptenv() to Windows-specific section of llstring.cpp. boost::optional type must be stated explicitly to initialize with a value. On platforms where llwchar is the same as wchar_t, LLWString is the same as std::wstring, so ll_convert specializations for std::wstring would duplicate those for LLWString. Defend against that. The compilers we use don't like 'return condition? { expr } : {}', in which we hope to construct and return an instance of the declared return type without having to restate the type. It works to use an explicit 'if' statement. --- indra/llcommon/llstring.cpp | 70 ++++++++++++++++++++++----------------------- 1 file changed, 35 insertions(+), 35 deletions(-) (limited to 'indra/llcommon/llstring.cpp') diff --git a/indra/llcommon/llstring.cpp b/indra/llcommon/llstring.cpp index f931103ba6..0174c411b4 100644 --- a/indra/llcommon/llstring.cpp +++ b/indra/llcommon/llstring.cpp @@ -53,40 +53,6 @@ std::string ll_safe_string(const char* in, S32 maxlen) return std::string(); } -boost::optional llstring_getoptenv(const std::string& key) -{ - auto wkey = ll_convert_string_to_wide(key); - // Take a wild guess as to how big the buffer should be. - std::vector buffer(1024); - auto n = GetEnvironmentVariableW(wkey.c_str(), &buffer[0], buffer.size()); - // If our initial guess was too short, n will indicate the size (in - // wchar_t's) that buffer should have been, including the terminating nul. - if (n > (buffer.size() - 1)) - { - // make it big enough - buffer.resize(n); - // and try again - n = GetEnvironmentVariableW(wkey.c_str(), &buffer[0], buffer.size()); - } - // did that (ultimately) succeed? - if (n) - { - // great, return populated boost::optional - return { &buffer[0] }; - } - - // not successful - auto last_error = GetLastError(); - // Don't bother warning for NOT_FOUND; that's an expected case - if (last_error != ERROR_ENVVAR_NOT_FOUND) - { - LL_WARNS() << "GetEnvironmentVariableW('" << key << "') failed: " - << windows_message(last_error) << LL_ENDL; - } - // return empty boost::optional - return {}; -} - bool is_char_hex(char hex) { if((hex >= '0') && (hex <= '9')) @@ -854,6 +820,40 @@ std::wstring windows_message(DWORD error) return out.str(); } +boost::optional llstring_getoptenv(const std::string& key) +{ + auto wkey = ll_convert_string_to_wide(key); + // Take a wild guess as to how big the buffer should be. + std::vector buffer(1024); + auto n = GetEnvironmentVariableW(wkey.c_str(), &buffer[0], buffer.size()); + // If our initial guess was too short, n will indicate the size (in + // wchar_t's) that buffer should have been, including the terminating nul. + if (n > (buffer.size() - 1)) + { + // make it big enough + buffer.resize(n); + // and try again + n = GetEnvironmentVariableW(wkey.c_str(), &buffer[0], buffer.size()); + } + // did that (ultimately) succeed? + if (n) + { + // great, return populated boost::optional + return boost::optional(&buffer[0]); + } + + // not successful + auto last_error = GetLastError(); + // Don't bother warning for NOT_FOUND; that's an expected case + if (last_error != ERROR_ENVVAR_NOT_FOUND) + { + LL_WARNS() << "GetEnvironmentVariableW('" << key << "') failed: " + << windows_message(last_error) << LL_ENDL; + } + // return empty boost::optional + return {}; +} + #else // ! LL_WINDOWS boost::optional llstring_getoptenv(const std::string& key) @@ -862,7 +862,7 @@ boost::optional llstring_getoptenv(const std::string& key) if (found) { // return populated boost::optional - return { found }; + return boost::optional(found); } else { -- cgit v1.2.3