summaryrefslogtreecommitdiff
path: root/indra/llcommon/llstring.cpp
diff options
context:
space:
mode:
authorNat Goodspeed <nat@lindenlab.com>2021-11-02 10:35:34 -0400
committerNat Goodspeed <nat@lindenlab.com>2021-11-02 10:35:34 -0400
commit10692ab4a4f999e1ee302675e4ffb84f37a52643 (patch)
tree94d592433f35f45b2d2417e4c5891412ee972f80 /indra/llcommon/llstring.cpp
parenta88da4ca169a01f45a71513e599e154e0c64b615 (diff)
SL-16207: Create uniform overload sets for wide-string conversions.
Use new ll_convert_forms() macro in llstring.h to declare, for each wide-string conversion function of interest, four overloads. The real one, the nontrivial one, is (const char*, size_t len), implemented in llstring.cpp. Then (const string&, size_t len), (const char*) and (const string&) are each trivially implemented with an inline call to (const char*, size_t len). Notably, we change all S32 len parameters to size_t. Using S32 is old skool. Tweak each nontrivial implementation in llstring.cpp to accept (const char*, size_t len) instead of (const string&) with or without explicit length. Eliminate from llstring.cpp trivial overloads (deriving length from either a const char* or from a string), since those are now inline in the header. Of course three of those overloads will be unified once we enable C++17 and change each relevant parameter to std::string_view, but we're not yet there. Meanwhile, this suite of overloads minimizes, to the best of our ability, new string allocations solely for parameter passing. And use of a macro means we need only change the macro once we get std::string_view. We take this step because some use cases require (const char*), some require (const string&, size_t len), others (const char*, size_t len) ... We were missing some key overloads, and had to work around them by instantiating new string objects (necessitating both allocation and character copying) just to pass the desired parameter. Using the macro ensures this consistent set of overloads for every wide-string conversion function. Additionally, knowing that the ugly-name overloads exist, ll_convert_forms() implicitly defines corresponding ll_convert<TARGET>() overloads. Streamline declarations of utf16str_to_wstring(), wstring_to_utf16str(), utf8str_to_utf16str(), utf16str_to_utf8str(), utf8str_to_wstring(), wstring_to_utf8str(), ll_convert_wide_to_wstring() and ll_convert_wstring_to_wide() using ll_convert_forms(). Use corresponding new ll_convert_cp_forms() macro to declare consistent overloads for conversion functions accepting an optional unsigned int code_page parameter. We used to delegate to the .cpp file the implementation of each overload accepting code_page so llstring.h need not include the Windows header defining the CP_UTF8 default; this is more simply accomplished by introducing a small ll_wstring_default_code_page() function to retrieve it from the .cpp file. That lets us specify the code_page parameter as optional, using that function as its default value. Use ll_convert_cp_forms() to streamline declarations of ll_convert_wide_to_string() and ll_convert_string_to_wide(). Introduce real implementations of ll_convert_wide_to_wstring() and ll_convert_wstring_to_wide(). The previous implementations merely copied individual characters, which is wrong: when we convert UTF16LE to UTF32, we can and should fold multi-character UTF16LE encodings to the corresponding single UTF32 character. The real implemenations leverage our awareness that both llutf16string and Windows std::wstring (either variant) use UTF16LE encoding, so we can reuse the corresponding llutf16string conversions. Introduce generic ll_convert_length() function, specialized as either std::strlen() or std::wcslen() depending on parameter type. (Even if std::wcslen() is derived from classic C, why doesn't the C++ standard library define a std::strlen(const wchar_t*) overload to call it?) Fix ll_convert_alias()'s ll_convert_impl specialization's operator() to accept boost::call_traits::param_type, so we can pass (e.g.) const std::wstring& but also const wchar_t* instead of const wchar_t*&.
Diffstat (limited to 'indra/llcommon/llstring.cpp')
-rw-r--r--indra/llcommon/llstring.cpp95
1 files changed, 29 insertions, 66 deletions
diff --git a/indra/llcommon/llstring.cpp b/indra/llcommon/llstring.cpp
index 0290eea143..5f426e5dca 100644
--- a/indra/llcommon/llstring.cpp
+++ b/indra/llcommon/llstring.cpp
@@ -215,7 +215,7 @@ S32 utf16chars_to_wchar(const U16* inchars, llwchar* outchar)
return inchars - base;
}
-llutf16string wstring_to_utf16str(const LLWString &utf32str, S32 len)
+llutf16string wstring_to_utf16str(const llwchar* utf32str, size_t len)
{
llutf16string out;
@@ -237,27 +237,19 @@ llutf16string wstring_to_utf16str(const LLWString &utf32str, S32 len)
return out;
}
-llutf16string wstring_to_utf16str(const LLWString &utf32str)
+llutf16string utf8str_to_utf16str( const char* utf8str, size_t len )
{
- const S32 len = (S32)utf32str.length();
- return wstring_to_utf16str(utf32str, len);
-}
-
-llutf16string utf8str_to_utf16str ( const std::string& utf8str )
-{
- LLWString wstr = utf8str_to_wstring ( utf8str );
+ LLWString wstr = utf8str_to_wstring ( utf8str, len );
return wstring_to_utf16str ( wstr );
}
-
-LLWString utf16str_to_wstring(const llutf16string &utf16str, S32 len)
+LLWString utf16str_to_wstring(const U16* utf16str, size_t len)
{
LLWString wout;
- if((len <= 0) || utf16str.empty()) return wout;
+ if (len == 0) return wout;
S32 i = 0;
- // craziness to make gcc happy (llutf16string.c_str() is tweaked on linux):
- const U16* chars16 = &(*(utf16str.begin()));
+ const U16* chars16 = utf16str;
while (i < len)
{
llwchar cur_char;
@@ -267,12 +259,6 @@ LLWString utf16str_to_wstring(const llutf16string &utf16str, S32 len)
return wout;
}
-LLWString utf16str_to_wstring(const llutf16string &utf16str)
-{
- const S32 len = (S32)utf16str.length();
- return utf16str_to_wstring(utf16str, len);
-}
-
// Length in llwchar (UTF-32) of the first len units (16 bits) of the given UTF-16 string.
S32 utf16str_wstring_length(const llutf16string &utf16str, const S32 utf16_len)
{
@@ -392,8 +378,7 @@ S32 wstring_utf8_length(const LLWString& wstr)
return len;
}
-
-LLWString utf8str_to_wstring(const std::string& utf8str, S32 len)
+LLWString utf8str_to_wstring(const char* utf8str, size_t len)
{
LLWString wout;
@@ -481,13 +466,7 @@ LLWString utf8str_to_wstring(const std::string& utf8str, S32 len)
return wout;
}
-LLWString utf8str_to_wstring(const std::string& utf8str)
-{
- const S32 len = (S32)utf8str.length();
- return utf8str_to_wstring(utf8str, len);
-}
-
-std::string wstring_to_utf8str(const LLWString& utf32str, S32 len)
+std::string wstring_to_utf8str(const llwchar* utf32str, size_t len)
{
std::string out;
@@ -503,20 +482,9 @@ std::string wstring_to_utf8str(const LLWString& utf32str, S32 len)
return out;
}
-std::string wstring_to_utf8str(const LLWString& utf32str)
-{
- const S32 len = (S32)utf32str.length();
- return wstring_to_utf8str(utf32str, len);
-}
-
-std::string utf16str_to_utf8str(const llutf16string& utf16str)
-{
- return wstring_to_utf8str(utf16str_to_wstring(utf16str));
-}
-
-std::string utf16str_to_utf8str(const llutf16string& utf16str, S32 len)
+std::string utf16str_to_utf8str(const U16* utf16str, size_t len)
{
- return wstring_to_utf8str(utf16str_to_wstring(utf16str, len), len);
+ return wstring_to_utf8str(utf16str_to_wstring(utf16str, len));
}
std::string utf8str_trim(const std::string& utf8str)
@@ -657,17 +625,16 @@ std::string utf8str_removeCRLF(const std::string& utf8str)
}
#if LL_WINDOWS
-std::string ll_convert_wide_to_string(const wchar_t* in)
+unsigned int ll_wstring_default_code_page()
{
- return ll_convert_wide_to_string(in, CP_UTF8);
+ return CP_UTF8;
}
-std::string ll_convert_wide_to_string(const wchar_t* in, unsigned int code_page)
+std::string ll_convert_wide_to_string(const wchar_t* in, size_t len_in, unsigned int code_page)
{
std::string out;
if(in)
{
- int len_in = wcslen(in);
int len_out = WideCharToMultiByte(
code_page,
0,
@@ -699,12 +666,7 @@ std::string ll_convert_wide_to_string(const wchar_t* in, unsigned int code_page)
return out;
}
-std::wstring ll_convert_string_to_wide(const std::string& in)
-{
- return ll_convert_string_to_wide(in, CP_UTF8);
-}
-
-std::wstring ll_convert_string_to_wide(const std::string& in, unsigned int code_page)
+std::wstring ll_convert_string_to_wide(const char* in, size_t len, unsigned int code_page)
{
// From review:
// We can preallocate a wide char buffer that is the same length (in wchar_t elements) as the utf8 input,
@@ -716,10 +678,10 @@ std::wstring ll_convert_string_to_wide(const std::string& in, unsigned int code_
// reserve an output buffer that will be destroyed on exit, with a place
// to put NULL terminator
- std::vector<wchar_t> w_out(in.length() + 1);
+ std::vector<wchar_t> w_out(len + 1);
memset(&w_out[0], 0, w_out.size());
- int real_output_str_len = MultiByteToWideChar(code_page, 0, in.c_str(), in.length(),
+ int real_output_str_len = MultiByteToWideChar(code_page, 0, in, len,
&w_out[0], w_out.size() - 1);
//looks like MultiByteToWideChar didn't add null terminator to converted string, see EXT-4858.
@@ -729,22 +691,23 @@ std::wstring ll_convert_string_to_wide(const std::string& in, unsigned int code_
return {&w_out[0]};
}
-LLWString ll_convert_wide_to_wstring(const std::wstring& in)
+LLWString ll_convert_wide_to_wstring(const wchar_t* in, size_t len)
{
- // This function, like its converse, is a placeholder, encapsulating a
- // guilty little hack: the only "official" way nat has found to convert
- // between std::wstring (16 bits on Windows) and LLWString (UTF-32) is
- // by using iconv, which we've avoided so far. It kinda sorta works to
- // just copy individual characters...
- // The point is that if/when we DO introduce some more official way to
- // perform such conversions, we should only have to call it here.
- return { in.begin(), in.end() };
+ // Whether or not std::wstring and llutf16string are distinct types, they
+ // both hold UTF-16LE characters. (See header file comments.) Pretend this
+ // wchar_t* sequence is really a U16* sequence and use the conversion we
+ // define above.
+ return utf16str_to_wstring(reinterpret_cast<const U16*>(in), len);
}
-std::wstring ll_convert_wstring_to_wide(const LLWString& in)
+std::wstring ll_convert_wstring_to_wide(const llwchar* in, size_t len)
{
- // See comments in ll_convert_wide_to_wstring()
- return { in.begin(), in.end() };
+ // first, convert to llutf16string, for which we have a real implementation
+ auto utf16str{ wstring_to_utf16str(in, len) };
+ // then, because each U16 char must be UTF-16LE encoded, pretend the U16*
+ // string pointer is a wchar_t* and instantiate a std::wstring of the same
+ // length.
+ return { reinterpret_cast<const wchar_t*>(utf16str.c_str()), utf16str.length() };
}
std::string ll_convert_string_to_utf8_string(const std::string& in)