summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNat Goodspeed <nat@lindenlab.com>2021-11-02 10:35:34 -0400
committerNat Goodspeed <nat@lindenlab.com>2021-11-02 10:35:34 -0400
commit10692ab4a4f999e1ee302675e4ffb84f37a52643 (patch)
tree94d592433f35f45b2d2417e4c5891412ee972f80
parenta88da4ca169a01f45a71513e599e154e0c64b615 (diff)
SL-16207: Create uniform overload sets for wide-string conversions.
Use new ll_convert_forms() macro in llstring.h to declare, for each wide-string conversion function of interest, four overloads. The real one, the nontrivial one, is (const char*, size_t len), implemented in llstring.cpp. Then (const string&, size_t len), (const char*) and (const string&) are each trivially implemented with an inline call to (const char*, size_t len). Notably, we change all S32 len parameters to size_t. Using S32 is old skool. Tweak each nontrivial implementation in llstring.cpp to accept (const char*, size_t len) instead of (const string&) with or without explicit length. Eliminate from llstring.cpp trivial overloads (deriving length from either a const char* or from a string), since those are now inline in the header. Of course three of those overloads will be unified once we enable C++17 and change each relevant parameter to std::string_view, but we're not yet there. Meanwhile, this suite of overloads minimizes, to the best of our ability, new string allocations solely for parameter passing. And use of a macro means we need only change the macro once we get std::string_view. We take this step because some use cases require (const char*), some require (const string&, size_t len), others (const char*, size_t len) ... We were missing some key overloads, and had to work around them by instantiating new string objects (necessitating both allocation and character copying) just to pass the desired parameter. Using the macro ensures this consistent set of overloads for every wide-string conversion function. Additionally, knowing that the ugly-name overloads exist, ll_convert_forms() implicitly defines corresponding ll_convert<TARGET>() overloads. Streamline declarations of utf16str_to_wstring(), wstring_to_utf16str(), utf8str_to_utf16str(), utf16str_to_utf8str(), utf8str_to_wstring(), wstring_to_utf8str(), ll_convert_wide_to_wstring() and ll_convert_wstring_to_wide() using ll_convert_forms(). Use corresponding new ll_convert_cp_forms() macro to declare consistent overloads for conversion functions accepting an optional unsigned int code_page parameter. We used to delegate to the .cpp file the implementation of each overload accepting code_page so llstring.h need not include the Windows header defining the CP_UTF8 default; this is more simply accomplished by introducing a small ll_wstring_default_code_page() function to retrieve it from the .cpp file. That lets us specify the code_page parameter as optional, using that function as its default value. Use ll_convert_cp_forms() to streamline declarations of ll_convert_wide_to_string() and ll_convert_string_to_wide(). Introduce real implementations of ll_convert_wide_to_wstring() and ll_convert_wstring_to_wide(). The previous implementations merely copied individual characters, which is wrong: when we convert UTF16LE to UTF32, we can and should fold multi-character UTF16LE encodings to the corresponding single UTF32 character. The real implemenations leverage our awareness that both llutf16string and Windows std::wstring (either variant) use UTF16LE encoding, so we can reuse the corresponding llutf16string conversions. Introduce generic ll_convert_length() function, specialized as either std::strlen() or std::wcslen() depending on parameter type. (Even if std::wcslen() is derived from classic C, why doesn't the C++ standard library define a std::strlen(const wchar_t*) overload to call it?) Fix ll_convert_alias()'s ll_convert_impl specialization's operator() to accept boost::call_traits::param_type, so we can pass (e.g.) const std::wstring& but also const wchar_t* instead of const wchar_t*&.
-rw-r--r--indra/llcommon/llstring.cpp95
-rw-r--r--indra/llcommon/llstring.h143
2 files changed, 113 insertions, 125 deletions
diff --git a/indra/llcommon/llstring.cpp b/indra/llcommon/llstring.cpp
index 0290eea143..5f426e5dca 100644
--- a/indra/llcommon/llstring.cpp
+++ b/indra/llcommon/llstring.cpp
@@ -215,7 +215,7 @@ S32 utf16chars_to_wchar(const U16* inchars, llwchar* outchar)
return inchars - base;
}
-llutf16string wstring_to_utf16str(const LLWString &utf32str, S32 len)
+llutf16string wstring_to_utf16str(const llwchar* utf32str, size_t len)
{
llutf16string out;
@@ -237,27 +237,19 @@ llutf16string wstring_to_utf16str(const LLWString &utf32str, S32 len)
return out;
}
-llutf16string wstring_to_utf16str(const LLWString &utf32str)
+llutf16string utf8str_to_utf16str( const char* utf8str, size_t len )
{
- const S32 len = (S32)utf32str.length();
- return wstring_to_utf16str(utf32str, len);
-}
-
-llutf16string utf8str_to_utf16str ( const std::string& utf8str )
-{
- LLWString wstr = utf8str_to_wstring ( utf8str );
+ LLWString wstr = utf8str_to_wstring ( utf8str, len );
return wstring_to_utf16str ( wstr );
}
-
-LLWString utf16str_to_wstring(const llutf16string &utf16str, S32 len)
+LLWString utf16str_to_wstring(const U16* utf16str, size_t len)
{
LLWString wout;
- if((len <= 0) || utf16str.empty()) return wout;
+ if (len == 0) return wout;
S32 i = 0;
- // craziness to make gcc happy (llutf16string.c_str() is tweaked on linux):
- const U16* chars16 = &(*(utf16str.begin()));
+ const U16* chars16 = utf16str;
while (i < len)
{
llwchar cur_char;
@@ -267,12 +259,6 @@ LLWString utf16str_to_wstring(const llutf16string &utf16str, S32 len)
return wout;
}
-LLWString utf16str_to_wstring(const llutf16string &utf16str)
-{
- const S32 len = (S32)utf16str.length();
- return utf16str_to_wstring(utf16str, len);
-}
-
// Length in llwchar (UTF-32) of the first len units (16 bits) of the given UTF-16 string.
S32 utf16str_wstring_length(const llutf16string &utf16str, const S32 utf16_len)
{
@@ -392,8 +378,7 @@ S32 wstring_utf8_length(const LLWString& wstr)
return len;
}
-
-LLWString utf8str_to_wstring(const std::string& utf8str, S32 len)
+LLWString utf8str_to_wstring(const char* utf8str, size_t len)
{
LLWString wout;
@@ -481,13 +466,7 @@ LLWString utf8str_to_wstring(const std::string& utf8str, S32 len)
return wout;
}
-LLWString utf8str_to_wstring(const std::string& utf8str)
-{
- const S32 len = (S32)utf8str.length();
- return utf8str_to_wstring(utf8str, len);
-}
-
-std::string wstring_to_utf8str(const LLWString& utf32str, S32 len)
+std::string wstring_to_utf8str(const llwchar* utf32str, size_t len)
{
std::string out;
@@ -503,20 +482,9 @@ std::string wstring_to_utf8str(const LLWString& utf32str, S32 len)
return out;
}
-std::string wstring_to_utf8str(const LLWString& utf32str)
-{
- const S32 len = (S32)utf32str.length();
- return wstring_to_utf8str(utf32str, len);
-}
-
-std::string utf16str_to_utf8str(const llutf16string& utf16str)
-{
- return wstring_to_utf8str(utf16str_to_wstring(utf16str));
-}
-
-std::string utf16str_to_utf8str(const llutf16string& utf16str, S32 len)
+std::string utf16str_to_utf8str(const U16* utf16str, size_t len)
{
- return wstring_to_utf8str(utf16str_to_wstring(utf16str, len), len);
+ return wstring_to_utf8str(utf16str_to_wstring(utf16str, len));
}
std::string utf8str_trim(const std::string& utf8str)
@@ -657,17 +625,16 @@ std::string utf8str_removeCRLF(const std::string& utf8str)
}
#if LL_WINDOWS
-std::string ll_convert_wide_to_string(const wchar_t* in)
+unsigned int ll_wstring_default_code_page()
{
- return ll_convert_wide_to_string(in, CP_UTF8);
+ return CP_UTF8;
}
-std::string ll_convert_wide_to_string(const wchar_t* in, unsigned int code_page)
+std::string ll_convert_wide_to_string(const wchar_t* in, size_t len_in, unsigned int code_page)
{
std::string out;
if(in)
{
- int len_in = wcslen(in);
int len_out = WideCharToMultiByte(
code_page,
0,
@@ -699,12 +666,7 @@ std::string ll_convert_wide_to_string(const wchar_t* in, unsigned int code_page)
return out;
}
-std::wstring ll_convert_string_to_wide(const std::string& in)
-{
- return ll_convert_string_to_wide(in, CP_UTF8);
-}
-
-std::wstring ll_convert_string_to_wide(const std::string& in, unsigned int code_page)
+std::wstring ll_convert_string_to_wide(const char* in, size_t len, unsigned int code_page)
{
// From review:
// We can preallocate a wide char buffer that is the same length (in wchar_t elements) as the utf8 input,
@@ -716,10 +678,10 @@ std::wstring ll_convert_string_to_wide(const std::string& in, unsigned int code_
// reserve an output buffer that will be destroyed on exit, with a place
// to put NULL terminator
- std::vector<wchar_t> w_out(in.length() + 1);
+ std::vector<wchar_t> w_out(len + 1);
memset(&w_out[0], 0, w_out.size());
- int real_output_str_len = MultiByteToWideChar(code_page, 0, in.c_str(), in.length(),
+ int real_output_str_len = MultiByteToWideChar(code_page, 0, in, len,
&w_out[0], w_out.size() - 1);
//looks like MultiByteToWideChar didn't add null terminator to converted string, see EXT-4858.
@@ -729,22 +691,23 @@ std::wstring ll_convert_string_to_wide(const std::string& in, unsigned int code_
return {&w_out[0]};
}
-LLWString ll_convert_wide_to_wstring(const std::wstring& in)
+LLWString ll_convert_wide_to_wstring(const wchar_t* in, size_t len)
{
- // This function, like its converse, is a placeholder, encapsulating a
- // guilty little hack: the only "official" way nat has found to convert
- // between std::wstring (16 bits on Windows) and LLWString (UTF-32) is
- // by using iconv, which we've avoided so far. It kinda sorta works to
- // just copy individual characters...
- // The point is that if/when we DO introduce some more official way to
- // perform such conversions, we should only have to call it here.
- return { in.begin(), in.end() };
+ // Whether or not std::wstring and llutf16string are distinct types, they
+ // both hold UTF-16LE characters. (See header file comments.) Pretend this
+ // wchar_t* sequence is really a U16* sequence and use the conversion we
+ // define above.
+ return utf16str_to_wstring(reinterpret_cast<const U16*>(in), len);
}
-std::wstring ll_convert_wstring_to_wide(const LLWString& in)
+std::wstring ll_convert_wstring_to_wide(const llwchar* in, size_t len)
{
- // See comments in ll_convert_wide_to_wstring()
- return { in.begin(), in.end() };
+ // first, convert to llutf16string, for which we have a real implementation
+ auto utf16str{ wstring_to_utf16str(in, len) };
+ // then, because each U16 char must be UTF-16LE encoded, pretend the U16*
+ // string pointer is a wchar_t* and instantiate a std::wstring of the same
+ // length.
+ return { reinterpret_cast<const wchar_t*>(utf16str.c_str()), utf16str.length() };
}
std::string ll_convert_string_to_utf8_string(const std::string& in)
diff --git a/indra/llcommon/llstring.h b/indra/llcommon/llstring.h
index 89e95ef40a..a0598e8a11 100644
--- a/indra/llcommon/llstring.h
+++ b/indra/llcommon/llstring.h
@@ -27,9 +27,11 @@
#ifndef LL_LLSTRING_H
#define LL_LLSTRING_H
+#include <boost/call_traits.hpp>
#include <boost/optional/optional.hpp>
#include <string>
#include <cstdio>
+#include <cwchar> // std::wcslen()
//#include <locale>
#include <iomanip>
#include <algorithm>
@@ -532,14 +534,59 @@ struct ll_convert_impl<T, T>
template<> \
struct ll_convert_impl<TO, FROM> \
{ \
- TO operator()(const FROM& in) const { return EXPR; } \
+ /* param_type optimally passes both char* and string */ \
+ TO operator()(typename boost::call_traits<FROM>::param_type in) const { return EXPR; } \
}
-// If all we're doing is copying characters, pass this as EXPR. Since it
-// expands into the 'return EXPR' slot in the ll_convert_impl specialization
-// above, it implies TO{ in.begin(), in.end() }.
+// If all we're doing is copying characters, pass this to ll_convert_alias as
+// EXPR. Since it expands into the 'return EXPR' slot in the ll_convert_impl
+// specialization above, it implies TO{ in.begin(), in.end() }.
#define LL_CONVERT_COPY_CHARS { in.begin(), in.end() }
+// Generic name for strlen() / wcslen() - the default implementation should
+// (!) work with U16 and llwchar, but we don't intend to engage it.
+template <typename CHARTYPE>
+size_t ll_convert_length(const CHARTYPE* zstr)
+{
+ const CHARTYPE* zp;
+ // classic C string scan
+ for (zp = zstr; *zp; ++zp)
+ ;
+ return (zp - zstr);
+}
+
+// specialize where we have a library function; may use intrinsic operations
+template <>
+inline size_t ll_convert_length<wchar_t>(const wchar_t* zstr) { return std::wcslen(zstr); }
+template <>
+inline size_t ll_convert_length<char> (const char* zstr) { return std::strlen(zstr); }
+
+// ll_convert_forms() is short for a bunch of boilerplate. It defines
+// longname(const char*, len), longname(const char*), longname(const string&)
+// and longname(const string&, len) so calls written pre-ll_convert() will
+// work. Most of these overloads will be unified once we turn on C++17 and can
+// use std::string_view.
+// It also uses aliasmacro to ensure that both ll_convert<OUTSTR>(const char*)
+// and ll_convert<OUTSTR>(const string&) will work.
+#define ll_convert_forms(aliasmacro, OUTSTR, INSTR, longname) \
+LL_COMMON_API OUTSTR longname(const INSTR::value_type* in, size_t len); \
+inline auto longname(const INSTR& in, size_t len) \
+{ \
+ return longname(in.c_str(), len); \
+} \
+inline auto longname(const INSTR::value_type* in) \
+{ \
+ return longname(in, ll_convert_length(in)); \
+} \
+inline auto longname(const INSTR& in) \
+{ \
+ return longname(in.c_str(), in.length()); \
+} \
+/* string param */ \
+aliasmacro(OUTSTR, INSTR, longname(in)); \
+/* char* param */ \
+aliasmacro(OUTSTR, const INSTR::value_type*, longname(in))
+
// Make the incoming string a utf8 string. Replaces any unknown glyph
// with the UNKNOWN_CHARACTER. Once any unknown glyph is found, the rest
// of the data may not be recovered.
@@ -602,34 +649,18 @@ typedef std::basic_string<U16> llutf16string;
#define ll_convert_wstr_alias(TO, FROM, EXPR) ll_convert_alias(TO, FROM, EXPR)
#endif
-LL_COMMON_API LLWString utf16str_to_wstring(const llutf16string &utf16str, S32 len);
-LL_COMMON_API LLWString utf16str_to_wstring(const llutf16string &utf16str);
-ll_convert_u16_alias(LLWString, llutf16string, utf16str_to_wstring(in));
-
-LL_COMMON_API llutf16string wstring_to_utf16str(const LLWString &utf32str, S32 len);
-LL_COMMON_API llutf16string wstring_to_utf16str(const LLWString &utf32str);
-ll_convert_u16_alias(llutf16string, LLWString, wstring_to_utf16str(in));
+ll_convert_forms(ll_convert_u16_alias, LLWString, llutf16string, utf16str_to_wstring);
+ll_convert_forms(ll_convert_u16_alias, llutf16string, LLWString, wstring_to_utf16str);
+ll_convert_forms(ll_convert_u16_alias, llutf16string, std::string, utf8str_to_utf16str);
+ll_convert_forms(ll_convert_alias, LLWString, std::string, utf8str_to_wstring);
-LL_COMMON_API llutf16string utf8str_to_utf16str ( const std::string& utf8str, S32 len);
-LL_COMMON_API llutf16string utf8str_to_utf16str ( const std::string& utf8str );
-ll_convert_u16_alias(llutf16string, std::string, utf8str_to_utf16str(in));
-
-LL_COMMON_API LLWString utf8str_to_wstring(const std::string &utf8str, S32 len);
-LL_COMMON_API LLWString utf8str_to_wstring(const std::string &utf8str);
// Same function, better name. JC
inline LLWString utf8string_to_wstring(const std::string& utf8_string) { return utf8str_to_wstring(utf8_string); }
-// best name of all
-ll_convert_alias(LLWString, std::string, utf8string_to_wstring(in));
-//
LL_COMMON_API S32 wchar_to_utf8chars(llwchar inchar, char* outchars);
-LL_COMMON_API std::string wstring_to_utf8str(const LLWString &utf32str, S32 len);
-LL_COMMON_API std::string wstring_to_utf8str(const LLWString &utf32str);
-ll_convert_alias(std::string, LLWString, wstring_to_utf8str(in));
-LL_COMMON_API std::string utf16str_to_utf8str(const llutf16string &utf16str, S32 len);
-LL_COMMON_API std::string utf16str_to_utf8str(const llutf16string &utf16str);
-ll_convert_u16_alias(std::string, llutf16string, utf16str_to_utf8str(in));
+ll_convert_forms(ll_convert_alias, std::string, LLWString, wstring_to_utf8str);
+ll_convert_forms(ll_convert_u16_alias, std::string, llutf16string, utf16str_to_utf8str);
// an older alias for utf16str_to_utf8str(llutf16string)
inline std::string wstring_to_utf8str(const llutf16string &utf16str) { return utf16str_to_utf8str(utf16str);}
@@ -706,42 +737,36 @@ LL_COMMON_API std::string utf8str_removeCRLF(const std::string& utf8str);
//@{
/**
- * @brief Convert a wide string to std::string
+ * @brief Convert a wide string to/from std::string
+ * Convert a Windows wide string to/from our LLWString
*
* This replaces the unsafe W2A macro from ATL.
*/
-LL_COMMON_API std::string ll_convert_wide_to_string(const wchar_t* in, unsigned int code_page);
-LL_COMMON_API std::string ll_convert_wide_to_string(const wchar_t* in); // default CP_UTF8
-inline std::string ll_convert_wide_to_string(const std::wstring& in, unsigned int code_page)
-{
- return ll_convert_wide_to_string(in.c_str(), code_page);
-}
-inline std::string ll_convert_wide_to_string(const std::wstring& in)
-{
- return ll_convert_wide_to_string(in.c_str());
-}
-ll_convert_wstr_alias(std::string, std::wstring, ll_convert_wide_to_string(in));
-
-/**
- * Converts a string to wide string.
- */
-LL_COMMON_API std::wstring ll_convert_string_to_wide(const std::string& in,
- unsigned int code_page);
-LL_COMMON_API std::wstring ll_convert_string_to_wide(const std::string& in);
- // default CP_UTF8
-ll_convert_wstr_alias(std::wstring, std::string, ll_convert_string_to_wide(in));
-
-/**
- * Convert a Windows wide string to our LLWString
- */
-LL_COMMON_API LLWString ll_convert_wide_to_wstring(const std::wstring& in);
-ll_convert_wstr_alias(LLWString, std::wstring, ll_convert_wide_to_wstring(in));
-
-/**
- * Convert LLWString to Windows wide string
- */
-LL_COMMON_API std::wstring ll_convert_wstring_to_wide(const LLWString& in);
-ll_convert_wstr_alias(std::wstring, LLWString, ll_convert_wstring_to_wide(in));
+// Avoid requiring this header to #include the Windows header file declaring
+// our actual default code_page by delegating this function to our .cpp file.
+LL_COMMON_API unsigned int ll_wstring_default_code_page();
+
+// This is like ll_convert_forms(), with the added complexity of a code page
+// parameter that may or may not be passed.
+#define ll_convert_cp_forms(aliasmacro, OUTSTR, INSTR, longname) \
+LL_COMMON_API OUTSTR longname( \
+ const INSTR::value_type* in, \
+ size_t len=ll_convert_length(in), \
+ unsigned int code_page=ll_wstring_default_code_page()); \
+inline auto longname( \
+ const INSTR& in, \
+ size_t len=in.length(), \
+ unsigned int code_page=ll_wstring_default_code_page()) \
+{ \
+ return longname(in.c_str(), len, code_page); \
+} \
+aliasmacro(OUTSTR, INSTR, longname(in)); \
+aliasmacro(OUTSTR, const INSTR::value_type*, longname(in))
+
+ll_convert_cp_forms(ll_convert_wstr_alias, std::string, std::wstring, ll_convert_wide_to_string);
+ll_convert_cp_forms(ll_convert_wstr_alias, std::wstring, std::string, ll_convert_string_to_wide);
+ ll_convert_forms(ll_convert_wstr_alias, LLWString, std::wstring, ll_convert_wide_to_wstring);
+ ll_convert_forms(ll_convert_wstr_alias, std::wstring, LLWString, ll_convert_wstring_to_wide);
/**
* Converts incoming string into utf8 string