diff options
-rw-r--r-- | indra/llcommon/llstring.cpp | 95 | ||||
-rw-r--r-- | indra/llcommon/llstring.h | 143 |
2 files changed, 113 insertions, 125 deletions
diff --git a/indra/llcommon/llstring.cpp b/indra/llcommon/llstring.cpp index 0290eea143..5f426e5dca 100644 --- a/indra/llcommon/llstring.cpp +++ b/indra/llcommon/llstring.cpp @@ -215,7 +215,7 @@ S32 utf16chars_to_wchar(const U16* inchars, llwchar* outchar) return inchars - base; } -llutf16string wstring_to_utf16str(const LLWString &utf32str, S32 len) +llutf16string wstring_to_utf16str(const llwchar* utf32str, size_t len) { llutf16string out; @@ -237,27 +237,19 @@ llutf16string wstring_to_utf16str(const LLWString &utf32str, S32 len) return out; } -llutf16string wstring_to_utf16str(const LLWString &utf32str) +llutf16string utf8str_to_utf16str( const char* utf8str, size_t len ) { - const S32 len = (S32)utf32str.length(); - return wstring_to_utf16str(utf32str, len); -} - -llutf16string utf8str_to_utf16str ( const std::string& utf8str ) -{ - LLWString wstr = utf8str_to_wstring ( utf8str ); + LLWString wstr = utf8str_to_wstring ( utf8str, len ); return wstring_to_utf16str ( wstr ); } - -LLWString utf16str_to_wstring(const llutf16string &utf16str, S32 len) +LLWString utf16str_to_wstring(const U16* utf16str, size_t len) { LLWString wout; - if((len <= 0) || utf16str.empty()) return wout; + if (len == 0) return wout; S32 i = 0; - // craziness to make gcc happy (llutf16string.c_str() is tweaked on linux): - const U16* chars16 = &(*(utf16str.begin())); + const U16* chars16 = utf16str; while (i < len) { llwchar cur_char; @@ -267,12 +259,6 @@ LLWString utf16str_to_wstring(const llutf16string &utf16str, S32 len) return wout; } -LLWString utf16str_to_wstring(const llutf16string &utf16str) -{ - const S32 len = (S32)utf16str.length(); - return utf16str_to_wstring(utf16str, len); -} - // Length in llwchar (UTF-32) of the first len units (16 bits) of the given UTF-16 string. S32 utf16str_wstring_length(const llutf16string &utf16str, const S32 utf16_len) { @@ -392,8 +378,7 @@ S32 wstring_utf8_length(const LLWString& wstr) return len; } - -LLWString utf8str_to_wstring(const std::string& utf8str, S32 len) +LLWString utf8str_to_wstring(const char* utf8str, size_t len) { LLWString wout; @@ -481,13 +466,7 @@ LLWString utf8str_to_wstring(const std::string& utf8str, S32 len) return wout; } -LLWString utf8str_to_wstring(const std::string& utf8str) -{ - const S32 len = (S32)utf8str.length(); - return utf8str_to_wstring(utf8str, len); -} - -std::string wstring_to_utf8str(const LLWString& utf32str, S32 len) +std::string wstring_to_utf8str(const llwchar* utf32str, size_t len) { std::string out; @@ -503,20 +482,9 @@ std::string wstring_to_utf8str(const LLWString& utf32str, S32 len) return out; } -std::string wstring_to_utf8str(const LLWString& utf32str) -{ - const S32 len = (S32)utf32str.length(); - return wstring_to_utf8str(utf32str, len); -} - -std::string utf16str_to_utf8str(const llutf16string& utf16str) -{ - return wstring_to_utf8str(utf16str_to_wstring(utf16str)); -} - -std::string utf16str_to_utf8str(const llutf16string& utf16str, S32 len) +std::string utf16str_to_utf8str(const U16* utf16str, size_t len) { - return wstring_to_utf8str(utf16str_to_wstring(utf16str, len), len); + return wstring_to_utf8str(utf16str_to_wstring(utf16str, len)); } std::string utf8str_trim(const std::string& utf8str) @@ -657,17 +625,16 @@ std::string utf8str_removeCRLF(const std::string& utf8str) } #if LL_WINDOWS -std::string ll_convert_wide_to_string(const wchar_t* in) +unsigned int ll_wstring_default_code_page() { - return ll_convert_wide_to_string(in, CP_UTF8); + return CP_UTF8; } -std::string ll_convert_wide_to_string(const wchar_t* in, unsigned int code_page) +std::string ll_convert_wide_to_string(const wchar_t* in, size_t len_in, unsigned int code_page) { std::string out; if(in) { - int len_in = wcslen(in); int len_out = WideCharToMultiByte( code_page, 0, @@ -699,12 +666,7 @@ std::string ll_convert_wide_to_string(const wchar_t* in, unsigned int code_page) return out; } -std::wstring ll_convert_string_to_wide(const std::string& in) -{ - return ll_convert_string_to_wide(in, CP_UTF8); -} - -std::wstring ll_convert_string_to_wide(const std::string& in, unsigned int code_page) +std::wstring ll_convert_string_to_wide(const char* in, size_t len, unsigned int code_page) { // From review: // We can preallocate a wide char buffer that is the same length (in wchar_t elements) as the utf8 input, @@ -716,10 +678,10 @@ std::wstring ll_convert_string_to_wide(const std::string& in, unsigned int code_ // reserve an output buffer that will be destroyed on exit, with a place // to put NULL terminator - std::vector<wchar_t> w_out(in.length() + 1); + std::vector<wchar_t> w_out(len + 1); memset(&w_out[0], 0, w_out.size()); - int real_output_str_len = MultiByteToWideChar(code_page, 0, in.c_str(), in.length(), + int real_output_str_len = MultiByteToWideChar(code_page, 0, in, len, &w_out[0], w_out.size() - 1); //looks like MultiByteToWideChar didn't add null terminator to converted string, see EXT-4858. @@ -729,22 +691,23 @@ std::wstring ll_convert_string_to_wide(const std::string& in, unsigned int code_ return {&w_out[0]}; } -LLWString ll_convert_wide_to_wstring(const std::wstring& in) +LLWString ll_convert_wide_to_wstring(const wchar_t* in, size_t len) { - // This function, like its converse, is a placeholder, encapsulating a - // guilty little hack: the only "official" way nat has found to convert - // between std::wstring (16 bits on Windows) and LLWString (UTF-32) is - // by using iconv, which we've avoided so far. It kinda sorta works to - // just copy individual characters... - // The point is that if/when we DO introduce some more official way to - // perform such conversions, we should only have to call it here. - return { in.begin(), in.end() }; + // Whether or not std::wstring and llutf16string are distinct types, they + // both hold UTF-16LE characters. (See header file comments.) Pretend this + // wchar_t* sequence is really a U16* sequence and use the conversion we + // define above. + return utf16str_to_wstring(reinterpret_cast<const U16*>(in), len); } -std::wstring ll_convert_wstring_to_wide(const LLWString& in) +std::wstring ll_convert_wstring_to_wide(const llwchar* in, size_t len) { - // See comments in ll_convert_wide_to_wstring() - return { in.begin(), in.end() }; + // first, convert to llutf16string, for which we have a real implementation + auto utf16str{ wstring_to_utf16str(in, len) }; + // then, because each U16 char must be UTF-16LE encoded, pretend the U16* + // string pointer is a wchar_t* and instantiate a std::wstring of the same + // length. + return { reinterpret_cast<const wchar_t*>(utf16str.c_str()), utf16str.length() }; } std::string ll_convert_string_to_utf8_string(const std::string& in) diff --git a/indra/llcommon/llstring.h b/indra/llcommon/llstring.h index 89e95ef40a..a0598e8a11 100644 --- a/indra/llcommon/llstring.h +++ b/indra/llcommon/llstring.h @@ -27,9 +27,11 @@ #ifndef LL_LLSTRING_H #define LL_LLSTRING_H +#include <boost/call_traits.hpp> #include <boost/optional/optional.hpp> #include <string> #include <cstdio> +#include <cwchar> // std::wcslen() //#include <locale> #include <iomanip> #include <algorithm> @@ -532,14 +534,59 @@ struct ll_convert_impl<T, T> template<> \ struct ll_convert_impl<TO, FROM> \ { \ - TO operator()(const FROM& in) const { return EXPR; } \ + /* param_type optimally passes both char* and string */ \ + TO operator()(typename boost::call_traits<FROM>::param_type in) const { return EXPR; } \ } -// If all we're doing is copying characters, pass this as EXPR. Since it -// expands into the 'return EXPR' slot in the ll_convert_impl specialization -// above, it implies TO{ in.begin(), in.end() }. +// If all we're doing is copying characters, pass this to ll_convert_alias as +// EXPR. Since it expands into the 'return EXPR' slot in the ll_convert_impl +// specialization above, it implies TO{ in.begin(), in.end() }. #define LL_CONVERT_COPY_CHARS { in.begin(), in.end() } +// Generic name for strlen() / wcslen() - the default implementation should +// (!) work with U16 and llwchar, but we don't intend to engage it. +template <typename CHARTYPE> +size_t ll_convert_length(const CHARTYPE* zstr) +{ + const CHARTYPE* zp; + // classic C string scan + for (zp = zstr; *zp; ++zp) + ; + return (zp - zstr); +} + +// specialize where we have a library function; may use intrinsic operations +template <> +inline size_t ll_convert_length<wchar_t>(const wchar_t* zstr) { return std::wcslen(zstr); } +template <> +inline size_t ll_convert_length<char> (const char* zstr) { return std::strlen(zstr); } + +// ll_convert_forms() is short for a bunch of boilerplate. It defines +// longname(const char*, len), longname(const char*), longname(const string&) +// and longname(const string&, len) so calls written pre-ll_convert() will +// work. Most of these overloads will be unified once we turn on C++17 and can +// use std::string_view. +// It also uses aliasmacro to ensure that both ll_convert<OUTSTR>(const char*) +// and ll_convert<OUTSTR>(const string&) will work. +#define ll_convert_forms(aliasmacro, OUTSTR, INSTR, longname) \ +LL_COMMON_API OUTSTR longname(const INSTR::value_type* in, size_t len); \ +inline auto longname(const INSTR& in, size_t len) \ +{ \ + return longname(in.c_str(), len); \ +} \ +inline auto longname(const INSTR::value_type* in) \ +{ \ + return longname(in, ll_convert_length(in)); \ +} \ +inline auto longname(const INSTR& in) \ +{ \ + return longname(in.c_str(), in.length()); \ +} \ +/* string param */ \ +aliasmacro(OUTSTR, INSTR, longname(in)); \ +/* char* param */ \ +aliasmacro(OUTSTR, const INSTR::value_type*, longname(in)) + // Make the incoming string a utf8 string. Replaces any unknown glyph // with the UNKNOWN_CHARACTER. Once any unknown glyph is found, the rest // of the data may not be recovered. @@ -602,34 +649,18 @@ typedef std::basic_string<U16> llutf16string; #define ll_convert_wstr_alias(TO, FROM, EXPR) ll_convert_alias(TO, FROM, EXPR) #endif -LL_COMMON_API LLWString utf16str_to_wstring(const llutf16string &utf16str, S32 len); -LL_COMMON_API LLWString utf16str_to_wstring(const llutf16string &utf16str); -ll_convert_u16_alias(LLWString, llutf16string, utf16str_to_wstring(in)); - -LL_COMMON_API llutf16string wstring_to_utf16str(const LLWString &utf32str, S32 len); -LL_COMMON_API llutf16string wstring_to_utf16str(const LLWString &utf32str); -ll_convert_u16_alias(llutf16string, LLWString, wstring_to_utf16str(in)); +ll_convert_forms(ll_convert_u16_alias, LLWString, llutf16string, utf16str_to_wstring); +ll_convert_forms(ll_convert_u16_alias, llutf16string, LLWString, wstring_to_utf16str); +ll_convert_forms(ll_convert_u16_alias, llutf16string, std::string, utf8str_to_utf16str); +ll_convert_forms(ll_convert_alias, LLWString, std::string, utf8str_to_wstring); -LL_COMMON_API llutf16string utf8str_to_utf16str ( const std::string& utf8str, S32 len); -LL_COMMON_API llutf16string utf8str_to_utf16str ( const std::string& utf8str ); -ll_convert_u16_alias(llutf16string, std::string, utf8str_to_utf16str(in)); - -LL_COMMON_API LLWString utf8str_to_wstring(const std::string &utf8str, S32 len); -LL_COMMON_API LLWString utf8str_to_wstring(const std::string &utf8str); // Same function, better name. JC inline LLWString utf8string_to_wstring(const std::string& utf8_string) { return utf8str_to_wstring(utf8_string); } -// best name of all -ll_convert_alias(LLWString, std::string, utf8string_to_wstring(in)); -// LL_COMMON_API S32 wchar_to_utf8chars(llwchar inchar, char* outchars); -LL_COMMON_API std::string wstring_to_utf8str(const LLWString &utf32str, S32 len); -LL_COMMON_API std::string wstring_to_utf8str(const LLWString &utf32str); -ll_convert_alias(std::string, LLWString, wstring_to_utf8str(in)); -LL_COMMON_API std::string utf16str_to_utf8str(const llutf16string &utf16str, S32 len); -LL_COMMON_API std::string utf16str_to_utf8str(const llutf16string &utf16str); -ll_convert_u16_alias(std::string, llutf16string, utf16str_to_utf8str(in)); +ll_convert_forms(ll_convert_alias, std::string, LLWString, wstring_to_utf8str); +ll_convert_forms(ll_convert_u16_alias, std::string, llutf16string, utf16str_to_utf8str); // an older alias for utf16str_to_utf8str(llutf16string) inline std::string wstring_to_utf8str(const llutf16string &utf16str) { return utf16str_to_utf8str(utf16str);} @@ -706,42 +737,36 @@ LL_COMMON_API std::string utf8str_removeCRLF(const std::string& utf8str); //@{ /** - * @brief Convert a wide string to std::string + * @brief Convert a wide string to/from std::string + * Convert a Windows wide string to/from our LLWString * * This replaces the unsafe W2A macro from ATL. */ -LL_COMMON_API std::string ll_convert_wide_to_string(const wchar_t* in, unsigned int code_page); -LL_COMMON_API std::string ll_convert_wide_to_string(const wchar_t* in); // default CP_UTF8 -inline std::string ll_convert_wide_to_string(const std::wstring& in, unsigned int code_page) -{ - return ll_convert_wide_to_string(in.c_str(), code_page); -} -inline std::string ll_convert_wide_to_string(const std::wstring& in) -{ - return ll_convert_wide_to_string(in.c_str()); -} -ll_convert_wstr_alias(std::string, std::wstring, ll_convert_wide_to_string(in)); - -/** - * Converts a string to wide string. - */ -LL_COMMON_API std::wstring ll_convert_string_to_wide(const std::string& in, - unsigned int code_page); -LL_COMMON_API std::wstring ll_convert_string_to_wide(const std::string& in); - // default CP_UTF8 -ll_convert_wstr_alias(std::wstring, std::string, ll_convert_string_to_wide(in)); - -/** - * Convert a Windows wide string to our LLWString - */ -LL_COMMON_API LLWString ll_convert_wide_to_wstring(const std::wstring& in); -ll_convert_wstr_alias(LLWString, std::wstring, ll_convert_wide_to_wstring(in)); - -/** - * Convert LLWString to Windows wide string - */ -LL_COMMON_API std::wstring ll_convert_wstring_to_wide(const LLWString& in); -ll_convert_wstr_alias(std::wstring, LLWString, ll_convert_wstring_to_wide(in)); +// Avoid requiring this header to #include the Windows header file declaring +// our actual default code_page by delegating this function to our .cpp file. +LL_COMMON_API unsigned int ll_wstring_default_code_page(); + +// This is like ll_convert_forms(), with the added complexity of a code page +// parameter that may or may not be passed. +#define ll_convert_cp_forms(aliasmacro, OUTSTR, INSTR, longname) \ +LL_COMMON_API OUTSTR longname( \ + const INSTR::value_type* in, \ + size_t len=ll_convert_length(in), \ + unsigned int code_page=ll_wstring_default_code_page()); \ +inline auto longname( \ + const INSTR& in, \ + size_t len=in.length(), \ + unsigned int code_page=ll_wstring_default_code_page()) \ +{ \ + return longname(in.c_str(), len, code_page); \ +} \ +aliasmacro(OUTSTR, INSTR, longname(in)); \ +aliasmacro(OUTSTR, const INSTR::value_type*, longname(in)) + +ll_convert_cp_forms(ll_convert_wstr_alias, std::string, std::wstring, ll_convert_wide_to_string); +ll_convert_cp_forms(ll_convert_wstr_alias, std::wstring, std::string, ll_convert_string_to_wide); + ll_convert_forms(ll_convert_wstr_alias, LLWString, std::wstring, ll_convert_wide_to_wstring); + ll_convert_forms(ll_convert_wstr_alias, std::wstring, LLWString, ll_convert_wstring_to_wide); /** * Converts incoming string into utf8 string |