diff options
Diffstat (limited to 'indra/llcommon/llstring.h')
-rw-r--r-- | indra/llcommon/llstring.h | 225 |
1 files changed, 142 insertions, 83 deletions
diff --git a/indra/llcommon/llstring.h b/indra/llcommon/llstring.h index 4263122f36..d94f549480 100644 --- a/indra/llcommon/llstring.h +++ b/indra/llcommon/llstring.h @@ -27,9 +27,11 @@ #ifndef LL_LLSTRING_H #define LL_LLSTRING_H +#include <boost/call_traits.hpp> #include <boost/optional/optional.hpp> #include <string> #include <cstdio> +#include <cwchar> // std::wcslen() //#include <locale> #include <iomanip> #include <algorithm> @@ -527,14 +529,71 @@ struct ll_convert_impl<T, T> T operator()(const T& in) const { return in; } }; +// simple construction from char* +template<typename T> +struct ll_convert_impl<T, const typename T::value_type*> +{ + T operator()(const typename T::value_type* in) const { return { in }; } +}; + // specialize ll_convert_impl<TO, FROM> to return EXPR #define ll_convert_alias(TO, FROM, EXPR) \ template<> \ struct ll_convert_impl<TO, FROM> \ { \ - TO operator()(const FROM& in) const { return EXPR; } \ + /* param_type optimally passes both char* and string */ \ + TO operator()(typename boost::call_traits<FROM>::param_type in) const { return EXPR; } \ +} + +// If all we're doing is copying characters, pass this to ll_convert_alias as +// EXPR. Since it expands into the 'return EXPR' slot in the ll_convert_impl +// specialization above, it implies TO{ in.begin(), in.end() }. +#define LL_CONVERT_COPY_CHARS { in.begin(), in.end() } + +// Generic name for strlen() / wcslen() - the default implementation should +// (!) work with U16 and llwchar, but we don't intend to engage it. +template <typename CHARTYPE> +size_t ll_convert_length(const CHARTYPE* zstr) +{ + const CHARTYPE* zp; + // classic C string scan + for (zp = zstr; *zp; ++zp) + ; + return (zp - zstr); } +// specialize where we have a library function; may use intrinsic operations +template <> +inline size_t ll_convert_length<wchar_t>(const wchar_t* zstr) { return std::wcslen(zstr); } +template <> +inline size_t ll_convert_length<char> (const char* zstr) { return std::strlen(zstr); } + +// ll_convert_forms() is short for a bunch of boilerplate. It defines +// longname(const char*, len), longname(const char*), longname(const string&) +// and longname(const string&, len) so calls written pre-ll_convert() will +// work. Most of these overloads will be unified once we turn on C++17 and can +// use std::string_view. +// It also uses aliasmacro to ensure that both ll_convert<OUTSTR>(const char*) +// and ll_convert<OUTSTR>(const string&) will work. +#define ll_convert_forms(aliasmacro, OUTSTR, INSTR, longname) \ +LL_COMMON_API OUTSTR longname(const INSTR::value_type* in, size_t len); \ +inline auto longname(const INSTR& in, size_t len) \ +{ \ + return longname(in.c_str(), len); \ +} \ +inline auto longname(const INSTR::value_type* in) \ +{ \ + return longname(in, ll_convert_length(in)); \ +} \ +inline auto longname(const INSTR& in) \ +{ \ + return longname(in.c_str(), in.length()); \ +} \ +/* string param */ \ +aliasmacro(OUTSTR, INSTR, longname(in)); \ +/* char* param */ \ +aliasmacro(OUTSTR, const INSTR::value_type*, longname(in)) + // Make the incoming string a utf8 string. Replaces any unknown glyph // with the UNKNOWN_CHARACTER. Once any unknown glyph is found, the rest // of the data may not be recovered. @@ -571,63 +630,47 @@ LL_COMMON_API std::string rawstr_to_utf8(const std::string& raw); // LL_WCHAR_T_NATIVE. typedef std::basic_string<U16> llutf16string; -#if ! defined(LL_WCHAR_T_NATIVE) -// wchar_t is identical to U16, and std::wstring is identical to llutf16string. -// Defining an ll_convert alias involving llutf16string would collide with the -// comparable preferred alias involving std::wstring. (In this scenario, if -// you pass llutf16string, it will engage the std::wstring specialization.) -#define ll_convert_u16_alias(TO, FROM, EXPR) // nothing -#else // defined(LL_WCHAR_T_NATIVE) -// wchar_t is a distinct native type, so llutf16string is also a distinct -// type, and there IS a point to converting separately to/from llutf16string. -// (But why? Windows APIs are still defined in terms of wchar_t, and -// in this scenario llutf16string won't work for them!) -#define ll_convert_u16_alias(TO, FROM, EXPR) ll_convert_alias(TO, FROM, EXPR) +// Considering wchar_t, llwchar and U16, there are three relevant cases: +#if LLWCHAR_IS_WCHAR_T // every which way but Windows +// llwchar is identical to wchar_t, LLWString is identical to std::wstring. +// U16 is distinct, llutf16string is distinct (though pretty useless). +// Given conversions to/from LLWString and to/from llutf16string, conversions +// involving std::wstring would collide. +#define ll_convert_wstr_alias(TO, FROM, EXPR) // nothing +// but we can define conversions involving llutf16string without collisions +#define ll_convert_u16_alias(TO, FROM, EXPR) ll_convert_alias(TO, FROM, EXPR) + +#elif defined(LL_WCHAR_T_NATIVE) // Windows, either clang or MS /Zc:wchar_t +// llwchar (32-bit), wchar_t (16-bit) and U16 are all different types. +// Conversions to/from LLWString, to/from std::wstring and to/from llutf16string +// can all be defined. +#define ll_convert_wstr_alias(TO, FROM, EXPR) ll_convert_alias(TO, FROM, EXPR) +#define ll_convert_u16_alias(TO, FROM, EXPR) ll_convert_alias(TO, FROM, EXPR) + +#else // ! LL_WCHAR_T_NATIVE: Windows with MS /Zc:wchar_t- +// wchar_t is identical to U16, std::wstring is identical to llutf16string. +// Given conversions to/from LLWString and to/from std::wstring, conversions +// involving llutf16string would collide. +#define ll_convert_u16_alias(TO, FROM, EXPR) // nothing +// but we can define conversions involving std::wstring without collisions +#define ll_convert_wstr_alias(TO, FROM, EXPR) ll_convert_alias(TO, FROM, EXPR) +#endif + +ll_convert_forms(ll_convert_u16_alias, LLWString, llutf16string, utf16str_to_wstring); +ll_convert_forms(ll_convert_u16_alias, llutf16string, LLWString, wstring_to_utf16str); +ll_convert_forms(ll_convert_u16_alias, llutf16string, std::string, utf8str_to_utf16str); +ll_convert_forms(ll_convert_alias, LLWString, std::string, utf8str_to_wstring); -#if LL_WINDOWS -// LL_WCHAR_T_NATIVE is defined on non-Windows systems because, in fact, -// wchar_t is native. Everywhere but Windows, we use it for llwchar (see -// stdtypes.h). That makes LLWString identical to std::wstring, so these -// aliases for std::wstring would collide with those for LLWString. Only -// define on Windows, where converting between std::wstring and llutf16string -// means copying chars. -ll_convert_alias(llutf16string, std::wstring, llutf16string(in.begin(), in.end())); -ll_convert_alias(std::wstring, llutf16string, std::wstring(in.begin(), in.end())); -#endif // LL_WINDOWS -#endif // defined(LL_WCHAR_T_NATIVE) - -LL_COMMON_API LLWString utf16str_to_wstring(const llutf16string &utf16str, S32 len); -LL_COMMON_API LLWString utf16str_to_wstring(const llutf16string &utf16str); -ll_convert_u16_alias(LLWString, llutf16string, utf16str_to_wstring(in)); - -LL_COMMON_API llutf16string wstring_to_utf16str(const LLWString &utf32str, S32 len); -LL_COMMON_API llutf16string wstring_to_utf16str(const LLWString &utf32str); -ll_convert_u16_alias(llutf16string, LLWString, wstring_to_utf16str(in)); - -LL_COMMON_API llutf16string utf8str_to_utf16str ( const std::string& utf8str, S32 len); -LL_COMMON_API llutf16string utf8str_to_utf16str ( const std::string& utf8str ); -ll_convert_u16_alias(llutf16string, std::string, utf8str_to_utf16str(in)); - -LL_COMMON_API LLWString utf8str_to_wstring(const std::string &utf8str, S32 len); -LL_COMMON_API LLWString utf8str_to_wstring(const std::string &utf8str); // Same function, better name. JC inline LLWString utf8string_to_wstring(const std::string& utf8_string) { return utf8str_to_wstring(utf8_string); } -// best name of all -ll_convert_alias(LLWString, std::string, utf8string_to_wstring(in)); -// LL_COMMON_API S32 wchar_to_utf8chars(llwchar inchar, char* outchars); -LL_COMMON_API std::string wstring_to_utf8str(const LLWString &utf32str, S32 len); -LL_COMMON_API std::string wstring_to_utf8str(const LLWString &utf32str); -ll_convert_alias(std::string, LLWString, wstring_to_utf8str(in)); -LL_COMMON_API std::string utf16str_to_utf8str(const llutf16string &utf16str, S32 len); -LL_COMMON_API std::string utf16str_to_utf8str(const llutf16string &utf16str); -ll_convert_u16_alias(std::string, llutf16string, utf16str_to_utf8str(in)); +ll_convert_forms(ll_convert_alias, std::string, LLWString, wstring_to_utf8str); +ll_convert_forms(ll_convert_u16_alias, std::string, llutf16string, utf16str_to_utf8str); -#if LL_WINDOWS +// an older alias for utf16str_to_utf8str(llutf16string) inline std::string wstring_to_utf8str(const llutf16string &utf16str) { return utf16str_to_utf8str(utf16str);} -#endif // Length of this UTF32 string in bytes when transformed to UTF8 LL_COMMON_API S32 wstring_utf8_length(const LLWString& wstr); @@ -701,42 +744,48 @@ LL_COMMON_API std::string utf8str_removeCRLF(const std::string& utf8str); //@{ /** - * @brief Convert a wide string to std::string + * @brief Convert a wide string to/from std::string + * Convert a Windows wide string to/from our LLWString * * This replaces the unsafe W2A macro from ATL. */ -LL_COMMON_API std::string ll_convert_wide_to_string(const wchar_t* in, unsigned int code_page); -LL_COMMON_API std::string ll_convert_wide_to_string(const wchar_t* in); // default CP_UTF8 -inline std::string ll_convert_wide_to_string(const std::wstring& in, unsigned int code_page) -{ - return ll_convert_wide_to_string(in.c_str(), code_page); -} -inline std::string ll_convert_wide_to_string(const std::wstring& in) -{ - return ll_convert_wide_to_string(in.c_str()); -} -ll_convert_alias(std::string, std::wstring, ll_convert_wide_to_string(in)); - -/** - * Converts a string to wide string. - */ -LL_COMMON_API std::wstring ll_convert_string_to_wide(const std::string& in, - unsigned int code_page); -LL_COMMON_API std::wstring ll_convert_string_to_wide(const std::string& in); - // default CP_UTF8 -ll_convert_alias(std::wstring, std::string, ll_convert_string_to_wide(in)); - -/** - * Convert a Windows wide string to our LLWString - */ -LL_COMMON_API LLWString ll_convert_wide_to_wstring(const std::wstring& in); -ll_convert_alias(LLWString, std::wstring, ll_convert_wide_to_wstring(in)); - -/** - * Convert LLWString to Windows wide string - */ -LL_COMMON_API std::wstring ll_convert_wstring_to_wide(const LLWString& in); -ll_convert_alias(std::wstring, LLWString, ll_convert_wstring_to_wide(in)); +// Avoid requiring this header to #include the Windows header file declaring +// our actual default code_page by delegating this function to our .cpp file. +LL_COMMON_API unsigned int ll_wstring_default_code_page(); + +// This is like ll_convert_forms(), with the added complexity of a code page +// parameter that may or may not be passed. +#define ll_convert_cp_forms(aliasmacro, OUTSTR, INSTR, longname) \ +/* declare the only nontrivial implementation (in .cpp file) */ \ +LL_COMMON_API OUTSTR longname( \ + const INSTR::value_type* in, \ + size_t len, \ + unsigned int code_page=ll_wstring_default_code_page()); \ +/* if passed only a char pointer, scan for nul terminator */ \ +inline auto longname(const INSTR::value_type* in) \ +{ \ + return longname(in, ll_convert_length(in)); \ +} \ +/* if passed string and length, extract its char pointer */ \ +inline auto longname( \ + const INSTR& in, \ + size_t len, \ + unsigned int code_page=ll_wstring_default_code_page()) \ +{ \ + return longname(in.c_str(), len, code_page); \ +} \ +/* if passed only a string object, no scan, pass known length */ \ +inline auto longname(const INSTR& in) \ +{ \ + return longname(in.c_str(), in.length()); \ +} \ +aliasmacro(OUTSTR, INSTR, longname(in)); \ +aliasmacro(OUTSTR, const INSTR::value_type*, longname(in)) + +ll_convert_cp_forms(ll_convert_wstr_alias, std::string, std::wstring, ll_convert_wide_to_string); +ll_convert_cp_forms(ll_convert_wstr_alias, std::wstring, std::string, ll_convert_string_to_wide); + ll_convert_forms(ll_convert_wstr_alias, LLWString, std::wstring, ll_convert_wide_to_wstring); + ll_convert_forms(ll_convert_wstr_alias, std::wstring, LLWString, ll_convert_wstring_to_wide); /** * Converts incoming string into utf8 string @@ -1937,4 +1986,14 @@ void LLStringUtilBase<T>::truncate(string_type& string, size_type count) string.resize(count < cur_size ? count : cur_size); } +// The good thing about *declaration* macros, vs. usage macros, is that now +// we're done with them: we don't need them to bleed into the consuming source +// file. +#undef ll_convert_alias +#undef ll_convert_u16_alias +#undef ll_convert_wstr_alias +#undef LL_CONVERT_COPY_CHARS +#undef ll_convert_forms +#undef ll_convert_cp_forms + #endif // LL_STRING_H |