summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--indra/llcommon/llstring.cpp95
-rw-r--r--indra/llcommon/llstring.h143
2 files changed, 113 insertions, 125 deletions
diff --git a/indra/llcommon/llstring.cpp b/indra/llcommon/llstring.cpp
index 0290eea143..5f426e5dca 100644
--- a/indra/llcommon/llstring.cpp
+++ b/indra/llcommon/llstring.cpp
@@ -215,7 +215,7 @@ S32 utf16chars_to_wchar(const U16* inchars, llwchar* outchar)
return inchars - base;
}
-llutf16string wstring_to_utf16str(const LLWString &utf32str, S32 len)
+llutf16string wstring_to_utf16str(const llwchar* utf32str, size_t len)
{
llutf16string out;
@@ -237,27 +237,19 @@ llutf16string wstring_to_utf16str(const LLWString &utf32str, S32 len)
return out;
}
-llutf16string wstring_to_utf16str(const LLWString &utf32str)
+llutf16string utf8str_to_utf16str( const char* utf8str, size_t len )
{
- const S32 len = (S32)utf32str.length();
- return wstring_to_utf16str(utf32str, len);
-}
-
-llutf16string utf8str_to_utf16str ( const std::string& utf8str )
-{
- LLWString wstr = utf8str_to_wstring ( utf8str );
+ LLWString wstr = utf8str_to_wstring ( utf8str, len );
return wstring_to_utf16str ( wstr );
}
-
-LLWString utf16str_to_wstring(const llutf16string &utf16str, S32 len)
+LLWString utf16str_to_wstring(const U16* utf16str, size_t len)
{
LLWString wout;
- if((len <= 0) || utf16str.empty()) return wout;
+ if (len == 0) return wout;
S32 i = 0;
- // craziness to make gcc happy (llutf16string.c_str() is tweaked on linux):
- const U16* chars16 = &(*(utf16str.begin()));
+ const U16* chars16 = utf16str;
while (i < len)
{
llwchar cur_char;
@@ -267,12 +259,6 @@ LLWString utf16str_to_wstring(const llutf16string &utf16str, S32 len)
return wout;
}
-LLWString utf16str_to_wstring(const llutf16string &utf16str)
-{
- const S32 len = (S32)utf16str.length();
- return utf16str_to_wstring(utf16str, len);
-}
-
// Length in llwchar (UTF-32) of the first len units (16 bits) of the given UTF-16 string.
S32 utf16str_wstring_length(const llutf16string &utf16str, const S32 utf16_len)
{
@@ -392,8 +378,7 @@ S32 wstring_utf8_length(const LLWString& wstr)
return len;
}
-
-LLWString utf8str_to_wstring(const std::string& utf8str, S32 len)
+LLWString utf8str_to_wstring(const char* utf8str, size_t len)
{
LLWString wout;
@@ -481,13 +466,7 @@ LLWString utf8str_to_wstring(const std::string& utf8str, S32 len)
return wout;
}
-LLWString utf8str_to_wstring(const std::string& utf8str)
-{
- const S32 len = (S32)utf8str.length();
- return utf8str_to_wstring(utf8str, len);
-}
-
-std::string wstring_to_utf8str(const LLWString& utf32str, S32 len)
+std::string wstring_to_utf8str(const llwchar* utf32str, size_t len)
{
std::string out;
@@ -503,20 +482,9 @@ std::string wstring_to_utf8str(const LLWString& utf32str, S32 len)
return out;
}
-std::string wstring_to_utf8str(const LLWString& utf32str)
-{
- const S32 len = (S32)utf32str.length();
- return wstring_to_utf8str(utf32str, len);
-}
-
-std::string utf16str_to_utf8str(const llutf16string& utf16str)
-{
- return wstring_to_utf8str(utf16str_to_wstring(utf16str));
-}
-
-std::string utf16str_to_utf8str(const llutf16string& utf16str, S32 len)
+std::string utf16str_to_utf8str(const U16* utf16str, size_t len)
{
- return wstring_to_utf8str(utf16str_to_wstring(utf16str, len), len);
+ return wstring_to_utf8str(utf16str_to_wstring(utf16str, len));
}
std::string utf8str_trim(const std::string& utf8str)
@@ -657,17 +625,16 @@ std::string utf8str_removeCRLF(const std::string& utf8str)
}
#if LL_WINDOWS
-std::string ll_convert_wide_to_string(const wchar_t* in)
+unsigned int ll_wstring_default_code_page()
{
- return ll_convert_wide_to_string(in, CP_UTF8);
+ return CP_UTF8;
}
-std::string ll_convert_wide_to_string(const wchar_t* in, unsigned int code_page)
+std::string ll_convert_wide_to_string(const wchar_t* in, size_t len_in, unsigned int code_page)
{
std::string out;
if(in)
{
- int len_in = wcslen(in);
int len_out = WideCharToMultiByte(
code_page,
0,
@@ -699,12 +666,7 @@ std::string ll_convert_wide_to_string(const wchar_t* in, unsigned int code_page)
return out;
}
-std::wstring ll_convert_string_to_wide(const std::string& in)
-{
- return ll_convert_string_to_wide(in, CP_UTF8);
-}
-
-std::wstring ll_convert_string_to_wide(const std::string& in, unsigned int code_page)
+std::wstring ll_convert_string_to_wide(const char* in, size_t len, unsigned int code_page)
{
// From review:
// We can preallocate a wide char buffer that is the same length (in wchar_t elements) as the utf8 input,
@@ -716,10 +678,10 @@ std::wstring ll_convert_string_to_wide(const std::string& in, unsigned int code_
// reserve an output buffer that will be destroyed on exit, with a place
// to put NULL terminator
- std::vector<wchar_t> w_out(in.length() + 1);
+ std::vector<wchar_t> w_out(len + 1);
memset(&w_out[0], 0, w_out.size());
- int real_output_str_len = MultiByteToWideChar(code_page, 0, in.c_str(), in.length(),
+ int real_output_str_len = MultiByteToWideChar(code_page, 0, in, len,
&w_out[0], w_out.size() - 1);
//looks like MultiByteToWideChar didn't add null terminator to converted string, see EXT-4858.
@@ -729,22 +691,23 @@ std::wstring ll_convert_string_to_wide(const std::string& in, unsigned int code_
return {&w_out[0]};
}
-LLWString ll_convert_wide_to_wstring(const std::wstring& in)
+LLWString ll_convert_wide_to_wstring(const wchar_t* in, size_t len)
{
- // This function, like its converse, is a placeholder, encapsulating a
- // guilty little hack: the only "official" way nat has found to convert
- // between std::wstring (16 bits on Windows) and LLWString (UTF-32) is
- // by using iconv, which we've avoided so far. It kinda sorta works to
- // just copy individual characters...
- // The point is that if/when we DO introduce some more official way to
- // perform such conversions, we should only have to call it here.
- return { in.begin(), in.end() };
+ // Whether or not std::wstring and llutf16string are distinct types, they
+ // both hold UTF-16LE characters. (See header file comments.) Pretend this
+ // wchar_t* sequence is really a U16* sequence and use the conversion we
+ // define above.
+ return utf16str_to_wstring(reinterpret_cast<const U16*>(in), len);
}
-std::wstring ll_convert_wstring_to_wide(const LLWString& in)
+std::wstring ll_convert_wstring_to_wide(const llwchar* in, size_t len)
{
- // See comments in ll_convert_wide_to_wstring()
- return { in.begin(), in.end() };
+ // first, convert to llutf16string, for which we have a real implementation
+ auto utf16str{ wstring_to_utf16str(in, len) };
+ // then, because each U16 char must be UTF-16LE encoded, pretend the U16*
+ // string pointer is a wchar_t* and instantiate a std::wstring of the same
+ // length.
+ return { reinterpret_cast<const wchar_t*>(utf16str.c_str()), utf16str.length() };
}
std::string ll_convert_string_to_utf8_string(const std::string& in)
diff --git a/indra/llcommon/llstring.h b/indra/llcommon/llstring.h
index 89e95ef40a..a0598e8a11 100644
--- a/indra/llcommon/llstring.h
+++ b/indra/llcommon/llstring.h
@@ -27,9 +27,11 @@
#ifndef LL_LLSTRING_H
#define LL_LLSTRING_H
+#include <boost/call_traits.hpp>
#include <boost/optional/optional.hpp>
#include <string>
#include <cstdio>
+#include <cwchar> // std::wcslen()
//#include <locale>
#include <iomanip>
#include <algorithm>
@@ -532,14 +534,59 @@ struct ll_convert_impl<T, T>
template<> \
struct ll_convert_impl<TO, FROM> \
{ \
- TO operator()(const FROM& in) const { return EXPR; } \
+ /* param_type optimally passes both char* and string */ \
+ TO operator()(typename boost::call_traits<FROM>::param_type in) const { return EXPR; } \
}
-// If all we're doing is copying characters, pass this as EXPR. Since it
-// expands into the 'return EXPR' slot in the ll_convert_impl specialization
-// above, it implies TO{ in.begin(), in.end() }.
+// If all we're doing is copying characters, pass this to ll_convert_alias as
+// EXPR. Since it expands into the 'return EXPR' slot in the ll_convert_impl
+// specialization above, it implies TO{ in.begin(), in.end() }.
#define LL_CONVERT_COPY_CHARS { in.begin(), in.end() }
+// Generic name for strlen() / wcslen() - the default implementation should
+// (!) work with U16 and llwchar, but we don't intend to engage it.
+template <typename CHARTYPE>
+size_t ll_convert_length(const CHARTYPE* zstr)
+{
+ const CHARTYPE* zp;
+ // classic C string scan
+ for (zp = zstr; *zp; ++zp)
+ ;
+ return (zp - zstr);
+}
+
+// specialize where we have a library function; may use intrinsic operations
+template <>
+inline size_t ll_convert_length<wchar_t>(const wchar_t* zstr) { return std::wcslen(zstr); }
+template <>
+inline size_t ll_convert_length<char> (const char* zstr) { return std::strlen(zstr); }
+
+// ll_convert_forms() is short for a bunch of boilerplate. It defines
+// longname(const char*, len), longname(const char*), longname(const string&)
+// and longname(const string&, len) so calls written pre-ll_convert() will
+// work. Most of these overloads will be unified once we turn on C++17 and can
+// use std::string_view.
+// It also uses aliasmacro to ensure that both ll_convert<OUTSTR>(const char*)
+// and ll_convert<OUTSTR>(const string&) will work.
+#define ll_convert_forms(aliasmacro, OUTSTR, INSTR, longname) \
+LL_COMMON_API OUTSTR longname(const INSTR::value_type* in, size_t len); \
+inline auto longname(const INSTR& in, size_t len) \
+{ \
+ return longname(in.c_str(), len); \
+} \
+inline auto longname(const INSTR::value_type* in) \
+{ \
+ return longname(in, ll_convert_length(in)); \
+} \
+inline auto longname(const INSTR& in) \
+{ \
+ return longname(in.c_str(), in.length()); \
+} \
+/* string param */ \
+aliasmacro(OUTSTR, INSTR, longname(in)); \
+/* char* param */ \
+aliasmacro(OUTSTR, const INSTR::value_type*, longname(in))
+
// Make the incoming string a utf8 string. Replaces any unknown glyph
// with the UNKNOWN_CHARACTER. Once any unknown glyph is found, the rest
// of the data may not be recovered.
@@ -602,34 +649,18 @@ typedef std::basic_string<U16> llutf16string;
#define ll_convert_wstr_alias(TO, FROM, EXPR) ll_convert_alias(TO, FROM, EXPR)
#endif
-LL_COMMON_API LLWString utf16str_to_wstring(const llutf16string &utf16str, S32 len);
-LL_COMMON_API LLWString utf16str_to_wstring(const llutf16string &utf16str);
-ll_convert_u16_alias(LLWString, llutf16string, utf16str_to_wstring(in));
-
-LL_COMMON_API llutf16string wstring_to_utf16str(const LLWString &utf32str, S32 len);
-LL_COMMON_API llutf16string wstring_to_utf16str(const LLWString &utf32str);
-ll_convert_u16_alias(llutf16string, LLWString, wstring_to_utf16str(in));
+ll_convert_forms(ll_convert_u16_alias, LLWString, llutf16string, utf16str_to_wstring);
+ll_convert_forms(ll_convert_u16_alias, llutf16string, LLWString, wstring_to_utf16str);
+ll_convert_forms(ll_convert_u16_alias, llutf16string, std::string, utf8str_to_utf16str);
+ll_convert_forms(ll_convert_alias, LLWString, std::string, utf8str_to_wstring);
-LL_COMMON_API llutf16string utf8str_to_utf16str ( const std::string& utf8str, S32 len);
-LL_COMMON_API llutf16string utf8str_to_utf16str ( const std::string& utf8str );
-ll_convert_u16_alias(llutf16string, std::string, utf8str_to_utf16str(in));
-
-LL_COMMON_API LLWString utf8str_to_wstring(const std::string &utf8str, S32 len);
-LL_COMMON_API LLWString utf8str_to_wstring(const std::string &utf8str);
// Same function, better name. JC
inline LLWString utf8string_to_wstring(const std::string& utf8_string) { return utf8str_to_wstring(utf8_string); }
-// best name of all
-ll_convert_alias(LLWString, std::string, utf8string_to_wstring(in));
-//
LL_COMMON_API S32 wchar_to_utf8chars(llwchar inchar, char* outchars);
-LL_COMMON_API std::string wstring_to_utf8str(const LLWString &utf32str, S32 len);
-LL_COMMON_API std::string wstring_to_utf8str(const LLWString &utf32str);
-ll_convert_alias(std::string, LLWString, wstring_to_utf8str(in));
-LL_COMMON_API std::string utf16str_to_utf8str(const llutf16string &utf16str, S32 len);
-LL_COMMON_API std::string utf16str_to_utf8str(const llutf16string &utf16str);
-ll_convert_u16_alias(std::string, llutf16string, utf16str_to_utf8str(in));
+ll_convert_forms(ll_convert_alias, std::string, LLWString, wstring_to_utf8str);
+ll_convert_forms(ll_convert_u16_alias, std::string, llutf16string, utf16str_to_utf8str);
// an older alias for utf16str_to_utf8str(llutf16string)
inline std::string wstring_to_utf8str(const llutf16string &utf16str) { return utf16str_to_utf8str(utf16str);}
@@ -706,42 +737,36 @@ LL_COMMON_API std::string utf8str_removeCRLF(const std::string& utf8str);
//@{
/**
- * @brief Convert a wide string to std::string
+ * @brief Convert a wide string to/from std::string
+ * Convert a Windows wide string to/from our LLWString
*
* This replaces the unsafe W2A macro from ATL.
*/
-LL_COMMON_API std::string ll_convert_wide_to_string(const wchar_t* in, unsigned int code_page);
-LL_COMMON_API std::string ll_convert_wide_to_string(const wchar_t* in); // default CP_UTF8
-inline std::string ll_convert_wide_to_string(const std::wstring& in, unsigned int code_page)
-{
- return ll_convert_wide_to_string(in.c_str(), code_page);
-}
-inline std::string ll_convert_wide_to_string(const std::wstring& in)
-{
- return ll_convert_wide_to_string(in.c_str());
-}
-ll_convert_wstr_alias(std::string, std::wstring, ll_convert_wide_to_string(in));
-
-/**
- * Converts a string to wide string.
- */
-LL_COMMON_API std::wstring ll_convert_string_to_wide(const std::string& in,
- unsigned int code_page);
-LL_COMMON_API std::wstring ll_convert_string_to_wide(const std::string& in);
- // default CP_UTF8
-ll_convert_wstr_alias(std::wstring, std::string, ll_convert_string_to_wide(in));
-
-/**
- * Convert a Windows wide string to our LLWString
- */
-LL_COMMON_API LLWString ll_convert_wide_to_wstring(const std::wstring& in);
-ll_convert_wstr_alias(LLWString, std::wstring, ll_convert_wide_to_wstring(in));
-
-/**
- * Convert LLWString to Windows wide string
- */
-LL_COMMON_API std::wstring ll_convert_wstring_to_wide(const LLWString& in);
-ll_convert_wstr_alias(std::wstring, LLWString, ll_convert_wstring_to_wide(in));
+// Avoid requiring this header to #include the Windows header file declaring
+// our actual default code_page by delegating this function to our .cpp file.
+LL_COMMON_API unsigned int ll_wstring_default_code_page();
+
+// This is like ll_convert_forms(), with the added complexity of a code page
+// parameter that may or may not be passed.
+#define ll_convert_cp_forms(aliasmacro, OUTSTR, INSTR, longname) \
+LL_COMMON_API OUTSTR longname( \
+ const INSTR::value_type* in, \
+ size_t len=ll_convert_length(in), \
+ unsigned int code_page=ll_wstring_default_code_page()); \
+inline auto longname( \
+ const INSTR& in, \
+ size_t len=in.length(), \
+ unsigned int code_page=ll_wstring_default_code_page()) \
+{ \
+ return longname(in.c_str(), len, code_page); \
+} \
+aliasmacro(OUTSTR, INSTR, longname(in)); \
+aliasmacro(OUTSTR, const INSTR::value_type*, longname(in))
+
+ll_convert_cp_forms(ll_convert_wstr_alias, std::string, std::wstring, ll_convert_wide_to_string);
+ll_convert_cp_forms(ll_convert_wstr_alias, std::wstring, std::string, ll_convert_string_to_wide);
+ ll_convert_forms(ll_convert_wstr_alias, LLWString, std::wstring, ll_convert_wide_to_wstring);
+ ll_convert_forms(ll_convert_wstr_alias, std::wstring, LLWString, ll_convert_wstring_to_wide);
/**
* Converts incoming string into utf8 string