diff options
Diffstat (limited to 'indra/llcommon/llstring.cpp')
-rw-r--r-- | indra/llcommon/llstring.cpp | 78 |
1 files changed, 78 insertions, 0 deletions
diff --git a/indra/llcommon/llstring.cpp b/indra/llcommon/llstring.cpp index 6dab598341..9895a684b2 100644 --- a/indra/llcommon/llstring.cpp +++ b/indra/llcommon/llstring.cpp @@ -239,6 +239,84 @@ LLWString utf16str_to_wstring(const llutf16string &utf16str) return utf16str_to_wstring(utf16str, len); } +// Length in llwchar (UTF-32) of the first len units (16 bits) of the given UTF-16 string. +S32 utf16str_wstring_length(const llutf16string &utf16str, const S32 utf16_len) +{ + S32 surrogate_pairs = 0; + // ... craziness to make gcc happy (llutf16string.c_str() is tweaked on linux): + const U16 *const utf16_chars = &(*(utf16str.begin())); + S32 i = 0; + while (i < utf16_len) + { + const U16 c = utf16_chars[i++]; + if (c >= 0xD800 && c <= 0xDBFF) // See http://en.wikipedia.org/wiki/UTF-16 + { // Have first byte of a surrogate pair + if (i >= utf16_len) + { + break; + } + const U16 d = utf16_chars[i]; + if (d >= 0xDC00 && d <= 0xDFFF) + { // Have valid second byte of a surrogate pair + surrogate_pairs++; + i++; + } + } + } + return utf16_len - surrogate_pairs; +} + +// Length in utf16string (UTF-16) of wlen wchars beginning at woffset. +S32 wstring_utf16_length(const LLWString &wstr, const S32 woffset, const S32 wlen) +{ + const S32 end = llmin((S32)wstr.length(), woffset + wlen); + if (end < woffset) + { + return 0; + } + else + { + S32 length = end - woffset; + for (S32 i = woffset; i < end; i++) + { + if (wstr[i] >= 0x10000) + { + length++; + } + } + return length; + } +} + +// Given a wstring and an offset in it, returns the length as wstring (i.e., +// number of llwchars) of the longest substring that starts at the offset +// and whose equivalent utf-16 string does not exceeds the given utf16_length. +S32 wstring_wstring_length_from_utf16_length(const LLWString & wstr, const S32 woffset, const S32 utf16_length, BOOL *unaligned) +{ + const S32 end = wstr.length(); + BOOL u = FALSE; + S32 n = woffset + utf16_length; + S32 i = woffset; + while (i < end) + { + if (wstr[i] >= 0x10000) + { + --n; + } + if (i >= n) + { + u = (i > n); + break; + } + i++; + } + if (unaligned) + { + *unaligned = u; + } + return i - woffset; +} + S32 wchar_utf8_length(const llwchar wc) { if (wc < 0x80) |