summaryrefslogtreecommitdiff
path: root/indra/llcommon/llstring.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'indra/llcommon/llstring.cpp')
-rw-r--r--indra/llcommon/llstring.cpp78
1 files changed, 78 insertions, 0 deletions
diff --git a/indra/llcommon/llstring.cpp b/indra/llcommon/llstring.cpp
index 6dab598341..9895a684b2 100644
--- a/indra/llcommon/llstring.cpp
+++ b/indra/llcommon/llstring.cpp
@@ -239,6 +239,84 @@ LLWString utf16str_to_wstring(const llutf16string &utf16str)
return utf16str_to_wstring(utf16str, len);
}
+// Length in llwchar (UTF-32) of the first len units (16 bits) of the given UTF-16 string.
+S32 utf16str_wstring_length(const llutf16string &utf16str, const S32 utf16_len)
+{
+ S32 surrogate_pairs = 0;
+ // ... craziness to make gcc happy (llutf16string.c_str() is tweaked on linux):
+ const U16 *const utf16_chars = &(*(utf16str.begin()));
+ S32 i = 0;
+ while (i < utf16_len)
+ {
+ const U16 c = utf16_chars[i++];
+ if (c >= 0xD800 && c <= 0xDBFF) // See http://en.wikipedia.org/wiki/UTF-16
+ { // Have first byte of a surrogate pair
+ if (i >= utf16_len)
+ {
+ break;
+ }
+ const U16 d = utf16_chars[i];
+ if (d >= 0xDC00 && d <= 0xDFFF)
+ { // Have valid second byte of a surrogate pair
+ surrogate_pairs++;
+ i++;
+ }
+ }
+ }
+ return utf16_len - surrogate_pairs;
+}
+
+// Length in utf16string (UTF-16) of wlen wchars beginning at woffset.
+S32 wstring_utf16_length(const LLWString &wstr, const S32 woffset, const S32 wlen)
+{
+ const S32 end = llmin((S32)wstr.length(), woffset + wlen);
+ if (end < woffset)
+ {
+ return 0;
+ }
+ else
+ {
+ S32 length = end - woffset;
+ for (S32 i = woffset; i < end; i++)
+ {
+ if (wstr[i] >= 0x10000)
+ {
+ length++;
+ }
+ }
+ return length;
+ }
+}
+
+// Given a wstring and an offset in it, returns the length as wstring (i.e.,
+// number of llwchars) of the longest substring that starts at the offset
+// and whose equivalent utf-16 string does not exceeds the given utf16_length.
+S32 wstring_wstring_length_from_utf16_length(const LLWString & wstr, const S32 woffset, const S32 utf16_length, BOOL *unaligned)
+{
+ const S32 end = wstr.length();
+ BOOL u = FALSE;
+ S32 n = woffset + utf16_length;
+ S32 i = woffset;
+ while (i < end)
+ {
+ if (wstr[i] >= 0x10000)
+ {
+ --n;
+ }
+ if (i >= n)
+ {
+ u = (i > n);
+ break;
+ }
+ i++;
+ }
+ if (unaligned)
+ {
+ *unaligned = u;
+ }
+ return i - woffset;
+}
+
S32 wchar_utf8_length(const llwchar wc)
{
if (wc < 0x80)