/** * @file llstring.h * @brief String utility functions and std::string class. * * $LicenseInfo:firstyear=2001&license=viewerlgpl$ * Second Life Viewer Source Code * Copyright (C) 2010, Linden Research, Inc. * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; * version 2.1 of the License only. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA * * Linden Research, Inc., 945 Battery Street, San Francisco, CA 94111 USA * $/LicenseInfo$ */ #ifndef LL_LLSTRING_H #define LL_LLSTRING_H #include #include #include #include #include #include // std::wcslen() //#include #include #include #include #include #include #include "llformat.h" #include "stdtypes.h" #if LL_LINUX #include #include #endif #include const char LL_UNKNOWN_CHAR = '?'; class LLSD; #if LL_DARWIN || LL_LINUX // Template specialization of char_traits for U16s. Only necessary on Mac and Linux (exists on Windows already) #include namespace std { template<> struct char_traits { typedef U16 char_type; typedef int int_type; typedef streampos pos_type; typedef streamoff off_type; typedef mbstate_t state_type; static void assign(char_type& __c1, const char_type& __c2) { __c1 = __c2; } static bool eq(const char_type& __c1, const char_type& __c2) { return __c1 == __c2; } static bool lt(const char_type& __c1, const char_type& __c2) { return __c1 < __c2; } static int compare(const char_type* __s1, const char_type* __s2, size_t __n) { return memcmp(__s1, __s2, __n * sizeof(char_type)); } static size_t length(const char_type* __s) { const char_type *cur_char = __s; while (*cur_char != 0) { ++cur_char; } return cur_char - __s; } static const char_type* find(const char_type* __s, size_t __n, const char_type& __a) { return static_cast(memchr(__s, __a, __n * sizeof(char_type))); } static char_type* move(char_type* __s1, const char_type* __s2, size_t __n) { return static_cast(memmove(__s1, __s2, __n * sizeof(char_type))); } static char_type* copy(char_type* __s1, const char_type* __s2, size_t __n) { return static_cast(memcpy(__s1, __s2, __n * sizeof(char_type))); } /* Flawfinder: ignore */ static char_type* assign(char_type* __s, size_t __n, char_type __a) { // This isn't right. //return static_cast(memset(__s, __a, __n * sizeof(char_type))); // I don't think there's a standard 'memset' for 16-bit values. // Do this the old-fashioned way. size_t __i; for(__i = 0; __i < __n; __i++) { __s[__i] = __a; } return __s; } static char_type to_char_type(const int_type& __c) { return static_cast(__c); } static int_type to_int_type(const char_type& __c) { return static_cast(__c); } static bool eq_int_type(const int_type& __c1, const int_type& __c2) { return __c1 == __c2; } static int_type eof() { return static_cast(EOF); } static int_type not_eof(const int_type& __c) { return (__c == eof()) ? 0 : __c; } }; }; #endif class LL_COMMON_API LLStringOps { private: static long sPacificTimeOffset; static long sLocalTimeOffset; static bool sPacificDaylightTime; static std::map datetimeToCodes; public: static std::vector sWeekDayList; static std::vector sWeekDayShortList; static std::vector sMonthList; static std::vector sMonthShortList; static std::string sDayFormat; static std::string sAM; static std::string sPM; static char toUpper(char elem) { return toupper((unsigned char)elem); } static llwchar toUpper(llwchar elem) { return towupper(elem); } static char toLower(char elem) { return tolower((unsigned char)elem); } static llwchar toLower(llwchar elem) { return towlower(elem); } static bool isSpace(char elem) { return isspace((unsigned char)elem) != 0; } static bool isSpace(llwchar elem) { return iswspace(elem) != 0; } static bool isUpper(char elem) { return isupper((unsigned char)elem) != 0; } static bool isUpper(llwchar elem) { return iswupper(elem) != 0; } static bool isLower(char elem) { return islower((unsigned char)elem) != 0; } static bool isLower(llwchar elem) { return iswlower(elem) != 0; } static bool isDigit(char a) { return isdigit((unsigned char)a) != 0; } static bool isDigit(llwchar a) { return iswdigit(a) != 0; } static bool isPunct(char a) { return ispunct((unsigned char)a) != 0; } static bool isPunct(llwchar a) { return iswpunct(a) != 0; } static bool isAlpha(char a) { return isalpha((unsigned char)a) != 0; } static bool isAlpha(llwchar a) { return iswalpha(a) != 0; } static bool isAlnum(char a) { return isalnum((unsigned char)a) != 0; } static bool isAlnum(llwchar a) { return iswalnum(a) != 0; } // Returns true when 'a' corresponds to a "genuine" emoji. HB static bool isEmoji(llwchar a); static S32 collate(const char* a, const char* b) { return strcoll(a, b); } static S32 collate(const llwchar* a, const llwchar* b); static void setupDatetimeInfo(bool pacific_daylight_time); static void setupWeekDaysNames(const std::string& data); static void setupWeekDaysShortNames(const std::string& data); static void setupMonthNames(const std::string& data); static void setupMonthShortNames(const std::string& data); static void setupDayFormat(const std::string& data); static long getPacificTimeOffset(void) { return sPacificTimeOffset;} static long getLocalTimeOffset(void) { return sLocalTimeOffset;} // Is the Pacific time zone (aka server time zone) // currently in daylight savings time? static bool getPacificDaylightTime(void) { return sPacificDaylightTime;} static std::string getDatetimeCode (std::string key); // Express a value like 1234567 as "1.23M" static std::string getReadableNumber(F64 num); }; /** * @brief Return a string constructed from in without crashing if the * pointer is NULL. */ LL_COMMON_API std::string ll_safe_string(const char* in); LL_COMMON_API std::string ll_safe_string(const char* in, S32 maxlen); // Allowing assignments from non-strings into format_map_t is apparently // *really* error-prone, so subclass std::string with just basic c'tors. class LLFormatMapString { public: LLFormatMapString() {}; LLFormatMapString(const char* s) : mString(ll_safe_string(s)) {}; LLFormatMapString(const std::string& s) : mString(s) {}; operator std::string() const { return mString; } bool operator<(const LLFormatMapString& rhs) const { return mString < rhs.mString; } std::size_t length() const { return mString.length(); } private: std::string mString; }; template class LLStringUtilBase { private: static std::string sLocale; public: typedef std::basic_string string_type; typedef typename string_type::size_type size_type; public: ///////////////////////////////////////////////////////////////////////////////////////// // Static Utility functions that operate on std::strings static const string_type null; typedef std::map format_map_t; /// considers any sequence of delims as a single field separator LL_COMMON_API static void getTokens(const string_type& instr, std::vector& tokens, const string_type& delims); /// like simple scan overload, but returns scanned vector static std::vector getTokens(const string_type& instr, const string_type& delims); /// add support for keep_delims and quotes (either could be empty string) static void getTokens(const string_type& instr, std::vector& tokens, const string_type& drop_delims, const string_type& keep_delims, const string_type& quotes=string_type()); /// like keep_delims-and-quotes overload, but returns scanned vector static std::vector getTokens(const string_type& instr, const string_type& drop_delims, const string_type& keep_delims, const string_type& quotes=string_type()); /// add support for escapes (could be empty string) static void getTokens(const string_type& instr, std::vector& tokens, const string_type& drop_delims, const string_type& keep_delims, const string_type& quotes, const string_type& escapes); /// like escapes overload, but returns scanned vector static std::vector getTokens(const string_type& instr, const string_type& drop_delims, const string_type& keep_delims, const string_type& quotes, const string_type& escapes); LL_COMMON_API static void formatNumber(string_type& numStr, string_type decimals); LL_COMMON_API static bool formatDatetime(string_type& replacement, string_type token, string_type param, S32 secFromEpoch); LL_COMMON_API static S32 format(string_type& s, const format_map_t& substitutions); LL_COMMON_API static S32 format(string_type& s, const LLSD& substitutions); LL_COMMON_API static bool simpleReplacement(string_type& replacement, string_type token, const format_map_t& substitutions); LL_COMMON_API static bool simpleReplacement(string_type& replacement, string_type token, const LLSD& substitutions); LL_COMMON_API static void setLocale (std::string inLocale); LL_COMMON_API static std::string getLocale (void); static bool isValidIndex(const string_type& string, size_type i) { return !string.empty() && (0 <= i) && (i <= string.size()); } static bool contains(const string_type& string, T c, size_type i=0) { return string.find(c, i) != string_type::npos; } static void trimHead(string_type& string); static void trimTail(string_type& string); static void trim(string_type& string) { trimHead(string); trimTail(string); } static void truncate(string_type& string, size_type count); // if string startsWith prefix, remove it and return true static bool removePrefix(string_type& string, const string_type& prefix); // if string startsWith prefix, return (string without prefix, true), else (string, false) static std::pair withoutPrefix(const string_type& string, const string_type& prefix); // like removePrefix() static bool removeSuffix(string_type& string, const string_type& suffix); static std::pair withoutSuffix(const string_type& string, const string_type& suffix); static void toUpper(string_type& string); static void toLower(string_type& string); // True if this is the head of s. static bool isHead( const string_type& string, const T* s ); /** * @brief Returns true if string starts with substr * * If etither string or substr are empty, this method returns false. */ static bool startsWith( const string_type& string, const string_type& substr); /** * @brief Returns true if string ends in substr * * If etither string or substr are empty, this method returns false. */ static bool endsWith( const string_type& string, const string_type& substr); /** * get environment string value with proper Unicode handling * (key is always UTF-8) * detect absence by return value == dflt */ static string_type getenv(const std::string& key, const string_type& dflt=""); /** * get optional environment string value with proper Unicode handling * (key is always UTF-8) * detect absence by (! return value) */ static std::optional getoptenv(const std::string& key); static void addCRLF(string_type& string); static void removeCRLF(string_type& string); static void removeWindowsCR(string_type& string); static void replaceTabsWithSpaces( string_type& string, size_type spaces_per_tab ); static void replaceNonstandardASCII( string_type& string, T replacement ); static void replaceChar( string_type& string, T target, T replacement ); static void replaceString( string_type& string, string_type target, string_type replacement ); static string_type capitalize(const string_type& str); static void capitalize(string_type& str); static bool containsNonprintable(const string_type& string); static void stripNonprintable(string_type& string); /** * Double-quote an argument string if needed, unless it's already * double-quoted. Decide whether it's needed based on the presence of any * character in @a triggers (default space or double-quote). If we quote * it, escape any embedded double-quote with the @a escape string (default * backslash). * * Passing triggers="" means always quote, unless it's already double-quoted. */ static string_type quote(const string_type& str, const string_type& triggers=" \"", const string_type& escape="\\"); /** * @brief Unsafe way to make ascii characters. You should probably * only call this when interacting with the host operating system. * The 1 byte std::string does not work correctly. * The 2 and 4 byte std::string probably work, so LLWStringUtil::_makeASCII * should work. */ static void _makeASCII(string_type& string); // Conversion to other data types static bool convertToBOOL(const string_type& string, bool& value); static bool convertToU8(const string_type& string, U8& value); static bool convertToS8(const string_type& string, S8& value); static bool convertToS16(const string_type& string, S16& value); static bool convertToU16(const string_type& string, U16& value); static bool convertToU32(const string_type& string, U32& value); static bool convertToS32(const string_type& string, S32& value); static bool convertToF32(const string_type& string, F32& value); static bool convertToF64(const string_type& string, F64& value); ///////////////////////////////////////////////////////////////////////////////////////// // Utility functions for working with char*'s and strings // Like strcmp but also handles empty strings. Uses // current locale. static S32 compareStrings(const T* lhs, const T* rhs); static S32 compareStrings(const string_type& lhs, const string_type& rhs); // case insensitive version of above. Uses current locale on // Win32, and falls back to a non-locale aware comparison on // Linux. static S32 compareInsensitive(const T* lhs, const T* rhs); static S32 compareInsensitive(const string_type& lhs, const string_type& rhs); // Case sensitive comparison with good handling of numbers. Does not use current locale. // a.k.a. strdictcmp() static S32 compareDict(const string_type& a, const string_type& b); // Case *in*sensitive comparison with good handling of numbers. Does not use current locale. // a.k.a. strdictcmp() static S32 compareDictInsensitive(const string_type& a, const string_type& b); // Puts compareDict() in a form appropriate for LL container classes to use for sorting. static bool precedesDict( const string_type& a, const string_type& b ); // A replacement for strncpy. // If the dst buffer is dst_size bytes long or more, ensures that dst is null terminated and holds // up to dst_size-1 characters of src. static void copy(T* dst, const T* src, size_type dst_size); // Copies src into dst at a given offset. static void copyInto(string_type& dst, const string_type& src, size_type offset); static bool isPartOfWord(T c) { return (c == (T)'_') || LLStringOps::isAlnum(c); } #ifdef _DEBUG LL_COMMON_API static void testHarness(); #endif private: LL_COMMON_API static size_type getSubstitution(const string_type& instr, size_type& start, std::vector& tokens); }; template const std::basic_string LLStringUtilBase::null; template std::string LLStringUtilBase::sLocale; typedef LLStringUtilBase LLStringUtil; typedef LLStringUtilBase LLWStringUtil; typedef std::basic_string LLWString; typedef std::basic_string_view LLWStringView; //@ Use this where we want to disallow input in the form of "foo" // This is used to catch places where english text is embedded in the code // instead of in a translatable XUI file. class LLStringExplicit : public std::string { public: explicit LLStringExplicit(const char* s) : std::string(s) {} LLStringExplicit(const std::string& s) : std::string(s) {} LLStringExplicit(const std::string& s, size_type pos, size_type n = std::string::npos) : std::string(s, pos, n) {} }; struct LLDictionaryLess { public: bool operator()(const std::string& a, const std::string& b) const { return (LLStringUtil::precedesDict(a, b)); } }; /** * Simple support functions */ /** * @brief chop off the trailing characters in a string. * * This function works on bytes rather than glyphs, so this will * incorrectly truncate non-single byte strings. * Use utf8str_truncate() for utf8 strings * @return a copy of in string minus the trailing count bytes. */ inline std::string chop_tail_copy( const std::string& in, std::string::size_type count) { return std::string(in, 0, in.length() - count); } /** * @brief This translates a nybble stored as a hex value from 0-f back * to a nybble in the low order bits of the return byte. */ LL_COMMON_API bool is_char_hex(char hex); LL_COMMON_API U8 hex_as_nybble(char hex); /** * @brief read the contents of a file into a string. * * Since this function has no concept of character encoding, most * anything you do with this method ill-advised. Please avoid. * @param str [out] The string which will have. * @param filename The full name of the file to read. * @return Returns true on success. If false, str is unmodified. */ LL_COMMON_API bool _read_file_into_string(std::string& str, const std::string& filename); LL_COMMON_API bool iswindividual(llwchar elem); /** * Unicode support */ /// generic conversion aliases template struct ll_convert_impl { // Don't even provide a generic implementation. We specialize for every // combination we do support. TO operator()(const FROM& in) const; }; /** * somefunction(ll_convert(data)) * target = ll_convert(data) * totype otherfunc(const fromtype& data) * { * // ... * return ll_convert(data); * } * all infer both the FROM type and the TO type. */ template class ll_convert { private: const FROM& mRef; public: ll_convert(const FROM& ref): mRef(ref) {} inline operator const FROM&() const { return mRef; } template , std::decay_t>, bool> =true> inline operator TO() const { return ll_convert_impl>()(mRef); } }; // When the TO type must be explicit, use a function template to get // ll_convert_to(from_value) API. template const SAME& ll_convert_to(const SAME& in) { return in; } template, std::decay_t>, bool> =true> TO ll_convert_to(const FROM& in) { return ll_convert_impl>()(in); } // degenerate case template struct ll_convert_impl { T operator()(const T& in) const { return in; } }; // simple construction from char* template struct ll_convert_impl { T operator()(const typename T::value_type* in) const { return { in }; } }; // specialize ll_convert_impl to return EXPR #define ll_convert_alias(TO, FROM, EXPR) \ template<> \ struct ll_convert_impl \ { \ /* param_type optimally passes both char* and string */ \ TO operator()(typename boost::call_traits::param_type in) const { return EXPR; } \ } // If all we're doing is copying characters, pass this to ll_convert_alias as // EXPR. Since it expands into the 'return EXPR' slot in the ll_convert_impl // specialization above, it implies TO{ in.begin(), in.end() }. #define LL_CONVERT_COPY_CHARS { in.begin(), in.end() } // Generic name for strlen() / wcslen() - the default implementation should // (!) work with U16 and llwchar, but we don't intend to engage it. template size_t ll_convert_length(const CHARTYPE* zstr) { const CHARTYPE* zp; // classic C string scan for (zp = zstr; *zp; ++zp) ; return (zp - zstr); } // specialize where we have a library function; may use intrinsic operations template <> inline size_t ll_convert_length(const wchar_t* zstr) { return std::wcslen(zstr); } template <> inline size_t ll_convert_length (const char* zstr) { return std::strlen(zstr); } // ll_convert_forms() is short for a bunch of boilerplate. It defines // longname(const char*, len), longname(const char*), longname(const string&) // and longname(const string&, len) so calls written pre-ll_convert() will // work. Most of these overloads will be unified once we turn on C++17 and can // use std::string_view. // It also uses aliasmacro to ensure that both ll_convert(const char*) // and ll_convert(const string&) will work. #define ll_convert_forms(aliasmacro, OUTSTR, INSTR, longname) \ LL_COMMON_API OUTSTR longname(const INSTR::value_type* in, size_t len); \ inline auto longname(const INSTR& in, size_t len) \ { \ return longname(in.c_str(), len); \ } \ inline auto longname(const INSTR::value_type* in) \ { \ return longname(in, ll_convert_length(in)); \ } \ inline auto longname(const INSTR& in) \ { \ return longname(in.c_str(), in.length()); \ } \ /* string param */ \ aliasmacro(OUTSTR, INSTR, longname(in)); \ /* char* param */ \ aliasmacro(OUTSTR, const INSTR::value_type*, longname(in)) // Make the incoming string a utf8 string. Replaces any unknown glyph // with the UNKNOWN_CHARACTER. Once any unknown glyph is found, the rest // of the data may not be recovered. LL_COMMON_API std::string rawstr_to_utf8(const std::string& raw); // // We should never use UTF16 except when communicating with Win32! // https://docs.microsoft.com/en-us/cpp/cpp/char-wchar-t-char16-t-char32-t // nat 2018-12-14: I consider the whole llutf16string thing a mistake, because // the Windows APIs we want to call are all defined in terms of wchar_t* // (or worse, LPCTSTR). // https://docs.microsoft.com/en-us/windows/desktop/winprog/windows-data-types // While there is no point coding for an ASCII-only world (! defined(UNICODE)), // use of U16 and llutf16string for Windows APIs locks in /Zc:wchar_t-. Going // forward, we should code in terms of wchar_t and std::wstring so as to // support either setting of /Zc:wchar_t. // The first link above states that char can be used to hold ASCII or any // multi-byte character set, and distinguishes wchar_t (UTF-16LE), char16_t // (UTF-16) and char32_t (UTF-32). Nonetheless, within this code base: // * char and std::string always hold UTF-8 (of which ASCII is a subset). It // is a BUG if they are used to pass strings in any other multi-byte // encoding. // * wchar_t and std::wstring should be our interface to Windows wide-string // APIs, and therefore hold UTF-16LE. // * U16 and llutf16string are the previous but DEPRECATED UTF-16LE type. Do // not introduce new uses of U16 or llutf16string for string data. // * llwchar and LLWString hold UTF-32 strings. // * Do not introduce char16_t or std::u16string. // * Do not introduce char32_t or std::u32string. // // This typedef may or may not be identical to std::wstring, depending on // LL_WCHAR_T_NATIVE. typedef std::basic_string llutf16string; // Considering wchar_t, llwchar and U16, there are three relevant cases: #if LLWCHAR_IS_WCHAR_T // every which way but Windows // llwchar is identical to wchar_t, LLWString is identical to std::wstring. // U16 is distinct, llutf16string is distinct (though pretty useless). // Given conversions to/from LLWString and to/from llutf16string, conversions // involving std::wstring would collide. #define ll_convert_wstr_alias(TO, FROM, EXPR) // nothing // but we can define conversions involving llutf16string without collisions #define ll_convert_u16_alias(TO, FROM, EXPR) ll_convert_alias(TO, FROM, EXPR) #elif defined(LL_WCHAR_T_NATIVE) // Windows, either clang or MS /Zc:wchar_t // llwchar (32-bit), wchar_t (16-bit) and U16 are all different types. // Conversions to/from LLWString, to/from std::wstring and to/from llutf16string // can all be defined. #define ll_convert_wstr_alias(TO, FROM, EXPR) ll_convert_alias(TO, FROM, EXPR) #define ll_convert_u16_alias(TO, FROM, EXPR) ll_convert_alias(TO, FROM, EXPR) #else // ! LL_WCHAR_T_NATIVE: Windows with MS /Zc:wchar_t- // wchar_t is identical to U16, std::wstring is identical to llutf16string. // Given conversions to/from LLWString and to/from std::wstring, conversions // involving llutf16string would collide. #define ll_convert_u16_alias(TO, FROM, EXPR) // nothing // but we can define conversions involving std::wstring without collisions #define ll_convert_wstr_alias(TO, FROM, EXPR) ll_convert_alias(TO, FROM, EXPR) #endif ll_convert_forms(ll_convert_u16_alias, LLWString, llutf16string, utf16str_to_wstring); ll_convert_forms(ll_convert_u16_alias, llutf16string, LLWString, wstring_to_utf16str); ll_convert_forms(ll_convert_u16_alias, llutf16string, std::string, utf8str_to_utf16str); ll_convert_forms(ll_convert_alias, LLWString, std::string, utf8str_to_wstring); // Same function, better name. JC inline LLWString utf8string_to_wstring(const std::string& utf8_string) { return utf8str_to_wstring(utf8_string); } // return a UTF-8 string representation of a single llwchar, which we // occasionally require: // cheaper than ll_convert_to(LLWString(1, inchar)) LL_COMMON_API std::string wchar_to_utf8chars(llwchar inchar); ll_convert_alias(std::string, llwchar, wchar_to_utf8chars(in)); ll_convert_forms(ll_convert_alias, std::string, LLWString, wstring_to_utf8str); ll_convert_forms(ll_convert_u16_alias, std::string, llutf16string, utf16str_to_utf8str); // an older alias for utf16str_to_utf8str(llutf16string) inline std::string wstring_to_utf8str(const llutf16string &utf16str) { return utf16str_to_utf8str(utf16str);} // Length of this UTF32 string in bytes when transformed to UTF8 LL_COMMON_API S32 wstring_utf8_length(const LLWString& wstr); // Length in bytes of this wide char in a UTF8 string LL_COMMON_API S32 wchar_utf8_length(const llwchar wc); LL_COMMON_API std::string wchar_utf8_preview(const llwchar wc); LL_COMMON_API std::string utf8str_tolower(const std::string& utf8str); // Length in llwchar (UTF-32) of the first len units (16 bits) of the given UTF-16 string. LL_COMMON_API S32 utf16str_wstring_length(const llutf16string &utf16str, S32 len); // Length in utf16string (UTF-16) of wlen wchars beginning at woffset. LL_COMMON_API S32 wstring_utf16_length(const LLWString & wstr, S32 woffset, S32 wlen); // Length in wstring (i.e., llwchar count) of a part of a wstring specified by utf16 length (i.e., utf16 units.) LL_COMMON_API S32 wstring_wstring_length_from_utf16_length(const LLWString & wstr, S32 woffset, S32 utf16_length, bool *unaligned = nullptr); /** * @brief Properly truncate a utf8 string to a maximum byte count. * * The returned string may be less than max_len if the truncation * happens in the middle of a glyph. If max_len is longer than the * string passed in, the return value == utf8str. * @param utf8str A valid utf8 string to truncate. * @param max_len The maximum number of bytes in the return value. * @return Returns a valid utf8 string with byte count <= max_len. */ LL_COMMON_API std::string utf8str_truncate(const std::string& utf8str, const S32 max_len); LL_COMMON_API std::string utf8str_trim(const std::string& utf8str); LL_COMMON_API S32 utf8str_compare_insensitive( const std::string& lhs, const std::string& rhs); /** * @brief Properly truncate a utf8 string to a maximum character count. * * If symbol_len is longer than the string passed in, the return * value == utf8str. * @param utf8str A valid utf8 string to truncate. * @param symbol_len The maximum number of symbols in the return value. * @return Returns a valid utf8 string with symbol count <= max_len. */ LL_COMMON_API std::string utf8str_symbol_truncate(const std::string& utf8str, const S32 symbol_len); /** * @brief Replace all occurences of target_char with replace_char * * @param utf8str A utf8 string to process. * @param target_char The wchar to be replaced * @param replace_char The wchar which is written on replace */ LL_COMMON_API std::string utf8str_substChar( const std::string& utf8str, const llwchar target_char, const llwchar replace_char); LL_COMMON_API std::string utf8str_makeASCII(const std::string& utf8str); // Hack - used for evil notecards. LL_COMMON_API std::string mbcsstring_makeASCII(const std::string& str); LL_COMMON_API std::string utf8str_removeCRLF(const std::string& utf8str); LL_COMMON_API llwchar utf8str_to_wchar(const std::string& utf8str, size_t offset, size_t length); LL_COMMON_API std::string utf8str_showBytesUTF8(const std::string& utf8str); LL_COMMON_API bool wstring_has_emoji(LLWStringView wstr); LL_COMMON_API bool wstring_remove_emojis(LLWString& wstr); LL_COMMON_API bool utf8str_remove_emojis(std::string& utf8str); #if LL_WINDOWS /* @name Windows string helpers */ //@{ /** * @brief Convert a wide string to/from std::string * Convert a Windows wide string to/from our LLWString * * This replaces the unsafe W2A macro from ATL. */ // Avoid requiring this header to #include the Windows header file declaring // our actual default code_page by delegating this function to our .cpp file. LL_COMMON_API unsigned int ll_wstring_default_code_page(); // This is like ll_convert_forms(), with the added complexity of a code page // parameter that may or may not be passed. #define ll_convert_cp_forms(aliasmacro, OUTSTR, INSTR, longname) \ /* declare the only nontrivial implementation (in .cpp file) */ \ LL_COMMON_API OUTSTR longname( \ const INSTR::value_type* in, \ size_t len, \ unsigned int code_page=ll_wstring_default_code_page()); \ /* if passed only a char pointer, scan for nul terminator */ \ inline auto longname(const INSTR::value_type* in) \ { \ return longname(in, ll_convert_length(in)); \ } \ /* if passed string and length, extract its char pointer */ \ inline auto longname( \ const INSTR& in, \ size_t len, \ unsigned int code_page=ll_wstring_default_code_page()) \ { \ return longname(in.c_str(), len, code_page); \ } \ /* if passed only a string object, no scan, pass known length */ \ inline auto longname(const INSTR& in) \ { \ return longname(in.c_str(), in.length()); \ } \ aliasmacro(OUTSTR, INSTR, longname(in)); \ aliasmacro(OUTSTR, const INSTR::value_type*, longname(in)) ll_convert_cp_forms(ll_convert_wstr_alias, std::string, std::wstring, ll_convert_wide_to_string); ll_convert_cp_forms(ll_convert_wstr_alias, std::wstring, std::string, ll_convert_string_to_wide); ll_convert_forms(ll_convert_wstr_alias, LLWString, std::wstring, ll_convert_wide_to_wstring); ll_convert_forms(ll_convert_wstr_alias, std::wstring, LLWString, ll_convert_wstring_to_wide); /** * Converts incoming string into utf8 string * */ LL_COMMON_API std::string ll_convert_string_to_utf8_string(const std::string& in); /// Get Windows message string for passed GetLastError() code // VS 2013 doesn't let us forward-declare this template, which is what we // started with, so the implementation could reference the specialization we // haven't yet declared. Somewhat weirdly, just stating the generic // implementation in terms of the specialization works, even in this order... // the general case is just a conversion from the sole implementation // Microsoft says DWORD is a typedef for unsigned long // https://docs.microsoft.com/en-us/windows/desktop/winprog/windows-data-types // so rather than drag windows.h into everybody's include space... template STRING windows_message(unsigned long error) { return ll_convert(windows_message(error)); } /// There's only one real implementation template<> LL_COMMON_API std::wstring windows_message(unsigned long error); /// Get Windows message string, implicitly calling GetLastError() LL_COMMON_API unsigned long windows_get_last_error(); template STRING windows_message() { return windows_message(windows_get_last_error()); } //@} LL_COMMON_API std::optional llstring_getoptenv(const std::string& key); #else // ! LL_WINDOWS LL_COMMON_API std::optional llstring_getoptenv(const std::string& key); #endif // ! LL_WINDOWS /** * Many of the 'strip' and 'replace' methods of LLStringUtilBase need * specialization to work with the signed char type. * Sadly, it is not possible (AFAIK) to specialize a single method of * a template class. * That stuff should go here. */ namespace LLStringFn { /** * @brief Replace all non-printable characters with replacement in * string. * NOTE - this will zap non-ascii * * @param [in,out] string the to modify. out value is the string * with zero non-printable characters. * @param The replacement character. use LL_UNKNOWN_CHAR if unsure. */ LL_COMMON_API void replace_nonprintable_in_ascii( std::basic_string& string, char replacement); /** * @brief Replace all non-printable characters and pipe characters * with replacement in a string. * NOTE - this will zap non-ascii * * @param [in,out] the string to modify. out value is the string * with zero non-printable characters and zero pipe characters. * @param The replacement character. use LL_UNKNOWN_CHAR if unsure. */ LL_COMMON_API void replace_nonprintable_and_pipe_in_ascii(std::basic_string& str, char replacement); /** * @brief Remove all characters that are not allowed in XML 1.0. * Returns a copy of the string with those characters removed. * Works with US ASCII and UTF-8 encoded strings. JC */ LL_COMMON_API std::string strip_invalid_xml(const std::string& input); /** * @brief Replace all characters that are not allowed in XML 1.0 * with corresponding literals: [ < > & ] => [ < > & ] */ LL_COMMON_API std::string xml_encode(const std::string& input, bool for_attribute = false); /** * @brief Replace some of XML literals that are defined in XML 1.0 * with corresponding characters: [ < > & ] => [ < > & ] */ LL_COMMON_API std::string xml_decode(const std::string& input, bool for_attribute = false); /** * @brief Replace all control characters (0 <= c < 0x20) with replacement in * string. This is safe for utf-8 * * @param [in,out] string the to modify. out value is the string * with zero non-printable characters. * @param The replacement character. use LL_UNKNOWN_CHAR if unsure. */ LL_COMMON_API void replace_ascii_controlchars( std::basic_string& string, char replacement); } //////////////////////////////////////////////////////////// // NOTE: LLStringUtil::format, getTokens, and support functions moved to llstring.cpp. // There is no LLWStringUtil::format implementation currently. // Calling these for anything other than LLStringUtil will produce link errors. //////////////////////////////////////////////////////////// // static template std::vector::string_type> LLStringUtilBase::getTokens(const string_type& instr, const string_type& delims) { std::vector tokens; getTokens(instr, tokens, delims); return tokens; } // static template std::vector::string_type> LLStringUtilBase::getTokens(const string_type& instr, const string_type& drop_delims, const string_type& keep_delims, const string_type& quotes) { std::vector tokens; getTokens(instr, tokens, drop_delims, keep_delims, quotes); return tokens; } // static template std::vector::string_type> LLStringUtilBase::getTokens(const string_type& instr, const string_type& drop_delims, const string_type& keep_delims, const string_type& quotes, const string_type& escapes) { std::vector tokens; getTokens(instr, tokens, drop_delims, keep_delims, quotes, escapes); return tokens; } namespace LLStringUtilBaseImpl { /** * Input string scanner helper for getTokens(), or really any other * character-parsing routine that may have to deal with escape characters. * This implementation defines the concept (also an interface, should you * choose to implement the concept by subclassing) and provides trivial * implementations for a string @em without escape processing. */ template struct InString { typedef std::basic_string string_type; typedef typename string_type::const_iterator const_iterator; InString(const_iterator b, const_iterator e): mIter(b), mEnd(e) {} virtual ~InString() {} bool done() const { return mIter == mEnd; } /// Is the current character (*mIter) escaped? This implementation can /// answer trivially because it doesn't support escapes. virtual bool escaped() const { return false; } /// Obtain the current character and advance @c mIter. virtual T next() { return *mIter++; } /// Does the current character match specified character? virtual bool is(T ch) const { return (! done()) && *mIter == ch; } /// Is the current character any one of the specified characters? virtual bool oneof(const string_type& delims) const { return (! done()) && LLStringUtilBase::contains(delims, *mIter); } /** * Scan forward from @from until either @a delim or end. This is primarily * useful for processing quoted substrings. * * If we do see @a delim, append everything from @from until (excluding) * @a delim to @a into, advance @c mIter to skip @a delim, and return @c * true. * * If we do not see @a delim, do not alter @a into or @c mIter and return * @c false. Do not pass GO, do not collect $200. * * @note The @c false case described above implements normal getTokens() * treatment of an unmatched open quote: treat the quote character as if * escaped, that is, simply collect it as part of the current token. Other * plausible behaviors directly affect the way getTokens() deals with an * unmatched quote: e.g. throwing an exception to treat it as an error, or * assuming a close quote beyond end of string (in which case return @c * true). */ virtual bool collect_until(string_type& into, const_iterator from, T delim) { const_iterator found = std::find(from, mEnd, delim); // If we didn't find delim, change nothing, just tell caller. if (found == mEnd) return false; // Found delim! Append everything between from and found. into.append(from, found); // advance past delim in input mIter = found + 1; return true; } const_iterator mIter, mEnd; }; /// InString subclass that handles escape characters template class InEscString: public InString { public: typedef InString super; typedef typename super::string_type string_type; typedef typename super::const_iterator const_iterator; using super::done; using super::mIter; using super::mEnd; InEscString(const_iterator b, const_iterator e, const string_type& escapes): super(b, e), mEscapes(escapes) { // Even though we've already initialized 'mIter' via our base-class // constructor, set it again to check for initial escape char. setiter(b); } /// This implementation uses the answer cached by setiter(). virtual bool escaped() const { return mIsEsc; } virtual T next() { // If we're looking at the escape character of an escape sequence, // skip that character. This is the one time we can modify 'mIter' // without using setiter: for this one case we DO NOT CARE if the // escaped character is itself an escape. if (mIsEsc) ++mIter; // If we were looking at an escape character, this is the escaped // character; otherwise it's just the next character. T result(*mIter); // Advance mIter, checking for escape sequence. setiter(mIter + 1); return result; } virtual bool is(T ch) const { // Like base-class is(), except that an escaped character matches // nothing. return (! done()) && (! mIsEsc) && *mIter == ch; } virtual bool oneof(const string_type& delims) const { // Like base-class oneof(), except that an escaped character matches // nothing. return (! done()) && (! mIsEsc) && LLStringUtilBase::contains(delims, *mIter); } virtual bool collect_until(string_type& into, const_iterator from, T delim) { // Deal with escapes in the characters we collect; that is, an escaped // character must become just that character without the preceding // escape. Collect characters in a separate string rather than // directly appending to 'into' in case we do not find delim, in which // case we're supposed to leave 'into' unmodified. string_type collected; // For scanning purposes, we're going to work directly with 'mIter'. // Save its current value in case we fail to see delim. const_iterator save_iter(mIter); // Okay, set 'mIter', checking for escape. setiter(from); while (! done()) { // If we see an unescaped delim, stop and report success. if ((! mIsEsc) && *mIter == delim) { // Append collected chars to 'into'. into.append(collected); // Don't forget to advance 'mIter' past delim. setiter(mIter + 1); return true; } // We're not at end, and either we're not looking at delim or it's // escaped. Collect this character and keep going. collected.push_back(next()); } // Here we hit 'mEnd' without ever seeing delim. Restore mIter and tell // caller. setiter(save_iter); return false; } private: void setiter(const_iterator i) { mIter = i; // Every time we change 'mIter', set 'mIsEsc' to be able to repetitively // answer escaped() without having to rescan 'mEscapes'. mIsEsc caches // contains(mEscapes, *mIter). // We're looking at an escaped char if we're not already at end (that // is, *mIter is even meaningful); if *mIter is in fact one of the // specified escape characters; and if there's one more character // following it. That is, if an escape character is the very last // character of the input string, it loses its special meaning. mIsEsc = (! done()) && LLStringUtilBase::contains(mEscapes, *mIter) && (mIter+1) != mEnd; } const string_type mEscapes; bool mIsEsc; }; /// getTokens() implementation based on InString concept template void getTokens(INSTRING& instr, std::vector& tokens, const string_type& drop_delims, const string_type& keep_delims, const string_type& quotes) { // There are times when we want to match either drop_delims or // keep_delims. Concatenate them up front to speed things up. string_type all_delims(drop_delims + keep_delims); // no tokens yet tokens.clear(); // try for another token while (! instr.done()) { // scan past any drop_delims while (instr.oneof(drop_delims)) { // skip this drop_delim instr.next(); // but if that was the end of the string, done if (instr.done()) return; } // found the start of another token: make a slot for it tokens.push_back(string_type()); if (instr.oneof(keep_delims)) { // *iter is a keep_delim, a token of exactly 1 character. Append // that character to the new token and proceed. tokens.back().push_back(instr.next()); continue; } // Here we have a non-delimiter token, which might consist of a mix of // quoted and unquoted parts. Use bash rules for quoting: you can // embed a quoted substring in the midst of an unquoted token (e.g. // ~/"sub dir"/myfile.txt); you can ram two quoted substrings together // to make a single token (e.g. 'He said, "'"Don't."'"'). We diverge // from bash in that bash considers an unmatched quote an error. Our // param signature doesn't allow for errors, so just pretend it's not // a quote and embed it. // At this level, keep scanning until we hit the next delimiter of // either type (drop_delims or keep_delims). while (! instr.oneof(all_delims)) { // If we're looking at an open quote, search forward for // a close quote, collecting characters along the way. if (instr.oneof(quotes) && instr.collect_until(tokens.back(), instr.mIter+1, *instr.mIter)) { // collect_until is cleverly designed to do exactly what we // need here. No further action needed if it returns true. } else { // Either *iter isn't a quote, or there's no matching close // quote: in other words, just an ordinary char. Append it to // current token. tokens.back().push_back(instr.next()); } // having scanned that segment of this token, if we've reached the // end of the string, we're done if (instr.done()) return; } } } } // namespace LLStringUtilBaseImpl // static template void LLStringUtilBase::getTokens(const string_type& string, std::vector& tokens, const string_type& drop_delims, const string_type& keep_delims, const string_type& quotes) { // Because this overload doesn't support escapes, use simple InString to // manage input range. LLStringUtilBaseImpl::InString instring(string.begin(), string.end()); LLStringUtilBaseImpl::getTokens(instring, tokens, drop_delims, keep_delims, quotes); } // static template void LLStringUtilBase::getTokens(const string_type& string, std::vector& tokens, const string_type& drop_delims, const string_type& keep_delims, const string_type& quotes, const string_type& escapes) { // This overload must deal with escapes. Delegate that to InEscString // (unless there ARE no escapes). std::unique_ptr< LLStringUtilBaseImpl::InString > instrp; if (escapes.empty()) instrp.reset(new LLStringUtilBaseImpl::InString(string.begin(), string.end())); else instrp.reset(new LLStringUtilBaseImpl::InEscString(string.begin(), string.end(), escapes)); LLStringUtilBaseImpl::getTokens(*instrp, tokens, drop_delims, keep_delims, quotes); } // static template S32 LLStringUtilBase::compareStrings(const T* lhs, const T* rhs) { S32 result; if( lhs == rhs ) { result = 0; } else if ( !lhs || !lhs[0] ) { result = ((!rhs || !rhs[0]) ? 0 : 1); } else if ( !rhs || !rhs[0]) { result = -1; } else { result = LLStringOps::collate(lhs, rhs); } return result; } //static template S32 LLStringUtilBase::compareStrings(const string_type& lhs, const string_type& rhs) { return LLStringOps::collate(lhs.c_str(), rhs.c_str()); } // static template S32 LLStringUtilBase::compareInsensitive(const T* lhs, const T* rhs ) { S32 result; if( lhs == rhs ) { result = 0; } else if ( !lhs || !lhs[0] ) { result = ((!rhs || !rhs[0]) ? 0 : 1); } else if ( !rhs || !rhs[0] ) { result = -1; } else { string_type lhs_string(lhs); string_type rhs_string(rhs); LLStringUtilBase::toUpper(lhs_string); LLStringUtilBase::toUpper(rhs_string); result = LLStringOps::collate(lhs_string.c_str(), rhs_string.c_str()); } return result; } //static template S32 LLStringUtilBase::compareInsensitive(const string_type& lhs, const string_type& rhs) { string_type lhs_string(lhs); string_type rhs_string(rhs); LLStringUtilBase::toUpper(lhs_string); LLStringUtilBase::toUpper(rhs_string); return LLStringOps::collate(lhs_string.c_str(), rhs_string.c_str()); } // Case sensitive comparison with good handling of numbers. Does not use current locale. // a.k.a. strdictcmp() //static template S32 LLStringUtilBase::compareDict(const string_type& astr, const string_type& bstr) { const T* a = astr.c_str(); const T* b = bstr.c_str(); T ca, cb; S32 ai, bi, cnt = 0; S32 bias = 0; ca = *(a++); cb = *(b++); while( ca && cb ){ if( bias==0 ){ if( LLStringOps::isUpper(ca) ){ ca = LLStringOps::toLower(ca); bias--; } if( LLStringOps::isUpper(cb) ){ cb = LLStringOps::toLower(cb); bias++; } }else{ if( LLStringOps::isUpper(ca) ){ ca = LLStringOps::toLower(ca); } if( LLStringOps::isUpper(cb) ){ cb = LLStringOps::toLower(cb); } } if( LLStringOps::isDigit(ca) ){ if( cnt-->0 ){ if( cb!=ca ) break; }else{ if( !LLStringOps::isDigit(cb) ) break; for(ai=0; LLStringOps::isDigit(a[ai]); ai++); for(bi=0; LLStringOps::isDigit(b[bi]); bi++); if( ai S32 LLStringUtilBase::compareDictInsensitive(const string_type& astr, const string_type& bstr) { const T* a = astr.c_str(); const T* b = bstr.c_str(); T ca, cb; S32 ai, bi, cnt = 0; ca = *(a++); cb = *(b++); while( ca && cb ){ if( LLStringOps::isUpper(ca) ){ ca = LLStringOps::toLower(ca); } if( LLStringOps::isUpper(cb) ){ cb = LLStringOps::toLower(cb); } if( LLStringOps::isDigit(ca) ){ if( cnt-->0 ){ if( cb!=ca ) break; }else{ if( !LLStringOps::isDigit(cb) ) break; for(ai=0; LLStringOps::isDigit(a[ai]); ai++); for(bi=0; LLStringOps::isDigit(b[bi]); bi++); if( ai bool LLStringUtilBase::precedesDict( const string_type& a, const string_type& b ) { if( a.size() && b.size() ) { return (LLStringUtilBase::compareDict(a.c_str(), b.c_str()) < 0); } else { return (!b.empty()); } } //static template void LLStringUtilBase::toUpper(string_type& string) { if( !string.empty() ) { std::transform( string.begin(), string.end(), string.begin(), (T(*)(T)) &LLStringOps::toUpper); } } //static template void LLStringUtilBase::toLower(string_type& string) { if( !string.empty() ) { std::transform( string.begin(), string.end(), string.begin(), (T(*)(T)) &LLStringOps::toLower); } } //static template void LLStringUtilBase::trimHead(string_type& string) { if( !string.empty() ) { size_type i = 0; while( i < string.length() && LLStringOps::isSpace( string[i] ) ) { i++; } string.erase(0, i); } } //static template void LLStringUtilBase::trimTail(string_type& string) { if( string.size() ) { size_type len = string.length(); size_type i = len; while( i > 0 && LLStringOps::isSpace( string[i-1] ) ) { i--; } string.erase( i, len - i ); } } // if string startsWith prefix, remove it and return true template bool LLStringUtilBase::removePrefix(string_type& string, const string_type& prefix) { bool found{ startsWith(string, prefix) }; if (found) { string.erase(0, prefix.length()); } return found; } // if string startsWith prefix, return (string without prefix, true), else (string, false) template std::pair::string_type, bool> LLStringUtilBase::withoutPrefix(const string_type& string, const string_type& prefix) { bool found{ startsWith(string, prefix) }; if (! found) { return { string, false }; } else { return { string.substr(prefix.length()), true }; } } // like removePrefix() template bool LLStringUtilBase::removeSuffix(string_type& string, const string_type& suffix) { bool found{ endsWith(string, suffix) }; if (found) { string.erase(string.length() - suffix.length()); } return found; } template std::pair::string_type, bool> LLStringUtilBase::withoutSuffix(const string_type& string, const string_type& suffix) { bool found{ endsWith(string, suffix) }; if (! found) { return { string, false }; } else { return { string.substr(0, string.length() - suffix.length()), true }; } } // Replace line feeds with carriage return-line feed pairs. //static template void LLStringUtilBase::addCRLF(string_type& string) { const T LF = 10; const T CR = 13; // Count the number of line feeds size_type count = 0; size_type len = string.size(); size_type i; for( i = 0; i < len; i++ ) { if( string[i] == LF ) { count++; } } // Insert a carriage return before each line feed if( count ) { size_type size = len + count; T *t = new T[size]; size_type j = 0; for( i = 0; i < len; ++i ) { if( string[i] == LF ) { t[j] = CR; ++j; } t[j] = string[i]; ++j; } string.assign(t, size); delete[] t; } } // Remove all carriage returns //static template void LLStringUtilBase::removeCRLF(string_type& string) { const T CR = 13; size_type cr_count = 0; size_type len = string.size(); size_type i; for( i = 0; i < len - cr_count; i++ ) { if( string[i+cr_count] == CR ) { cr_count++; } string[i] = string[i+cr_count]; } string.erase(i, cr_count); } //static template void LLStringUtilBase::removeWindowsCR(string_type& string) { if (string.empty()) { return; } const T LF = 10; const T CR = 13; size_type cr_count = 0; size_type len = string.size(); size_type i; for( i = 0; i < len - cr_count - 1; i++ ) { if( string[i+cr_count] == CR && string[i+cr_count+1] == LF) { cr_count++; } string[i] = string[i+cr_count]; } string.erase(i, cr_count); } //static template void LLStringUtilBase::replaceChar( string_type& string, T target, T replacement ) { size_type found_pos = 0; while( (found_pos = string.find(target, found_pos)) != string_type::npos ) { string[found_pos] = replacement; found_pos++; // avoid infinite defeat if target == replacement } } //static template void LLStringUtilBase::replaceString( string_type& string, string_type target, string_type replacement ) { size_type found_pos = 0; while( (found_pos = string.find(target, found_pos)) != string_type::npos ) { string.replace( found_pos, target.length(), replacement ); found_pos += replacement.length(); // avoid infinite defeat if replacement contains target } } //static template void LLStringUtilBase::replaceNonstandardASCII( string_type& string, T replacement ) { const char LF = 10; const S8 MIN = 32; // const S8 MAX = 127; size_type len = string.size(); for( size_type i = 0; i < len; i++ ) { // No need to test MAX < mText[i] because we treat mText[i] as a signed char, // which has a max value of 127. if( ( S8(string[i]) < MIN ) && (string[i] != LF) ) { string[i] = replacement; } } } //static template void LLStringUtilBase::replaceTabsWithSpaces( string_type& str, size_type spaces_per_tab ) { const T TAB = '\t'; const T SPACE = ' '; string_type out_str; // Replace tabs with spaces for (size_type i = 0; i < str.length(); i++) { if (str[i] == TAB) { for (size_type j = 0; j < spaces_per_tab; j++) out_str += SPACE; } else { out_str += str[i]; } } str = out_str; } //static template std::basic_string LLStringUtilBase::capitalize(const string_type& str) { string_type result(str); capitalize(result); return result; } //static template void LLStringUtilBase::capitalize(string_type& str) { if (str.size()) { auto last = str[0] = toupper(str[0]); for (U32 i = 1; i < str.size(); ++i) { last = (last == ' ' || last == '-' || last == '_') ? str[i] = toupper(str[i]) : str[i]; } } } //static template bool LLStringUtilBase::containsNonprintable(const string_type& string) { const char MIN = 32; bool rv = false; for (size_type i = 0; i < string.size(); i++) { if(string[i] < MIN) { rv = true; break; } } return rv; } // *TODO: reimplement in terms of algorithm //static template void LLStringUtilBase::stripNonprintable(string_type& string) { const char MIN = 32; size_type j = 0; if (string.empty()) { return; } size_t src_size = string.size(); char* c_string = new char[src_size + 1]; if(c_string == NULL) { return; } copy(c_string, string.c_str(), src_size+1); char* write_head = &c_string[0]; for (size_type i = 0; i < src_size; i++) { char* read_head = &string[i]; write_head = &c_string[j]; if(!(*read_head < MIN)) { *write_head = *read_head; ++j; } } c_string[j]= '\0'; string = c_string; delete []c_string; } // *TODO: reimplement in terms of algorithm template std::basic_string LLStringUtilBase::quote(const string_type& str, const string_type& triggers, const string_type& escape) { size_type len(str.length()); // If the string is already quoted, assume user knows what s/he's doing. if (len >= 2 && str[0] == '"' && str[len-1] == '"') { return str; } // Not already quoted: do we need to? triggers.empty() is a special case // meaning "always quote." if ((! triggers.empty()) && str.find_first_of(triggers) == string_type::npos) { // no trigger characters, don't bother quoting return str; } // For whatever reason, we must quote this string. string_type result; result.push_back('"'); for (typename string_type::const_iterator ci(str.begin()), cend(str.end()); ci != cend; ++ci) { if (*ci == '"') { result.append(escape); } result.push_back(*ci); } result.push_back('"'); return result; } template void LLStringUtilBase::_makeASCII(string_type& string) { // Replace non-ASCII chars with LL_UNKNOWN_CHAR for (size_type i = 0; i < string.length(); i++) { if (string[i] > 0x7f) { string[i] = LL_UNKNOWN_CHAR; } } } // static template void LLStringUtilBase::copy( T* dst, const T* src, size_type dst_size ) { if( dst_size > 0 ) { size_type min_len = 0; if( src ) { min_len = llmin( dst_size - 1, strlen( src ) ); /* Flawfinder: ignore */ memcpy(dst, src, min_len * sizeof(T)); /* Flawfinder: ignore */ } dst[min_len] = '\0'; } } // static template void LLStringUtilBase::copyInto(string_type& dst, const string_type& src, size_type offset) { if ( offset == dst.length() ) { // special case - append to end of string and avoid expensive // (when strings are large) string manipulations dst += src; } else { string_type tail = dst.substr(offset); dst = dst.substr(0, offset); dst += src; dst += tail; }; } // True if this is the head of s. //static template bool LLStringUtilBase::isHead( const string_type& string, const T* s ) { if( string.empty() ) { // Early exit return false; } else { return (strncmp( s, string.c_str(), string.size() ) == 0); } } // static template bool LLStringUtilBase::startsWith( const string_type& string, const string_type& substr) { if(string.empty() || (substr.empty())) return false; if (substr.length() > string.length()) return false; if (0 == string.compare(0, substr.length(), substr)) return true; return false; } // static template bool LLStringUtilBase::endsWith( const string_type& string, const string_type& substr) { if(string.empty() || (substr.empty())) return false; size_t sub_len = substr.length(); size_t str_len = string.length(); if (sub_len > str_len) return false; if (0 == string.compare(str_len - sub_len, sub_len, substr)) return true; return false; } // static template auto LLStringUtilBase::getoptenv(const std::string& key) -> std::optional { auto found(llstring_getoptenv(key)); if (found) { // return populated std::optional return { ll_convert_to(*found) }; } else { // empty std::optional return {}; } } // static template auto LLStringUtilBase::getenv(const std::string& key, const string_type& dflt) -> string_type { auto found(getoptenv(key)); if (found) { return *found; } else { return dflt; } } template bool LLStringUtilBase::convertToBOOL(const string_type& string, bool& value) { if( string.empty() ) { return false; } string_type temp( string ); trim(temp); if( (temp == "1") || (temp == "T") || (temp == "t") || (temp == "TRUE") || (temp == "true") || (temp == "True") ) { value = true; return true; } else if( (temp == "0") || (temp == "F") || (temp == "f") || (temp == "FALSE") || (temp == "false") || (temp == "False") ) { value = false; return true; } return false; } template bool LLStringUtilBase::convertToU8(const string_type& string, U8& value) { S32 value32 = 0; bool success = convertToS32(string, value32); if( success && (U8_MIN <= value32) && (value32 <= U8_MAX) ) { value = (U8) value32; return true; } return false; } template bool LLStringUtilBase::convertToS8(const string_type& string, S8& value) { S32 value32 = 0; bool success = convertToS32(string, value32); if( success && (S8_MIN <= value32) && (value32 <= S8_MAX) ) { value = (S8) value32; return true; } return false; } template bool LLStringUtilBase::convertToS16(const string_type& string, S16& value) { S32 value32 = 0; bool success = convertToS32(string, value32); if( success && (S16_MIN <= value32) && (value32 <= S16_MAX) ) { value = (S16) value32; return true; } return false; } template bool LLStringUtilBase::convertToU16(const string_type& string, U16& value) { S32 value32 = 0; bool success = convertToS32(string, value32); if( success && (U16_MIN <= value32) && (value32 <= U16_MAX) ) { value = (U16) value32; return true; } return false; } template bool LLStringUtilBase::convertToU32(const string_type& string, U32& value) { if( string.empty() ) { return false; } string_type temp( string ); trim(temp); U32 v; std::basic_istringstream i_stream((string_type)temp); if(i_stream >> v) { value = v; return true; } return false; } template bool LLStringUtilBase::convertToS32(const string_type& string, S32& value) { if( string.empty() ) { return false; } string_type temp( string ); trim(temp); S32 v; std::basic_istringstream i_stream((string_type)temp); if(i_stream >> v) { //TODO: figure out overflow and underflow reporting here //if((LONG_MAX == v) || (LONG_MIN == v)) //{ // // Underflow or overflow // return false; //} value = v; return true; } return false; } template bool LLStringUtilBase::convertToF32(const string_type& string, F32& value) { F64 value64 = 0.0; bool success = convertToF64(string, value64); if( success && (-F32_MAX <= value64) && (value64 <= F32_MAX) ) { value = (F32) value64; return true; } return false; } template bool LLStringUtilBase::convertToF64(const string_type& string, F64& value) { if( string.empty() ) { return false; } string_type temp( string ); trim(temp); F64 v; std::basic_istringstream i_stream((string_type)temp); if(i_stream >> v) { //TODO: figure out overflow and underflow reporting here //if( ((-HUGE_VAL == v) || (HUGE_VAL == v))) ) //{ // // Underflow or overflow // return false; //} value = v; return true; } return false; } template void LLStringUtilBase::truncate(string_type& string, size_type count) { size_type cur_size = string.size(); string.resize(count < cur_size ? count : cur_size); } // The good thing about *declaration* macros, vs. usage macros, is that now // we're done with them: we don't need them to bleed into the consuming source // file. #undef ll_convert_alias #undef ll_convert_u16_alias #undef ll_convert_wstr_alias #undef LL_CONVERT_COPY_CHARS #undef ll_convert_forms #undef ll_convert_cp_forms #endif // LL_STRING_H