summaryrefslogtreecommitdiff
path: root/indra/llcommon/llstring.h
diff options
context:
space:
mode:
Diffstat (limited to 'indra/llcommon/llstring.h')
-rw-r--r--indra/llcommon/llstring.h355
1 files changed, 353 insertions, 2 deletions
diff --git a/indra/llcommon/llstring.h b/indra/llcommon/llstring.h
index 7b24b5e279..e4ae54cec5 100644
--- a/indra/llcommon/llstring.h
+++ b/indra/llcommon/llstring.h
@@ -40,6 +40,7 @@
#endif
#include <string.h>
+#include <boost/scoped_ptr.hpp>
#if LL_SOLARIS
// stricmp and strnicmp do not exist on Solaris:
@@ -247,7 +248,38 @@ public:
static const string_type null;
typedef std::map<LLFormatMapString, LLFormatMapString> format_map_t;
- LL_COMMON_API static void getTokens(const string_type& instr, std::vector<string_type >& tokens, const string_type& delims);
+ /// considers any sequence of delims as a single field separator
+ LL_COMMON_API static void getTokens(const string_type& instr,
+ std::vector<string_type >& tokens,
+ const string_type& delims);
+ /// like simple scan overload, but returns scanned vector
+ LL_COMMON_API static std::vector<string_type> getTokens(const string_type& instr,
+ const string_type& delims);
+ /// add support for keep_delims and quotes (either could be empty string)
+ LL_COMMON_API static void getTokens(const string_type& instr,
+ std::vector<string_type>& tokens,
+ const string_type& drop_delims,
+ const string_type& keep_delims,
+ const string_type& quotes=string_type());
+ /// like keep_delims-and-quotes overload, but returns scanned vector
+ LL_COMMON_API static std::vector<string_type> getTokens(const string_type& instr,
+ const string_type& drop_delims,
+ const string_type& keep_delims,
+ const string_type& quotes=string_type());
+ /// add support for escapes (could be empty string)
+ LL_COMMON_API static void getTokens(const string_type& instr,
+ std::vector<string_type>& tokens,
+ const string_type& drop_delims,
+ const string_type& keep_delims,
+ const string_type& quotes,
+ const string_type& escapes);
+ /// like escapes overload, but returns scanned vector
+ LL_COMMON_API static std::vector<string_type> getTokens(const string_type& instr,
+ const string_type& drop_delims,
+ const string_type& keep_delims,
+ const string_type& quotes,
+ const string_type& escapes);
+
LL_COMMON_API static void formatNumber(string_type& numStr, string_type decimals);
LL_COMMON_API static bool formatDatetime(string_type& replacement, string_type token, string_type param, S32 secFromEpoch);
LL_COMMON_API static S32 format(string_type& s, const format_map_t& substitutions);
@@ -262,6 +294,11 @@ public:
return !string.empty() && (0 <= i) && (i <= string.size());
}
+ static bool contains(const string_type& string, T c, size_type i=0)
+ {
+ return string.find(c, i) != string_type::npos;
+ }
+
static void trimHead(string_type& string);
static void trimTail(string_type& string);
static void trim(string_type& string) { trimHead(string); trimTail(string); }
@@ -650,10 +687,324 @@ namespace LLStringFn
////////////////////////////////////////////////////////////
// NOTE: LLStringUtil::format, getTokens, and support functions moved to llstring.cpp.
// There is no LLWStringUtil::format implementation currently.
-// Calling thse for anything other than LLStringUtil will produce link errors.
+// Calling these for anything other than LLStringUtil will produce link errors.
////////////////////////////////////////////////////////////
+// static
+template <class T>
+std::vector<typename LLStringUtilBase<T>::string_type>
+LLStringUtilBase<T>::getTokens(const string_type& instr, const string_type& delims)
+{
+ std::vector<string_type> tokens;
+ getTokens(instr, tokens, delims);
+ return tokens;
+}
+
+// static
+template <class T>
+std::vector<typename LLStringUtilBase<T>::string_type>
+LLStringUtilBase<T>::getTokens(const string_type& instr,
+ const string_type& drop_delims,
+ const string_type& keep_delims,
+ const string_type& quotes)
+{
+ std::vector<string_type> tokens;
+ getTokens(instr, tokens, drop_delims, keep_delims, quotes);
+ return tokens;
+}
+
+// static
+template <class T>
+std::vector<typename LLStringUtilBase<T>::string_type>
+LLStringUtilBase<T>::getTokens(const string_type& instr,
+ const string_type& drop_delims,
+ const string_type& keep_delims,
+ const string_type& quotes,
+ const string_type& escapes)
+{
+ std::vector<string_type> tokens;
+ getTokens(instr, tokens, drop_delims, keep_delims, quotes, escapes);
+ return tokens;
+}
+
+namespace LLStringUtilBaseImpl
+{
+
+/**
+ * Input string scanner helper for getTokens(), or really any other
+ * character-parsing routine that may have to deal with escape characters.
+ * This implementation defines the concept (also an interface, should you
+ * choose to implement the concept by subclassing) and provides trivial
+ * implementations for a string @em without escape processing.
+ */
+template <class T>
+struct InString
+{
+ typedef std::basic_string<T> string_type;
+ typedef typename string_type::const_iterator const_iterator;
+
+ InString(const_iterator b, const_iterator e):
+ iter(b),
+ end(e)
+ {}
+
+ bool done() const { return iter == end; }
+ /// Is the current character (*iter) escaped? This implementation can
+ /// answer trivially because it doesn't support escapes.
+ virtual bool escaped() const { return false; }
+ /// Obtain the current character and advance @c iter.
+ virtual T next() { return *iter++; }
+ /// Does the current character match specified character?
+ virtual bool is(T ch) const { return (! done()) && *iter == ch; }
+ /// Is the current character any one of the specified characters?
+ virtual bool oneof(const string_type& delims) const
+ {
+ return (! done()) && LLStringUtilBase<T>::contains(delims, *iter);
+ }
+
+ /**
+ * Scan forward from @from until either @a delim or end. This is primarily
+ * useful for processing quoted substrings.
+ *
+ * If we do see @a delim, append everything from @from until (excluding)
+ * @a delim to @a into, advance @c iter to skip @a delim, and return @c
+ * true.
+ *
+ * If we do not see @a delim, do not alter @a into or @c iter and return
+ * @c false. Do not pass GO, do not collect $200.
+ *
+ * @note The @c false case described above implements normal getTokens()
+ * treatment of an unmatched open quote: treat the quote character as if
+ * escaped, that is, simply collect it as part of the current token. Other
+ * plausible behaviors directly affect the way getTokens() deals with an
+ * unmatched quote: e.g. throwing an exception to treat it as an error, or
+ * assuming a close quote beyond end of string (in which case return @c
+ * true).
+ */
+ virtual bool collect_until(string_type& into, const_iterator from, T delim)
+ {
+ const_iterator found = std::find(from, end, delim);
+ // If we didn't find delim, change nothing, just tell caller.
+ if (found == end)
+ return false;
+ // Found delim! Append everything between from and found.
+ into.append(from, found);
+ // advance past delim in input
+ iter = found + 1;
+ return true;
+ }
+
+ const_iterator iter, end;
+};
+
+/// InString subclass that handles escape characters
+template <class T>
+class InEscString: public InString<T>
+{
+public:
+ typedef InString<T> super;
+ typedef typename super::string_type string_type;
+ typedef typename super::const_iterator const_iterator;
+ using super::done;
+ using super::iter;
+ using super::end;
+
+ InEscString(const_iterator b, const_iterator e, const string_type& escapes_):
+ super(b, e),
+ escapes(escapes_)
+ {
+ // Even though we've already initialized 'iter' via our base-class
+ // constructor, set it again to check for initial escape char.
+ setiter(b);
+ }
+
+ /// This implementation uses the answer cached by setiter().
+ virtual bool escaped() const { return isesc; }
+ virtual T next()
+ {
+ // If we're looking at the escape character of an escape sequence,
+ // skip that character. This is the one time we can modify 'iter'
+ // without using setiter: for this one case we DO NOT CARE if the
+ // escaped character is itself an escape.
+ if (isesc)
+ ++iter;
+ // If we were looking at an escape character, this is the escaped
+ // character; otherwise it's just the next character.
+ T result(*iter);
+ // Advance iter, checking for escape sequence.
+ setiter(iter + 1);
+ return result;
+ }
+
+ virtual bool is(T ch) const
+ {
+ // Like base-class is(), except that an escaped character matches
+ // nothing.
+ return (! done()) && (! isesc) && *iter == ch;
+ }
+
+ virtual bool oneof(const string_type& delims) const
+ {
+ // Like base-class oneof(), except that an escaped character matches
+ // nothing.
+ return (! done()) && (! isesc) && LLStringUtilBase<T>::contains(delims, *iter);
+ }
+
+ virtual bool collect_until(string_type& into, const_iterator from, T delim)
+ {
+ // Deal with escapes in the characters we collect; that is, an escaped
+ // character must become just that character without the preceding
+ // escape. Collect characters in a separate string rather than
+ // directly appending to 'into' in case we do not find delim, in which
+ // case we're supposed to leave 'into' unmodified.
+ string_type collected;
+ // For scanning purposes, we're going to work directly with 'iter'.
+ // Save its current value in case we fail to see delim.
+ const_iterator save_iter(iter);
+ // Okay, set 'iter', checking for escape.
+ setiter(from);
+ while (! done())
+ {
+ // If we see an unescaped delim, stop and report success.
+ if ((! isesc) && *iter == delim)
+ {
+ // Append collected chars to 'into'.
+ into.append(collected);
+ // Don't forget to advance 'iter' past delim.
+ setiter(iter + 1);
+ return true;
+ }
+ // We're not at end, and either we're not looking at delim or it's
+ // escaped. Collect this character and keep going.
+ collected.push_back(next());
+ }
+ // Here we hit 'end' without ever seeing delim. Restore iter and tell
+ // caller.
+ setiter(save_iter);
+ return false;
+ }
+
+private:
+ void setiter(const_iterator i)
+ {
+ iter = i;
+
+ // Every time we change 'iter', set 'isesc' to be able to repetitively
+ // answer escaped() without having to rescan 'escapes'. isesc caches
+ // contains(escapes, *iter).
+
+ // We're looking at an escaped char if we're not already at end (that
+ // is, *iter is even meaningful); if *iter is in fact one of the
+ // specified escape characters; and if there's one more character
+ // following it. That is, if an escape character is the very last
+ // character of the input string, it loses its special meaning.
+ isesc = (! done()) &&
+ LLStringUtilBase<T>::contains(escapes, *iter) &&
+ (iter+1) != end;
+ }
+
+ const string_type escapes;
+ bool isesc;
+};
+
+/// getTokens() implementation based on InString concept
+template <typename INSTRING, typename string_type>
+void getTokens(INSTRING& instr, std::vector<string_type>& tokens,
+ const string_type& drop_delims, const string_type& keep_delims,
+ const string_type& quotes)
+{
+ // There are times when we want to match either drop_delims or
+ // keep_delims. Concatenate them up front to speed things up.
+ string_type all_delims(drop_delims + keep_delims);
+ // no tokens yet
+ tokens.clear();
+
+ // try for another token
+ while (! instr.done())
+ {
+ // scan past any drop_delims
+ while (instr.oneof(drop_delims))
+ {
+ // skip this drop_delim
+ instr.next();
+ // but if that was the end of the string, done
+ if (instr.done())
+ return;
+ }
+ // found the start of another token: make a slot for it
+ tokens.push_back(string_type());
+ if (instr.oneof(keep_delims))
+ {
+ // *iter is a keep_delim, a token of exactly 1 character. Append
+ // that character to the new token and proceed.
+ tokens.back().push_back(instr.next());
+ continue;
+ }
+ // Here we have a non-delimiter token, which might consist of a mix of
+ // quoted and unquoted parts. Use bash rules for quoting: you can
+ // embed a quoted substring in the midst of an unquoted token (e.g.
+ // ~/"sub dir"/myfile.txt); you can ram two quoted substrings together
+ // to make a single token (e.g. 'He said, "'"Don't."'"'). We diverge
+ // from bash in that bash considers an unmatched quote an error. Our
+ // param signature doesn't allow for errors, so just pretend it's not
+ // a quote and embed it.
+ // At this level, keep scanning until we hit the next delimiter of
+ // either type (drop_delims or keep_delims).
+ while (! instr.oneof(all_delims))
+ {
+ // If we're looking at an open quote, search forward for
+ // a close quote, collecting characters along the way.
+ if (instr.oneof(quotes) &&
+ instr.collect_until(tokens.back(), instr.iter+1, *instr.iter))
+ {
+ // collect_until is cleverly designed to do exactly what we
+ // need here. No further action needed if it returns true.
+ }
+ else
+ {
+ // Either *iter isn't a quote, or there's no matching close
+ // quote: in other words, just an ordinary char. Append it to
+ // current token.
+ tokens.back().push_back(instr.next());
+ }
+ // having scanned that segment of this token, if we've reached the
+ // end of the string, we're done
+ if (instr.done())
+ return;
+ }
+ }
+}
+
+} // namespace LLStringUtilBaseImpl
+
+// static
+template <class T>
+void LLStringUtilBase<T>::getTokens(const string_type& string, std::vector<string_type>& tokens,
+ const string_type& drop_delims, const string_type& keep_delims,
+ const string_type& quotes)
+{
+ // Because this overload doesn't support escapes, use simple InString to
+ // manage input range.
+ LLStringUtilBaseImpl::InString<T> instring(string.begin(), string.end());
+ LLStringUtilBaseImpl::getTokens(instring, tokens, drop_delims, keep_delims, quotes);
+}
+
+// static
+template <class T>
+void LLStringUtilBase<T>::getTokens(const string_type& string, std::vector<string_type>& tokens,
+ const string_type& drop_delims, const string_type& keep_delims,
+ const string_type& quotes, const string_type& escapes)
+{
+ // This overload must deal with escapes. Delegate that to InEscString
+ // (unless there ARE no escapes).
+ boost::scoped_ptr< LLStringUtilBaseImpl::InString<T> > instrp;
+ if (escapes.empty())
+ instrp.reset(new LLStringUtilBaseImpl::InString<T>(string.begin(), string.end()));
+ else
+ instrp.reset(new LLStringUtilBaseImpl::InEscString<T>(string.begin(), string.end(), escapes));
+ LLStringUtilBaseImpl::getTokens(*instrp, tokens, drop_delims, keep_delims, quotes);
+}
// static
template<class T>