summaryrefslogtreecommitdiff
path: root/indra/llcommon
diff options
context:
space:
mode:
Diffstat (limited to 'indra/llcommon')
-rw-r--r--indra/llcommon/llstring.cpp16
-rw-r--r--indra/llcommon/llstring.h355
-rw-r--r--indra/llcommon/tests/StringVec.h37
-rw-r--r--indra/llcommon/tests/listener.h21
-rw-r--r--indra/llcommon/tests/llstring_test.cpp115
5 files changed, 517 insertions, 27 deletions
diff --git a/indra/llcommon/llstring.cpp b/indra/llcommon/llstring.cpp
index e7fe656808..fa0eb9f72c 100644
--- a/indra/llcommon/llstring.cpp
+++ b/indra/llcommon/llstring.cpp
@@ -912,22 +912,24 @@ S32 LLStringUtil::format(std::string& s, const format_map_t& substitutions);
template<>
void LLStringUtil::getTokens(const std::string& instr, std::vector<std::string >& tokens, const std::string& delims)
{
- std::string currToken;
- std::string::size_type begIdx, endIdx;
-
- begIdx = instr.find_first_not_of (delims);
- while (begIdx != std::string::npos)
+ // Starting at offset 0, scan forward for the next non-delimiter. We're
+ // done when the only characters left in 'instr' are delimiters.
+ for (std::string::size_type begIdx, endIdx = 0;
+ (begIdx = instr.find_first_not_of (delims, endIdx)) != std::string::npos; )
{
+ // Found a non-delimiter. After that, find the next delimiter.
endIdx = instr.find_first_of (delims, begIdx);
if (endIdx == std::string::npos)
{
+ // No more delimiters: this token extends to the end of the string.
endIdx = instr.length();
}
- currToken = instr.substr(begIdx, endIdx - begIdx);
+ // extract the token between begIdx and endIdx; substr() needs length
+ std::string currToken(instr.substr(begIdx, endIdx - begIdx));
LLStringUtil::trim (currToken);
tokens.push_back(currToken);
- begIdx = instr.find_first_not_of (delims, endIdx);
+ // next scan past delimiters starts at endIdx
}
}
diff --git a/indra/llcommon/llstring.h b/indra/llcommon/llstring.h
index 7b24b5e279..e4ae54cec5 100644
--- a/indra/llcommon/llstring.h
+++ b/indra/llcommon/llstring.h
@@ -40,6 +40,7 @@
#endif
#include <string.h>
+#include <boost/scoped_ptr.hpp>
#if LL_SOLARIS
// stricmp and strnicmp do not exist on Solaris:
@@ -247,7 +248,38 @@ public:
static const string_type null;
typedef std::map<LLFormatMapString, LLFormatMapString> format_map_t;
- LL_COMMON_API static void getTokens(const string_type& instr, std::vector<string_type >& tokens, const string_type& delims);
+ /// considers any sequence of delims as a single field separator
+ LL_COMMON_API static void getTokens(const string_type& instr,
+ std::vector<string_type >& tokens,
+ const string_type& delims);
+ /// like simple scan overload, but returns scanned vector
+ LL_COMMON_API static std::vector<string_type> getTokens(const string_type& instr,
+ const string_type& delims);
+ /// add support for keep_delims and quotes (either could be empty string)
+ LL_COMMON_API static void getTokens(const string_type& instr,
+ std::vector<string_type>& tokens,
+ const string_type& drop_delims,
+ const string_type& keep_delims,
+ const string_type& quotes=string_type());
+ /// like keep_delims-and-quotes overload, but returns scanned vector
+ LL_COMMON_API static std::vector<string_type> getTokens(const string_type& instr,
+ const string_type& drop_delims,
+ const string_type& keep_delims,
+ const string_type& quotes=string_type());
+ /// add support for escapes (could be empty string)
+ LL_COMMON_API static void getTokens(const string_type& instr,
+ std::vector<string_type>& tokens,
+ const string_type& drop_delims,
+ const string_type& keep_delims,
+ const string_type& quotes,
+ const string_type& escapes);
+ /// like escapes overload, but returns scanned vector
+ LL_COMMON_API static std::vector<string_type> getTokens(const string_type& instr,
+ const string_type& drop_delims,
+ const string_type& keep_delims,
+ const string_type& quotes,
+ const string_type& escapes);
+
LL_COMMON_API static void formatNumber(string_type& numStr, string_type decimals);
LL_COMMON_API static bool formatDatetime(string_type& replacement, string_type token, string_type param, S32 secFromEpoch);
LL_COMMON_API static S32 format(string_type& s, const format_map_t& substitutions);
@@ -262,6 +294,11 @@ public:
return !string.empty() && (0 <= i) && (i <= string.size());
}
+ static bool contains(const string_type& string, T c, size_type i=0)
+ {
+ return string.find(c, i) != string_type::npos;
+ }
+
static void trimHead(string_type& string);
static void trimTail(string_type& string);
static void trim(string_type& string) { trimHead(string); trimTail(string); }
@@ -650,10 +687,324 @@ namespace LLStringFn
////////////////////////////////////////////////////////////
// NOTE: LLStringUtil::format, getTokens, and support functions moved to llstring.cpp.
// There is no LLWStringUtil::format implementation currently.
-// Calling thse for anything other than LLStringUtil will produce link errors.
+// Calling these for anything other than LLStringUtil will produce link errors.
////////////////////////////////////////////////////////////
+// static
+template <class T>
+std::vector<typename LLStringUtilBase<T>::string_type>
+LLStringUtilBase<T>::getTokens(const string_type& instr, const string_type& delims)
+{
+ std::vector<string_type> tokens;
+ getTokens(instr, tokens, delims);
+ return tokens;
+}
+
+// static
+template <class T>
+std::vector<typename LLStringUtilBase<T>::string_type>
+LLStringUtilBase<T>::getTokens(const string_type& instr,
+ const string_type& drop_delims,
+ const string_type& keep_delims,
+ const string_type& quotes)
+{
+ std::vector<string_type> tokens;
+ getTokens(instr, tokens, drop_delims, keep_delims, quotes);
+ return tokens;
+}
+
+// static
+template <class T>
+std::vector<typename LLStringUtilBase<T>::string_type>
+LLStringUtilBase<T>::getTokens(const string_type& instr,
+ const string_type& drop_delims,
+ const string_type& keep_delims,
+ const string_type& quotes,
+ const string_type& escapes)
+{
+ std::vector<string_type> tokens;
+ getTokens(instr, tokens, drop_delims, keep_delims, quotes, escapes);
+ return tokens;
+}
+
+namespace LLStringUtilBaseImpl
+{
+
+/**
+ * Input string scanner helper for getTokens(), or really any other
+ * character-parsing routine that may have to deal with escape characters.
+ * This implementation defines the concept (also an interface, should you
+ * choose to implement the concept by subclassing) and provides trivial
+ * implementations for a string @em without escape processing.
+ */
+template <class T>
+struct InString
+{
+ typedef std::basic_string<T> string_type;
+ typedef typename string_type::const_iterator const_iterator;
+
+ InString(const_iterator b, const_iterator e):
+ iter(b),
+ end(e)
+ {}
+
+ bool done() const { return iter == end; }
+ /// Is the current character (*iter) escaped? This implementation can
+ /// answer trivially because it doesn't support escapes.
+ virtual bool escaped() const { return false; }
+ /// Obtain the current character and advance @c iter.
+ virtual T next() { return *iter++; }
+ /// Does the current character match specified character?
+ virtual bool is(T ch) const { return (! done()) && *iter == ch; }
+ /// Is the current character any one of the specified characters?
+ virtual bool oneof(const string_type& delims) const
+ {
+ return (! done()) && LLStringUtilBase<T>::contains(delims, *iter);
+ }
+
+ /**
+ * Scan forward from @from until either @a delim or end. This is primarily
+ * useful for processing quoted substrings.
+ *
+ * If we do see @a delim, append everything from @from until (excluding)
+ * @a delim to @a into, advance @c iter to skip @a delim, and return @c
+ * true.
+ *
+ * If we do not see @a delim, do not alter @a into or @c iter and return
+ * @c false. Do not pass GO, do not collect $200.
+ *
+ * @note The @c false case described above implements normal getTokens()
+ * treatment of an unmatched open quote: treat the quote character as if
+ * escaped, that is, simply collect it as part of the current token. Other
+ * plausible behaviors directly affect the way getTokens() deals with an
+ * unmatched quote: e.g. throwing an exception to treat it as an error, or
+ * assuming a close quote beyond end of string (in which case return @c
+ * true).
+ */
+ virtual bool collect_until(string_type& into, const_iterator from, T delim)
+ {
+ const_iterator found = std::find(from, end, delim);
+ // If we didn't find delim, change nothing, just tell caller.
+ if (found == end)
+ return false;
+ // Found delim! Append everything between from and found.
+ into.append(from, found);
+ // advance past delim in input
+ iter = found + 1;
+ return true;
+ }
+
+ const_iterator iter, end;
+};
+
+/// InString subclass that handles escape characters
+template <class T>
+class InEscString: public InString<T>
+{
+public:
+ typedef InString<T> super;
+ typedef typename super::string_type string_type;
+ typedef typename super::const_iterator const_iterator;
+ using super::done;
+ using super::iter;
+ using super::end;
+
+ InEscString(const_iterator b, const_iterator e, const string_type& escapes_):
+ super(b, e),
+ escapes(escapes_)
+ {
+ // Even though we've already initialized 'iter' via our base-class
+ // constructor, set it again to check for initial escape char.
+ setiter(b);
+ }
+
+ /// This implementation uses the answer cached by setiter().
+ virtual bool escaped() const { return isesc; }
+ virtual T next()
+ {
+ // If we're looking at the escape character of an escape sequence,
+ // skip that character. This is the one time we can modify 'iter'
+ // without using setiter: for this one case we DO NOT CARE if the
+ // escaped character is itself an escape.
+ if (isesc)
+ ++iter;
+ // If we were looking at an escape character, this is the escaped
+ // character; otherwise it's just the next character.
+ T result(*iter);
+ // Advance iter, checking for escape sequence.
+ setiter(iter + 1);
+ return result;
+ }
+
+ virtual bool is(T ch) const
+ {
+ // Like base-class is(), except that an escaped character matches
+ // nothing.
+ return (! done()) && (! isesc) && *iter == ch;
+ }
+
+ virtual bool oneof(const string_type& delims) const
+ {
+ // Like base-class oneof(), except that an escaped character matches
+ // nothing.
+ return (! done()) && (! isesc) && LLStringUtilBase<T>::contains(delims, *iter);
+ }
+
+ virtual bool collect_until(string_type& into, const_iterator from, T delim)
+ {
+ // Deal with escapes in the characters we collect; that is, an escaped
+ // character must become just that character without the preceding
+ // escape. Collect characters in a separate string rather than
+ // directly appending to 'into' in case we do not find delim, in which
+ // case we're supposed to leave 'into' unmodified.
+ string_type collected;
+ // For scanning purposes, we're going to work directly with 'iter'.
+ // Save its current value in case we fail to see delim.
+ const_iterator save_iter(iter);
+ // Okay, set 'iter', checking for escape.
+ setiter(from);
+ while (! done())
+ {
+ // If we see an unescaped delim, stop and report success.
+ if ((! isesc) && *iter == delim)
+ {
+ // Append collected chars to 'into'.
+ into.append(collected);
+ // Don't forget to advance 'iter' past delim.
+ setiter(iter + 1);
+ return true;
+ }
+ // We're not at end, and either we're not looking at delim or it's
+ // escaped. Collect this character and keep going.
+ collected.push_back(next());
+ }
+ // Here we hit 'end' without ever seeing delim. Restore iter and tell
+ // caller.
+ setiter(save_iter);
+ return false;
+ }
+
+private:
+ void setiter(const_iterator i)
+ {
+ iter = i;
+
+ // Every time we change 'iter', set 'isesc' to be able to repetitively
+ // answer escaped() without having to rescan 'escapes'. isesc caches
+ // contains(escapes, *iter).
+
+ // We're looking at an escaped char if we're not already at end (that
+ // is, *iter is even meaningful); if *iter is in fact one of the
+ // specified escape characters; and if there's one more character
+ // following it. That is, if an escape character is the very last
+ // character of the input string, it loses its special meaning.
+ isesc = (! done()) &&
+ LLStringUtilBase<T>::contains(escapes, *iter) &&
+ (iter+1) != end;
+ }
+
+ const string_type escapes;
+ bool isesc;
+};
+
+/// getTokens() implementation based on InString concept
+template <typename INSTRING, typename string_type>
+void getTokens(INSTRING& instr, std::vector<string_type>& tokens,
+ const string_type& drop_delims, const string_type& keep_delims,
+ const string_type& quotes)
+{
+ // There are times when we want to match either drop_delims or
+ // keep_delims. Concatenate them up front to speed things up.
+ string_type all_delims(drop_delims + keep_delims);
+ // no tokens yet
+ tokens.clear();
+
+ // try for another token
+ while (! instr.done())
+ {
+ // scan past any drop_delims
+ while (instr.oneof(drop_delims))
+ {
+ // skip this drop_delim
+ instr.next();
+ // but if that was the end of the string, done
+ if (instr.done())
+ return;
+ }
+ // found the start of another token: make a slot for it
+ tokens.push_back(string_type());
+ if (instr.oneof(keep_delims))
+ {
+ // *iter is a keep_delim, a token of exactly 1 character. Append
+ // that character to the new token and proceed.
+ tokens.back().push_back(instr.next());
+ continue;
+ }
+ // Here we have a non-delimiter token, which might consist of a mix of
+ // quoted and unquoted parts. Use bash rules for quoting: you can
+ // embed a quoted substring in the midst of an unquoted token (e.g.
+ // ~/"sub dir"/myfile.txt); you can ram two quoted substrings together
+ // to make a single token (e.g. 'He said, "'"Don't."'"'). We diverge
+ // from bash in that bash considers an unmatched quote an error. Our
+ // param signature doesn't allow for errors, so just pretend it's not
+ // a quote and embed it.
+ // At this level, keep scanning until we hit the next delimiter of
+ // either type (drop_delims or keep_delims).
+ while (! instr.oneof(all_delims))
+ {
+ // If we're looking at an open quote, search forward for
+ // a close quote, collecting characters along the way.
+ if (instr.oneof(quotes) &&
+ instr.collect_until(tokens.back(), instr.iter+1, *instr.iter))
+ {
+ // collect_until is cleverly designed to do exactly what we
+ // need here. No further action needed if it returns true.
+ }
+ else
+ {
+ // Either *iter isn't a quote, or there's no matching close
+ // quote: in other words, just an ordinary char. Append it to
+ // current token.
+ tokens.back().push_back(instr.next());
+ }
+ // having scanned that segment of this token, if we've reached the
+ // end of the string, we're done
+ if (instr.done())
+ return;
+ }
+ }
+}
+
+} // namespace LLStringUtilBaseImpl
+
+// static
+template <class T>
+void LLStringUtilBase<T>::getTokens(const string_type& string, std::vector<string_type>& tokens,
+ const string_type& drop_delims, const string_type& keep_delims,
+ const string_type& quotes)
+{
+ // Because this overload doesn't support escapes, use simple InString to
+ // manage input range.
+ LLStringUtilBaseImpl::InString<T> instring(string.begin(), string.end());
+ LLStringUtilBaseImpl::getTokens(instring, tokens, drop_delims, keep_delims, quotes);
+}
+
+// static
+template <class T>
+void LLStringUtilBase<T>::getTokens(const string_type& string, std::vector<string_type>& tokens,
+ const string_type& drop_delims, const string_type& keep_delims,
+ const string_type& quotes, const string_type& escapes)
+{
+ // This overload must deal with escapes. Delegate that to InEscString
+ // (unless there ARE no escapes).
+ boost::scoped_ptr< LLStringUtilBaseImpl::InString<T> > instrp;
+ if (escapes.empty())
+ instrp.reset(new LLStringUtilBaseImpl::InString<T>(string.begin(), string.end()));
+ else
+ instrp.reset(new LLStringUtilBaseImpl::InEscString<T>(string.begin(), string.end(), escapes));
+ LLStringUtilBaseImpl::getTokens(*instrp, tokens, drop_delims, keep_delims, quotes);
+}
// static
template<class T>
diff --git a/indra/llcommon/tests/StringVec.h b/indra/llcommon/tests/StringVec.h
new file mode 100644
index 0000000000..a380b00a05
--- /dev/null
+++ b/indra/llcommon/tests/StringVec.h
@@ -0,0 +1,37 @@
+/**
+ * @file StringVec.h
+ * @author Nat Goodspeed
+ * @date 2012-02-24
+ * @brief Extend TUT ensure_equals() to handle std::vector<std::string>
+ *
+ * $LicenseInfo:firstyear=2012&license=viewerlgpl$
+ * Copyright (c) 2012, Linden Research, Inc.
+ * $/LicenseInfo$
+ */
+
+#if ! defined(LL_STRINGVEC_H)
+#define LL_STRINGVEC_H
+
+#include <vector>
+#include <string>
+#include <iostream>
+
+typedef std::vector<std::string> StringVec;
+
+std::ostream& operator<<(std::ostream& out, const StringVec& strings)
+{
+ out << '(';
+ StringVec::const_iterator begin(strings.begin()), end(strings.end());
+ if (begin != end)
+ {
+ out << '"' << *begin << '"';
+ while (++begin != end)
+ {
+ out << ", \"" << *begin << '"';
+ }
+ }
+ out << ')';
+ return out;
+}
+
+#endif /* ! defined(LL_STRINGVEC_H) */
diff --git a/indra/llcommon/tests/listener.h b/indra/llcommon/tests/listener.h
index dcdb2412be..9c5c18a150 100644
--- a/indra/llcommon/tests/listener.h
+++ b/indra/llcommon/tests/listener.h
@@ -30,6 +30,8 @@
#define LL_LISTENER_H
#include "llsd.h"
+#include "llevents.h"
+#include "tests/StringVec.h"
#include <iostream>
/*****************************************************************************
@@ -133,24 +135,7 @@ struct Collect
return false;
}
void clear() { result.clear(); }
- typedef std::vector<std::string> StringList;
- StringList result;
+ StringVec result;
};
-std::ostream& operator<<(std::ostream& out, const Collect::StringList& strings)
-{
- out << '(';
- Collect::StringList::const_iterator begin(strings.begin()), end(strings.end());
- if (begin != end)
- {
- out << '"' << *begin << '"';
- while (++begin != end)
- {
- out << ", \"" << *begin << '"';
- }
- }
- out << ')';
- return out;
-}
-
#endif /* ! defined(LL_LISTENER_H) */
diff --git a/indra/llcommon/tests/llstring_test.cpp b/indra/llcommon/tests/llstring_test.cpp
index 6a1cbf652a..821deeac21 100644
--- a/indra/llcommon/tests/llstring_test.cpp
+++ b/indra/llcommon/tests/llstring_test.cpp
@@ -29,7 +29,11 @@
#include "linden_common.h"
#include "../test/lltut.h"
+#include <boost/assign/list_of.hpp>
#include "../llstring.h"
+#include "StringVec.h"
+
+using boost::assign::list_of;
namespace tut
{
@@ -750,4 +754,115 @@ namespace tut
ensure("empty substr.", !LLStringUtil::endsWith(empty, value));
ensure("empty everything.", !LLStringUtil::endsWith(empty, empty));
}
+
+ template<> template<>
+ void string_index_object_t::test<41>()
+ {
+ set_test_name("getTokens(\"delims\")");
+ ensure_equals("empty string", LLStringUtil::getTokens("", " "), StringVec());
+ ensure_equals("only delims",
+ LLStringUtil::getTokens(" \r\n ", " \r\n"), StringVec());
+ ensure_equals("sequence of delims",
+ LLStringUtil::getTokens(",,, one ,,,", ","), list_of("one"));
+ // nat considers this a dubious implementation side effect, but I'd
+ // hate to change it now...
+ ensure_equals("noncontiguous tokens",
+ LLStringUtil::getTokens(", ,, , one ,,,", ","), list_of("")("")("one"));
+ ensure_equals("space-padded tokens",
+ LLStringUtil::getTokens(", one , two ,", ","), list_of("one")("two"));
+ ensure_equals("no delims", LLStringUtil::getTokens("one", ","), list_of("one"));
+ }
+
+ // Shorthand for verifying that getTokens() behaves the same when you
+ // don't pass a string of escape characters, when you pass an empty string
+ // (different overloads), and when you pass a string of characters that
+ // aren't actually present.
+ void ensure_getTokens(const std::string& desc,
+ const std::string& string,
+ const std::string& drop_delims,
+ const std::string& keep_delims,
+ const std::string& quotes,
+ const std::vector<std::string>& expect)
+ {
+ ensure_equals(desc + " - no esc",
+ LLStringUtil::getTokens(string, drop_delims, keep_delims, quotes),
+ expect);
+ ensure_equals(desc + " - empty esc",
+ LLStringUtil::getTokens(string, drop_delims, keep_delims, quotes, ""),
+ expect);
+ ensure_equals(desc + " - unused esc",
+ LLStringUtil::getTokens(string, drop_delims, keep_delims, quotes, "!"),
+ expect);
+ }
+
+ void ensure_getTokens(const std::string& desc,
+ const std::string& string,
+ const std::string& drop_delims,
+ const std::string& keep_delims,
+ const std::vector<std::string>& expect)
+ {
+ ensure_getTokens(desc, string, drop_delims, keep_delims, "", expect);
+ }
+
+ template<> template<>
+ void string_index_object_t::test<42>()
+ {
+ set_test_name("getTokens(\"delims\", etc.)");
+ // Signatures to test in this method:
+ // getTokens(string, drop_delims, keep_delims [, quotes [, escapes]])
+ // If you omit keep_delims, you get the older function (test above).
+
+ // cases like the getTokens(string, delims) tests above
+ ensure_getTokens("empty string", "", " ", "", StringVec());
+ ensure_getTokens("only delims",
+ " \r\n ", " \r\n", "", StringVec());
+ ensure_getTokens("sequence of delims",
+ ",,, one ,,,", ", ", "", list_of("one"));
+ // Note contrast with the case in the previous method
+ ensure_getTokens("noncontiguous tokens",
+ ", ,, , one ,,,", ", ", "", list_of("one"));
+ ensure_getTokens("space-padded tokens",
+ ", one , two ,", ", ", "",
+ list_of("one")("two"));
+ ensure_getTokens("no delims", "one", ",", "", list_of("one"));
+
+ // drop_delims vs. keep_delims
+ ensure_getTokens("arithmetic",
+ " ab+def / xx* yy ", " ", "+-*/",
+ list_of("ab")("+")("def")("/")("xx")("*")("yy"));
+
+ // quotes
+ ensure_getTokens("no quotes",
+ "She said, \"Don't go.\"", " ", ",", "",
+ list_of("She")("said")(",")("\"Don't")("go.\""));
+ ensure_getTokens("quotes",
+ "She said, \"Don't go.\"", " ", ",", "\"",
+ list_of("She")("said")(",")("Don't go."));
+ ensure_getTokens("quotes and delims",
+ "run c:/'Documents and Settings'/someone", " ", "", "'",
+ list_of("run")("c:/Documents and Settings/someone"));
+ ensure_getTokens("unmatched quote",
+ "baby don't leave", " ", "", "'",
+ list_of("baby")("don't")("leave"));
+ ensure_getTokens("adjacent quoted",
+ "abc'def \"ghi'\"jkl' mno\"pqr", " ", "", "\"'",
+ list_of("abcdef \"ghijkl' mnopqr"));
+
+ // escapes
+ // Don't use backslash as an escape for these tests -- you'll go nuts
+ // between the C++ string scanner and getTokens() escapes. Test with
+ // something else!
+ ensure_equals("escaped delims",
+ LLStringUtil::getTokens("^ a - dog^-gone^ phrase", " ", "-", "", "^"),
+ list_of(" a")("-")("dog-gone phrase"));
+ ensure_equals("escaped quotes",
+ LLStringUtil::getTokens("say: 'this isn^'t w^orking'.", " ", "", "'", "^"),
+ list_of("say:")("this isn't working."));
+ ensure_equals("escaped escape",
+ LLStringUtil::getTokens("want x^^2", " ", "", "", "^"),
+ list_of("want")("x^2"));
+ ensure_equals("escape at end",
+ LLStringUtil::getTokens("it's^ up there^", " ", "", "'", "^"),
+ list_of("it's up")("there^"));
+ }
}