summaryrefslogtreecommitdiff
path: root/indra/llcommon/llsdserialize_xml.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'indra/llcommon/llsdserialize_xml.cpp')
-rw-r--r--indra/llcommon/llsdserialize_xml.cpp370
1 files changed, 277 insertions, 93 deletions
diff --git a/indra/llcommon/llsdserialize_xml.cpp b/indra/llcommon/llsdserialize_xml.cpp
index b3596e8705..c5a7c6fc15 100644
--- a/indra/llcommon/llsdserialize_xml.cpp
+++ b/indra/llcommon/llsdserialize_xml.cpp
@@ -2,30 +2,25 @@
* @file llsdserialize_xml.cpp
* @brief XML parsers and formatters for LLSD
*
- * $LicenseInfo:firstyear=2006&license=viewergpl$
- *
- * Copyright (c) 2006-2007, Linden Research, Inc.
- *
+ * $LicenseInfo:firstyear=2006&license=viewerlgpl$
* Second Life Viewer Source Code
- * The source code in this file ("Source Code") is provided by Linden Lab
- * to you under the terms of the GNU General Public License, version 2.0
- * ("GPL"), unless you have obtained a separate licensing agreement
- * ("Other License"), formally executed by you and Linden Lab. Terms of
- * the GPL can be found in doc/GPL-license.txt in this distribution, or
- * online at http://secondlife.com/developers/opensource/gplv2
+ * Copyright (C) 2010, Linden Research, Inc.
*
- * There are special exceptions to the terms and conditions of the GPL as
- * it is applied to this Source Code. View the full text of the exception
- * in the file doc/FLOSS-exception.txt in this software distribution, or
- * online at http://secondlife.com/developers/opensource/flossexception
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation;
+ * version 2.1 of the License only.
*
- * By copying, modifying or distributing this software, you acknowledge
- * that you have read and understood your obligations described above,
- * and agree to abide by those obligations.
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
*
- * ALL LINDEN LAB SOURCE CODE IS PROVIDED "AS IS." LINDEN LAB MAKES NO
- * WARRANTIES, EXPRESS, IMPLIED OR OTHERWISE, REGARDING ITS ACCURACY,
- * COMPLETENESS OR PERFORMANCE.
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ * Linden Research, Inc., 945 Battery Street, San Francisco, CA 94111 USA
* $/LicenseInfo$
*/
@@ -35,7 +30,8 @@
#include <iostream>
#include <deque>
-#include "apr-1/apr_base64.h"
+#include "apr_base64.h"
+#include <boost/regex.hpp>
extern "C"
{
@@ -63,7 +59,7 @@ S32 LLSDXMLFormatter::format(const LLSD& data, std::ostream& ostr, U32 options)
{
std::streamsize old_precision = ostr.precision(25);
- LLString post = "";
+ std::string post;
if (options & LLSDFormatter::OPTIONS_PRETTY)
{
post = "\n";
@@ -79,8 +75,8 @@ S32 LLSDXMLFormatter::format(const LLSD& data, std::ostream& ostr, U32 options)
S32 LLSDXMLFormatter::format_impl(const LLSD& data, std::ostream& ostr, U32 options, U32 level) const
{
S32 format_count = 1;
- LLString pre = "";
- LLString post = "";
+ std::string pre;
+ std::string post;
if (options & LLSDFormatter::OPTIONS_PRETTY)
{
@@ -137,12 +133,8 @@ S32 LLSDXMLFormatter::format_impl(const LLSD& data, std::ostream& ostr, U32 opti
case LLSD::TypeBoolean:
ostr << pre << "<boolean>";
if(mBoolAlpha ||
-#if( LL_WINDOWS || __GNUC__ > 2)
(ostr.flags() & std::ios::boolalpha)
-#else
- (ostr.flags() & 0x0100)
-#endif
- )
+ )
{
ostr << (data.asBoolean() ? "true" : "false");
}
@@ -262,12 +254,13 @@ public:
~Impl();
S32 parse(std::istream& input, LLSD& data);
+ S32 parseLines(std::istream& input, LLSD& data);
void parsePart(const char *buf, int len);
-private:
void reset();
-
+
+private:
void startElementHandler(const XML_Char* name, const XML_Char** attributes);
void endElementHandler(const XML_Char* name);
void characterDataHandler(const XML_Char* data, int length);
@@ -307,8 +300,8 @@ private:
LLSD mResult;
S32 mParseCount;
- bool mInLLSDElement;
- bool mGracefullStop;
+ bool mInLLSDElement; // true if we're on LLSD
+ bool mGracefullStop; // true if we found the </llsd
typedef std::deque<LLSD*> LLSDRefStack;
LLSDRefStack mStack;
@@ -317,17 +310,14 @@ private:
bool mSkipping;
int mSkipThrough;
- std::string mCurrentKey;
- std::ostringstream mCurrentContent;
-
- bool mPreStaged;
+ std::string mCurrentKey; // Current XML <tag>
+ std::string mCurrentContent; // String data between <tag> and </tag>
};
LLSDXMLParser::Impl::Impl()
{
mParser = XML_ParserCreate(NULL);
- mPreStaged = false;
reset();
}
@@ -336,7 +326,7 @@ LLSDXMLParser::Impl::~Impl()
XML_ParserFree(mParser);
}
-bool is_eol(char c)
+inline bool is_eol(char c)
{
return (c == '\n' || c == '\r');
}
@@ -356,9 +346,9 @@ static unsigned get_till_eol(std::istream& input, char *buf, unsigned bufsize)
unsigned count = 0;
while (count < bufsize && input.good())
{
- input.get(buf[count]);
- count++;
- if (is_eol(buf[count - 1]))
+ char c = input.get();
+ buf[count++] = c;
+ if (is_eol(c))
break;
}
return count;
@@ -366,7 +356,6 @@ static unsigned get_till_eol(std::istream& input, char *buf, unsigned bufsize)
S32 LLSDXMLParser::Impl::parse(std::istream& input, LLSD& data)
{
- reset();
XML_Status status;
static const int BUFFER_SIZE = 1024;
@@ -420,14 +409,86 @@ S32 LLSDXMLParser::Impl::parse(std::istream& input, LLSD& data)
return mParseCount;
}
-void LLSDXMLParser::Impl::reset()
+
+S32 LLSDXMLParser::Impl::parseLines(std::istream& input, LLSD& data)
{
- if (mPreStaged)
+ XML_Status status = XML_STATUS_OK;
+
+ data = LLSD();
+
+ static const int BUFFER_SIZE = 1024;
+
+ //static char last_buffer[ BUFFER_SIZE ];
+ //std::streamsize last_num_read;
+
+ // Must get rid of any leading \n, otherwise the stream gets into an error/eof state
+ clear_eol(input);
+
+ while( !mGracefullStop
+ && input.good()
+ && !input.eof())
{
- mPreStaged = false;
- return;
+ void* buffer = XML_GetBuffer(mParser, BUFFER_SIZE);
+ /*
+ * If we happened to end our last buffer right at the end of the llsd, but the
+ * stream is still going we will get a null buffer here. Check for mGracefullStop.
+ * -- I don't think this is actually true - zero 2008-05-09
+ */
+ if (!buffer)
+ {
+ break;
+ }
+
+ // Get one line
+ input.getline((char*)buffer, BUFFER_SIZE);
+ std::streamsize num_read = input.gcount();
+
+ //memcpy( last_buffer, buffer, num_read );
+ //last_num_read = num_read;
+
+ if ( num_read > 0 )
+ {
+ if (!input.good() )
+ { // Clear state that's set when we run out of buffer
+ input.clear();
+ }
+
+ // Re-insert with the \n that was absorbed by getline()
+ char * text = (char *) buffer;
+ if ( text[num_read - 1] == 0)
+ {
+ text[num_read - 1] = '\n';
+ }
+ }
+
+ status = XML_ParseBuffer(mParser, num_read, false);
+ if (status == XML_STATUS_ERROR)
+ {
+ break;
+ }
+ }
+
+ if (status != XML_STATUS_ERROR
+ && !mGracefullStop)
+ { // Parse last bit
+ status = XML_ParseBuffer(mParser, 0, true);
+ }
+
+ if (status == XML_STATUS_ERROR
+ && !mGracefullStop)
+ {
+ llinfos << "LLSDXMLParser::Impl::parseLines: XML_STATUS_ERROR" << llendl;
+ return LLSDParser::PARSE_FAILURE;
}
+ clear_eol(input);
+ data = mResult;
+ return mParseCount;
+}
+
+
+void LLSDXMLParser::Impl::reset()
+{
mResult.clear();
mParseCount = 0;
@@ -440,12 +501,7 @@ void LLSDXMLParser::Impl::reset()
mSkipping = false;
-#if( LL_WINDOWS || __GNUC__ > 2)
mCurrentKey.clear();
-#else
- mCurrentKey = std::string();
-#endif
-
XML_ParserReset(mParser, "utf-8");
XML_SetUserData(mParser, this);
@@ -476,26 +532,61 @@ LLSDXMLParser::Impl::findAttribute(const XML_Char* name, const XML_Char** pairs)
void LLSDXMLParser::Impl::parsePart(const char* buf, int len)
{
- void * buffer = XML_GetBuffer(mParser, len);
- if (buffer != NULL && buf != NULL)
+ if ( buf != NULL
+ && len > 0 )
{
- memcpy(buffer, buf, len);
+ XML_Status status = XML_Parse(mParser, buf, len, false);
+ if (status == XML_STATUS_ERROR)
+ {
+ llinfos << "Unexpected XML parsing error at start" << llendl;
+ }
}
- XML_ParseBuffer(mParser, len, false);
-
- mPreStaged = true;
}
+// Performance testing code
+//#define XML_PARSER_PERFORMANCE_TESTS
+
+#ifdef XML_PARSER_PERFORMANCE_TESTS
+
+extern U64 totalTime();
+U64 readElementTime = 0;
+U64 startElementTime = 0;
+U64 endElementTime = 0;
+U64 charDataTime = 0;
+U64 parseTime = 0;
+
+class XML_Timer
+{
+public:
+ XML_Timer( U64 * sum ) : mSum( sum )
+ {
+ mStart = totalTime();
+ }
+ ~XML_Timer()
+ {
+ *mSum += (totalTime() - mStart);
+ }
+
+ U64 * mSum;
+ U64 mStart;
+};
+#endif // XML_PARSER_PERFORMANCE_TESTS
+
void LLSDXMLParser::Impl::startElementHandler(const XML_Char* name, const XML_Char** attributes)
{
+ #ifdef XML_PARSER_PERFORMANCE_TESTS
+ XML_Timer timer( &startElementTime );
+ #endif // XML_PARSER_PERFORMANCE_TESTS
+
++mDepth;
if (mSkipping)
{
return;
}
-
+
Element element = readElement(name);
- mCurrentContent.str("");
+
+ mCurrentContent.clear();
switch (element)
{
@@ -538,11 +629,7 @@ void LLSDXMLParser::Impl::startElementHandler(const XML_Char* name, const XML_Ch
LLSD& newElement = map[mCurrentKey];
mStack.push_back(&newElement);
-#if( LL_WINDOWS || __GNUC__ > 2)
mCurrentKey.clear();
-#else
- mCurrentKey = std::string();
-#endif
}
else if (mStack.back()->isArray())
{
@@ -575,6 +662,10 @@ void LLSDXMLParser::Impl::startElementHandler(const XML_Char* name, const XML_Ch
void LLSDXMLParser::Impl::endElementHandler(const XML_Char* name)
{
+ #ifdef XML_PARSER_PERFORMANCE_TESTS
+ XML_Timer timer( &endElementTime );
+ #endif // XML_PARSER_PERFORMANCE_TESTS
+
--mDepth;
if (mSkipping)
{
@@ -599,7 +690,7 @@ void LLSDXMLParser::Impl::endElementHandler(const XML_Char* name)
return;
case ELEMENT_KEY:
- mCurrentKey = mCurrentContent.str();
+ mCurrentKey = mCurrentContent;
return;
default:
@@ -612,9 +703,6 @@ void LLSDXMLParser::Impl::endElementHandler(const XML_Char* name)
LLSD& value = *mStack.back();
mStack.pop_back();
- std::string content = mCurrentContent.str();
- mCurrentContent.str("");
-
switch (element)
{
case ELEMENT_UNDEF:
@@ -622,39 +710,66 @@ void LLSDXMLParser::Impl::endElementHandler(const XML_Char* name)
break;
case ELEMENT_BOOL:
- value = content == "true" || content == "1";
+ value = (mCurrentContent == "true" || mCurrentContent == "1");
break;
case ELEMENT_INTEGER:
- value = LLSD(content).asInteger();
+ {
+ S32 i;
+ if ( sscanf(mCurrentContent.c_str(), "%d", &i ) == 1 )
+ { // See if sscanf works - it's faster
+ value = i;
+ }
+ else
+ {
+ value = LLSD(mCurrentContent).asInteger();
+ }
+ }
break;
case ELEMENT_REAL:
- value = LLSD(content).asReal();
+ {
+ F64 r;
+ if ( sscanf(mCurrentContent.c_str(), "%lf", &r ) == 1 )
+ { // See if sscanf works - it's faster
+ value = r;
+ }
+ else
+ {
+ value = LLSD(mCurrentContent).asReal();
+ }
+ }
break;
case ELEMENT_STRING:
- value = content;
+ value = mCurrentContent;
break;
case ELEMENT_UUID:
- value = LLSD(content).asUUID();
+ value = LLSD(mCurrentContent).asUUID();
break;
case ELEMENT_DATE:
- value = LLSD(content).asDate();
+ value = LLSD(mCurrentContent).asDate();
break;
case ELEMENT_URI:
- value = LLSD(content).asURI();
+ value = LLSD(mCurrentContent).asURI();
break;
case ELEMENT_BINARY:
{
- S32 len = apr_base64_decode_len(content.c_str());
+ // Regex is expensive, but only fix for whitespace in base64,
+ // created by python and other non-linden systems - DEV-39358
+ // Fortunately we have very little binary passing now,
+ // so performance impact shold be negligible. + poppy 2009-09-04
+ boost::regex r;
+ r.assign("\\s");
+ std::string stripped = boost::regex_replace(mCurrentContent, r, "");
+ S32 len = apr_base64_decode_len(stripped.c_str());
std::vector<U8> data;
data.resize(len);
- len = apr_base64_decode_binary(&data[0], content.c_str());
+ len = apr_base64_decode_binary(&data[0], stripped.c_str());
data.resize(len);
value = data;
break;
@@ -668,11 +783,17 @@ void LLSDXMLParser::Impl::endElementHandler(const XML_Char* name)
// other values, map and array, have already been set
break;
}
+
+ mCurrentContent.clear();
}
void LLSDXMLParser::Impl::characterDataHandler(const XML_Char* data, int length)
{
- mCurrentContent.write(data, length);
+ #ifdef XML_PARSER_PERFORMANCE_TESTS
+ XML_Timer timer( &charDataTime );
+ #endif // XML_PARSER_PERFORMANCE_TESTS
+
+ mCurrentContent.append(data, length);
}
@@ -695,22 +816,69 @@ void LLSDXMLParser::Impl::sCharacterDataHandler(
}
+/*
+ This code is time critical
+
+ This is a sample of tag occurances of text in simstate file with ~8000 objects.
+ A tag pair (<key>something</key>) counts is counted as two:
+
+ key - 2680178
+ real - 1818362
+ integer - 906078
+ array - 295682
+ map - 191818
+ uuid - 177903
+ binary - 175748
+ string - 53482
+ undef - 40353
+ boolean - 33874
+ llsd - 16332
+ uri - 38
+ date - 1
+*/
LLSDXMLParser::Impl::Element LLSDXMLParser::Impl::readElement(const XML_Char* name)
{
- if (strcmp(name, "llsd") == 0) { return ELEMENT_LLSD; }
- if (strcmp(name, "undef") == 0) { return ELEMENT_UNDEF; }
- if (strcmp(name, "boolean") == 0) { return ELEMENT_BOOL; }
- if (strcmp(name, "integer") == 0) { return ELEMENT_INTEGER; }
- if (strcmp(name, "real") == 0) { return ELEMENT_REAL; }
- if (strcmp(name, "string") == 0) { return ELEMENT_STRING; }
- if (strcmp(name, "uuid") == 0) { return ELEMENT_UUID; }
- if (strcmp(name, "date") == 0) { return ELEMENT_DATE; }
- if (strcmp(name, "uri") == 0) { return ELEMENT_URI; }
- if (strcmp(name, "binary") == 0) { return ELEMENT_BINARY; }
- if (strcmp(name, "map") == 0) { return ELEMENT_MAP; }
- if (strcmp(name, "array") == 0) { return ELEMENT_ARRAY; }
- if (strcmp(name, "key") == 0) { return ELEMENT_KEY; }
-
+ #ifdef XML_PARSER_PERFORMANCE_TESTS
+ XML_Timer timer( &readElementTime );
+ #endif // XML_PARSER_PERFORMANCE_TESTS
+
+ XML_Char c = *name;
+ switch (c)
+ {
+ case 'k':
+ if (strcmp(name, "key") == 0) { return ELEMENT_KEY; }
+ break;
+ case 'r':
+ if (strcmp(name, "real") == 0) { return ELEMENT_REAL; }
+ break;
+ case 'i':
+ if (strcmp(name, "integer") == 0) { return ELEMENT_INTEGER; }
+ break;
+ case 'a':
+ if (strcmp(name, "array") == 0) { return ELEMENT_ARRAY; }
+ break;
+ case 'm':
+ if (strcmp(name, "map") == 0) { return ELEMENT_MAP; }
+ break;
+ case 'u':
+ if (strcmp(name, "uuid") == 0) { return ELEMENT_UUID; }
+ if (strcmp(name, "undef") == 0) { return ELEMENT_UNDEF; }
+ if (strcmp(name, "uri") == 0) { return ELEMENT_URI; }
+ break;
+ case 'b':
+ if (strcmp(name, "binary") == 0) { return ELEMENT_BINARY; }
+ if (strcmp(name, "boolean") == 0) { return ELEMENT_BOOL; }
+ break;
+ case 's':
+ if (strcmp(name, "string") == 0) { return ELEMENT_STRING; }
+ break;
+ case 'l':
+ if (strcmp(name, "llsd") == 0) { return ELEMENT_LLSD; }
+ break;
+ case 'd':
+ if (strcmp(name, "date") == 0) { return ELEMENT_DATE; }
+ break;
+ }
return ELEMENT_UNKNOWN;
}
@@ -738,5 +906,21 @@ void LLSDXMLParser::parsePart(const char *buf, int len)
// virtual
S32 LLSDXMLParser::doParse(std::istream& input, LLSD& data) const
{
- return impl.parse(input, data);
+ #ifdef XML_PARSER_PERFORMANCE_TESTS
+ XML_Timer timer( &parseTime );
+ #endif // XML_PARSER_PERFORMANCE_TESTS
+
+ if (mParseLines)
+ {
+ // Use line-based reading (faster code)
+ return impl.parseLines(input, data);
+ }
+
+ return impl.parse(input, data);
+}
+
+// virtual
+void LLSDXMLParser::doReset()
+{
+ impl.reset();
}