diff options
Diffstat (limited to 'indra/llcommon/llsdserialize_xml.cpp')
-rw-r--r-- | indra/llcommon/llsdserialize_xml.cpp | 370 |
1 files changed, 277 insertions, 93 deletions
diff --git a/indra/llcommon/llsdserialize_xml.cpp b/indra/llcommon/llsdserialize_xml.cpp index b3596e8705..c5a7c6fc15 100644 --- a/indra/llcommon/llsdserialize_xml.cpp +++ b/indra/llcommon/llsdserialize_xml.cpp @@ -2,30 +2,25 @@ * @file llsdserialize_xml.cpp * @brief XML parsers and formatters for LLSD * - * $LicenseInfo:firstyear=2006&license=viewergpl$ - * - * Copyright (c) 2006-2007, Linden Research, Inc. - * + * $LicenseInfo:firstyear=2006&license=viewerlgpl$ * Second Life Viewer Source Code - * The source code in this file ("Source Code") is provided by Linden Lab - * to you under the terms of the GNU General Public License, version 2.0 - * ("GPL"), unless you have obtained a separate licensing agreement - * ("Other License"), formally executed by you and Linden Lab. Terms of - * the GPL can be found in doc/GPL-license.txt in this distribution, or - * online at http://secondlife.com/developers/opensource/gplv2 + * Copyright (C) 2010, Linden Research, Inc. * - * There are special exceptions to the terms and conditions of the GPL as - * it is applied to this Source Code. View the full text of the exception - * in the file doc/FLOSS-exception.txt in this software distribution, or - * online at http://secondlife.com/developers/opensource/flossexception + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; + * version 2.1 of the License only. * - * By copying, modifying or distributing this software, you acknowledge - * that you have read and understood your obligations described above, - * and agree to abide by those obligations. + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. * - * ALL LINDEN LAB SOURCE CODE IS PROVIDED "AS IS." LINDEN LAB MAKES NO - * WARRANTIES, EXPRESS, IMPLIED OR OTHERWISE, REGARDING ITS ACCURACY, - * COMPLETENESS OR PERFORMANCE. + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + * Linden Research, Inc., 945 Battery Street, San Francisco, CA 94111 USA * $/LicenseInfo$ */ @@ -35,7 +30,8 @@ #include <iostream> #include <deque> -#include "apr-1/apr_base64.h" +#include "apr_base64.h" +#include <boost/regex.hpp> extern "C" { @@ -63,7 +59,7 @@ S32 LLSDXMLFormatter::format(const LLSD& data, std::ostream& ostr, U32 options) { std::streamsize old_precision = ostr.precision(25); - LLString post = ""; + std::string post; if (options & LLSDFormatter::OPTIONS_PRETTY) { post = "\n"; @@ -79,8 +75,8 @@ S32 LLSDXMLFormatter::format(const LLSD& data, std::ostream& ostr, U32 options) S32 LLSDXMLFormatter::format_impl(const LLSD& data, std::ostream& ostr, U32 options, U32 level) const { S32 format_count = 1; - LLString pre = ""; - LLString post = ""; + std::string pre; + std::string post; if (options & LLSDFormatter::OPTIONS_PRETTY) { @@ -137,12 +133,8 @@ S32 LLSDXMLFormatter::format_impl(const LLSD& data, std::ostream& ostr, U32 opti case LLSD::TypeBoolean: ostr << pre << "<boolean>"; if(mBoolAlpha || -#if( LL_WINDOWS || __GNUC__ > 2) (ostr.flags() & std::ios::boolalpha) -#else - (ostr.flags() & 0x0100) -#endif - ) + ) { ostr << (data.asBoolean() ? "true" : "false"); } @@ -262,12 +254,13 @@ public: ~Impl(); S32 parse(std::istream& input, LLSD& data); + S32 parseLines(std::istream& input, LLSD& data); void parsePart(const char *buf, int len); -private: void reset(); - + +private: void startElementHandler(const XML_Char* name, const XML_Char** attributes); void endElementHandler(const XML_Char* name); void characterDataHandler(const XML_Char* data, int length); @@ -307,8 +300,8 @@ private: LLSD mResult; S32 mParseCount; - bool mInLLSDElement; - bool mGracefullStop; + bool mInLLSDElement; // true if we're on LLSD + bool mGracefullStop; // true if we found the </llsd typedef std::deque<LLSD*> LLSDRefStack; LLSDRefStack mStack; @@ -317,17 +310,14 @@ private: bool mSkipping; int mSkipThrough; - std::string mCurrentKey; - std::ostringstream mCurrentContent; - - bool mPreStaged; + std::string mCurrentKey; // Current XML <tag> + std::string mCurrentContent; // String data between <tag> and </tag> }; LLSDXMLParser::Impl::Impl() { mParser = XML_ParserCreate(NULL); - mPreStaged = false; reset(); } @@ -336,7 +326,7 @@ LLSDXMLParser::Impl::~Impl() XML_ParserFree(mParser); } -bool is_eol(char c) +inline bool is_eol(char c) { return (c == '\n' || c == '\r'); } @@ -356,9 +346,9 @@ static unsigned get_till_eol(std::istream& input, char *buf, unsigned bufsize) unsigned count = 0; while (count < bufsize && input.good()) { - input.get(buf[count]); - count++; - if (is_eol(buf[count - 1])) + char c = input.get(); + buf[count++] = c; + if (is_eol(c)) break; } return count; @@ -366,7 +356,6 @@ static unsigned get_till_eol(std::istream& input, char *buf, unsigned bufsize) S32 LLSDXMLParser::Impl::parse(std::istream& input, LLSD& data) { - reset(); XML_Status status; static const int BUFFER_SIZE = 1024; @@ -420,14 +409,86 @@ S32 LLSDXMLParser::Impl::parse(std::istream& input, LLSD& data) return mParseCount; } -void LLSDXMLParser::Impl::reset() + +S32 LLSDXMLParser::Impl::parseLines(std::istream& input, LLSD& data) { - if (mPreStaged) + XML_Status status = XML_STATUS_OK; + + data = LLSD(); + + static const int BUFFER_SIZE = 1024; + + //static char last_buffer[ BUFFER_SIZE ]; + //std::streamsize last_num_read; + + // Must get rid of any leading \n, otherwise the stream gets into an error/eof state + clear_eol(input); + + while( !mGracefullStop + && input.good() + && !input.eof()) { - mPreStaged = false; - return; + void* buffer = XML_GetBuffer(mParser, BUFFER_SIZE); + /* + * If we happened to end our last buffer right at the end of the llsd, but the + * stream is still going we will get a null buffer here. Check for mGracefullStop. + * -- I don't think this is actually true - zero 2008-05-09 + */ + if (!buffer) + { + break; + } + + // Get one line + input.getline((char*)buffer, BUFFER_SIZE); + std::streamsize num_read = input.gcount(); + + //memcpy( last_buffer, buffer, num_read ); + //last_num_read = num_read; + + if ( num_read > 0 ) + { + if (!input.good() ) + { // Clear state that's set when we run out of buffer + input.clear(); + } + + // Re-insert with the \n that was absorbed by getline() + char * text = (char *) buffer; + if ( text[num_read - 1] == 0) + { + text[num_read - 1] = '\n'; + } + } + + status = XML_ParseBuffer(mParser, num_read, false); + if (status == XML_STATUS_ERROR) + { + break; + } + } + + if (status != XML_STATUS_ERROR + && !mGracefullStop) + { // Parse last bit + status = XML_ParseBuffer(mParser, 0, true); + } + + if (status == XML_STATUS_ERROR + && !mGracefullStop) + { + llinfos << "LLSDXMLParser::Impl::parseLines: XML_STATUS_ERROR" << llendl; + return LLSDParser::PARSE_FAILURE; } + clear_eol(input); + data = mResult; + return mParseCount; +} + + +void LLSDXMLParser::Impl::reset() +{ mResult.clear(); mParseCount = 0; @@ -440,12 +501,7 @@ void LLSDXMLParser::Impl::reset() mSkipping = false; -#if( LL_WINDOWS || __GNUC__ > 2) mCurrentKey.clear(); -#else - mCurrentKey = std::string(); -#endif - XML_ParserReset(mParser, "utf-8"); XML_SetUserData(mParser, this); @@ -476,26 +532,61 @@ LLSDXMLParser::Impl::findAttribute(const XML_Char* name, const XML_Char** pairs) void LLSDXMLParser::Impl::parsePart(const char* buf, int len) { - void * buffer = XML_GetBuffer(mParser, len); - if (buffer != NULL && buf != NULL) + if ( buf != NULL + && len > 0 ) { - memcpy(buffer, buf, len); + XML_Status status = XML_Parse(mParser, buf, len, false); + if (status == XML_STATUS_ERROR) + { + llinfos << "Unexpected XML parsing error at start" << llendl; + } } - XML_ParseBuffer(mParser, len, false); - - mPreStaged = true; } +// Performance testing code +//#define XML_PARSER_PERFORMANCE_TESTS + +#ifdef XML_PARSER_PERFORMANCE_TESTS + +extern U64 totalTime(); +U64 readElementTime = 0; +U64 startElementTime = 0; +U64 endElementTime = 0; +U64 charDataTime = 0; +U64 parseTime = 0; + +class XML_Timer +{ +public: + XML_Timer( U64 * sum ) : mSum( sum ) + { + mStart = totalTime(); + } + ~XML_Timer() + { + *mSum += (totalTime() - mStart); + } + + U64 * mSum; + U64 mStart; +}; +#endif // XML_PARSER_PERFORMANCE_TESTS + void LLSDXMLParser::Impl::startElementHandler(const XML_Char* name, const XML_Char** attributes) { + #ifdef XML_PARSER_PERFORMANCE_TESTS + XML_Timer timer( &startElementTime ); + #endif // XML_PARSER_PERFORMANCE_TESTS + ++mDepth; if (mSkipping) { return; } - + Element element = readElement(name); - mCurrentContent.str(""); + + mCurrentContent.clear(); switch (element) { @@ -538,11 +629,7 @@ void LLSDXMLParser::Impl::startElementHandler(const XML_Char* name, const XML_Ch LLSD& newElement = map[mCurrentKey]; mStack.push_back(&newElement); -#if( LL_WINDOWS || __GNUC__ > 2) mCurrentKey.clear(); -#else - mCurrentKey = std::string(); -#endif } else if (mStack.back()->isArray()) { @@ -575,6 +662,10 @@ void LLSDXMLParser::Impl::startElementHandler(const XML_Char* name, const XML_Ch void LLSDXMLParser::Impl::endElementHandler(const XML_Char* name) { + #ifdef XML_PARSER_PERFORMANCE_TESTS + XML_Timer timer( &endElementTime ); + #endif // XML_PARSER_PERFORMANCE_TESTS + --mDepth; if (mSkipping) { @@ -599,7 +690,7 @@ void LLSDXMLParser::Impl::endElementHandler(const XML_Char* name) return; case ELEMENT_KEY: - mCurrentKey = mCurrentContent.str(); + mCurrentKey = mCurrentContent; return; default: @@ -612,9 +703,6 @@ void LLSDXMLParser::Impl::endElementHandler(const XML_Char* name) LLSD& value = *mStack.back(); mStack.pop_back(); - std::string content = mCurrentContent.str(); - mCurrentContent.str(""); - switch (element) { case ELEMENT_UNDEF: @@ -622,39 +710,66 @@ void LLSDXMLParser::Impl::endElementHandler(const XML_Char* name) break; case ELEMENT_BOOL: - value = content == "true" || content == "1"; + value = (mCurrentContent == "true" || mCurrentContent == "1"); break; case ELEMENT_INTEGER: - value = LLSD(content).asInteger(); + { + S32 i; + if ( sscanf(mCurrentContent.c_str(), "%d", &i ) == 1 ) + { // See if sscanf works - it's faster + value = i; + } + else + { + value = LLSD(mCurrentContent).asInteger(); + } + } break; case ELEMENT_REAL: - value = LLSD(content).asReal(); + { + F64 r; + if ( sscanf(mCurrentContent.c_str(), "%lf", &r ) == 1 ) + { // See if sscanf works - it's faster + value = r; + } + else + { + value = LLSD(mCurrentContent).asReal(); + } + } break; case ELEMENT_STRING: - value = content; + value = mCurrentContent; break; case ELEMENT_UUID: - value = LLSD(content).asUUID(); + value = LLSD(mCurrentContent).asUUID(); break; case ELEMENT_DATE: - value = LLSD(content).asDate(); + value = LLSD(mCurrentContent).asDate(); break; case ELEMENT_URI: - value = LLSD(content).asURI(); + value = LLSD(mCurrentContent).asURI(); break; case ELEMENT_BINARY: { - S32 len = apr_base64_decode_len(content.c_str()); + // Regex is expensive, but only fix for whitespace in base64, + // created by python and other non-linden systems - DEV-39358 + // Fortunately we have very little binary passing now, + // so performance impact shold be negligible. + poppy 2009-09-04 + boost::regex r; + r.assign("\\s"); + std::string stripped = boost::regex_replace(mCurrentContent, r, ""); + S32 len = apr_base64_decode_len(stripped.c_str()); std::vector<U8> data; data.resize(len); - len = apr_base64_decode_binary(&data[0], content.c_str()); + len = apr_base64_decode_binary(&data[0], stripped.c_str()); data.resize(len); value = data; break; @@ -668,11 +783,17 @@ void LLSDXMLParser::Impl::endElementHandler(const XML_Char* name) // other values, map and array, have already been set break; } + + mCurrentContent.clear(); } void LLSDXMLParser::Impl::characterDataHandler(const XML_Char* data, int length) { - mCurrentContent.write(data, length); + #ifdef XML_PARSER_PERFORMANCE_TESTS + XML_Timer timer( &charDataTime ); + #endif // XML_PARSER_PERFORMANCE_TESTS + + mCurrentContent.append(data, length); } @@ -695,22 +816,69 @@ void LLSDXMLParser::Impl::sCharacterDataHandler( } +/* + This code is time critical + + This is a sample of tag occurances of text in simstate file with ~8000 objects. + A tag pair (<key>something</key>) counts is counted as two: + + key - 2680178 + real - 1818362 + integer - 906078 + array - 295682 + map - 191818 + uuid - 177903 + binary - 175748 + string - 53482 + undef - 40353 + boolean - 33874 + llsd - 16332 + uri - 38 + date - 1 +*/ LLSDXMLParser::Impl::Element LLSDXMLParser::Impl::readElement(const XML_Char* name) { - if (strcmp(name, "llsd") == 0) { return ELEMENT_LLSD; } - if (strcmp(name, "undef") == 0) { return ELEMENT_UNDEF; } - if (strcmp(name, "boolean") == 0) { return ELEMENT_BOOL; } - if (strcmp(name, "integer") == 0) { return ELEMENT_INTEGER; } - if (strcmp(name, "real") == 0) { return ELEMENT_REAL; } - if (strcmp(name, "string") == 0) { return ELEMENT_STRING; } - if (strcmp(name, "uuid") == 0) { return ELEMENT_UUID; } - if (strcmp(name, "date") == 0) { return ELEMENT_DATE; } - if (strcmp(name, "uri") == 0) { return ELEMENT_URI; } - if (strcmp(name, "binary") == 0) { return ELEMENT_BINARY; } - if (strcmp(name, "map") == 0) { return ELEMENT_MAP; } - if (strcmp(name, "array") == 0) { return ELEMENT_ARRAY; } - if (strcmp(name, "key") == 0) { return ELEMENT_KEY; } - + #ifdef XML_PARSER_PERFORMANCE_TESTS + XML_Timer timer( &readElementTime ); + #endif // XML_PARSER_PERFORMANCE_TESTS + + XML_Char c = *name; + switch (c) + { + case 'k': + if (strcmp(name, "key") == 0) { return ELEMENT_KEY; } + break; + case 'r': + if (strcmp(name, "real") == 0) { return ELEMENT_REAL; } + break; + case 'i': + if (strcmp(name, "integer") == 0) { return ELEMENT_INTEGER; } + break; + case 'a': + if (strcmp(name, "array") == 0) { return ELEMENT_ARRAY; } + break; + case 'm': + if (strcmp(name, "map") == 0) { return ELEMENT_MAP; } + break; + case 'u': + if (strcmp(name, "uuid") == 0) { return ELEMENT_UUID; } + if (strcmp(name, "undef") == 0) { return ELEMENT_UNDEF; } + if (strcmp(name, "uri") == 0) { return ELEMENT_URI; } + break; + case 'b': + if (strcmp(name, "binary") == 0) { return ELEMENT_BINARY; } + if (strcmp(name, "boolean") == 0) { return ELEMENT_BOOL; } + break; + case 's': + if (strcmp(name, "string") == 0) { return ELEMENT_STRING; } + break; + case 'l': + if (strcmp(name, "llsd") == 0) { return ELEMENT_LLSD; } + break; + case 'd': + if (strcmp(name, "date") == 0) { return ELEMENT_DATE; } + break; + } return ELEMENT_UNKNOWN; } @@ -738,5 +906,21 @@ void LLSDXMLParser::parsePart(const char *buf, int len) // virtual S32 LLSDXMLParser::doParse(std::istream& input, LLSD& data) const { - return impl.parse(input, data); + #ifdef XML_PARSER_PERFORMANCE_TESTS + XML_Timer timer( &parseTime ); + #endif // XML_PARSER_PERFORMANCE_TESTS + + if (mParseLines) + { + // Use line-based reading (faster code) + return impl.parseLines(input, data); + } + + return impl.parse(input, data); +} + +// virtual +void LLSDXMLParser::doReset() +{ + impl.reset(); } |