diff options
Diffstat (limited to 'indra/llcommon')
-rw-r--r-- | indra/llcommon/llleap.cpp | 31 | ||||
-rw-r--r-- | indra/llcommon/llsdserialize.cpp | 183 | ||||
-rw-r--r-- | indra/llcommon/llstreamtools.cpp | 26 | ||||
-rw-r--r-- | indra/llcommon/llstreamtools.h | 27 | ||||
-rw-r--r-- | indra/llcommon/tests/llleap_test.cpp | 32 | ||||
-rw-r--r-- | indra/llcommon/tests/llsdserialize_test.cpp | 610 |
6 files changed, 636 insertions, 273 deletions
diff --git a/indra/llcommon/llleap.cpp b/indra/llcommon/llleap.cpp index c87c0758fe..259f5bc505 100644 --- a/indra/llcommon/llleap.cpp +++ b/indra/llcommon/llleap.cpp @@ -204,30 +204,35 @@ public: LLSD packet(LLSDMap("pump", pump)("data", data)); std::ostringstream buffer; - buffer << LLSDNotationStreamer(packet); + // SL-18330: for large data blocks, it's much faster to parse binary + // LLSD than notation LLSD. Use serialize(LLSD_BINARY) rather than + // directly calling LLSDBinaryFormatter because, unlike the latter, + // serialize() prepends the relevant header, needed by a general- + // purpose LLSD parser to distinguish binary from notation. + LLSDSerialize::serialize(packet, buffer, LLSDSerialize::LLSD_BINARY, + LLSDFormatter::OPTIONS_NONE); /*==========================================================================*| // DEBUGGING ONLY: don't copy str() if we can avoid it. std::string strdata(buffer.str()); if (std::size_t(buffer.tellp()) != strdata.length()) { - LL_ERRS("LLLeap") << "tellp() -> " << buffer.tellp() << " != " + LL_ERRS("LLLeap") << "tellp() -> " << static_cast<U64>(buffer.tellp()) << " != " << "str().length() -> " << strdata.length() << LL_ENDL; } // DEBUGGING ONLY: reading back is terribly inefficient. std::istringstream readback(strdata); LLSD echo; - LLPointer<LLSDParser> parser(new LLSDNotationParser()); - S32 parse_status(parser->parse(readback, echo, strdata.length())); - if (parse_status == LLSDParser::PARSE_FAILURE) + bool parse_status(LLSDSerialize::deserialize(echo, readback, strdata.length())); + if (! parse_status) { - LL_ERRS("LLLeap") << "LLSDNotationParser() cannot parse output of " - << "LLSDNotationStreamer()" << LL_ENDL; + LL_ERRS("LLLeap") << "LLSDSerialize::deserialize() cannot parse output of " + << "LLSDSerialize::serialize(LLSD_BINARY)" << LL_ENDL; } if (! llsd_equals(echo, packet)) { - LL_ERRS("LLLeap") << "LLSDNotationParser() produced different LLSD " - << "than passed to LLSDNotationStreamer()" << LL_ENDL; + LL_ERRS("LLLeap") << "LLSDSerialize::deserialize() returned different LLSD " + << "than passed to LLSDSerialize::serialize()" << LL_ENDL; } |*==========================================================================*/ @@ -314,9 +319,17 @@ public: LL_DEBUGS("LLLeap") << "needed " << mExpect << " bytes, got " << childout.size() << ", parsing LLSD" << LL_ENDL; LLSD data; +#if 1 + // specifically require notation LLSD from child LLPointer<LLSDParser> parser(new LLSDNotationParser()); S32 parse_status(parser->parse(childout.get_istream(), data, mExpect)); if (parse_status == LLSDParser::PARSE_FAILURE) +#else + // SL-18330: accept any valid LLSD serialization format from child + // Unfortunately this runs into trouble we have not yet debugged. + bool parse_status(LLSDSerialize::deserialize(data, childout.get_istream(), mExpect)); + if (! parse_status) +#endif { bad_protocol("unparseable LLSD data"); } diff --git a/indra/llcommon/llsdserialize.cpp b/indra/llcommon/llsdserialize.cpp index af57f4ac5e..a14a6b5b1b 100644 --- a/indra/llcommon/llsdserialize.cpp +++ b/indra/llcommon/llsdserialize.cpp @@ -45,6 +45,7 @@ #endif #include "lldate.h" +#include "llmemorystream.h" #include "llsd.h" #include "llstring.h" #include "lluri.h" @@ -61,6 +62,23 @@ const std::string LLSD_NOTATION_HEADER("llsd/notation"); #define windowBits 15 #define ENABLE_ZLIB_GZIP 32 +// If we published this in llsdserialize.h, we could use it in the +// implementation of LLSDOStreamer's operator<<(). +template <class Formatter> +void format_using(const LLSD& data, std::ostream& ostr, + LLSDFormatter::EFormatterOptions options=LLSDFormatter::OPTIONS_PRETTY_BINARY) +{ + LLPointer<Formatter> f{ new Formatter }; + f->format(data, ostr, options); +} + +template <class Parser> +S32 parse_using(std::istream& istr, LLSD& data, size_t max_bytes, S32 max_depth=-1) +{ + LLPointer<Parser> p{ new Parser }; + return p->parse(istr, data, max_bytes, max_depth); +} + /** * LLSDSerialize */ @@ -83,10 +101,10 @@ void LLSDSerialize::serialize(const LLSD& sd, std::ostream& str, ELLSD_Serialize f = new LLSDXMLFormatter; break; - case LLSD_NOTATION: - str << "<? " << LLSD_NOTATION_HEADER << " ?>\n"; - f = new LLSDNotationFormatter; - break; + case LLSD_NOTATION: + str << "<? " << LLSD_NOTATION_HEADER << " ?>\n"; + f = new LLSDNotationFormatter; + break; default: LL_WARNS() << "serialize request for unknown ELLSD_Serialize" << LL_ENDL; @@ -101,18 +119,31 @@ void LLSDSerialize::serialize(const LLSD& sd, std::ostream& str, ELLSD_Serialize // static bool LLSDSerialize::deserialize(LLSD& sd, std::istream& str, llssize max_bytes) { - LLPointer<LLSDParser> p = NULL; char hdr_buf[MAX_HDR_LEN + 1] = ""; /* Flawfinder: ignore */ - int i; - int inbuf = 0; - bool legacy_no_header = false; bool fail_if_not_legacy = false; - std::string header; /* - * Get the first line before anything. + * Get the first line before anything. Don't read more than max_bytes: + * this get() overload reads no more than (count-1) bytes into the + * specified buffer. In the usual case when max_bytes exceeds + * sizeof(hdr_buf), get() will read no more than sizeof(hdr_buf)-2. */ - str.get(hdr_buf, MAX_HDR_LEN, '\n'); + str.get(hdr_buf, llmin(max_bytes+1, sizeof(hdr_buf)-1), '\n'); + auto inbuf = str.gcount(); + // https://en.cppreference.com/w/cpp/io/basic_istream/get + // When the get() above sees the specified delimiter '\n', it stops there + // without pulling it from the stream. If it turns out that the stream + // does NOT contain a header, and the content includes meaningful '\n', + // it's important to pull that into hdr_buf too. + if (inbuf < max_bytes && str.get(hdr_buf[inbuf])) + { + // got the delimiting '\n' + ++inbuf; + // None of the following requires that hdr_buf contain a final '\0' + // byte. We could store one if needed, since even the incremented + // inbuf won't exceed sizeof(hdr_buf)-1, but there's no need. + } + std::string header{ hdr_buf, inbuf }; if (str.fail()) { str.clear(); @@ -120,79 +151,97 @@ bool LLSDSerialize::deserialize(LLSD& sd, std::istream& str, llssize max_bytes) } if (!strncasecmp(LEGACY_NON_HEADER, hdr_buf, strlen(LEGACY_NON_HEADER))) /* Flawfinder: ignore */ + { // Create a LLSD XML parser, and parse the first chunk read above. + LLSDXMLParser x; + x.parsePart(hdr_buf, inbuf); // Parse the first part that was already read + auto parsed = x.parse(str, sd, max_bytes - inbuf); // Parse the rest of it + // Formally we should probably check (parsed != PARSE_FAILURE && + // parsed > 0), but since PARSE_FAILURE is -1, this suffices. + return (parsed > 0); + } + + if (fail_if_not_legacy) { - legacy_no_header = true; - inbuf = (int)str.gcount(); + LL_WARNS() << "deserialize LLSD parse failure" << LL_ENDL; + return false; } - else + + /* + * Remove the newline chars + */ + std::string::size_type lastchar = header.find_last_not_of("\r\n"); + if (lastchar != std::string::npos) { - if (fail_if_not_legacy) - goto fail; - /* - * Remove the newline chars - */ - for (i = 0; i < MAX_HDR_LEN; i++) - { - if (hdr_buf[i] == 0 || hdr_buf[i] == '\r' || - hdr_buf[i] == '\n') - { - hdr_buf[i] = 0; - break; - } - } - header = hdr_buf; + // It's important that find_last_not_of() returns size_type, which is + // why lastchar explicitly declares the type above. erase(size_type) + // erases from that offset to the end of the string, whereas + // erase(iterator) erases only a single character. + header.erase(lastchar+1); + } - std::string::size_type start = std::string::npos; - std::string::size_type end = std::string::npos; - start = header.find_first_not_of("<? "); - if (start != std::string::npos) + // trim off the <? ... ?> header syntax + auto start = header.find_first_not_of("<? "); + if (start != std::string::npos) + { + auto end = header.find_first_of(" ?", start); + if (end != std::string::npos) { - end = header.find_first_of(" ?", start); + header = header.substr(start, end - start); + ws(str); } - if ((start == std::string::npos) || (end == std::string::npos)) - goto fail; - - header = header.substr(start, end - start); - ws(str); } /* * Create the parser as appropriate */ - if (legacy_no_header) - { // Create a LLSD XML parser, and parse the first chunk read above - LLSDXMLParser* x = new LLSDXMLParser(); - x->parsePart(hdr_buf, inbuf); // Parse the first part that was already read - x->parseLines(str, sd); // Parse the rest of it - delete x; - return true; - } - - if (header == LLSD_BINARY_HEADER) + if (0 == LLStringUtil::compareInsensitive(header, LLSD_BINARY_HEADER)) { - p = new LLSDBinaryParser; + return (parse_using<LLSDBinaryParser>(str, sd, max_bytes-inbuf) > 0); } - else if (header == LLSD_XML_HEADER) + else if (0 == LLStringUtil::compareInsensitive(header, LLSD_XML_HEADER)) { - p = new LLSDXMLParser; + return (parse_using<LLSDXMLParser>(str, sd, max_bytes-inbuf) > 0); } - else if (header == LLSD_NOTATION_HEADER) + else if (0 == LLStringUtil::compareInsensitive(header, LLSD_NOTATION_HEADER)) { - p = new LLSDNotationParser; + return (parse_using<LLSDNotationParser>(str, sd, max_bytes-inbuf) > 0); } - else + else // no header we recognize { - LL_WARNS() << "deserialize request for unknown ELLSD_Serialize" << LL_ENDL; - } - - if (p.notNull()) - { - p->parse(str, sd, max_bytes); - return true; + LLPointer<LLSDParser> p; + if (inbuf && hdr_buf[0] == '<') + { + // looks like XML + LL_DEBUGS() << "deserialize request with no header, assuming XML" << LL_ENDL; + p = new LLSDXMLParser; + } + else + { + // assume notation + LL_DEBUGS() << "deserialize request with no header, assuming notation" << LL_ENDL; + p = new LLSDNotationParser; + } + // Since we've already read 'inbuf' bytes into 'hdr_buf', prepend that + // data to whatever remains in 'str'. + LLMemoryStreamBuf already(reinterpret_cast<const U8*>(hdr_buf), inbuf); + cat_streambuf prebuff(&already, str.rdbuf()); + std::istream prepend(&prebuff); +#if 1 + return (p->parse(prepend, sd, max_bytes) > 0); +#else + // debugging the reconstituted 'prepend' stream + // allocate a buffer that we hope is big enough for the whole thing + std::vector<char> wholemsg((max_bytes == size_t(SIZE_UNLIMITED))? 1024 : max_bytes); + prepend.read(wholemsg.data(), std::min(max_bytes, wholemsg.size())); + LLMemoryStream replay(reinterpret_cast<const U8*>(wholemsg.data()), prepend.gcount()); + auto success{ p->parse(replay, sd, prepend.gcount()) > 0 }; + { + LL_DEBUGS() << (success? "parsed: $$" : "failed: '") + << std::string(wholemsg.data(), llmin(prepend.gcount(), 100)) << "$$" + << LL_ENDL; + } + return success; +#endif } - -fail: - LL_WARNS() << "deserialize LLSD parse failure" << LL_ENDL; - return false; } /** @@ -2394,5 +2443,3 @@ U8* unzip_llsdNavMesh( bool& valid, size_t& outsize, std::istream& is, S32 size return result; } - - diff --git a/indra/llcommon/llstreamtools.cpp b/indra/llcommon/llstreamtools.cpp index 1ff15fcf89..bc32b6fd9e 100644 --- a/indra/llcommon/llstreamtools.cpp +++ b/indra/llcommon/llstreamtools.cpp @@ -513,3 +513,29 @@ std::istream& operator>>(std::istream& str, const char *tocheck) } return str; } + +int cat_streambuf::underflow() +{ + if (gptr() == egptr()) + { + // here because our buffer is empty + std::streamsize size = 0; + // Until we've run out of mInputs, try reading the first of them + // into mBuffer. If that fetches some characters, break the loop. + while (! mInputs.empty() + && ! (size = mInputs.front()->sgetn(mBuffer.data(), mBuffer.size()))) + { + // We tried to read mInputs.front() but got zero characters. + // Discard the first streambuf and try the next one. + mInputs.pop_front(); + } + // Either we ran out of mInputs or we succeeded in reading some + // characters, that is, size != 0. Tell base class what we have. + setg(mBuffer.data(), mBuffer.data(), mBuffer.data() + size); + } + // If we fell out of the above loop with mBuffer still empty, return + // eof(), otherwise return the next character. + return (gptr() == egptr()) + ? std::char_traits<char>::eof() + : std::char_traits<char>::to_int_type(*gptr()); +} diff --git a/indra/llcommon/llstreamtools.h b/indra/llcommon/llstreamtools.h index 1b04bf91d7..bb7bc20327 100644 --- a/indra/llcommon/llstreamtools.h +++ b/indra/llcommon/llstreamtools.h @@ -27,8 +27,10 @@ #ifndef LL_STREAM_TOOLS_H #define LL_STREAM_TOOLS_H +#include <deque> #include <iostream> #include <string> +#include <vector> // unless specifed otherwise these all return input_stream.good() @@ -113,6 +115,27 @@ LL_COMMON_API std::streamsize fullread( LL_COMMON_API std::istream& operator>>(std::istream& str, const char *tocheck); -#endif - +/** + * cat_streambuf is a std::streambuf subclass that accepts a variadic number + * of std::streambuf* (e.g. some_istream.rdbuf()) and virtually concatenates + * their contents. + */ +// derived from https://stackoverflow.com/a/49441066/5533635 +class cat_streambuf: public std::streambuf +{ +private: + std::deque<std::streambuf*> mInputs; + std::vector<char> mBuffer; + +public: + // only valid for std::streambuf* arguments + template <typename... Inputs> + cat_streambuf(Inputs... inputs): + mInputs{inputs...}, + mBuffer(1024) + {} + + int underflow() override; +}; +#endif diff --git a/indra/llcommon/tests/llleap_test.cpp b/indra/llcommon/tests/llleap_test.cpp index 9754353ab0..6c799b7993 100644 --- a/indra/llcommon/tests/llleap_test.cpp +++ b/indra/llcommon/tests/llleap_test.cpp @@ -110,12 +110,12 @@ namespace tut "import os\n" "import sys\n" "\n" - // Don't forget that this Python script is written to some - // temp directory somewhere! Its __file__ is useless in - // finding indra/lib/python. Use our __FILE__, with - // raw-string syntax to deal with Windows pathnames. - "mydir = os.path.dirname(r'" << __FILE__ << "')\n" - "from llbase import llsd\n" + "try:\n" + // new freestanding llsd package + " import llsd\n" + "except ImportError:\n" + // older llbase.llsd module + " from llbase import llsd\n" "\n" "class ProtocolError(Exception):\n" " def __init__(self, msg, data):\n" @@ -126,26 +126,26 @@ namespace tut " pass\n" "\n" "def get():\n" - " hdr = ''\n" - " while ':' not in hdr and len(hdr) < 20:\n" - " hdr += sys.stdin.read(1)\n" + " hdr = []\n" + " while b':' not in hdr and len(hdr) < 20:\n" + " hdr.append(sys.stdin.buffer.read(1))\n" " if not hdr:\n" " sys.exit(0)\n" - " if not hdr.endswith(':'):\n" + " if not hdr[-1] == b':':\n" " raise ProtocolError('Expected len:data, got %r' % hdr, hdr)\n" " try:\n" - " length = int(hdr[:-1])\n" + " length = int(b''.join(hdr[:-1]))\n" " except ValueError:\n" " raise ProtocolError('Non-numeric len %r' % hdr[:-1], hdr[:-1])\n" " parts = []\n" " received = 0\n" " while received < length:\n" - " parts.append(sys.stdin.read(length - received))\n" + " parts.append(sys.stdin.buffer.read(length - received))\n" " received += len(parts[-1])\n" - " data = ''.join(parts)\n" + " data = b''.join(parts)\n" " assert len(data) == length\n" " try:\n" - " return llsd.parse(data.encode())\n" + " return llsd.parse(data)\n" // Seems the old indra.base.llsd module didn't properly // convert IndexError (from running off end of string) to // LLSDParseError. @@ -185,11 +185,11 @@ namespace tut " return _reply\n" "\n" "def put(req):\n" - " sys.stdout.write(':'.join((str(len(req)), req)))\n" + " sys.stdout.buffer.write(b'%d:%b' % (len(req), req))\n" " sys.stdout.flush()\n" "\n" "def send(pump, data):\n" - " put(llsd.format_notation(dict(pump=pump, data=data)).decode())\n" + " put(llsd.format_notation(dict(pump=pump, data=data)))\n" "\n" "def request(pump, data):\n" " # we expect 'data' is a dict\n" diff --git a/indra/llcommon/tests/llsdserialize_test.cpp b/indra/llcommon/tests/llsdserialize_test.cpp index c246f5ee56..29e3007aff 100644 --- a/indra/llcommon/tests/llsdserialize_test.cpp +++ b/indra/llcommon/tests/llsdserialize_test.cpp @@ -46,20 +46,24 @@ typedef U32 uint32_t; #include "boost/range.hpp" #include "boost/foreach.hpp" -#include "boost/function.hpp" #include "boost/bind.hpp" #include "boost/phoenix/bind/bind_function.hpp" #include "boost/phoenix/core/argument.hpp" using namespace boost::phoenix; -#include "../llsd.h" -#include "../llsdserialize.h" +#include "llsd.h" +#include "llsdserialize.h" #include "llsdutil.h" -#include "../llformat.h" +#include "llformat.h" +#include "llmemorystream.h" #include "../test/lltut.h" #include "../test/namedtempfile.h" #include "stringize.h" +#include <functional> + +typedef std::function<void(const LLSD& data, std::ostream& str)> FormatterFunction; +typedef std::function<bool(std::istream& istr, LLSD& data, llssize max_bytes)> ParserFunction; std::vector<U8> string_to_vector(const std::string& str) { @@ -112,7 +116,7 @@ namespace tut mSD = LLUUID::null; expected = "<llsd><uuid /></llsd>\n"; xml_test("null uuid", expected); - + mSD = LLUUID("c96f9b1e-f589-4100-9774-d98643ce0bed"); expected = "<llsd><uuid>c96f9b1e-f589-4100-9774-d98643ce0bed</uuid></llsd>\n"; xml_test("uuid", expected); @@ -136,7 +140,7 @@ namespace tut expected = "<llsd><binary encoding=\"base64\">aGVsbG8=</binary></llsd>\n"; xml_test("binary", expected); } - + template<> template<> void sd_xml_object::test<2>() { @@ -225,7 +229,7 @@ namespace tut expected = "<llsd><map><key>baz</key><undef /><key>foo</key><string>bar</string></map></llsd>\n"; xml_test("2 element map", expected); } - + template<> template<> void sd_xml_object::test<6>() { @@ -241,7 +245,7 @@ namespace tut expected = "<llsd><binary encoding=\"base64\">Nnw2fGFzZGZoYXBweWJveHw2MGU0NGVjNS0zMDVjLTQzYzItOWExOS1iNGI4OWIxYWUyYTZ8NjBlNDRlYzUtMzA1Yy00M2MyLTlhMTktYjRiODliMWFlMmE2fDYwZTQ0ZWM1LTMwNWMtNDNjMi05YTE5LWI0Yjg5YjFhZTJhNnwwMDAwMDAwMC0wMDAwLTAwMDAtMDAwMC0wMDAwMDAwMDAwMDB8N2ZmZmZmZmZ8N2ZmZmZmZmZ8MHwwfDgyMDAwfDQ1MGZlMzk0LTI5MDQtYzlhZC0yMTRjLWEwN2ViN2ZlZWMyOXwoTm8gRGVzY3JpcHRpb24pfDB8MTB8MA==</binary></llsd>\n"; xml_test("binary", expected); } - + class TestLLSDSerializeData { public: @@ -250,9 +254,34 @@ namespace tut void doRoundTripTests(const std::string&); void checkRoundTrip(const std::string&, const LLSD& v); - - LLPointer<LLSDFormatter> mFormatter; - LLPointer<LLSDParser> mParser; + + void setFormatterParser(LLPointer<LLSDFormatter> formatter, LLPointer<LLSDParser> parser) + { + mFormatter = [formatter](const LLSD& data, std::ostream& str) + { + formatter->format(data, str); + }; + // this lambda must be mutable since otherwise the bound 'parser' + // is assumed to point to a const LLSDParser + mParser = [parser](std::istream& istr, LLSD& data, llssize max_bytes) mutable + { + // reset() call is needed since test code re-uses parser object + parser->reset(); + return (parser->parse(istr, data, max_bytes) > 0); + }; + } + + void setParser(bool (*parser)(LLSD&, std::istream&, llssize)) + { + // why does LLSDSerialize::deserialize() reverse the parse() params?? + mParser = [parser](std::istream& istr, LLSD& data, llssize max_bytes) + { + return (parser(data, istr, max_bytes) > 0); + }; + } + + FormatterFunction mFormatter; + ParserFunction mParser; }; TestLLSDSerializeData::TestLLSDSerializeData() @@ -265,12 +294,11 @@ namespace tut void TestLLSDSerializeData::checkRoundTrip(const std::string& msg, const LLSD& v) { - std::stringstream stream; - mFormatter->format(v, stream); + std::stringstream stream; + mFormatter(v, stream); //LL_INFOS() << "checkRoundTrip: length " << stream.str().length() << LL_ENDL; LLSD w; - mParser->reset(); // reset() call is needed since test code re-uses mParser - mParser->parse(stream, w, stream.str().size()); + mParser(stream, w, stream.str().size()); try { @@ -299,52 +327,52 @@ namespace tut fillmap(root[key], width, depth - 1); } } - + void TestLLSDSerializeData::doRoundTripTests(const std::string& msg) { LLSD v; checkRoundTrip(msg + " undefined", v); - + v = true; checkRoundTrip(msg + " true bool", v); - + v = false; checkRoundTrip(msg + " false bool", v); - + v = 1; checkRoundTrip(msg + " positive int", v); - + v = 0; checkRoundTrip(msg + " zero int", v); - + v = -1; checkRoundTrip(msg + " negative int", v); - + v = 1234.5f; checkRoundTrip(msg + " positive float", v); - + v = 0.0f; checkRoundTrip(msg + " zero float", v); - + v = -1234.5f; checkRoundTrip(msg + " negative float", v); - + // FIXME: need a NaN test - + v = LLUUID::null; checkRoundTrip(msg + " null uuid", v); - + LLUUID newUUID; newUUID.generate(); v = newUUID; checkRoundTrip(msg + " new uuid", v); - + v = ""; checkRoundTrip(msg + " empty string", v); - + v = "some string"; checkRoundTrip(msg + " non-empty string", v); - + v = "Second Life is a 3-D virtual world entirely built and owned by its residents. " "Since opening to the public in 2003, it has grown explosively and today is " @@ -372,7 +400,7 @@ namespace tut for (U32 block = 0x000000; block <= 0x10ffff; block += block_size) { std::ostringstream out; - + for (U32 c = block; c < block + block_size; ++c) { if (c <= 0x000001f @@ -386,7 +414,7 @@ namespace tut if (0x00fdd0 <= c && c <= 0x00fdef) { continue; } if ((c & 0x00fffe) == 0x00fffe) { continue; } // see Unicode standard, section 15.8 - + if (c <= 0x00007f) { out << (char)(c & 0x7f); @@ -410,55 +438,55 @@ namespace tut out << (char)(0x80 | ((c >> 0) & 0x3f)); } } - + v = out.str(); std::ostringstream blockmsg; blockmsg << msg << " unicode string block 0x" << std::hex << block; checkRoundTrip(blockmsg.str(), v); } - + LLDate epoch; v = epoch; checkRoundTrip(msg + " epoch date", v); - + LLDate aDay("2002-12-07T05:07:15.00Z"); v = aDay; checkRoundTrip(msg + " date", v); - + LLURI path("http://slurl.com/secondlife/Ambleside/57/104/26/"); v = path; checkRoundTrip(msg + " url", v); - + const char source[] = "it must be a blue moon again"; std::vector<U8> data; // note, includes terminating '\0' copy(&source[0], &source[sizeof(source)], back_inserter(data)); - + v = data; checkRoundTrip(msg + " binary", v); - + v = LLSD::emptyMap(); checkRoundTrip(msg + " empty map", v); - + v = LLSD::emptyMap(); v["name"] = "luke"; //v.insert("name", "luke"); v["age"] = 3; //v.insert("age", 3); checkRoundTrip(msg + " map", v); - + v.clear(); v["a"]["1"] = true; v["b"]["0"] = false; checkRoundTrip(msg + " nested maps", v); - + v = LLSD::emptyArray(); checkRoundTrip(msg + " empty array", v); - + v = LLSD::emptyArray(); v.append("ali"); v.append(28); checkRoundTrip(msg + " array", v); - + v.clear(); v[0][0] = true; v[1][0] = false; @@ -468,7 +496,7 @@ namespace tut fillmap(v, 10, 3); // 10^6 maps checkRoundTrip(msg + " many nested maps", v); } - + typedef tut::test_group<TestLLSDSerializeData> TestLLSDSerializeGroup; typedef TestLLSDSerializeGroup::object TestLLSDSerializeObject; TestLLSDSerializeGroup gTestLLSDSerializeGroup("llsd serialization"); @@ -476,35 +504,106 @@ namespace tut template<> template<> void TestLLSDSerializeObject::test<1>() { - mFormatter = new LLSDNotationFormatter(false, "", LLSDFormatter::OPTIONS_PRETTY_BINARY); - mParser = new LLSDNotationParser(); + setFormatterParser(new LLSDNotationFormatter(false, "", LLSDFormatter::OPTIONS_PRETTY_BINARY), + new LLSDNotationParser()); doRoundTripTests("pretty binary notation serialization"); } template<> template<> void TestLLSDSerializeObject::test<2>() { - mFormatter = new LLSDNotationFormatter(false, "", LLSDFormatter::OPTIONS_NONE); - mParser = new LLSDNotationParser(); + setFormatterParser(new LLSDNotationFormatter(false, "", LLSDFormatter::OPTIONS_NONE), + new LLSDNotationParser()); doRoundTripTests("raw binary notation serialization"); } template<> template<> void TestLLSDSerializeObject::test<3>() { - mFormatter = new LLSDXMLFormatter(); - mParser = new LLSDXMLParser(); + setFormatterParser(new LLSDXMLFormatter(), new LLSDXMLParser()); doRoundTripTests("xml serialization"); } template<> template<> void TestLLSDSerializeObject::test<4>() { - mFormatter = new LLSDBinaryFormatter(); - mParser = new LLSDBinaryParser(); + setFormatterParser(new LLSDBinaryFormatter(), new LLSDBinaryParser()); doRoundTripTests("binary serialization"); } + template<> template<> + void TestLLSDSerializeObject::test<5>() + { + mFormatter = [](const LLSD& sd, std::ostream& str) + { + LLSDSerialize::serialize(sd, str, LLSDSerialize::LLSD_BINARY); + }; + setParser(LLSDSerialize::deserialize); + doRoundTripTests("serialize(LLSD_BINARY)"); + }; + + template<> template<> + void TestLLSDSerializeObject::test<6>() + { + mFormatter = [](const LLSD& sd, std::ostream& str) + { + LLSDSerialize::serialize(sd, str, LLSDSerialize::LLSD_XML); + }; + setParser(LLSDSerialize::deserialize); + doRoundTripTests("serialize(LLSD_XML)"); + }; + + template<> template<> + void TestLLSDSerializeObject::test<7>() + { + mFormatter = [](const LLSD& sd, std::ostream& str) + { + LLSDSerialize::serialize(sd, str, LLSDSerialize::LLSD_NOTATION); + }; + setParser(LLSDSerialize::deserialize); + // In this test, serialize(LLSD_NOTATION) emits a header recognized by + // deserialize(). + doRoundTripTests("serialize(LLSD_NOTATION)"); + }; + + template<> template<> + void TestLLSDSerializeObject::test<8>() + { + setFormatterParser(new LLSDNotationFormatter(false, "", LLSDFormatter::OPTIONS_NONE), + new LLSDNotationParser()); + setParser(LLSDSerialize::deserialize); + // This is an interesting test because LLSDNotationFormatter does not + // emit an llsd/notation header. + doRoundTripTests("LLSDNotationFormatter -> deserialize"); + }; + + template<> template<> + void TestLLSDSerializeObject::test<9>() + { + setFormatterParser(new LLSDXMLFormatter(false, "", LLSDFormatter::OPTIONS_NONE), + new LLSDXMLParser()); + setParser(LLSDSerialize::deserialize); + // This is an interesting test because LLSDXMLFormatter does not + // emit an LLSD/XML header. + doRoundTripTests("LLSDXMLFormatter -> deserialize"); + }; + +/*==========================================================================*| + // We do not expect this test to succeed. Without a header, neither + // notation LLSD nor binary LLSD reliably start with a distinct character, + // the way XML LLSD starts with '<'. By convention, we default to notation + // rather than binary. + template<> template<> + void TestLLSDSerializeObject::test<10>() + { + setFormatterParser(new LLSDBinaryFormatter(false, "", LLSDFormatter::OPTIONS_NONE), + new LLSDBinaryParser()); + setParser(LLSDSerialize::deserialize); + // This is an interesting test because LLSDBinaryFormatter does not + // emit an LLSD/Binary header. + doRoundTripTests("LLSDBinaryFormatter -> deserialize"); + }; +|*==========================================================================*/ /** * @class TestLLSDParsing @@ -555,7 +654,7 @@ namespace tut public: TestLLSDXMLParsing() {} }; - + typedef tut::test_group<TestLLSDXMLParsing> TestLLSDXMLParsingGroup; typedef TestLLSDXMLParsingGroup::object TestLLSDXMLParsingObject; TestLLSDXMLParsingGroup gTestLLSDXMLParsingGroup("llsd XML parsing"); @@ -586,8 +685,8 @@ namespace tut LLSD(), LLSDParser::PARSE_FAILURE); } - - + + template<> template<> void TestLLSDXMLParsingObject::test<2>() { @@ -596,7 +695,7 @@ namespace tut v["amy"] = 23; v["bob"] = LLSD(); v["cam"] = 1.23; - + ensureParse( "unknown data type", "<llsd><map>" @@ -607,16 +706,16 @@ namespace tut v, v.size() + 1); } - + template<> template<> void TestLLSDXMLParsingObject::test<3>() { // test handling of nested bad data - + LLSD v; v["amy"] = 23; v["cam"] = 1.23; - + ensureParse( "map with html", "<llsd><map>" @@ -626,7 +725,7 @@ namespace tut "</map></llsd>", v, v.size() + 1); - + v.clear(); v["amy"] = 23; v["cam"] = 1.23; @@ -639,7 +738,7 @@ namespace tut "</map></llsd>", v, v.size() + 1); - + v.clear(); v["amy"] = 23; v["bob"] = LLSD::emptyMap(); @@ -661,7 +760,7 @@ namespace tut v[0] = 23; v[1] = LLSD(); v[2] = 1.23; - + ensureParse( "array value of html", "<llsd><array>" @@ -671,7 +770,7 @@ namespace tut "</array></llsd>", v, v.size() + 1); - + v.clear(); v[0] = 23; v[1] = LLSD::emptyMap(); @@ -1225,7 +1324,7 @@ namespace tut vec[0] = 'a'; vec[1] = 'b'; vec[2] = 'c'; vec[3] = '3'; vec[4] = '2'; vec[5] = '1'; LLSD value = vec; - + vec.resize(11); vec[0] = 'b'; // for binary vec[5] = 'a'; vec[6] = 'b'; vec[7] = 'c'; @@ -1694,85 +1793,83 @@ namespace tut ensureBinaryAndXML("map", test); } - struct TestPythonCompatible + // helper for TestPythonCompatible + static std::string import_llsd("import os.path\n" + "import sys\n" + "try:\n" + // new freestanding llsd package + " import llsd\n" + "except ImportError:\n" + // older llbase.llsd module + " from llbase import llsd\n"); + + // helper for TestPythonCompatible + template <typename CONTENT> + void python(const std::string& desc, const CONTENT& script, int expect=0) { - TestPythonCompatible(): - // Note the peculiar insertion of __FILE__ into this string. Since - // this script is being written into a platform-dependent temp - // directory, we can't locate indra/lib/python relative to - // Python's __file__. Use __FILE__ instead, navigating relative - // to this C++ source file. Use Python raw-string syntax so - // Windows pathname backslashes won't mislead Python's string - // scanner. - import_llsd("import os.path\n" - "import sys\n" - "from llbase import llsd\n") - {} - ~TestPythonCompatible() {} + auto PYTHON(LLStringUtil::getenv("PYTHON")); + ensure("Set $PYTHON to the Python interpreter", !PYTHON.empty()); - std::string import_llsd; + NamedTempFile scriptfile("py", script); - template <typename CONTENT> - void python(const std::string& desc, const CONTENT& script, int expect=0) +#if LL_WINDOWS + std::string q("\""); + std::string qPYTHON(q + PYTHON + q); + std::string qscript(q + scriptfile.getName() + q); + int rc = _spawnl(_P_WAIT, PYTHON.c_str(), qPYTHON.c_str(), qscript.c_str(), NULL); + if (rc == -1) { - auto PYTHON(LLStringUtil::getenv("PYTHON")); - ensure("Set $PYTHON to the Python interpreter", !PYTHON.empty()); - - NamedTempFile scriptfile("py", script); + char buffer[256]; + strerror_s(buffer, errno); // C++ can infer the buffer size! :-O + ensure(STRINGIZE("Couldn't run Python " << desc << "script: " << buffer), false); + } + else + { + ensure_equals(STRINGIZE(desc << " script terminated with rc " << rc), rc, expect); + } -#if LL_WINDOWS - std::string q("\""); - std::string qPYTHON(q + PYTHON + q); - std::string qscript(q + scriptfile.getName() + q); - int rc = _spawnl(_P_WAIT, PYTHON.c_str(), qPYTHON.c_str(), qscript.c_str(), NULL); - if (rc == -1) - { - char buffer[256]; - strerror_s(buffer, errno); // C++ can infer the buffer size! :-O - ensure(STRINGIZE("Couldn't run Python " << desc << "script: " << buffer), false); - } - else +#else // LL_DARWIN, LL_LINUX + LLProcess::Params params; + params.executable = PYTHON; + params.args.add(scriptfile.getName()); + LLProcessPtr py(LLProcess::create(params)); + ensure(STRINGIZE("Couldn't launch " << desc << " script"), bool(py)); + // Implementing timeout would mean messing with alarm() and + // catching SIGALRM... later maybe... + int status(0); + if (waitpid(py->getProcessID(), &status, 0) == -1) + { + int waitpid_errno(errno); + ensure_equals(STRINGIZE("Couldn't retrieve rc from " << desc << " script: " + "waitpid() errno " << waitpid_errno), + waitpid_errno, ECHILD); + } + else + { + if (WIFEXITED(status)) { - ensure_equals(STRINGIZE(desc << " script terminated with rc " << rc), rc, expect); + int rc(WEXITSTATUS(status)); + ensure_equals(STRINGIZE(desc << " script terminated with rc " << rc), + rc, expect); } - -#else // LL_DARWIN, LL_LINUX - LLProcess::Params params; - params.executable = PYTHON; - params.args.add(scriptfile.getName()); - LLProcessPtr py(LLProcess::create(params)); - ensure(STRINGIZE("Couldn't launch " << desc << " script"), bool(py)); - // Implementing timeout would mean messing with alarm() and - // catching SIGALRM... later maybe... - int status(0); - if (waitpid(py->getProcessID(), &status, 0) == -1) + else if (WIFSIGNALED(status)) { - int waitpid_errno(errno); - ensure_equals(STRINGIZE("Couldn't retrieve rc from " << desc << " script: " - "waitpid() errno " << waitpid_errno), - waitpid_errno, ECHILD); + ensure(STRINGIZE(desc << " script terminated by signal " << WTERMSIG(status)), + false); } else { - if (WIFEXITED(status)) - { - int rc(WEXITSTATUS(status)); - ensure_equals(STRINGIZE(desc << " script terminated with rc " << rc), - rc, expect); - } - else if (WIFSIGNALED(status)) - { - ensure(STRINGIZE(desc << " script terminated by signal " << WTERMSIG(status)), - false); - } - else - { - ensure(STRINGIZE(desc << " script produced impossible status " << status), - false); - } + ensure(STRINGIZE(desc << " script produced impossible status " << status), + false); } -#endif } +#endif + } + + struct TestPythonCompatible + { + TestPythonCompatible() {} + ~TestPythonCompatible() {} }; typedef tut::test_group<TestPythonCompatible> TestPythonCompatibleGroup; @@ -1798,25 +1895,33 @@ namespace tut "print('Running on', sys.platform)\n"); } - // helper for test<3> - static void writeLLSDArray(std::ostream& out, const LLSD& array) + // helper for test<3> - test<7> + static void writeLLSDArray(const FormatterFunction& serialize, + std::ostream& out, const LLSD& array) { - BOOST_FOREACH(LLSD item, llsd::inArray(array)) + for (const LLSD& item : llsd::inArray(array)) { - LLSDSerialize::toNotation(item, out); - // It's important to separate with newlines because Python's llsd - // module doesn't support parsing from a file stream, only from a - // string, so we have to know how much of the file to read into a - // string. - out << '\n'; + // It's important to delimit the entries in this file somehow + // because, although Python's llsd.parse() can accept a file + // stream, the XML parser expects EOF after a single outer element + // -- it doesn't just stop. So we must extract a sequence of bytes + // strings from the file. But since one of the serialization + // formats we want to test is binary, we can't pick any single + // byte value as a delimiter! Use a binary integer length prefix + // instead. + std::ostringstream buffer; + serialize(item, buffer); + auto buffstr{ buffer.str() }; + int bufflen{ static_cast<int>(buffstr.length()) }; + out.write(reinterpret_cast<const char*>(&bufflen), sizeof(bufflen)); + out.write(buffstr.c_str(), buffstr.length()); } } - template<> template<> - void TestPythonCompatibleObject::test<3>() + // helper for test<3> - test<7> + static void toPythonUsing(const std::string& desc, + const FormatterFunction& serialize) { - set_test_name("verify sequence to Python"); - LLSD cdata(LLSDArray(17)(3.14) ("This string\n" "has several\n" @@ -1836,7 +1941,7 @@ namespace tut " except StopIteration:\n" " pass\n" " else:\n" - " assert False, 'Too many data items'\n"; + " raise AssertionError('Too many data items')\n"; // Create an llsdXXXXXX file containing 'data' serialized to // notation. @@ -1845,32 +1950,128 @@ namespace tut // takes a callable. To this callable it passes the // std::ostream with which it's writing the // NamedTempFile. - boost::bind(writeLLSDArray, _1, cdata)); + [serialize, cdata] + (std::ostream& out) + { writeLLSDArray(serialize, out, cdata); }); - python("read C++ notation", + python("read C++ " + desc, placeholders::arg1 << import_llsd << - "def parse_each(iterable):\n" - " for item in iterable:\n" - " yield llsd.parse(item)\n" << - pydata << + "from functools import partial\n" + "import io\n" + "import struct\n" + "lenformat = struct.Struct('i')\n" + "def parse_each(inf):\n" + " for rawlen in iter(partial(inf.read, lenformat.size), b''):\n" + " len = lenformat.unpack(rawlen)[0]\n" + // Since llsd.parse() has no max_bytes argument, instead of + // passing the input stream directly to parse(), read the item + // into a distinct bytes object and parse that. + " data = inf.read(len)\n" + " try:\n" + " frombytes = llsd.parse(data)\n" + " except llsd.LLSDParseError as err:\n" + " print(f'*** {err}')\n" + " print(f'Bad content:\\n{data!r}')\n" + " raise\n" + // Also try parsing from a distinct stream. + " stream = io.BytesIO(data)\n" + " fromstream = llsd.parse(stream)\n" + " assert frombytes == fromstream\n" + " yield frombytes\n" + << pydata << // Don't forget raw-string syntax for Windows pathnames. "verify(parse_each(open(r'" << file.getName() << "', 'rb')))\n"); } template<> template<> + void TestPythonCompatibleObject::test<3>() + { + set_test_name("to Python using LLSDSerialize::serialize(LLSD_XML)"); + toPythonUsing("LLSD_XML", + [](const LLSD& sd, std::ostream& out) + { LLSDSerialize::serialize(sd, out, LLSDSerialize::LLSD_XML); }); + } + + template<> template<> void TestPythonCompatibleObject::test<4>() { - set_test_name("verify sequence from Python"); + set_test_name("to Python using LLSDSerialize::serialize(LLSD_NOTATION)"); + toPythonUsing("LLSD_NOTATION", + [](const LLSD& sd, std::ostream& out) + { LLSDSerialize::serialize(sd, out, LLSDSerialize::LLSD_NOTATION); }); + } + + template<> template<> + void TestPythonCompatibleObject::test<5>() + { + set_test_name("to Python using LLSDSerialize::serialize(LLSD_BINARY)"); + toPythonUsing("LLSD_BINARY", + [](const LLSD& sd, std::ostream& out) + { LLSDSerialize::serialize(sd, out, LLSDSerialize::LLSD_BINARY); }); + } + + template<> template<> + void TestPythonCompatibleObject::test<6>() + { + set_test_name("to Python using LLSDSerialize::toXML()"); + toPythonUsing("toXML()", LLSDSerialize::toXML); + } + + template<> template<> + void TestPythonCompatibleObject::test<7>() + { + set_test_name("to Python using LLSDSerialize::toNotation()"); + toPythonUsing("toNotation()", LLSDSerialize::toNotation); + } +/*==========================================================================*| + template<> template<> + void TestPythonCompatibleObject::test<8>() + { + set_test_name("to Python using LLSDSerialize::toBinary()"); + // We don't expect this to work because, without a header, + // llsd.parse() will assume notation rather than binary. + toPythonUsing("toBinary()", LLSDSerialize::toBinary); + } +|*==========================================================================*/ + + // helper for test<8> - test<12> + bool itemFromStream(std::istream& istr, LLSD& item, const ParserFunction& parse) + { + // reset the output value for debugging clarity + item.clear(); + // We use an int length prefix as a foolproof delimiter even for + // binary serialized streams. + int length{ 0 }; + istr.read(reinterpret_cast<char*>(&length), sizeof(length)); +// return parse(istr, item, length); + // Sadly, as of 2022-12-01 it seems we can't really trust our LLSD + // parsers to honor max_bytes: this test works better when we read + // each item into its own distinct LLMemoryStream, instead of passing + // the original istr with a max_bytes constraint. + std::vector<U8> buffer(length); + istr.read(reinterpret_cast<char*>(buffer.data()), length); + LLMemoryStream stream(buffer.data(), length); + return parse(stream, item, length); + } + + // helper for test<8> - test<12> + void fromPythonUsing(const std::string& pyformatter, + const ParserFunction& parse= + [](std::istream& istr, LLSD& data, llssize max_bytes) + { return LLSDSerialize::deserialize(data, istr, max_bytes); }) + { // Create an empty data file. This is just a placeholder for our // script to write into. Create it to establish a unique name that // we know. NamedTempFile file("llsd", ""); - python("write Python notation", + python("Python " + pyformatter, placeholders::arg1 << import_llsd << + "import struct\n" + "lenformat = struct.Struct('i')\n" "DATA = [\n" " 17,\n" " 3.14,\n" @@ -1881,34 +2082,87 @@ namespace tut "]\n" // Don't forget raw-string syntax for Windows pathnames. // N.B. Using 'print' implicitly adds newlines. - "with open(r'" << file.getName() << "', 'w') as f:\n" + "with open(r'" << file.getName() << "', 'wb') as f:\n" " for item in DATA:\n" - " print(llsd.format_notation(item).decode(), file=f)\n"); + " serialized = llsd." << pyformatter << "(item)\n" + " f.write(lenformat.pack(len(serialized)))\n" + " f.write(serialized)\n"); std::ifstream inf(file.getName().c_str()); LLSD item; - // Notice that we're not doing anything special to parse out the - // newlines: LLSDSerialize::fromNotation ignores them. While it would - // seem they're not strictly necessary, going in this direction, we - // want to ensure that notation-separated-by-newlines works in both - // directions -- since in practice, a given file might be read by - // either language. - ensure_equals("Failed to read LLSD::Integer from Python", - LLSDSerialize::fromNotation(item, inf, LLSDSerialize::SIZE_UNLIMITED), - 1); - ensure_equals(item.asInteger(), 17); - ensure_equals("Failed to read LLSD::Real from Python", - LLSDSerialize::fromNotation(item, inf, LLSDSerialize::SIZE_UNLIMITED), - 1); - ensure_approximately_equals("Bad LLSD::Real value from Python", - item.asReal(), 3.14, 7); // 7 bits ~= 0.01 - ensure_equals("Failed to read LLSD::String from Python", - LLSDSerialize::fromNotation(item, inf, LLSDSerialize::SIZE_UNLIMITED), - 1); - ensure_equals(item.asString(), - "This string\n" - "has several\n" - "lines."); - + try + { + ensure("Failed to read LLSD::Integer from Python", + itemFromStream(inf, item, parse)); + ensure_equals(item.asInteger(), 17); + ensure("Failed to read LLSD::Real from Python", + itemFromStream(inf, item, parse)); + ensure_approximately_equals("Bad LLSD::Real value from Python", + item.asReal(), 3.14, 7); // 7 bits ~= 0.01 + ensure("Failed to read LLSD::String from Python", + itemFromStream(inf, item, parse)); + ensure_equals(item.asString(), + "This string\n" + "has several\n" + "lines."); + } + catch (const tut::failure& err) + { + std::cout << "for " << err.what() << ", item = " << item << std::endl; + throw; + } + } + + template<> template<> + void TestPythonCompatibleObject::test<8>() + { + set_test_name("from Python XML using LLSDSerialize::deserialize()"); + fromPythonUsing("format_xml"); + } + + template<> template<> + void TestPythonCompatibleObject::test<9>() + { + set_test_name("from Python notation using LLSDSerialize::deserialize()"); + fromPythonUsing("format_notation"); + } + + template<> template<> + void TestPythonCompatibleObject::test<10>() + { + set_test_name("from Python binary using LLSDSerialize::deserialize()"); + fromPythonUsing("format_binary"); + } + + template<> template<> + void TestPythonCompatibleObject::test<11>() + { + set_test_name("from Python XML using fromXML()"); + // fromXML()'s optional 3rd param isn't max_bytes, it's emit_errors + fromPythonUsing("format_xml", + [](std::istream& istr, LLSD& data, llssize) + { return LLSDSerialize::fromXML(data, istr) > 0; }); + } + + template<> template<> + void TestPythonCompatibleObject::test<12>() + { + set_test_name("from Python notation using fromNotation()"); + fromPythonUsing("format_notation", + [](std::istream& istr, LLSD& data, llssize max_bytes) + { return LLSDSerialize::fromNotation(data, istr, max_bytes) > 0; }); + } + +/*==========================================================================*| + template<> template<> + void TestPythonCompatibleObject::test<13>() + { + set_test_name("from Python binary using fromBinary()"); + // We don't expect this to work because format_binary() emits a + // header, but fromBinary() won't recognize a header. + fromPythonUsing("format_binary", + [](std::istream& istr, LLSD& data, llssize max_bytes) + { return LLSDSerialize::fromBinary(data, istr, max_bytes) > 0; }); } +|*==========================================================================*/ } |