diff options
-rw-r--r-- | indra/llcommon/llleap.cpp | 9 | ||||
-rw-r--r-- | indra/llcommon/llsdserialize.cpp | 49 | ||||
-rw-r--r-- | indra/llcommon/tests/llleap_test.cpp | 32 | ||||
-rw-r--r-- | indra/llcommon/tests/llsdserialize_test.cpp | 118 |
4 files changed, 147 insertions, 61 deletions
diff --git a/indra/llcommon/llleap.cpp b/indra/llcommon/llleap.cpp index f71eaf92c4..e8ccc4300d 100644 --- a/indra/llcommon/llleap.cpp +++ b/indra/llcommon/llleap.cpp @@ -317,8 +317,17 @@ public: LL_DEBUGS("LLLeap") << "needed " << mExpect << " bytes, got " << childout.size() << ", parsing LLSD" << LL_ENDL; LLSD data; +#if 1 + // specifically require notation LLSD from child + LLPointer<LLSDParser> parser(new LLSDNotationParser()); + S32 parse_status(parser->parse(childout.get_istream(), data, mExpect)); + if (parse_status == LLSDParser::PARSE_FAILURE) +#else + // SL-18330: accept any valid LLSD serialization format from child + // Unfortunately this runs into trouble we have not yet debugged. bool parse_status(LLSDSerialize::deserialize(data, childout.get_istream(), mExpect)); if (! parse_status) +#endif { bad_protocol("unparseable LLSD data"); } diff --git a/indra/llcommon/llsdserialize.cpp b/indra/llcommon/llsdserialize.cpp index dc8f8f5737..97bf51eeaa 100644 --- a/indra/llcommon/llsdserialize.cpp +++ b/indra/llcommon/llsdserialize.cpp @@ -123,12 +123,26 @@ bool LLSDSerialize::deserialize(LLSD& sd, std::istream& str, S32 max_bytes) bool fail_if_not_legacy = false; /* - * Get the first line before anything. + * Get the first line before anything. Don't read more than max_bytes: + * this get() overload reads no more than (count-1) bytes into the + * specified buffer. In the usual case when max_bytes exceeds + * sizeof(hdr_buf), get() will read no more than sizeof(hdr_buf)-2. */ - // Remember str's original input position: if there's no header, we'll - // want to back up and retry. - str.get(hdr_buf, sizeof(hdr_buf), '\n'); + str.get(hdr_buf, std::min(max_bytes+1, sizeof(hdr_buf)-1), '\n'); auto inbuf = str.gcount(); + // https://en.cppreference.com/w/cpp/io/basic_istream/get + // When the get() above sees the specified delimiter '\n', it stops there + // without pulling it from the stream. If it turns out that the stream + // does NOT contain a header, and the content includes meaningful '\n', + // it's important to pull that into hdr_buf too. + if (inbuf < max_bytes && str.get(hdr_buf[inbuf])) + { + // got the delimiting '\n' + ++inbuf; + // None of the following requires that hdr_buf contain a final '\0' + // byte. We could store one if needed, since even the incremented + // inbuf won't exceed sizeof(hdr_buf)-1, but there's no need. + } std::string header{ hdr_buf, inbuf }; if (str.fail()) { @@ -175,17 +189,17 @@ bool LLSDSerialize::deserialize(LLSD& sd, std::istream& str, S32 max_bytes) /* * Create the parser as appropriate */ - if (header == LLSD_BINARY_HEADER) + if (0 == LLStringUtil::compareInsensitive(header, LLSD_BINARY_HEADER)) { - return (parse_using<LLSDBinaryParser>(str, sd, max_bytes) > 0); + return (parse_using<LLSDBinaryParser>(str, sd, max_bytes-inbuf) > 0); } - else if (header == LLSD_XML_HEADER) + else if (0 == LLStringUtil::compareInsensitive(header, LLSD_XML_HEADER)) { - return (parse_using<LLSDXMLParser>(str, sd, max_bytes) > 0); + return (parse_using<LLSDXMLParser>(str, sd, max_bytes-inbuf) > 0); } - else if (header == LLSD_NOTATION_HEADER) + else if (0 == LLStringUtil::compareInsensitive(header, LLSD_NOTATION_HEADER)) { - return (parse_using<LLSDNotationParser>(str, sd, max_bytes) > 0); + return (parse_using<LLSDNotationParser>(str, sd, max_bytes-inbuf) > 0); } else // no header we recognize { @@ -207,7 +221,22 @@ bool LLSDSerialize::deserialize(LLSD& sd, std::istream& str, S32 max_bytes) LLMemoryStreamBuf already(reinterpret_cast<const U8*>(hdr_buf), inbuf); cat_streambuf prebuff(&already, str.rdbuf()); std::istream prepend(&prebuff); +#if 1 return (p->parse(prepend, sd, max_bytes) > 0); +#else + // debugging the reconstituted 'prepend' stream + // allocate a buffer that we hope is big enough for the whole thing + std::vector<char> wholemsg((max_bytes == size_t(SIZE_UNLIMITED))? 1024 : max_bytes); + prepend.read(wholemsg.data(), std::min(max_bytes, wholemsg.size())); + LLMemoryStream replay(reinterpret_cast<const U8*>(wholemsg.data()), prepend.gcount()); + auto success{ p->parse(replay, sd, prepend.gcount()) > 0 }; + { + LL_DEBUGS() << (success? "parsed: $$" : "failed: '") + << std::string(wholemsg.data(), llmin(prepend.gcount(), 100)) << "$$" + << LL_ENDL; + } + return success; +#endif } } diff --git a/indra/llcommon/tests/llleap_test.cpp b/indra/llcommon/tests/llleap_test.cpp index 9754353ab0..6c799b7993 100644 --- a/indra/llcommon/tests/llleap_test.cpp +++ b/indra/llcommon/tests/llleap_test.cpp @@ -110,12 +110,12 @@ namespace tut "import os\n" "import sys\n" "\n" - // Don't forget that this Python script is written to some - // temp directory somewhere! Its __file__ is useless in - // finding indra/lib/python. Use our __FILE__, with - // raw-string syntax to deal with Windows pathnames. - "mydir = os.path.dirname(r'" << __FILE__ << "')\n" - "from llbase import llsd\n" + "try:\n" + // new freestanding llsd package + " import llsd\n" + "except ImportError:\n" + // older llbase.llsd module + " from llbase import llsd\n" "\n" "class ProtocolError(Exception):\n" " def __init__(self, msg, data):\n" @@ -126,26 +126,26 @@ namespace tut " pass\n" "\n" "def get():\n" - " hdr = ''\n" - " while ':' not in hdr and len(hdr) < 20:\n" - " hdr += sys.stdin.read(1)\n" + " hdr = []\n" + " while b':' not in hdr and len(hdr) < 20:\n" + " hdr.append(sys.stdin.buffer.read(1))\n" " if not hdr:\n" " sys.exit(0)\n" - " if not hdr.endswith(':'):\n" + " if not hdr[-1] == b':':\n" " raise ProtocolError('Expected len:data, got %r' % hdr, hdr)\n" " try:\n" - " length = int(hdr[:-1])\n" + " length = int(b''.join(hdr[:-1]))\n" " except ValueError:\n" " raise ProtocolError('Non-numeric len %r' % hdr[:-1], hdr[:-1])\n" " parts = []\n" " received = 0\n" " while received < length:\n" - " parts.append(sys.stdin.read(length - received))\n" + " parts.append(sys.stdin.buffer.read(length - received))\n" " received += len(parts[-1])\n" - " data = ''.join(parts)\n" + " data = b''.join(parts)\n" " assert len(data) == length\n" " try:\n" - " return llsd.parse(data.encode())\n" + " return llsd.parse(data)\n" // Seems the old indra.base.llsd module didn't properly // convert IndexError (from running off end of string) to // LLSDParseError. @@ -185,11 +185,11 @@ namespace tut " return _reply\n" "\n" "def put(req):\n" - " sys.stdout.write(':'.join((str(len(req)), req)))\n" + " sys.stdout.buffer.write(b'%d:%b' % (len(req), req))\n" " sys.stdout.flush()\n" "\n" "def send(pump, data):\n" - " put(llsd.format_notation(dict(pump=pump, data=data)).decode())\n" + " put(llsd.format_notation(dict(pump=pump, data=data)))\n" "\n" "def request(pump, data):\n" " # we expect 'data' is a dict\n" diff --git a/indra/llcommon/tests/llsdserialize_test.cpp b/indra/llcommon/tests/llsdserialize_test.cpp index 601f2c580d..39b93cf335 100644 --- a/indra/llcommon/tests/llsdserialize_test.cpp +++ b/indra/llcommon/tests/llsdserialize_test.cpp @@ -51,10 +51,11 @@ typedef U32 uint32_t; #include "boost/phoenix/core/argument.hpp" using namespace boost::phoenix; -#include "../llsd.h" -#include "../llsdserialize.h" +#include "llsd.h" +#include "llsdserialize.h" #include "llsdutil.h" -#include "../llformat.h" +#include "llformat.h" +#include "llmemorystream.h" #include "../test/lltut.h" #include "../test/namedtempfile.h" @@ -1898,14 +1899,22 @@ namespace tut static void writeLLSDArray(const FormatterFunction& serialize, std::ostream& out, const LLSD& array) { - BOOST_FOREACH(LLSD item, llsd::inArray(array)) + for (const LLSD& item : llsd::inArray(array)) { - serialize(item, out); - // It's important to separate with newlines because Python's llsd - // module doesn't support parsing from a file stream, only from a - // string, so we have to know how much of the file to read into a - // string. - out << '\n'; + // It's important to delimit the entries in this file somehow + // because, although Python's llsd.parse() can accept a file + // stream, the XML parser expects EOF after a single outer element + // -- it doesn't just stop. So we must extract a sequence of bytes + // strings from the file. But since one of the serialization + // formats we want to test is binary, we can't pick any single + // byte value as a delimiter! Use a binary integer length prefix + // instead. + std::ostringstream buffer; + serialize(item, buffer); + auto buffstr{ buffer.str() }; + int bufflen{ static_cast<int>(buffstr.length()) }; + out.write(reinterpret_cast<const char*>(&bufflen), sizeof(bufflen)); + out.write(buffstr.c_str(), buffstr.length()); } } @@ -1932,7 +1941,7 @@ namespace tut " except StopIteration:\n" " pass\n" " else:\n" - " assert False, 'Too many data items'\n"; + " raise AssertionError('Too many data items')\n"; // Create an llsdXXXXXX file containing 'data' serialized to // notation. @@ -1948,10 +1957,23 @@ namespace tut python("read C++ " + desc, placeholders::arg1 << import_llsd << - "def parse_each(iterable):\n" - " for item in iterable:\n" - " yield llsd.parse(item)\n" << - pydata << + "from functools import partial\n" + "import struct\n" + "lenformat = struct.Struct('i')\n" + "def parse_each(inf):\n" + " for rawlen in iter(partial(inf.read, lenformat.size), b''):\n" + " len = lenformat.unpack(rawlen)[0]\n" + // Since llsd.parse() has no max_bytes argument, instead of + // passing the input stream directly to parse(), read the item + // into a distinct bytes object and parse that. + " data = inf.read(len)\n" + " try:\n" + " yield llsd.parse(data)\n" + " except llsd.LLSDParseError as err:\n" + " print(f'*** {err}')\n" + " print(f'Bad content:\\n{data!r}')\n" + " raise\n" + << pydata << // Don't forget raw-string syntax for Windows pathnames. "verify(parse_each(open(r'" << file.getName() << "', 'rb')))\n"); } @@ -2009,6 +2031,26 @@ namespace tut |*==========================================================================*/ // helper for test<8> - test<12> + bool itemFromStream(std::istream& istr, LLSD& item, const ParserFunction& parse) + { + // reset the output value for debugging clarity + item.clear(); + // We use an int length prefix as a foolproof delimiter even for + // binary serialized streams. + int length{ 0 }; + istr.read(reinterpret_cast<char*>(&length), sizeof(length)); +// return parse(istr, item, length); + // Sadly, as of 2022-12-01 it seems we can't really trust our LLSD + // parsers to honor max_bytes: this test works better when we read + // each item into its own distinct LLMemoryStream, instead of passing + // the original istr with a max_bytes constraint. + std::vector<U8> buffer(length); + istr.read(reinterpret_cast<char*>(buffer.data()), length); + LLMemoryStream stream(buffer.data(), length); + return parse(stream, item, length); + } + + // helper for test<8> - test<12> void fromPythonUsing(const std::string& pyformatter, const ParserFunction& parse= [](std::istream& istr, LLSD& data, size_t max_bytes) @@ -2022,6 +2064,8 @@ namespace tut python("Python " + pyformatter, placeholders::arg1 << import_llsd << + "import struct\n" + "lenformat = struct.Struct('i')\n" "DATA = [\n" " 17,\n" " 3.14,\n" @@ -2034,29 +2078,33 @@ namespace tut // N.B. Using 'print' implicitly adds newlines. "with open(r'" << file.getName() << "', 'wb') as f:\n" " for item in DATA:\n" - " print(llsd." << pyformatter << "(item), file=f)\n"); + " serialized = llsd." << pyformatter << "(item)\n" + " f.write(lenformat.pack(len(serialized)))\n" + " f.write(serialized)\n"); std::ifstream inf(file.getName().c_str()); LLSD item; - // Notice that we're not doing anything special to parse out the - // newlines: LLSDSerialize::fromNotation ignores them. While it would - // seem they're not strictly necessary, going in this direction, we - // want to ensure that notation-separated-by-newlines works in both - // directions -- since in practice, a given file might be read by - // either language. - ensure("Failed to read LLSD::Integer from Python", - parse(inf, item, LLSDSerialize::SIZE_UNLIMITED)); - ensure_equals(item.asInteger(), 17); - ensure("Failed to read LLSD::Real from Python", - parse(inf, item, LLSDSerialize::SIZE_UNLIMITED)); - ensure_approximately_equals("Bad LLSD::Real value from Python", - item.asReal(), 3.14, 7); // 7 bits ~= 0.01 - ensure("Failed to read LLSD::String from Python", - parse(inf, item, LLSDSerialize::SIZE_UNLIMITED)); - ensure_equals(item.asString(), - "This string\n" - "has several\n" - "lines."); + try + { + ensure("Failed to read LLSD::Integer from Python", + itemFromStream(inf, item, parse)); + ensure_equals(item.asInteger(), 17); + ensure("Failed to read LLSD::Real from Python", + itemFromStream(inf, item, parse)); + ensure_approximately_equals("Bad LLSD::Real value from Python", + item.asReal(), 3.14, 7); // 7 bits ~= 0.01 + ensure("Failed to read LLSD::String from Python", + itemFromStream(inf, item, parse)); + ensure_equals(item.asString(), + "This string\n" + "has several\n" + "lines."); + } + catch (const tut::failure& err) + { + std::cout << "for " << err.what() << ", item = " << item << std::endl; + throw; + } } template<> template<> |