From 2f557cd7faceec36acace1eee4ee38904ff06130 Mon Sep 17 00:00:00 2001 From: Nat Goodspeed Date: Fri, 2 Dec 2022 15:17:56 -0500 Subject: SL-18330: Fix new C++ <-> Python LLSD compatibility tests. When sending multiple LEAP packets in the same file (for testing convenience), use a length prefix instead of delimiting with '\n'. Now that we allow a serialization format that includes an LLSD format header (e.g. ""), '\n' is part of the packet content. But in fact, testing binary LLSD means we can't pick any delimiter guaranteed not to appear in the packet content. Using a length prefix also lets us pass a specific max_bytes to the subject C++ LLSD parser. Make llleap_test.cpp use new freestanding Python llsd package when available. Update Python-side LEAP protocol code to work directly with encoded bytes stream, avoiding bytes<->str encoding and decoding, which breaks binary LLSD. Make LLSDSerialize::deserialize() recognize LLSD format header case- insensitively. Python emits and checks for "llsd/binary", while LLSDSerialize emits and checks for "LLSD/Binary". Once any of the headers is recognized, pass corrected max_bytes to the specific parser. Make deserialize() more careful about the no-header case: preserve '\n' in content. Introduce debugging code (disabled) because it's a little tricky to recreate. Revert LLLeap child process stdout parser from LLSDSerialize::deserialize() to the specific LLSDNotationParser(), as at present: the generic parser fails one of LLLeap's integration tests for reasons that remain mysterious. --- indra/llcommon/llleap.cpp | 9 +++ indra/llcommon/llsdserialize.cpp | 49 +++++++++--- indra/llcommon/tests/llleap_test.cpp | 32 ++++---- indra/llcommon/tests/llsdserialize_test.cpp | 118 +++++++++++++++++++--------- 4 files changed, 147 insertions(+), 61 deletions(-) diff --git a/indra/llcommon/llleap.cpp b/indra/llcommon/llleap.cpp index f71eaf92c4..e8ccc4300d 100644 --- a/indra/llcommon/llleap.cpp +++ b/indra/llcommon/llleap.cpp @@ -317,8 +317,17 @@ public: LL_DEBUGS("LLLeap") << "needed " << mExpect << " bytes, got " << childout.size() << ", parsing LLSD" << LL_ENDL; LLSD data; +#if 1 + // specifically require notation LLSD from child + LLPointer parser(new LLSDNotationParser()); + S32 parse_status(parser->parse(childout.get_istream(), data, mExpect)); + if (parse_status == LLSDParser::PARSE_FAILURE) +#else + // SL-18330: accept any valid LLSD serialization format from child + // Unfortunately this runs into trouble we have not yet debugged. bool parse_status(LLSDSerialize::deserialize(data, childout.get_istream(), mExpect)); if (! parse_status) +#endif { bad_protocol("unparseable LLSD data"); } diff --git a/indra/llcommon/llsdserialize.cpp b/indra/llcommon/llsdserialize.cpp index dc8f8f5737..97bf51eeaa 100644 --- a/indra/llcommon/llsdserialize.cpp +++ b/indra/llcommon/llsdserialize.cpp @@ -123,12 +123,26 @@ bool LLSDSerialize::deserialize(LLSD& sd, std::istream& str, S32 max_bytes) bool fail_if_not_legacy = false; /* - * Get the first line before anything. + * Get the first line before anything. Don't read more than max_bytes: + * this get() overload reads no more than (count-1) bytes into the + * specified buffer. In the usual case when max_bytes exceeds + * sizeof(hdr_buf), get() will read no more than sizeof(hdr_buf)-2. */ - // Remember str's original input position: if there's no header, we'll - // want to back up and retry. - str.get(hdr_buf, sizeof(hdr_buf), '\n'); + str.get(hdr_buf, std::min(max_bytes+1, sizeof(hdr_buf)-1), '\n'); auto inbuf = str.gcount(); + // https://en.cppreference.com/w/cpp/io/basic_istream/get + // When the get() above sees the specified delimiter '\n', it stops there + // without pulling it from the stream. If it turns out that the stream + // does NOT contain a header, and the content includes meaningful '\n', + // it's important to pull that into hdr_buf too. + if (inbuf < max_bytes && str.get(hdr_buf[inbuf])) + { + // got the delimiting '\n' + ++inbuf; + // None of the following requires that hdr_buf contain a final '\0' + // byte. We could store one if needed, since even the incremented + // inbuf won't exceed sizeof(hdr_buf)-1, but there's no need. + } std::string header{ hdr_buf, inbuf }; if (str.fail()) { @@ -175,17 +189,17 @@ bool LLSDSerialize::deserialize(LLSD& sd, std::istream& str, S32 max_bytes) /* * Create the parser as appropriate */ - if (header == LLSD_BINARY_HEADER) + if (0 == LLStringUtil::compareInsensitive(header, LLSD_BINARY_HEADER)) { - return (parse_using(str, sd, max_bytes) > 0); + return (parse_using(str, sd, max_bytes-inbuf) > 0); } - else if (header == LLSD_XML_HEADER) + else if (0 == LLStringUtil::compareInsensitive(header, LLSD_XML_HEADER)) { - return (parse_using(str, sd, max_bytes) > 0); + return (parse_using(str, sd, max_bytes-inbuf) > 0); } - else if (header == LLSD_NOTATION_HEADER) + else if (0 == LLStringUtil::compareInsensitive(header, LLSD_NOTATION_HEADER)) { - return (parse_using(str, sd, max_bytes) > 0); + return (parse_using(str, sd, max_bytes-inbuf) > 0); } else // no header we recognize { @@ -207,7 +221,22 @@ bool LLSDSerialize::deserialize(LLSD& sd, std::istream& str, S32 max_bytes) LLMemoryStreamBuf already(reinterpret_cast(hdr_buf), inbuf); cat_streambuf prebuff(&already, str.rdbuf()); std::istream prepend(&prebuff); +#if 1 return (p->parse(prepend, sd, max_bytes) > 0); +#else + // debugging the reconstituted 'prepend' stream + // allocate a buffer that we hope is big enough for the whole thing + std::vector wholemsg((max_bytes == size_t(SIZE_UNLIMITED))? 1024 : max_bytes); + prepend.read(wholemsg.data(), std::min(max_bytes, wholemsg.size())); + LLMemoryStream replay(reinterpret_cast(wholemsg.data()), prepend.gcount()); + auto success{ p->parse(replay, sd, prepend.gcount()) > 0 }; + { + LL_DEBUGS() << (success? "parsed: $$" : "failed: '") + << std::string(wholemsg.data(), llmin(prepend.gcount(), 100)) << "$$" + << LL_ENDL; + } + return success; +#endif } } diff --git a/indra/llcommon/tests/llleap_test.cpp b/indra/llcommon/tests/llleap_test.cpp index 9754353ab0..6c799b7993 100644 --- a/indra/llcommon/tests/llleap_test.cpp +++ b/indra/llcommon/tests/llleap_test.cpp @@ -110,12 +110,12 @@ namespace tut "import os\n" "import sys\n" "\n" - // Don't forget that this Python script is written to some - // temp directory somewhere! Its __file__ is useless in - // finding indra/lib/python. Use our __FILE__, with - // raw-string syntax to deal with Windows pathnames. - "mydir = os.path.dirname(r'" << __FILE__ << "')\n" - "from llbase import llsd\n" + "try:\n" + // new freestanding llsd package + " import llsd\n" + "except ImportError:\n" + // older llbase.llsd module + " from llbase import llsd\n" "\n" "class ProtocolError(Exception):\n" " def __init__(self, msg, data):\n" @@ -126,26 +126,26 @@ namespace tut " pass\n" "\n" "def get():\n" - " hdr = ''\n" - " while ':' not in hdr and len(hdr) < 20:\n" - " hdr += sys.stdin.read(1)\n" + " hdr = []\n" + " while b':' not in hdr and len(hdr) < 20:\n" + " hdr.append(sys.stdin.buffer.read(1))\n" " if not hdr:\n" " sys.exit(0)\n" - " if not hdr.endswith(':'):\n" + " if not hdr[-1] == b':':\n" " raise ProtocolError('Expected len:data, got %r' % hdr, hdr)\n" " try:\n" - " length = int(hdr[:-1])\n" + " length = int(b''.join(hdr[:-1]))\n" " except ValueError:\n" " raise ProtocolError('Non-numeric len %r' % hdr[:-1], hdr[:-1])\n" " parts = []\n" " received = 0\n" " while received < length:\n" - " parts.append(sys.stdin.read(length - received))\n" + " parts.append(sys.stdin.buffer.read(length - received))\n" " received += len(parts[-1])\n" - " data = ''.join(parts)\n" + " data = b''.join(parts)\n" " assert len(data) == length\n" " try:\n" - " return llsd.parse(data.encode())\n" + " return llsd.parse(data)\n" // Seems the old indra.base.llsd module didn't properly // convert IndexError (from running off end of string) to // LLSDParseError. @@ -185,11 +185,11 @@ namespace tut " return _reply\n" "\n" "def put(req):\n" - " sys.stdout.write(':'.join((str(len(req)), req)))\n" + " sys.stdout.buffer.write(b'%d:%b' % (len(req), req))\n" " sys.stdout.flush()\n" "\n" "def send(pump, data):\n" - " put(llsd.format_notation(dict(pump=pump, data=data)).decode())\n" + " put(llsd.format_notation(dict(pump=pump, data=data)))\n" "\n" "def request(pump, data):\n" " # we expect 'data' is a dict\n" diff --git a/indra/llcommon/tests/llsdserialize_test.cpp b/indra/llcommon/tests/llsdserialize_test.cpp index 601f2c580d..39b93cf335 100644 --- a/indra/llcommon/tests/llsdserialize_test.cpp +++ b/indra/llcommon/tests/llsdserialize_test.cpp @@ -51,10 +51,11 @@ typedef U32 uint32_t; #include "boost/phoenix/core/argument.hpp" using namespace boost::phoenix; -#include "../llsd.h" -#include "../llsdserialize.h" +#include "llsd.h" +#include "llsdserialize.h" #include "llsdutil.h" -#include "../llformat.h" +#include "llformat.h" +#include "llmemorystream.h" #include "../test/lltut.h" #include "../test/namedtempfile.h" @@ -1898,14 +1899,22 @@ namespace tut static void writeLLSDArray(const FormatterFunction& serialize, std::ostream& out, const LLSD& array) { - BOOST_FOREACH(LLSD item, llsd::inArray(array)) + for (const LLSD& item : llsd::inArray(array)) { - serialize(item, out); - // It's important to separate with newlines because Python's llsd - // module doesn't support parsing from a file stream, only from a - // string, so we have to know how much of the file to read into a - // string. - out << '\n'; + // It's important to delimit the entries in this file somehow + // because, although Python's llsd.parse() can accept a file + // stream, the XML parser expects EOF after a single outer element + // -- it doesn't just stop. So we must extract a sequence of bytes + // strings from the file. But since one of the serialization + // formats we want to test is binary, we can't pick any single + // byte value as a delimiter! Use a binary integer length prefix + // instead. + std::ostringstream buffer; + serialize(item, buffer); + auto buffstr{ buffer.str() }; + int bufflen{ static_cast(buffstr.length()) }; + out.write(reinterpret_cast(&bufflen), sizeof(bufflen)); + out.write(buffstr.c_str(), buffstr.length()); } } @@ -1932,7 +1941,7 @@ namespace tut " except StopIteration:\n" " pass\n" " else:\n" - " assert False, 'Too many data items'\n"; + " raise AssertionError('Too many data items')\n"; // Create an llsdXXXXXX file containing 'data' serialized to // notation. @@ -1948,10 +1957,23 @@ namespace tut python("read C++ " + desc, placeholders::arg1 << import_llsd << - "def parse_each(iterable):\n" - " for item in iterable:\n" - " yield llsd.parse(item)\n" << - pydata << + "from functools import partial\n" + "import struct\n" + "lenformat = struct.Struct('i')\n" + "def parse_each(inf):\n" + " for rawlen in iter(partial(inf.read, lenformat.size), b''):\n" + " len = lenformat.unpack(rawlen)[0]\n" + // Since llsd.parse() has no max_bytes argument, instead of + // passing the input stream directly to parse(), read the item + // into a distinct bytes object and parse that. + " data = inf.read(len)\n" + " try:\n" + " yield llsd.parse(data)\n" + " except llsd.LLSDParseError as err:\n" + " print(f'*** {err}')\n" + " print(f'Bad content:\\n{data!r}')\n" + " raise\n" + << pydata << // Don't forget raw-string syntax for Windows pathnames. "verify(parse_each(open(r'" << file.getName() << "', 'rb')))\n"); } @@ -2008,6 +2030,26 @@ namespace tut } |*==========================================================================*/ + // helper for test<8> - test<12> + bool itemFromStream(std::istream& istr, LLSD& item, const ParserFunction& parse) + { + // reset the output value for debugging clarity + item.clear(); + // We use an int length prefix as a foolproof delimiter even for + // binary serialized streams. + int length{ 0 }; + istr.read(reinterpret_cast(&length), sizeof(length)); +// return parse(istr, item, length); + // Sadly, as of 2022-12-01 it seems we can't really trust our LLSD + // parsers to honor max_bytes: this test works better when we read + // each item into its own distinct LLMemoryStream, instead of passing + // the original istr with a max_bytes constraint. + std::vector buffer(length); + istr.read(reinterpret_cast(buffer.data()), length); + LLMemoryStream stream(buffer.data(), length); + return parse(stream, item, length); + } + // helper for test<8> - test<12> void fromPythonUsing(const std::string& pyformatter, const ParserFunction& parse= @@ -2022,6 +2064,8 @@ namespace tut python("Python " + pyformatter, placeholders::arg1 << import_llsd << + "import struct\n" + "lenformat = struct.Struct('i')\n" "DATA = [\n" " 17,\n" " 3.14,\n" @@ -2034,29 +2078,33 @@ namespace tut // N.B. Using 'print' implicitly adds newlines. "with open(r'" << file.getName() << "', 'wb') as f:\n" " for item in DATA:\n" - " print(llsd." << pyformatter << "(item), file=f)\n"); + " serialized = llsd." << pyformatter << "(item)\n" + " f.write(lenformat.pack(len(serialized)))\n" + " f.write(serialized)\n"); std::ifstream inf(file.getName().c_str()); LLSD item; - // Notice that we're not doing anything special to parse out the - // newlines: LLSDSerialize::fromNotation ignores them. While it would - // seem they're not strictly necessary, going in this direction, we - // want to ensure that notation-separated-by-newlines works in both - // directions -- since in practice, a given file might be read by - // either language. - ensure("Failed to read LLSD::Integer from Python", - parse(inf, item, LLSDSerialize::SIZE_UNLIMITED)); - ensure_equals(item.asInteger(), 17); - ensure("Failed to read LLSD::Real from Python", - parse(inf, item, LLSDSerialize::SIZE_UNLIMITED)); - ensure_approximately_equals("Bad LLSD::Real value from Python", - item.asReal(), 3.14, 7); // 7 bits ~= 0.01 - ensure("Failed to read LLSD::String from Python", - parse(inf, item, LLSDSerialize::SIZE_UNLIMITED)); - ensure_equals(item.asString(), - "This string\n" - "has several\n" - "lines."); + try + { + ensure("Failed to read LLSD::Integer from Python", + itemFromStream(inf, item, parse)); + ensure_equals(item.asInteger(), 17); + ensure("Failed to read LLSD::Real from Python", + itemFromStream(inf, item, parse)); + ensure_approximately_equals("Bad LLSD::Real value from Python", + item.asReal(), 3.14, 7); // 7 bits ~= 0.01 + ensure("Failed to read LLSD::String from Python", + itemFromStream(inf, item, parse)); + ensure_equals(item.asString(), + "This string\n" + "has several\n" + "lines."); + } + catch (const tut::failure& err) + { + std::cout << "for " << err.what() << ", item = " << item << std::endl; + throw; + } } template<> template<> -- cgit v1.2.3