summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNat Goodspeed <nat@lindenlab.com>2022-12-02 15:17:56 -0500
committerNat Goodspeed <nat@lindenlab.com>2022-12-02 15:17:56 -0500
commit2f557cd7faceec36acace1eee4ee38904ff06130 (patch)
tree826951a26dc6a55233e3c3d30b7af56fa126af6e
parentb180e4de23cb54ec385e2d999fc5fdd4ea804ba4 (diff)
SL-18330: Fix new C++ <-> Python LLSD compatibility tests.
When sending multiple LEAP packets in the same file (for testing convenience), use a length prefix instead of delimiting with '\n'. Now that we allow a serialization format that includes an LLSD format header (e.g. "<?llsd/binary?>"), '\n' is part of the packet content. But in fact, testing binary LLSD means we can't pick any delimiter guaranteed not to appear in the packet content. Using a length prefix also lets us pass a specific max_bytes to the subject C++ LLSD parser. Make llleap_test.cpp use new freestanding Python llsd package when available. Update Python-side LEAP protocol code to work directly with encoded bytes stream, avoiding bytes<->str encoding and decoding, which breaks binary LLSD. Make LLSDSerialize::deserialize() recognize LLSD format header case- insensitively. Python emits and checks for "llsd/binary", while LLSDSerialize emits and checks for "LLSD/Binary". Once any of the headers is recognized, pass corrected max_bytes to the specific parser. Make deserialize() more careful about the no-header case: preserve '\n' in content. Introduce debugging code (disabled) because it's a little tricky to recreate. Revert LLLeap child process stdout parser from LLSDSerialize::deserialize() to the specific LLSDNotationParser(), as at present: the generic parser fails one of LLLeap's integration tests for reasons that remain mysterious.
-rw-r--r--indra/llcommon/llleap.cpp9
-rw-r--r--indra/llcommon/llsdserialize.cpp49
-rw-r--r--indra/llcommon/tests/llleap_test.cpp32
-rw-r--r--indra/llcommon/tests/llsdserialize_test.cpp118
4 files changed, 147 insertions, 61 deletions
diff --git a/indra/llcommon/llleap.cpp b/indra/llcommon/llleap.cpp
index f71eaf92c4..e8ccc4300d 100644
--- a/indra/llcommon/llleap.cpp
+++ b/indra/llcommon/llleap.cpp
@@ -317,8 +317,17 @@ public:
LL_DEBUGS("LLLeap") << "needed " << mExpect << " bytes, got "
<< childout.size() << ", parsing LLSD" << LL_ENDL;
LLSD data;
+#if 1
+ // specifically require notation LLSD from child
+ LLPointer<LLSDParser> parser(new LLSDNotationParser());
+ S32 parse_status(parser->parse(childout.get_istream(), data, mExpect));
+ if (parse_status == LLSDParser::PARSE_FAILURE)
+#else
+ // SL-18330: accept any valid LLSD serialization format from child
+ // Unfortunately this runs into trouble we have not yet debugged.
bool parse_status(LLSDSerialize::deserialize(data, childout.get_istream(), mExpect));
if (! parse_status)
+#endif
{
bad_protocol("unparseable LLSD data");
}
diff --git a/indra/llcommon/llsdserialize.cpp b/indra/llcommon/llsdserialize.cpp
index dc8f8f5737..97bf51eeaa 100644
--- a/indra/llcommon/llsdserialize.cpp
+++ b/indra/llcommon/llsdserialize.cpp
@@ -123,12 +123,26 @@ bool LLSDSerialize::deserialize(LLSD& sd, std::istream& str, S32 max_bytes)
bool fail_if_not_legacy = false;
/*
- * Get the first line before anything.
+ * Get the first line before anything. Don't read more than max_bytes:
+ * this get() overload reads no more than (count-1) bytes into the
+ * specified buffer. In the usual case when max_bytes exceeds
+ * sizeof(hdr_buf), get() will read no more than sizeof(hdr_buf)-2.
*/
- // Remember str's original input position: if there's no header, we'll
- // want to back up and retry.
- str.get(hdr_buf, sizeof(hdr_buf), '\n');
+ str.get(hdr_buf, std::min(max_bytes+1, sizeof(hdr_buf)-1), '\n');
auto inbuf = str.gcount();
+ // https://en.cppreference.com/w/cpp/io/basic_istream/get
+ // When the get() above sees the specified delimiter '\n', it stops there
+ // without pulling it from the stream. If it turns out that the stream
+ // does NOT contain a header, and the content includes meaningful '\n',
+ // it's important to pull that into hdr_buf too.
+ if (inbuf < max_bytes && str.get(hdr_buf[inbuf]))
+ {
+ // got the delimiting '\n'
+ ++inbuf;
+ // None of the following requires that hdr_buf contain a final '\0'
+ // byte. We could store one if needed, since even the incremented
+ // inbuf won't exceed sizeof(hdr_buf)-1, but there's no need.
+ }
std::string header{ hdr_buf, inbuf };
if (str.fail())
{
@@ -175,17 +189,17 @@ bool LLSDSerialize::deserialize(LLSD& sd, std::istream& str, S32 max_bytes)
/*
* Create the parser as appropriate
*/
- if (header == LLSD_BINARY_HEADER)
+ if (0 == LLStringUtil::compareInsensitive(header, LLSD_BINARY_HEADER))
{
- return (parse_using<LLSDBinaryParser>(str, sd, max_bytes) > 0);
+ return (parse_using<LLSDBinaryParser>(str, sd, max_bytes-inbuf) > 0);
}
- else if (header == LLSD_XML_HEADER)
+ else if (0 == LLStringUtil::compareInsensitive(header, LLSD_XML_HEADER))
{
- return (parse_using<LLSDXMLParser>(str, sd, max_bytes) > 0);
+ return (parse_using<LLSDXMLParser>(str, sd, max_bytes-inbuf) > 0);
}
- else if (header == LLSD_NOTATION_HEADER)
+ else if (0 == LLStringUtil::compareInsensitive(header, LLSD_NOTATION_HEADER))
{
- return (parse_using<LLSDNotationParser>(str, sd, max_bytes) > 0);
+ return (parse_using<LLSDNotationParser>(str, sd, max_bytes-inbuf) > 0);
}
else // no header we recognize
{
@@ -207,7 +221,22 @@ bool LLSDSerialize::deserialize(LLSD& sd, std::istream& str, S32 max_bytes)
LLMemoryStreamBuf already(reinterpret_cast<const U8*>(hdr_buf), inbuf);
cat_streambuf prebuff(&already, str.rdbuf());
std::istream prepend(&prebuff);
+#if 1
return (p->parse(prepend, sd, max_bytes) > 0);
+#else
+ // debugging the reconstituted 'prepend' stream
+ // allocate a buffer that we hope is big enough for the whole thing
+ std::vector<char> wholemsg((max_bytes == size_t(SIZE_UNLIMITED))? 1024 : max_bytes);
+ prepend.read(wholemsg.data(), std::min(max_bytes, wholemsg.size()));
+ LLMemoryStream replay(reinterpret_cast<const U8*>(wholemsg.data()), prepend.gcount());
+ auto success{ p->parse(replay, sd, prepend.gcount()) > 0 };
+ {
+ LL_DEBUGS() << (success? "parsed: $$" : "failed: '")
+ << std::string(wholemsg.data(), llmin(prepend.gcount(), 100)) << "$$"
+ << LL_ENDL;
+ }
+ return success;
+#endif
}
}
diff --git a/indra/llcommon/tests/llleap_test.cpp b/indra/llcommon/tests/llleap_test.cpp
index 9754353ab0..6c799b7993 100644
--- a/indra/llcommon/tests/llleap_test.cpp
+++ b/indra/llcommon/tests/llleap_test.cpp
@@ -110,12 +110,12 @@ namespace tut
"import os\n"
"import sys\n"
"\n"
- // Don't forget that this Python script is written to some
- // temp directory somewhere! Its __file__ is useless in
- // finding indra/lib/python. Use our __FILE__, with
- // raw-string syntax to deal with Windows pathnames.
- "mydir = os.path.dirname(r'" << __FILE__ << "')\n"
- "from llbase import llsd\n"
+ "try:\n"
+ // new freestanding llsd package
+ " import llsd\n"
+ "except ImportError:\n"
+ // older llbase.llsd module
+ " from llbase import llsd\n"
"\n"
"class ProtocolError(Exception):\n"
" def __init__(self, msg, data):\n"
@@ -126,26 +126,26 @@ namespace tut
" pass\n"
"\n"
"def get():\n"
- " hdr = ''\n"
- " while ':' not in hdr and len(hdr) < 20:\n"
- " hdr += sys.stdin.read(1)\n"
+ " hdr = []\n"
+ " while b':' not in hdr and len(hdr) < 20:\n"
+ " hdr.append(sys.stdin.buffer.read(1))\n"
" if not hdr:\n"
" sys.exit(0)\n"
- " if not hdr.endswith(':'):\n"
+ " if not hdr[-1] == b':':\n"
" raise ProtocolError('Expected len:data, got %r' % hdr, hdr)\n"
" try:\n"
- " length = int(hdr[:-1])\n"
+ " length = int(b''.join(hdr[:-1]))\n"
" except ValueError:\n"
" raise ProtocolError('Non-numeric len %r' % hdr[:-1], hdr[:-1])\n"
" parts = []\n"
" received = 0\n"
" while received < length:\n"
- " parts.append(sys.stdin.read(length - received))\n"
+ " parts.append(sys.stdin.buffer.read(length - received))\n"
" received += len(parts[-1])\n"
- " data = ''.join(parts)\n"
+ " data = b''.join(parts)\n"
" assert len(data) == length\n"
" try:\n"
- " return llsd.parse(data.encode())\n"
+ " return llsd.parse(data)\n"
// Seems the old indra.base.llsd module didn't properly
// convert IndexError (from running off end of string) to
// LLSDParseError.
@@ -185,11 +185,11 @@ namespace tut
" return _reply\n"
"\n"
"def put(req):\n"
- " sys.stdout.write(':'.join((str(len(req)), req)))\n"
+ " sys.stdout.buffer.write(b'%d:%b' % (len(req), req))\n"
" sys.stdout.flush()\n"
"\n"
"def send(pump, data):\n"
- " put(llsd.format_notation(dict(pump=pump, data=data)).decode())\n"
+ " put(llsd.format_notation(dict(pump=pump, data=data)))\n"
"\n"
"def request(pump, data):\n"
" # we expect 'data' is a dict\n"
diff --git a/indra/llcommon/tests/llsdserialize_test.cpp b/indra/llcommon/tests/llsdserialize_test.cpp
index 601f2c580d..39b93cf335 100644
--- a/indra/llcommon/tests/llsdserialize_test.cpp
+++ b/indra/llcommon/tests/llsdserialize_test.cpp
@@ -51,10 +51,11 @@ typedef U32 uint32_t;
#include "boost/phoenix/core/argument.hpp"
using namespace boost::phoenix;
-#include "../llsd.h"
-#include "../llsdserialize.h"
+#include "llsd.h"
+#include "llsdserialize.h"
#include "llsdutil.h"
-#include "../llformat.h"
+#include "llformat.h"
+#include "llmemorystream.h"
#include "../test/lltut.h"
#include "../test/namedtempfile.h"
@@ -1898,14 +1899,22 @@ namespace tut
static void writeLLSDArray(const FormatterFunction& serialize,
std::ostream& out, const LLSD& array)
{
- BOOST_FOREACH(LLSD item, llsd::inArray(array))
+ for (const LLSD& item : llsd::inArray(array))
{
- serialize(item, out);
- // It's important to separate with newlines because Python's llsd
- // module doesn't support parsing from a file stream, only from a
- // string, so we have to know how much of the file to read into a
- // string.
- out << '\n';
+ // It's important to delimit the entries in this file somehow
+ // because, although Python's llsd.parse() can accept a file
+ // stream, the XML parser expects EOF after a single outer element
+ // -- it doesn't just stop. So we must extract a sequence of bytes
+ // strings from the file. But since one of the serialization
+ // formats we want to test is binary, we can't pick any single
+ // byte value as a delimiter! Use a binary integer length prefix
+ // instead.
+ std::ostringstream buffer;
+ serialize(item, buffer);
+ auto buffstr{ buffer.str() };
+ int bufflen{ static_cast<int>(buffstr.length()) };
+ out.write(reinterpret_cast<const char*>(&bufflen), sizeof(bufflen));
+ out.write(buffstr.c_str(), buffstr.length());
}
}
@@ -1932,7 +1941,7 @@ namespace tut
" except StopIteration:\n"
" pass\n"
" else:\n"
- " assert False, 'Too many data items'\n";
+ " raise AssertionError('Too many data items')\n";
// Create an llsdXXXXXX file containing 'data' serialized to
// notation.
@@ -1948,10 +1957,23 @@ namespace tut
python("read C++ " + desc,
placeholders::arg1 <<
import_llsd <<
- "def parse_each(iterable):\n"
- " for item in iterable:\n"
- " yield llsd.parse(item)\n" <<
- pydata <<
+ "from functools import partial\n"
+ "import struct\n"
+ "lenformat = struct.Struct('i')\n"
+ "def parse_each(inf):\n"
+ " for rawlen in iter(partial(inf.read, lenformat.size), b''):\n"
+ " len = lenformat.unpack(rawlen)[0]\n"
+ // Since llsd.parse() has no max_bytes argument, instead of
+ // passing the input stream directly to parse(), read the item
+ // into a distinct bytes object and parse that.
+ " data = inf.read(len)\n"
+ " try:\n"
+ " yield llsd.parse(data)\n"
+ " except llsd.LLSDParseError as err:\n"
+ " print(f'*** {err}')\n"
+ " print(f'Bad content:\\n{data!r}')\n"
+ " raise\n"
+ << pydata <<
// Don't forget raw-string syntax for Windows pathnames.
"verify(parse_each(open(r'" << file.getName() << "', 'rb')))\n");
}
@@ -2009,6 +2031,26 @@ namespace tut
|*==========================================================================*/
// helper for test<8> - test<12>
+ bool itemFromStream(std::istream& istr, LLSD& item, const ParserFunction& parse)
+ {
+ // reset the output value for debugging clarity
+ item.clear();
+ // We use an int length prefix as a foolproof delimiter even for
+ // binary serialized streams.
+ int length{ 0 };
+ istr.read(reinterpret_cast<char*>(&length), sizeof(length));
+// return parse(istr, item, length);
+ // Sadly, as of 2022-12-01 it seems we can't really trust our LLSD
+ // parsers to honor max_bytes: this test works better when we read
+ // each item into its own distinct LLMemoryStream, instead of passing
+ // the original istr with a max_bytes constraint.
+ std::vector<U8> buffer(length);
+ istr.read(reinterpret_cast<char*>(buffer.data()), length);
+ LLMemoryStream stream(buffer.data(), length);
+ return parse(stream, item, length);
+ }
+
+ // helper for test<8> - test<12>
void fromPythonUsing(const std::string& pyformatter,
const ParserFunction& parse=
[](std::istream& istr, LLSD& data, size_t max_bytes)
@@ -2022,6 +2064,8 @@ namespace tut
python("Python " + pyformatter,
placeholders::arg1 <<
import_llsd <<
+ "import struct\n"
+ "lenformat = struct.Struct('i')\n"
"DATA = [\n"
" 17,\n"
" 3.14,\n"
@@ -2034,29 +2078,33 @@ namespace tut
// N.B. Using 'print' implicitly adds newlines.
"with open(r'" << file.getName() << "', 'wb') as f:\n"
" for item in DATA:\n"
- " print(llsd." << pyformatter << "(item), file=f)\n");
+ " serialized = llsd." << pyformatter << "(item)\n"
+ " f.write(lenformat.pack(len(serialized)))\n"
+ " f.write(serialized)\n");
std::ifstream inf(file.getName().c_str());
LLSD item;
- // Notice that we're not doing anything special to parse out the
- // newlines: LLSDSerialize::fromNotation ignores them. While it would
- // seem they're not strictly necessary, going in this direction, we
- // want to ensure that notation-separated-by-newlines works in both
- // directions -- since in practice, a given file might be read by
- // either language.
- ensure("Failed to read LLSD::Integer from Python",
- parse(inf, item, LLSDSerialize::SIZE_UNLIMITED));
- ensure_equals(item.asInteger(), 17);
- ensure("Failed to read LLSD::Real from Python",
- parse(inf, item, LLSDSerialize::SIZE_UNLIMITED));
- ensure_approximately_equals("Bad LLSD::Real value from Python",
- item.asReal(), 3.14, 7); // 7 bits ~= 0.01
- ensure("Failed to read LLSD::String from Python",
- parse(inf, item, LLSDSerialize::SIZE_UNLIMITED));
- ensure_equals(item.asString(),
- "This string\n"
- "has several\n"
- "lines.");
+ try
+ {
+ ensure("Failed to read LLSD::Integer from Python",
+ itemFromStream(inf, item, parse));
+ ensure_equals(item.asInteger(), 17);
+ ensure("Failed to read LLSD::Real from Python",
+ itemFromStream(inf, item, parse));
+ ensure_approximately_equals("Bad LLSD::Real value from Python",
+ item.asReal(), 3.14, 7); // 7 bits ~= 0.01
+ ensure("Failed to read LLSD::String from Python",
+ itemFromStream(inf, item, parse));
+ ensure_equals(item.asString(),
+ "This string\n"
+ "has several\n"
+ "lines.");
+ }
+ catch (const tut::failure& err)
+ {
+ std::cout << "for " << err.what() << ", item = " << item << std::endl;
+ throw;
+ }
}
template<> template<>