summaryrefslogtreecommitdiff
path: root/indra
diff options
context:
space:
mode:
authorNat Goodspeed <nat@lindenlab.com>2020-05-14 16:58:33 -0400
committerNat Goodspeed <nat@lindenlab.com>2020-05-14 16:58:33 -0400
commit9d5d257ceeb8297bcd8ac17164b6584717b5c024 (patch)
treed0c32fa078c1cdf9ae9e8566cce575efa96b8aa5 /indra
parent98dfba0d2f24aeb92e023df9d48b23fef8253024 (diff)
DRTVWR-476, SL-12204: Fix crash in Marketplace Listings.
The observed crash was due to sharing a stateful global resource (the global LLMessageSystem instance) between different tasks. Specifically, a coroutine sets its mMessageReader one way, expecting that value to persist until it's done with message parsing, but another coroutine sneaks in at a suspension point and sets it differently. Introduce LockMessageReader and LockMessageChecker classes, which must be instantiated by a consumer of the resource. The constructor of each locks a coroutine-aware mutex, so that for the lifetime of the lock object no other coroutine can instantiate another. Refactor the code so that LLMessageSystem::mMessageReader can only be modified by LockMessageReader, not by direct assignment. mMessageReader is now an instance of LLMessageReaderPointer, which supports dereferencing and comparison but not assignment. Only LockMessageReader can change its value. LockMessageReader addresses the use case in which the specific mMessageReader value need only persist for the duration of a single method call. Add an instance in LLMessageHandlerBridge::post(). LockMessageChecker is a subclass of LockMessageReader: both lock the same mutex. LockMessageChecker addresses the use case in which the specific mMessageReader value must persist across multiple method calls. Modify the methods in question to require a LockMessageChecker instance. Provide LockMessageChecker forwarding methods to facilitate calling the underlying LLMessageSystem methods via the LockMessageChecker instance. Add LockMessageChecker instances to LLAppViewer::idleNetwork(), a couple cases in idle_startup() and LLMessageSystem::establishBidirectionalTrust().
Diffstat (limited to 'indra')
-rw-r--r--indra/llmessage/message.cpp41
-rw-r--r--indra/llmessage/message.h130
-rw-r--r--indra/newview/llappviewer.cpp49
-rw-r--r--indra/newview/llstartup.cpp42
4 files changed, 199 insertions, 63 deletions
diff --git a/indra/llmessage/message.cpp b/indra/llmessage/message.cpp
index 6ef4025ab1..da62bb12e8 100644
--- a/indra/llmessage/message.cpp
+++ b/indra/llmessage/message.cpp
@@ -117,8 +117,8 @@ void LLMessageHandlerBridge::post(LLHTTPNode::ResponsePtr response,
gMessageSystem->mLastSender = LLHost(input["sender"].asString());
gMessageSystem->mPacketsIn += 1;
gMessageSystem->mLLSDMessageReader->setMessage(namePtr, input["body"]);
- gMessageSystem->mMessageReader = gMessageSystem->mLLSDMessageReader;
-
+ LockMessageReader rdr(gMessageSystem->mMessageReader, gMessageSystem->mLLSDMessageReader);
+
if(gMessageSystem->callHandler(namePtr, false, gMessageSystem))
{
response->result(LLSD());
@@ -189,7 +189,7 @@ void LLMessageSystem::init()
mTimingCallbackData = NULL;
mMessageBuilder = NULL;
- mMessageReader = NULL;
+ LockMessageReader(mMessageReader, NULL);
}
// Read file and build message templates
@@ -230,7 +230,6 @@ LLMessageSystem::LLMessageSystem(const std::string& filename, U32 port,
mTemplateMessageReader = new LLTemplateMessageReader(mMessageNumbers);
mLLSDMessageReader = new LLSDMessageReader();
- mMessageReader = NULL;
// initialize various bits of net info
mSocket = 0;
@@ -330,7 +329,6 @@ LLMessageSystem::~LLMessageSystem()
delete mTemplateMessageReader;
mTemplateMessageReader = NULL;
- mMessageReader = NULL;
delete mTemplateMessageBuilder;
mTemplateMessageBuilder = NULL;
@@ -480,11 +478,12 @@ LLCircuitData* LLMessageSystem::findCircuit(const LLHost& host,
}
// Returns TRUE if a valid, on-circuit message has been received.
-BOOL LLMessageSystem::checkMessages( S64 frame_count )
+// Requiring a non-const LockMessageChecker reference ensures that
+// mMessageReader has been set to mTemplateMessageReader.
+BOOL LLMessageSystem::checkMessages(LockMessageChecker&, S64 frame_count )
{
// Pump
BOOL valid_packet = FALSE;
- mMessageReader = mTemplateMessageReader;
LLTransferTargetVFile::updateQueue();
@@ -748,7 +747,7 @@ S32 LLMessageSystem::getReceiveBytes() const
}
-void LLMessageSystem::processAcks(F32 collect_time)
+void LLMessageSystem::processAcks(LockMessageChecker&, F32 collect_time)
{
F64Seconds mt_sec = getMessageTimeSeconds();
{
@@ -2062,8 +2061,9 @@ void LLMessageSystem::dispatch(
return;
}
// enable this for output of message names
- //LL_INFOS("Messaging") << "< \"" << msg_name << "\"" << LL_ENDL;
- //LL_DEBUGS() << "data: " << LLSDNotationStreamer(message) << LL_ENDL;
+ LL_DEBUGS("Messaging") << "< \"" << msg_name << "\"" << LL_ENDL;
+ LL_DEBUGS("Messaging") << "context: " << context << LL_ENDL;
+ LL_DEBUGS("Messaging") << "message: " << message << LL_ENDL;
handler->post(responsep, context, message);
}
@@ -3268,6 +3268,8 @@ void null_message_callback(LLMessageSystem *msg, void **data)
// up, and then sending auth messages.
void LLMessageSystem::establishBidirectionalTrust(const LLHost &host, S64 frame_count )
{
+ LockMessageChecker lmc(this);
+
std::string shared_secret = get_shared_secret();
if(shared_secret.empty())
{
@@ -3287,7 +3289,7 @@ void LLMessageSystem::establishBidirectionalTrust(const LLHost &host, S64 frame_
addU8Fast(_PREHASH_PingID, 0);
addU32Fast(_PREHASH_OldestUnacked, 0);
sendMessage(host);
- if (checkMessages( frame_count ))
+ if (lmc.checkMessages( frame_count ))
{
if (isMessageFast(_PREHASH_CompletePingCheck) &&
(getSender() == host))
@@ -3295,7 +3297,7 @@ void LLMessageSystem::establishBidirectionalTrust(const LLHost &host, S64 frame_
break;
}
}
- processAcks();
+ lmc.processAcks();
ms_sleep(1);
}
@@ -3314,8 +3316,8 @@ void LLMessageSystem::establishBidirectionalTrust(const LLHost &host, S64 frame_
cdp = mCircuitInfo.findCircuit(host);
if(!cdp) break; // no circuit anymore, no point continuing.
if(cdp->getTrusted()) break; // circuit is trusted.
- checkMessages(frame_count);
- processAcks();
+ lmc.checkMessages(frame_count);
+ lmc.processAcks();
ms_sleep(1);
}
}
@@ -3973,11 +3975,18 @@ void LLMessageSystem::setTimeDecodesSpamThreshold( F32 seconds )
LLMessageReader::setTimeDecodesSpamThreshold(seconds);
}
+LockMessageChecker::LockMessageChecker(LLMessageSystem* msgsystem):
+ // for the lifespan of this LockMessageChecker instance, use
+ // LLTemplateMessageReader as msgsystem's mMessageReader
+ LockMessageReader(msgsystem->mMessageReader, msgsystem->mTemplateMessageReader),
+ mMessageSystem(msgsystem)
+{}
+
// HACK! babbage: return true if message rxed via either UDP or HTTP
// TODO: babbage: move gServicePump in to LLMessageSystem?
-bool LLMessageSystem::checkAllMessages(S64 frame_count, LLPumpIO* http_pump)
+bool LLMessageSystem::checkAllMessages(LockMessageChecker& lmc, S64 frame_count, LLPumpIO* http_pump)
{
- if(checkMessages(frame_count))
+ if(lmc.checkMessages(frame_count))
{
return true;
}
diff --git a/indra/llmessage/message.h b/indra/llmessage/message.h
index 0af5a1b96d..a3f2829ece 100644
--- a/indra/llmessage/message.h
+++ b/indra/llmessage/message.h
@@ -61,6 +61,8 @@
#include "llstoredmessage.h"
#include "boost/function.hpp"
#include "llpounceable.h"
+#include "llcoros.h"
+#include LLCOROS_MUTEX_HEADER
const U32 MESSAGE_MAX_STRINGS_LENGTH = 64;
const U32 MESSAGE_NUMBER_OF_HASH_BUCKETS = 8192;
@@ -199,6 +201,89 @@ public:
virtual void complete(const LLHost& host, const LLUUID& agent) const = 0;
};
+/**
+ * SL-12204: We've observed crashes when consumer code sets
+ * LLMessageSystem::mMessageReader, assuming that all subsequent processing of
+ * the current message will use the same mMessageReader value -- only to have
+ * a different coroutine sneak in and replace mMessageReader before
+ * completion. This is a limitation of sharing a stateful global resource for
+ * message parsing; instead code receiving a new message should instantiate a
+ * (trivially constructed) local message parser and use that.
+ *
+ * Until then, when one coroutine sets a particular LLMessageReader subclass
+ * as the current message reader, ensure that no other coroutine can replace
+ * it until the first coroutine has finished with its message.
+ *
+ * This is achieved with two helper classes. LLMessageSystem::mMessageReader
+ * is now an LLMessageReaderPointer instance, which can efficiently compare or
+ * dereference its contained LLMessageReader* but which cannot be directly
+ * assigned. To change the value of LLMessageReaderPointer, you must
+ * instantiate LockMessageReader with the LLMessageReader* you wish to make
+ * current. mMessageReader will have that value for the lifetime of the
+ * LockMessageReader instance, then revert to nullptr. Moreover, as its name
+ * implies, LockMessageReader locks the mutex in LLMessageReaderPointer so
+ * that any other coroutine instantiating LockMessageReader will block until
+ * the first coroutine has destroyed its instance.
+ */
+class LLMessageReaderPointer
+{
+public:
+ LLMessageReaderPointer(): mPtr(nullptr) {}
+ // It is essential that comparison and dereferencing must be fast, which
+ // is why we don't check for nullptr when dereferencing.
+ LLMessageReader* operator->() const { return mPtr; }
+ bool operator==(const LLMessageReader* other) const { return mPtr == other; }
+ bool operator!=(const LLMessageReader* other) const { return ! (*this == other); }
+private:
+ // Only LockMessageReader can set mPtr.
+ friend class LockMessageReader;
+ LLMessageReader* mPtr;
+ LLCoros::Mutex mMutex;
+};
+
+/**
+ * To set mMessageReader to nullptr:
+ *
+ * @code
+ * // use an anonymous instance that is destroyed immediately
+ * LockMessageReader(gMessageSystem->mMessageReader, nullptr);
+ * @endcode
+ *
+ * Why do we still require going through LockMessageReader at all? Because it
+ * would be Bad if any coroutine set mMessageReader to nullptr while another
+ * coroutine was still parsing a message.
+ */
+class LockMessageReader
+{
+public:
+ // Because LockMessageReader contains LLCoros::LockType, it is already
+ // move-only. No need to delete the copy constructor or copy assignment.
+ LockMessageReader(LLMessageReaderPointer& var, LLMessageReader* instance):
+ mVar(var.mPtr),
+ mLock(var.mMutex)
+ {
+ mVar = instance;
+ }
+ ~LockMessageReader()
+ {
+ mVar = nullptr;
+ }
+private:
+ // capture a reference to LLMessageReaderPointer::mPtr
+ decltype(LLMessageReaderPointer::mPtr)& mVar;
+ // while holding a lock on LLMessageReaderPointer::mMutex
+ LLCoros::LockType mLock;
+};
+
+/**
+ * LockMessageReader is great as long as you only need mMessageReader locked
+ * during a single LLMessageSystem function call. However, empirically the
+ * sequence from checkAllMessages() through processAcks() need mMessageReader
+ * locked to LLTemplateMessageReader. Enforce that by making them require an
+ * instance of LockMessageChecker.
+ */
+class LockMessageChecker;
+
class LLMessageSystem : public LLMessageSenderInterface
{
private:
@@ -331,8 +416,8 @@ public:
bool addCircuitCode(U32 code, const LLUUID& session_id);
BOOL poll(F32 seconds); // Number of seconds that we want to block waiting for data, returns if data was received
- BOOL checkMessages( S64 frame_count = 0 );
- void processAcks(F32 collect_time = 0.f);
+ BOOL checkMessages(LockMessageChecker&, S64 frame_count = 0 );
+ void processAcks(LockMessageChecker&, F32 collect_time = 0.f);
BOOL isMessageFast(const char *msg);
BOOL isMessage(const char *msg)
@@ -730,7 +815,7 @@ public:
const LLSD& data);
// Check UDP messages and pump http_pump to receive HTTP messages.
- bool checkAllMessages(S64 frame_count, LLPumpIO* http_pump);
+ bool checkAllMessages(LockMessageChecker&, S64 frame_count, LLPumpIO* http_pump);
// Moved to allow access from LLTemplateMessageDispatcher
void clearReceiveState();
@@ -817,12 +902,13 @@ private:
LLMessageBuilder* mMessageBuilder;
LLTemplateMessageBuilder* mTemplateMessageBuilder;
LLSDMessageBuilder* mLLSDMessageBuilder;
- LLMessageReader* mMessageReader;
+ LLMessageReaderPointer mMessageReader;
LLTemplateMessageReader* mTemplateMessageReader;
LLSDMessageReader* mLLSDMessageReader;
friend class LLMessageHandlerBridge;
-
+ friend class LockMessageChecker;
+
bool callHandler(const char *name, bool trustedSource,
LLMessageSystem* msg);
@@ -835,6 +921,40 @@ private:
// external hook into messaging system
extern LLPounceable<LLMessageSystem*, LLPounceableStatic> gMessageSystem;
+// Implementation of LockMessageChecker depends on definition of
+// LLMessageSystem, hence must follow it.
+class LockMessageChecker: public LockMessageReader
+{
+public:
+ LockMessageChecker(LLMessageSystem* msgsystem);
+
+ // For convenience, provide forwarding wrappers so you can call (e.g.)
+ // checkAllMessages() on your LockMessageChecker instance instead of
+ // passing the instance to LLMessageSystem::checkAllMessages(). Use
+ // perfect forwarding to avoid having to maintain these wrappers in sync
+ // with the target methods.
+ template <typename... ARGS>
+ bool checkAllMessages(ARGS&&... args)
+ {
+ return mMessageSystem->checkAllMessages(*this, std::forward<ARGS>(args)...);
+ }
+
+ template <typename... ARGS>
+ bool checkMessages(ARGS&&... args)
+ {
+ return mMessageSystem->checkMessages(*this, std::forward<ARGS>(args)...);
+ }
+
+ template <typename... ARGS>
+ void processAcks(ARGS&&... args)
+ {
+ return mMessageSystem->processAcks(*this, std::forward<ARGS>(args)...);
+ }
+
+private:
+ LLMessageSystem* mMessageSystem;
+};
+
// Must specific overall system version, which is used to determine
// if a patch is available in the message template checksum verification.
// Return true if able to initialize system.
diff --git a/indra/newview/llappviewer.cpp b/indra/newview/llappviewer.cpp
index 65bfcec8c4..70d3c10524 100644
--- a/indra/newview/llappviewer.cpp
+++ b/indra/newview/llappviewer.cpp
@@ -5250,37 +5250,40 @@ void LLAppViewer::idleNetwork()
const S64 frame_count = gFrameCount; // U32->S64
F32 total_time = 0.0f;
- while (gMessageSystem->checkAllMessages(frame_count, gServicePump))
{
- if (gDoDisconnect)
+ LockMessageChecker lmc(gMessageSystem);
+ while (lmc.checkAllMessages(frame_count, gServicePump))
{
- // We're disconnecting, don't process any more messages from the server
- // We're usually disconnecting due to either network corruption or a
- // server going down, so this is OK.
- break;
- }
+ if (gDoDisconnect)
+ {
+ // We're disconnecting, don't process any more messages from the server
+ // We're usually disconnecting due to either network corruption or a
+ // server going down, so this is OK.
+ break;
+ }
- total_decoded++;
- gPacketsIn++;
+ total_decoded++;
+ gPacketsIn++;
- if (total_decoded > MESSAGE_MAX_PER_FRAME)
- {
- break;
- }
+ if (total_decoded > MESSAGE_MAX_PER_FRAME)
+ {
+ break;
+ }
#ifdef TIME_THROTTLE_MESSAGES
- // Prevent slow packets from completely destroying the frame rate.
- // This usually happens due to clumps of avatars taking huge amount
- // of network processing time (which needs to be fixed, but this is
- // a good limit anyway).
- total_time = check_message_timer.getElapsedTimeF32();
- if (total_time >= CheckMessagesMaxTime)
- break;
+ // Prevent slow packets from completely destroying the frame rate.
+ // This usually happens due to clumps of avatars taking huge amount
+ // of network processing time (which needs to be fixed, but this is
+ // a good limit anyway).
+ total_time = check_message_timer.getElapsedTimeF32();
+ if (total_time >= CheckMessagesMaxTime)
+ break;
#endif
- }
+ }
- // Handle per-frame message system processing.
- gMessageSystem->processAcks(gSavedSettings.getF32("AckCollectTime"));
+ // Handle per-frame message system processing.
+ lmc.processAcks(gSavedSettings.getF32("AckCollectTime"));
+ }
#ifdef TIME_THROTTLE_MESSAGES
if (total_time >= CheckMessagesMaxTime)
diff --git a/indra/newview/llstartup.cpp b/indra/newview/llstartup.cpp
index 4d25e8b2a3..ee12c451eb 100644
--- a/indra/newview/llstartup.cpp
+++ b/indra/newview/llstartup.cpp
@@ -1534,12 +1534,14 @@ bool idle_startup()
{
LLStartUp::setStartupState( STATE_AGENT_SEND );
}
- LLMessageSystem* msg = gMessageSystem;
- while (msg->checkAllMessages(gFrameCount, gServicePump))
{
- display_startup();
+ LockMessageChecker lmc(gMessageSystem);
+ while (lmc.checkAllMessages(gFrameCount, gServicePump))
+ {
+ display_startup();
+ }
+ lmc.processAcks();
}
- msg->processAcks();
display_startup();
return FALSE;
}
@@ -1589,25 +1591,27 @@ bool idle_startup()
//---------------------------------------------------------------------
if (STATE_AGENT_WAIT == LLStartUp::getStartupState())
{
- LLMessageSystem* msg = gMessageSystem;
- while (msg->checkAllMessages(gFrameCount, gServicePump))
{
- if (gAgentMovementCompleted)
- {
- // Sometimes we have more than one message in the
- // queue. break out of this loop and continue
- // processing. If we don't, then this could skip one
- // or more login steps.
- break;
- }
- else
+ LockMessageChecker lmc(gMessageSystem);
+ while (lmc.checkAllMessages(gFrameCount, gServicePump))
{
- LL_DEBUGS("AppInit") << "Awaiting AvatarInitComplete, got "
- << msg->getMessageName() << LL_ENDL;
+ if (gAgentMovementCompleted)
+ {
+ // Sometimes we have more than one message in the
+ // queue. break out of this loop and continue
+ // processing. If we don't, then this could skip one
+ // or more login steps.
+ break;
+ }
+ else
+ {
+ LL_DEBUGS("AppInit") << "Awaiting AvatarInitComplete, got "
+ << gMessageSystem->getMessageName() << LL_ENDL;
+ }
+ display_startup();
}
- display_startup();
+ lmc.processAcks();
}
- msg->processAcks();
display_startup();