summaryrefslogtreecommitdiff
path: root/indra/llcommon
diff options
context:
space:
mode:
authorNat Goodspeed <nat@lindenlab.com>2019-10-22 17:14:26 -0400
committerNat Goodspeed <nat@lindenlab.com>2020-03-25 19:02:24 -0400
commit1345a02b21a83bc4ee7ff72efc1858e956f18c53 (patch)
treea85a70d9691eb5fb35063b971e729cca30ab7e23 /indra/llcommon
parent28a54c2f7b25b9fda763c51746701f0cd21c8018 (diff)
DRTVWR-476: Terminate long-lived coroutines to avoid shutdown crash.
Add LLCoros::TempStatus instances around known suspension points so printActiveCoroutines() can report what each suspended coroutine is waiting for. Similarly, sprinkle checkStop() calls at known suspension points. Make LLApp::setStatus() post an event to a new LLEventPump "LLApp" with a string corresponding to the status value being set, but only until ~LLEventPumps() -- since setStatus() also gets called very late in the application's lifetime. Make postAndSuspendSetup() (used by postAndSuspend(), suspendUntilEventOn(), postAndSuspendWithTimeout(), suspendUntilEventOnWithTimeout()) add a listener on the new "LLApp" LLEventPump that pushes the new LLCoros::Stopping exception to the coroutine waiting on the LLCoros::Promise. Make it return the new LLBoundListener along with the previous one. Accordingly, make postAndSuspend() and postAndSuspendWithTimeout() store the new LLBoundListener returned by postAndSuspendSetup() in a LLTempBoundListener (as with the previous one) so it will automatically disconnect once the wait is over. Make each LLCoprocedurePool instance listen on "LLApp" with a listener that closes the queue on which new work items are dispatched. Closing the queue causes the waiting dispatch coroutine to terminate. Store the connection in an LLTempBoundListener on the LLCoprocedurePool so it will disconnect automatically on destruction. Refactor the loop in coprocedureInvokerCoro() to instantiate TempStatus around the suspending call. Change a couple spammy LL_INFOS() calls to LL_DEBUGS(). Give all logging calls in that module a "CoProcMgr" tag to make it straightforward to re-enable the LL_DEBUGS() calls as desired.
Diffstat (limited to 'indra/llcommon')
-rw-r--r--indra/llcommon/llapp.cpp36
-rw-r--r--indra/llcommon/lleventcoro.cpp80
2 files changed, 101 insertions, 15 deletions
diff --git a/indra/llcommon/llapp.cpp b/indra/llcommon/llapp.cpp
index 421af3006e..3dab632aef 100644
--- a/indra/llcommon/llapp.cpp
+++ b/indra/llcommon/llapp.cpp
@@ -49,6 +49,8 @@
#include "google_breakpad/exception_handler.h"
#include "stringize.h"
#include "llcleanup.h"
+#include "llevents.h"
+#include "llsdutil.h"
//
// Signal handling
@@ -561,10 +563,42 @@ void LLApp::runErrorHandler()
LLApp::setStopped();
}
+namespace
+{
+
+static std::map<LLApp::EAppStatus, const char*> statusDesc
+{
+ { LLApp::APP_STATUS_RUNNING, "running" },
+ { LLApp::APP_STATUS_QUITTING, "quitting" },
+ { LLApp::APP_STATUS_STOPPED, "stopped" },
+ { LLApp::APP_STATUS_ERROR, "error" }
+};
+
+} // anonymous namespace
+
// static
void LLApp::setStatus(EAppStatus status)
{
- sStatus = status;
+ sStatus = status;
+
+ // This can also happen very late in the application lifecycle -- don't
+ // resurrect a deleted LLSingleton
+ if (! LLEventPumps::wasDeleted())
+ {
+ // notify interested parties of status change
+ LLSD statsd;
+ auto found = statusDesc.find(status);
+ if (found != statusDesc.end())
+ {
+ statsd = found->second;
+ }
+ else
+ {
+ // unknown status? at least report value
+ statsd = LLSD::Integer(status);
+ }
+ LLEventPumps::instance().obtain("LLApp").post(llsd::map("status", statsd));
+ }
}
diff --git a/indra/llcommon/lleventcoro.cpp b/indra/llcommon/lleventcoro.cpp
index b374c9fa04..23e0012a1a 100644
--- a/indra/llcommon/lleventcoro.cpp
+++ b/indra/llcommon/lleventcoro.cpp
@@ -32,6 +32,7 @@
#include "lleventcoro.h"
// STL headers
#include <chrono>
+#include <exception>
// std headers
// external library headers
#include <boost/fiber/operations.hpp>
@@ -40,6 +41,7 @@
#include "llsdutil.h"
#include "llerror.h"
#include "llcoros.h"
+#include "stringize.h"
namespace
{
@@ -106,29 +108,39 @@ void storeToLLSDPath(LLSD& dest, const LLSD& path, const LLSD& value)
void llcoro::suspend()
{
+ LLCoros::checkStop();
+ LLCoros::TempStatus st("waiting one tick");
boost::this_fiber::yield();
}
void llcoro::suspendUntilTimeout(float seconds)
{
+ LLCoros::checkStop();
// The fact that we accept non-integer seconds means we should probably
// use granularity finer than one second. However, given the overhead of
// the rest of our processing, it seems silly to use granularity finer
// than a millisecond.
+ LLCoros::TempStatus st(STRINGIZE("waiting for " << seconds << "s"));
boost::this_fiber::sleep_for(std::chrono::milliseconds(long(seconds * 1000)));
}
namespace
{
-LLBoundListener postAndSuspendSetup(const std::string& callerName,
- const std::string& listenerName,
- LLCoros::Promise<LLSD>& promise,
- const LLSD& event,
- const LLEventPumpOrPumpName& requestPumpP,
- const LLEventPumpOrPumpName& replyPumpP,
- const LLSD& replyPumpNamePath)
+// returns a listener on replyPumpP, also on "mainloop" -- both should be
+// stored in LLTempBoundListeners on the caller's stack frame
+std::pair<LLBoundListener, LLBoundListener>
+postAndSuspendSetup(const std::string& callerName,
+ const std::string& listenerName,
+ LLCoros::Promise<LLSD>& promise,
+ const LLSD& event,
+ const LLEventPumpOrPumpName& requestPumpP,
+ const LLEventPumpOrPumpName& replyPumpP,
+ const LLSD& replyPumpNamePath)
{
+ // Before we get any farther -- should we be stopping instead of
+ // suspending?
+ LLCoros::checkStop();
// Get the consuming attribute for THIS coroutine, the one that's about to
// suspend. Don't call get_consuming() in the lambda body: that would
// return the consuming attribute for some other coroutine, most likely
@@ -138,6 +150,38 @@ LLBoundListener postAndSuspendSetup(const std::string& callerName,
// value to the promise, thus fulfilling its future
llassert_always_msg(replyPumpP, ("replyPump required for " + callerName));
LLEventPump& replyPump(replyPumpP.getPump());
+ // The relative order of the two listen() calls below would only matter if
+ // "LLApp" were an LLEventMailDrop. But if we ever go there, we'd want to
+ // notice the pending LLApp status first.
+ LLBoundListener stopper(
+ LLEventPumps::instance().obtain("LLApp").listen(
+ listenerName,
+ [&promise, listenerName](const LLSD& status)
+ {
+ // anything except "running" should wake up the waiting
+ // coroutine
+ auto& statsd = status["status"];
+ if (statsd.asString() != "running")
+ {
+ LL_DEBUGS("lleventcoro") << listenerName
+ << " spotted status " << statsd
+ << ", throwing Stopping" << LL_ENDL;
+ try
+ {
+ promise.set_exception(
+ std::make_exception_ptr(
+ LLCoros::Stopping("status " + statsd.asString())));
+ }
+ catch (const boost::fibers::promise_already_satisfied& exc)
+ {
+ LL_WARNS("lleventcoro") << listenerName
+ << " couldn't throw Stopping "
+ "because promise already set" << LL_ENDL;
+ }
+ }
+ // do not consume -- every listener must see status
+ return false;
+ }));
LLBoundListener connection(
replyPump.listen(
listenerName,
@@ -160,6 +204,7 @@ LLBoundListener postAndSuspendSetup(const std::string& callerName,
return false;
}
}));
+
// skip the "post" part if requestPump is default-constructed
if (requestPumpP)
{
@@ -179,7 +224,7 @@ LLBoundListener postAndSuspendSetup(const std::string& callerName,
LL_DEBUGS("lleventcoro") << callerName << ": coroutine " << listenerName
<< " about to wait on LLEventPump " << replyPump.getName()
<< LL_ENDL;
- return connection;
+ return { connection, stopper };
}
} // anonymous
@@ -190,15 +235,17 @@ LLSD llcoro::postAndSuspend(const LLSD& event, const LLEventPumpOrPumpName& requ
LLCoros::Promise<LLSD> promise;
std::string listenerName(listenerNameForCoro());
- // Store connection into an LLTempBoundListener so we implicitly
+ // Store both connections into LLTempBoundListeners so we implicitly
// disconnect on return from this function.
- LLTempBoundListener connection =
+ auto connections =
postAndSuspendSetup("postAndSuspend()", listenerName, promise,
event, requestPump, replyPump, replyPumpNamePath);
+ LLTempBoundListener connection(connections.first), stopper(connections.second);
// declare the future
LLCoros::Future<LLSD> future = LLCoros::getFuture(promise);
// calling get() on the future makes us wait for it
+ LLCoros::TempStatus st(STRINGIZE("waiting for " << replyPump.getPump().getName()));
LLSD value(future.get());
LL_DEBUGS("lleventcoro") << "postAndSuspend(): coroutine " << listenerName
<< " resuming with " << value << LL_ENDL;
@@ -215,17 +262,22 @@ LLSD llcoro::postAndSuspendWithTimeout(const LLSD& event,
LLCoros::Promise<LLSD> promise;
std::string listenerName(listenerNameForCoro());
- // Store connection into an LLTempBoundListener so we implicitly
+ // Store both connections into LLTempBoundListeners so we implicitly
// disconnect on return from this function.
- LLTempBoundListener connection =
+ auto connections =
postAndSuspendSetup("postAndSuspendWithTimeout()", listenerName, promise,
event, requestPump, replyPump, replyPumpNamePath);
+ LLTempBoundListener connection(connections.first), stopper(connections.second);
// declare the future
LLCoros::Future<LLSD> future = LLCoros::getFuture(promise);
// wait for specified timeout
- boost::fibers::future_status status =
- future.wait_for(std::chrono::milliseconds(long(timeout * 1000)));
+ boost::fibers::future_status status;
+ {
+ LLCoros::TempStatus st(STRINGIZE("waiting for " << replyPump.getPump().getName()
+ << " for " << timeout << "s"));
+ status = future.wait_for(std::chrono::milliseconds(long(timeout * 1000)));
+ }
// if the future is NOT yet ready, return timeoutResult instead
if (status == boost::fibers::future_status::timeout)
{