From 1345a02b21a83bc4ee7ff72efc1858e956f18c53 Mon Sep 17 00:00:00 2001 From: Nat Goodspeed Date: Tue, 22 Oct 2019 17:14:26 -0400 Subject: DRTVWR-476: Terminate long-lived coroutines to avoid shutdown crash. Add LLCoros::TempStatus instances around known suspension points so printActiveCoroutines() can report what each suspended coroutine is waiting for. Similarly, sprinkle checkStop() calls at known suspension points. Make LLApp::setStatus() post an event to a new LLEventPump "LLApp" with a string corresponding to the status value being set, but only until ~LLEventPumps() -- since setStatus() also gets called very late in the application's lifetime. Make postAndSuspendSetup() (used by postAndSuspend(), suspendUntilEventOn(), postAndSuspendWithTimeout(), suspendUntilEventOnWithTimeout()) add a listener on the new "LLApp" LLEventPump that pushes the new LLCoros::Stopping exception to the coroutine waiting on the LLCoros::Promise. Make it return the new LLBoundListener along with the previous one. Accordingly, make postAndSuspend() and postAndSuspendWithTimeout() store the new LLBoundListener returned by postAndSuspendSetup() in a LLTempBoundListener (as with the previous one) so it will automatically disconnect once the wait is over. Make each LLCoprocedurePool instance listen on "LLApp" with a listener that closes the queue on which new work items are dispatched. Closing the queue causes the waiting dispatch coroutine to terminate. Store the connection in an LLTempBoundListener on the LLCoprocedurePool so it will disconnect automatically on destruction. Refactor the loop in coprocedureInvokerCoro() to instantiate TempStatus around the suspending call. Change a couple spammy LL_INFOS() calls to LL_DEBUGS(). Give all logging calls in that module a "CoProcMgr" tag to make it straightforward to re-enable the LL_DEBUGS() calls as desired. --- indra/llmessage/llcoproceduremanager.cpp | 47 +++++++++++++++++++++++++------- 1 file changed, 37 insertions(+), 10 deletions(-) (limited to 'indra/llmessage/llcoproceduremanager.cpp') diff --git a/indra/llmessage/llcoproceduremanager.cpp b/indra/llmessage/llcoproceduremanager.cpp index 89eb00a2b7..a8f6b8aa67 100644 --- a/indra/llmessage/llcoproceduremanager.cpp +++ b/indra/llmessage/llcoproceduremanager.cpp @@ -102,6 +102,7 @@ private: size_t mPoolSize; CoprocQueue_t mPendingCoprocs; ActiveCoproc_t mActiveCoprocs; + LLTempBoundListener mStatusListener; typedef std::map CoroAdapterMap_t; LLCore::HttpRequest::policy_t mHTTPPolicy; @@ -149,7 +150,7 @@ LLCoprocedureManager::poolPtr_t LLCoprocedureManager::initializePool(const std:: mPropertyDefineFn(keyName, size, "Coroutine Pool size for " + poolName); } - LL_WARNS() << "LLCoprocedureManager: No setting for \"" << keyName << "\" setting pool size to default of " << size << LL_ENDL; + LL_WARNS("CoProcMgr") << "LLCoprocedureManager: No setting for \"" << keyName << "\" setting pool size to default of " << size << LL_ENDL; } poolPtr_t pool(new LLCoprocedurePool(poolName, size)); @@ -214,9 +215,28 @@ LLCoprocedurePool::LLCoprocedurePool(const std::string &poolName, size_t size): mPoolName(poolName), mPoolSize(size), mPendingCoprocs(DEFAULT_QUEUE_SIZE), - mCoroMapping(), - mHTTPPolicy(LLCore::HttpRequest::DEFAULT_POLICY_ID) + mHTTPPolicy(LLCore::HttpRequest::DEFAULT_POLICY_ID), + mCoroMapping() { + // store in our LLTempBoundListener so that when the LLCoprocedurePool is + // destroyed, we implicitly disconnect from this LLEventPump + mStatusListener = LLEventPumps::instance().obtain("LLApp").listen( + poolName, + [this, poolName](const LLSD& status) + { + auto& statsd = status["status"]; + if (statsd.asString() != "running") + { + LL_INFOS("CoProcMgr") << "Pool " << poolName + << " closing queue because status " << statsd + << LL_ENDL; + // This should ensure that all waiting coprocedures in this + // pool will wake up and terminate. + mPendingCoprocs.close(); + } + return false; + }); + for (size_t count = 0; count < mPoolSize; ++count) { LLCoreHttpUtil::HttpCoroutineAdapter::ptr_t httpAdapter(new LLCoreHttpUtil::HttpCoroutineAdapter( mPoolName + "Adapter", mHTTPPolicy)); @@ -227,7 +247,7 @@ LLCoprocedurePool::LLCoprocedurePool(const std::string &poolName, size_t size): mCoroMapping.insert(CoroAdapterMap_t::value_type(pooledCoro, httpAdapter)); } - LL_INFOS() << "Created coprocedure pool named \"" << mPoolName << "\" with " << size << " items." << LL_ENDL; + LL_INFOS("CoProcMgr") << "Created coprocedure pool named \"" << mPoolName << "\" with " << size << " items." << LL_ENDL; } LLCoprocedurePool::~LLCoprocedurePool() @@ -240,7 +260,7 @@ LLUUID LLCoprocedurePool::enqueueCoprocedure(const std::string &name, LLCoproced LLUUID id(LLUUID::generateNewID()); mPendingCoprocs.push(QueuedCoproc::ptr_t(new QueuedCoproc(name, id, proc))); - LL_INFOS() << "Coprocedure(" << name << ") enqueued with id=" << id.asString() << " in pool \"" << mPoolName << "\"" << LL_ENDL; + LL_INFOS("CoProcMgr") << "Coprocedure(" << name << ") enqueued with id=" << id.asString() << " in pool \"" << mPoolName << "\"" << LL_ENDL; return id; } @@ -250,8 +270,17 @@ void LLCoprocedurePool::coprocedureInvokerCoro(LLCoreHttpUtil::HttpCoroutineAdap { QueuedCoproc::ptr_t coproc; boost::fibers::channel_op_status status; - while ((status = mPendingCoprocs.pop_wait_for(coproc, std::chrono::seconds(10))) != boost::fibers::channel_op_status::closed) + for (;;) { + { + LLCoros::TempStatus st("waiting for work for 10s"); + status = mPendingCoprocs.pop_wait_for(coproc, std::chrono::seconds(10)); + } + if (status == boost::fibers::channel_op_status::closed) + { + break; + } + if(status == boost::fibers::channel_op_status::timeout) { LL_INFOS_ONCE() << "pool '" << mPoolName << "' stalled." << LL_ENDL; @@ -260,8 +289,7 @@ void LLCoprocedurePool::coprocedureInvokerCoro(LLCoreHttpUtil::HttpCoroutineAdap ActiveCoproc_t::iterator itActive = mActiveCoprocs.insert(ActiveCoproc_t::value_type(coproc->mId, httpAdapter)).first; - // Nicky: This is super spammy. Consider using LL_DEBUGS here? - LL_INFOS() << "Dequeued and invoking coprocedure(" << coproc->mName << ") with id=" << coproc->mId.asString() << " in pool \"" << mPoolName << "\"" << LL_ENDL; + LL_DEBUGS("CoProcMgr") << "Dequeued and invoking coprocedure(" << coproc->mName << ") with id=" << coproc->mId.asString() << " in pool \"" << mPoolName << "\"" << LL_ENDL; try { @@ -277,8 +305,7 @@ void LLCoprocedurePool::coprocedureInvokerCoro(LLCoreHttpUtil::HttpCoroutineAdap throw; } - // Nicky: This is super spammy. Consider using LL_DEBUGS here? - LL_INFOS() << "Finished coprocedure(" << coproc->mName << ")" << " in pool \"" << mPoolName << "\"" << LL_ENDL; + LL_DEBUGS("CoProcMgr") << "Finished coprocedure(" << coproc->mName << ")" << " in pool \"" << mPoolName << "\"" << LL_ENDL; mActiveCoprocs.erase(itActive); } -- cgit v1.2.3