From b7d60f650d2ca9fdfc3c541d76670c938f2cf48e Mon Sep 17 00:00:00 2001 From: Nat Goodspeed Date: Wed, 20 May 2020 10:44:34 -0400 Subject: DRTVWR-476: Fix LLCoprocedurePool::enqueueCoprocedure() shutdown crash. --- indra/llmessage/llcoproceduremanager.cpp | 45 ++++++++++++++++++++++++-------- 1 file changed, 34 insertions(+), 11 deletions(-) (limited to 'indra') diff --git a/indra/llmessage/llcoproceduremanager.cpp b/indra/llmessage/llcoproceduremanager.cpp index 210b83ae2d..a7bd836c4d 100644 --- a/indra/llmessage/llcoproceduremanager.cpp +++ b/indra/llmessage/llcoproceduremanager.cpp @@ -325,16 +325,22 @@ LLUUID LLCoprocedurePool::enqueueCoprocedure(const std::string &name, LLCoproced LL_INFOS("CoProcMgr") << "Coprocedure(" << name << ") enqueuing with id=" << id.asString() << " in pool \"" << mPoolName << "\" at " << mPending << LL_ENDL; auto pushed = mPendingCoprocs->try_push(boost::make_shared(name, id, proc)); - // We don't really have a lot of good options if try_push() failed, - // perhaps because the consuming coroutine is gummed up or something. This - // method is probably called from code called by mainloop. If we toss an - // llcoro::suspend() call here, we'll circle back for another mainloop - // iteration, possibly resulting in being re-entered here. Let's avoid that. - LL_ERRS_IF(pushed != boost::fibers::channel_op_status::success, "CoProcMgr") - << "Enqueue failed because queue is " << int(pushed) << LL_ENDL; - ++mPending; - - return id; + if (pushed == boost::fibers::channel_op_status::success) + { + ++mPending; + return id; + } + + // Here we didn't succeed in pushing. Shutdown could be the reason. + if (pushed == boost::fibers::channel_op_status::closed) + { + LL_WARNS("CoProcMgr") << "Discarding coprocedure '" << name << "' because shutdown" << LL_ENDL; + return {}; + } + + // The queue should never fill up. + LL_ERRS("CoProcMgr") << "Enqueue failed (" << unsigned(pushed) << ")" << LL_ENDL; + return {}; // never executed, pacify the compiler } //------------------------------------------------------------------------- @@ -344,6 +350,23 @@ void LLCoprocedurePool::coprocedureInvokerCoro( { for (;;) { + // It is VERY IMPORTANT that we instantiate a new ptr_t just before + // the pop_wait_for() call below. When this ptr_t was declared at + // function scope (outside the for loop), NickyD correctly diagnosed a + // mysterious hang condition due to: + // - the second time through the loop, the ptr_t held the last pointer + // to the previous QueuedCoproc, which indirectly held the last + // LLPointer to an LLInventoryCallback instance + // - while holding the lock on pendingCoprocs, pop_wait_for() assigned + // the popped value to the ptr_t variable + // - assignment destroyed the previous value of that variable, which + // indirectly destroyed the LLInventoryCallback + // - whose destructor called ~LLRequestServerAppearanceUpdateOnDestroy() + // - which called LLAppearanceMgr::requestServerAppearanceUpdate() + // - which called enqueueCoprocedure() + // - which tried to acquire the lock on pendingCoprocs... alas. + // Using a fresh, clean ptr_t ensures that no previous value is + // destroyed during pop_wait_for(). QueuedCoproc::ptr_t coproc; boost::fibers::channel_op_status status; { @@ -357,7 +380,7 @@ void LLCoprocedurePool::coprocedureInvokerCoro( if(status == boost::fibers::channel_op_status::timeout) { - LL_INFOS_ONCE() << "pool '" << mPoolName << "' stalled." << LL_ENDL; + LL_DEBUGS_ONCE("CoProcMgr") << "pool '" << mPoolName << "' waiting." << LL_ENDL; continue; } // we actually popped an item -- cgit v1.2.3