diff options
author | Nat Goodspeed <nat@lindenlab.com> | 2019-10-22 16:49:29 -0400 |
---|---|---|
committer | Nat Goodspeed <nat@lindenlab.com> | 2020-03-25 19:02:24 -0400 |
commit | 28a54c2f7b25b9fda763c51746701f0cd21c8018 (patch) | |
tree | 906139d3074d1bc129dde12f8dc2af79599d1dff /indra/llcommon | |
parent | 253e360062a74cbaf87c7f9dc30ef26bca1697eb (diff) |
DRTVWR-476: Infrastructure to help manage long-lived coroutines.
Introduce LLCoros::Stop exception, with subclasses Stopping, Stopped and
Shutdown. Add LLCoros::checkStop(), intended to be called periodically by any
coroutine with nontrivial lifespan. It checks the LLApp status and, unless
isRunning(), throws one of these new exceptions.
Make LLCoros::toplevel() catch Stop specially and log forcible coroutine
termination.
Now that LLApp status matters even in a test program, introduce a trivial
LLTestApp subclass whose sole function is to make isRunning() true.
(LLApp::setStatus() is protected: only a subclass can call it.) Add LLTestApp
instances to lleventcoro_test.cpp and lllogin_test.cpp.
Make LLCoros::toplevel() accept parameters by value rather than by const
reference so we can continue using them even after context switches.
Make private LLCoros::get_CoroData() static. Given that we've observed some
coroutines living past LLCoros destruction, making the caller call
LLCoros::instance() is more dangerous than encapsulating it within a static
method -- since the encapsulated call can check LLCoros::wasDeleted() first
and do something reasonable instead. This also eliminates the need for both a
const and non-const overload.
Defend LLCoros::delete_CoroData() (cleanup function for fiber_specific_ptr for
CoroData, implicitly called after coroutine termination) against calls after
~LLCoros().
Add a status string to coroutine-local data, with LLCoro::setStatus(),
getStatus() and RAII class TempStatus.
Add an optional 'when' string argument to LLCoros::printActiveCoroutines().
Make ~LLCoros() print the coroutines still active at destruction.
Diffstat (limited to 'indra/llcommon')
-rw-r--r-- | indra/llcommon/llcoros.cpp | 105 | ||||
-rw-r--r-- | indra/llcommon/llcoros.h | 66 | ||||
-rw-r--r-- | indra/llcommon/tests/lleventcoro_test.cpp | 2 |
3 files changed, 145 insertions, 28 deletions
diff --git a/indra/llcommon/llcoros.cpp b/indra/llcommon/llcoros.cpp index 37bcfc3242..78a0c5d225 100644 --- a/indra/llcommon/llcoros.cpp +++ b/indra/llcommon/llcoros.cpp @@ -48,6 +48,7 @@ #undef BOOST_DISABLE_ASSERTS #endif // other Linden headers +#include "llapp.h" #include "lltimer.h" #include "llevents.h" #include "llerror.h" @@ -58,10 +59,15 @@ #include <excpt.h> #endif - -const LLCoros::CoroData& LLCoros::get_CoroData(const std::string& caller) const +// static +LLCoros::CoroData& LLCoros::get_CoroData(const std::string& caller) { - CoroData* current = mCurrent.get(); + CoroData* current{ nullptr }; + // be careful about attempted accesses in the final throes of app shutdown + if (! wasDeleted()) + { + current = instance().mCurrent.get(); + } // For the main() coroutine, the one NOT explicitly launched by launch(), // we never explicitly set mCurrent. Use a static CoroData instance with // canonical values. @@ -78,12 +84,6 @@ const LLCoros::CoroData& LLCoros::get_CoroData(const std::string& caller) const return *current; } -LLCoros::CoroData& LLCoros::get_CoroData(const std::string& caller) -{ - // reuse const implementation, just cast away const-ness of result - return const_cast<CoroData&>(const_cast<const LLCoros*>(this)->get_CoroData(caller)); -} - //static LLCoros::coro::id LLCoros::get_self() { @@ -93,7 +93,7 @@ LLCoros::coro::id LLCoros::get_self() //static void LLCoros::set_consuming(bool consuming) { - CoroData& data(LLCoros::instance().get_CoroData("set_consuming()")); + CoroData& data(get_CoroData("set_consuming()")); // DO NOT call this on the main() coroutine. llassert_always(! data.mName.empty()); data.mConsuming = consuming; @@ -102,7 +102,19 @@ void LLCoros::set_consuming(bool consuming) //static bool LLCoros::get_consuming() { - return LLCoros::instance().get_CoroData("get_consuming()").mConsuming; + return get_CoroData("get_consuming()").mConsuming; +} + +// static +void LLCoros::setStatus(const std::string& status) +{ + get_CoroData("setStatus()").mStatus = status; +} + +// static +std::string LLCoros::getStatus() +{ + return get_CoroData("getStatus()").mStatus; } LLCoros::LLCoros(): @@ -118,6 +130,11 @@ LLCoros::LLCoros(): { } +LLCoros::~LLCoros() +{ + printActiveCoroutines("at LLCoros destruction"); +} + std::string LLCoros::generateDistinctName(const std::string& prefix) const { static int unique = 0; @@ -166,19 +183,19 @@ void LLCoros::setStackSize(S32 stacksize) mStackSize = stacksize; } -void LLCoros::printActiveCoroutines() +void LLCoros::printActiveCoroutines(const std::string& when) { - LL_INFOS("LLCoros") << "Number of active coroutines: " << (S32)mCoros.size() << LL_ENDL; + LL_INFOS("LLCoros") << "Number of active coroutines " << when + << ": " << (S32)mCoros.size() << LL_ENDL; if (mCoros.size() > 0) { LL_INFOS("LLCoros") << "-------------- List of active coroutines ------------"; - CoroMap::iterator iter; - CoroMap::iterator end = mCoros.end(); F64 time = LLTimer::getTotalSeconds(); - for (iter = mCoros.begin(); iter != end; iter++) + for (const auto& pair : mCoros) { - F64 life_time = time - iter->second->mCreationTime; - LL_CONT << LL_NEWLINE << "Name: " << iter->first << " life: " << life_time; + F64 life_time = time - pair.second->mCreationTime; + LL_CONT << LL_NEWLINE << pair.first << ' ' << pair.second->mStatus + << " life: " << life_time; } LL_CONT << LL_ENDL; LL_INFOS("LLCoros") << "-----------------------------------------------------" << LL_ENDL; @@ -244,11 +261,19 @@ void LLCoros::winlevel(const callable_t& callable) #endif // Top-level wrapper around caller's coroutine callable. -void LLCoros::toplevel(const std::string& name, const callable_t& callable) +// Normally we like to pass strings and such by const reference -- but in this +// case, we WANT to copy both the name and the callable to our local stack! +void LLCoros::toplevel(std::string name, callable_t callable) { CoroData* corodata = new CoroData(name); - // Store it in our pointer map. Oddly, must cast away const-ness of key. - mCoros.insert(const_cast<std::string&>(name), corodata); + if (corodata == NULL) + { + // Out of memory? + printActiveCoroutines(); + LL_ERRS("LLCoros") << "Failed to start coroutine: " << name << " Stacksize: " << mStackSize << " Total coroutines: " << mCoros.size() << LL_ENDL; + } + // Store it in our pointer map. + mCoros.insert(name, corodata); // also set it as current mCurrent.reset(corodata); @@ -261,18 +286,39 @@ void LLCoros::toplevel(const std::string& name, const callable_t& callable) callable(); #endif } + catch (const Stop& exc) + { + LL_INFOS("LLCoros") << "coroutine " << name << " terminating because " + << exc.what() << LL_ENDL; + } catch (const LLContinueError&) { // Any uncaught exception derived from LLContinueError will be caught // here and logged. This coroutine will terminate but the rest of the // viewer will carry on. - LOG_UNHANDLED_EXCEPTION(STRINGIZE("coroutine " << corodata->mName)); + LOG_UNHANDLED_EXCEPTION(STRINGIZE("coroutine " << name)); } catch (...) { // Any OTHER kind of uncaught exception will cause the viewer to // crash, hopefully informatively. - CRASH_ON_UNHANDLED_EXCEPTION(STRINGIZE("coroutine " << corodata->mName)); + CRASH_ON_UNHANDLED_EXCEPTION(STRINGIZE("coroutine " << name)); + } +} + +void LLCoros::checkStop() +{ + if (wasDeleted()) + { + LLTHROW(Shutdown("LLCoros was deleted")); + } + if (LLApp::isStopped()) + { + LLTHROW(Stopped("viewer is stopped")); + } + if (! LLApp::isRunning()) + { + LLTHROW(Stopping("viewer is stopping")); } } @@ -288,6 +334,19 @@ void LLCoros::delete_CoroData(CoroData* cdptr) { // This custom cleanup function is necessarily static. Find and bind the // LLCoros instance. + // In case the LLCoros instance has already been deleted, just warn and + // scoot. We do NOT want to reinstantiate LLCoros during shutdown! + if (wasDeleted()) + { + // The LLSingletons involved in logging may have been deleted too. + // This warning may help developers track down coroutines that have + // not yet been cleaned up. + // But cdptr is very likely a dangling pointer by this time, so don't + // try to dereference mName. + logwarns("Coroutine terminating after LLCoros instance deleted"); + return; + } + LLCoros& self(LLCoros::instance()); // We set mCurrent on entry to a new fiber, expecting that the // corresponding entry has already been stored in mCoros. It is an diff --git a/indra/llcommon/llcoros.h b/indra/llcommon/llcoros.h index dedb6c8eca..de7b691284 100644 --- a/indra/llcommon/llcoros.h +++ b/indra/llcommon/llcoros.h @@ -29,6 +29,7 @@ #if ! defined(LL_LLCOROS_H) #define LL_LLCOROS_H +#include "llexception.h" #include <boost/fiber/fss.hpp> #include <boost/fiber/future/promise.hpp> #include <boost/fiber/future/future.hpp> @@ -74,6 +75,7 @@ class LL_COMMON_API LLCoros: public LLSingleton<LLCoros> { LLSINGLETON(LLCoros); + ~LLCoros(); public: /// The viewer's use of the term "coroutine" became deeply embedded before /// the industry term "fiber" emerged to distinguish userland threads from @@ -147,8 +149,8 @@ public: */ void setStackSize(S32 stacksize); - /// for delayed initialization - void printActiveCoroutines(); + /// diagnostic + void printActiveCoroutines(const std::string& when=std::string()); /// get the current coro::id for those who really really care static coro::id get_self(); @@ -176,6 +178,7 @@ public: { set_consuming(consuming); } + OverrideConsuming(const OverrideConsuming&) = delete; ~OverrideConsuming() { set_consuming(mPrevConsuming); @@ -185,6 +188,58 @@ public: bool mPrevConsuming; }; + /// set string coroutine status for diagnostic purposes + static void setStatus(const std::string& status); + static std::string getStatus(); + + /// RAII control of status + class TempStatus + { + public: + TempStatus(const std::string& status): + mOldStatus(getStatus()) + { + setStatus(status); + } + TempStatus(const TempStatus&) = delete; + ~TempStatus() + { + setStatus(mOldStatus); + } + + private: + std::string mOldStatus; + }; + + /// thrown by checkStop() + struct Stop: public LLContinueError + { + Stop(const std::string& what): LLContinueError(what) {} + }; + + /// early stages + struct Stopping: public Stop + { + Stopping(const std::string& what): Stop(what) {} + }; + + /// cleaning up + struct Stopped: public Stop + { + Stopped(const std::string& what): Stop(what) {} + }; + + /// cleaned up -- not much survives! + struct Shutdown: public Stop + { + Shutdown(const std::string& what): Stop(what) {} + }; + + /// Call this intermittently if there's a chance your coroutine might + /// continue running into application shutdown. Throws Stop if LLCoros has + /// been cleaned up. + static void checkStop(); + /** * Aliases for promise and future. An older underlying future implementation * required us to wrap future; that's no longer needed. However -- if it's @@ -204,13 +259,12 @@ public: private: std::string generateDistinctName(const std::string& prefix) const; - void toplevel(const std::string& name, const callable_t& callable); + void toplevel(std::string name, callable_t callable); struct CoroData; #if LL_WINDOWS static void winlevel(const callable_t& callable); #endif - CoroData& get_CoroData(const std::string& caller); - const CoroData& get_CoroData(const std::string& caller) const; + static CoroData& get_CoroData(const std::string& caller); S32 mStackSize; @@ -223,6 +277,8 @@ private: const std::string mName; // set_consuming() state bool mConsuming; + // setStatus() state + std::string mStatus; F64 mCreationTime; // since epoch }; typedef boost::ptr_map<std::string, CoroData> CoroMap; diff --git a/indra/llcommon/tests/lleventcoro_test.cpp b/indra/llcommon/tests/lleventcoro_test.cpp index c13920eefd..032923a108 100644 --- a/indra/llcommon/tests/lleventcoro_test.cpp +++ b/indra/llcommon/tests/lleventcoro_test.cpp @@ -40,6 +40,7 @@ #include <typeinfo> #include "../test/lltut.h" +#include "../test/lltestapp.h" #include "llsd.h" #include "llsdutil.h" #include "llevents.h" @@ -98,6 +99,7 @@ namespace tut std::string replyName, errorName, threw, stringdata; LLSD result, errordata; int which; + LLTestApp testApp; void explicit_wait(boost::shared_ptr<LLCoros::Promise<std::string>>& cbp); void waitForEventOn1(); |