summaryrefslogtreecommitdiff
path: root/indra/llcommon
diff options
context:
space:
mode:
authorNat Goodspeed <nat@lindenlab.com>2019-10-22 16:49:29 -0400
committerNat Goodspeed <nat@lindenlab.com>2020-03-25 19:02:24 -0400
commit28a54c2f7b25b9fda763c51746701f0cd21c8018 (patch)
tree906139d3074d1bc129dde12f8dc2af79599d1dff /indra/llcommon
parent253e360062a74cbaf87c7f9dc30ef26bca1697eb (diff)
DRTVWR-476: Infrastructure to help manage long-lived coroutines.
Introduce LLCoros::Stop exception, with subclasses Stopping, Stopped and Shutdown. Add LLCoros::checkStop(), intended to be called periodically by any coroutine with nontrivial lifespan. It checks the LLApp status and, unless isRunning(), throws one of these new exceptions. Make LLCoros::toplevel() catch Stop specially and log forcible coroutine termination. Now that LLApp status matters even in a test program, introduce a trivial LLTestApp subclass whose sole function is to make isRunning() true. (LLApp::setStatus() is protected: only a subclass can call it.) Add LLTestApp instances to lleventcoro_test.cpp and lllogin_test.cpp. Make LLCoros::toplevel() accept parameters by value rather than by const reference so we can continue using them even after context switches. Make private LLCoros::get_CoroData() static. Given that we've observed some coroutines living past LLCoros destruction, making the caller call LLCoros::instance() is more dangerous than encapsulating it within a static method -- since the encapsulated call can check LLCoros::wasDeleted() first and do something reasonable instead. This also eliminates the need for both a const and non-const overload. Defend LLCoros::delete_CoroData() (cleanup function for fiber_specific_ptr for CoroData, implicitly called after coroutine termination) against calls after ~LLCoros(). Add a status string to coroutine-local data, with LLCoro::setStatus(), getStatus() and RAII class TempStatus. Add an optional 'when' string argument to LLCoros::printActiveCoroutines(). Make ~LLCoros() print the coroutines still active at destruction.
Diffstat (limited to 'indra/llcommon')
-rw-r--r--indra/llcommon/llcoros.cpp105
-rw-r--r--indra/llcommon/llcoros.h66
-rw-r--r--indra/llcommon/tests/lleventcoro_test.cpp2
3 files changed, 145 insertions, 28 deletions
diff --git a/indra/llcommon/llcoros.cpp b/indra/llcommon/llcoros.cpp
index 37bcfc3242..78a0c5d225 100644
--- a/indra/llcommon/llcoros.cpp
+++ b/indra/llcommon/llcoros.cpp
@@ -48,6 +48,7 @@
#undef BOOST_DISABLE_ASSERTS
#endif
// other Linden headers
+#include "llapp.h"
#include "lltimer.h"
#include "llevents.h"
#include "llerror.h"
@@ -58,10 +59,15 @@
#include <excpt.h>
#endif
-
-const LLCoros::CoroData& LLCoros::get_CoroData(const std::string& caller) const
+// static
+LLCoros::CoroData& LLCoros::get_CoroData(const std::string& caller)
{
- CoroData* current = mCurrent.get();
+ CoroData* current{ nullptr };
+ // be careful about attempted accesses in the final throes of app shutdown
+ if (! wasDeleted())
+ {
+ current = instance().mCurrent.get();
+ }
// For the main() coroutine, the one NOT explicitly launched by launch(),
// we never explicitly set mCurrent. Use a static CoroData instance with
// canonical values.
@@ -78,12 +84,6 @@ const LLCoros::CoroData& LLCoros::get_CoroData(const std::string& caller) const
return *current;
}
-LLCoros::CoroData& LLCoros::get_CoroData(const std::string& caller)
-{
- // reuse const implementation, just cast away const-ness of result
- return const_cast<CoroData&>(const_cast<const LLCoros*>(this)->get_CoroData(caller));
-}
-
//static
LLCoros::coro::id LLCoros::get_self()
{
@@ -93,7 +93,7 @@ LLCoros::coro::id LLCoros::get_self()
//static
void LLCoros::set_consuming(bool consuming)
{
- CoroData& data(LLCoros::instance().get_CoroData("set_consuming()"));
+ CoroData& data(get_CoroData("set_consuming()"));
// DO NOT call this on the main() coroutine.
llassert_always(! data.mName.empty());
data.mConsuming = consuming;
@@ -102,7 +102,19 @@ void LLCoros::set_consuming(bool consuming)
//static
bool LLCoros::get_consuming()
{
- return LLCoros::instance().get_CoroData("get_consuming()").mConsuming;
+ return get_CoroData("get_consuming()").mConsuming;
+}
+
+// static
+void LLCoros::setStatus(const std::string& status)
+{
+ get_CoroData("setStatus()").mStatus = status;
+}
+
+// static
+std::string LLCoros::getStatus()
+{
+ return get_CoroData("getStatus()").mStatus;
}
LLCoros::LLCoros():
@@ -118,6 +130,11 @@ LLCoros::LLCoros():
{
}
+LLCoros::~LLCoros()
+{
+ printActiveCoroutines("at LLCoros destruction");
+}
+
std::string LLCoros::generateDistinctName(const std::string& prefix) const
{
static int unique = 0;
@@ -166,19 +183,19 @@ void LLCoros::setStackSize(S32 stacksize)
mStackSize = stacksize;
}
-void LLCoros::printActiveCoroutines()
+void LLCoros::printActiveCoroutines(const std::string& when)
{
- LL_INFOS("LLCoros") << "Number of active coroutines: " << (S32)mCoros.size() << LL_ENDL;
+ LL_INFOS("LLCoros") << "Number of active coroutines " << when
+ << ": " << (S32)mCoros.size() << LL_ENDL;
if (mCoros.size() > 0)
{
LL_INFOS("LLCoros") << "-------------- List of active coroutines ------------";
- CoroMap::iterator iter;
- CoroMap::iterator end = mCoros.end();
F64 time = LLTimer::getTotalSeconds();
- for (iter = mCoros.begin(); iter != end; iter++)
+ for (const auto& pair : mCoros)
{
- F64 life_time = time - iter->second->mCreationTime;
- LL_CONT << LL_NEWLINE << "Name: " << iter->first << " life: " << life_time;
+ F64 life_time = time - pair.second->mCreationTime;
+ LL_CONT << LL_NEWLINE << pair.first << ' ' << pair.second->mStatus
+ << " life: " << life_time;
}
LL_CONT << LL_ENDL;
LL_INFOS("LLCoros") << "-----------------------------------------------------" << LL_ENDL;
@@ -244,11 +261,19 @@ void LLCoros::winlevel(const callable_t& callable)
#endif
// Top-level wrapper around caller's coroutine callable.
-void LLCoros::toplevel(const std::string& name, const callable_t& callable)
+// Normally we like to pass strings and such by const reference -- but in this
+// case, we WANT to copy both the name and the callable to our local stack!
+void LLCoros::toplevel(std::string name, callable_t callable)
{
CoroData* corodata = new CoroData(name);
- // Store it in our pointer map. Oddly, must cast away const-ness of key.
- mCoros.insert(const_cast<std::string&>(name), corodata);
+ if (corodata == NULL)
+ {
+ // Out of memory?
+ printActiveCoroutines();
+ LL_ERRS("LLCoros") << "Failed to start coroutine: " << name << " Stacksize: " << mStackSize << " Total coroutines: " << mCoros.size() << LL_ENDL;
+ }
+ // Store it in our pointer map.
+ mCoros.insert(name, corodata);
// also set it as current
mCurrent.reset(corodata);
@@ -261,18 +286,39 @@ void LLCoros::toplevel(const std::string& name, const callable_t& callable)
callable();
#endif
}
+ catch (const Stop& exc)
+ {
+ LL_INFOS("LLCoros") << "coroutine " << name << " terminating because "
+ << exc.what() << LL_ENDL;
+ }
catch (const LLContinueError&)
{
// Any uncaught exception derived from LLContinueError will be caught
// here and logged. This coroutine will terminate but the rest of the
// viewer will carry on.
- LOG_UNHANDLED_EXCEPTION(STRINGIZE("coroutine " << corodata->mName));
+ LOG_UNHANDLED_EXCEPTION(STRINGIZE("coroutine " << name));
}
catch (...)
{
// Any OTHER kind of uncaught exception will cause the viewer to
// crash, hopefully informatively.
- CRASH_ON_UNHANDLED_EXCEPTION(STRINGIZE("coroutine " << corodata->mName));
+ CRASH_ON_UNHANDLED_EXCEPTION(STRINGIZE("coroutine " << name));
+ }
+}
+
+void LLCoros::checkStop()
+{
+ if (wasDeleted())
+ {
+ LLTHROW(Shutdown("LLCoros was deleted"));
+ }
+ if (LLApp::isStopped())
+ {
+ LLTHROW(Stopped("viewer is stopped"));
+ }
+ if (! LLApp::isRunning())
+ {
+ LLTHROW(Stopping("viewer is stopping"));
}
}
@@ -288,6 +334,19 @@ void LLCoros::delete_CoroData(CoroData* cdptr)
{
// This custom cleanup function is necessarily static. Find and bind the
// LLCoros instance.
+ // In case the LLCoros instance has already been deleted, just warn and
+ // scoot. We do NOT want to reinstantiate LLCoros during shutdown!
+ if (wasDeleted())
+ {
+ // The LLSingletons involved in logging may have been deleted too.
+ // This warning may help developers track down coroutines that have
+ // not yet been cleaned up.
+ // But cdptr is very likely a dangling pointer by this time, so don't
+ // try to dereference mName.
+ logwarns("Coroutine terminating after LLCoros instance deleted");
+ return;
+ }
+
LLCoros& self(LLCoros::instance());
// We set mCurrent on entry to a new fiber, expecting that the
// corresponding entry has already been stored in mCoros. It is an
diff --git a/indra/llcommon/llcoros.h b/indra/llcommon/llcoros.h
index dedb6c8eca..de7b691284 100644
--- a/indra/llcommon/llcoros.h
+++ b/indra/llcommon/llcoros.h
@@ -29,6 +29,7 @@
#if ! defined(LL_LLCOROS_H)
#define LL_LLCOROS_H
+#include "llexception.h"
#include <boost/fiber/fss.hpp>
#include <boost/fiber/future/promise.hpp>
#include <boost/fiber/future/future.hpp>
@@ -74,6 +75,7 @@
class LL_COMMON_API LLCoros: public LLSingleton<LLCoros>
{
LLSINGLETON(LLCoros);
+ ~LLCoros();
public:
/// The viewer's use of the term "coroutine" became deeply embedded before
/// the industry term "fiber" emerged to distinguish userland threads from
@@ -147,8 +149,8 @@ public:
*/
void setStackSize(S32 stacksize);
- /// for delayed initialization
- void printActiveCoroutines();
+ /// diagnostic
+ void printActiveCoroutines(const std::string& when=std::string());
/// get the current coro::id for those who really really care
static coro::id get_self();
@@ -176,6 +178,7 @@ public:
{
set_consuming(consuming);
}
+ OverrideConsuming(const OverrideConsuming&) = delete;
~OverrideConsuming()
{
set_consuming(mPrevConsuming);
@@ -185,6 +188,58 @@ public:
bool mPrevConsuming;
};
+ /// set string coroutine status for diagnostic purposes
+ static void setStatus(const std::string& status);
+ static std::string getStatus();
+
+ /// RAII control of status
+ class TempStatus
+ {
+ public:
+ TempStatus(const std::string& status):
+ mOldStatus(getStatus())
+ {
+ setStatus(status);
+ }
+ TempStatus(const TempStatus&) = delete;
+ ~TempStatus()
+ {
+ setStatus(mOldStatus);
+ }
+
+ private:
+ std::string mOldStatus;
+ };
+
+ /// thrown by checkStop()
+ struct Stop: public LLContinueError
+ {
+ Stop(const std::string& what): LLContinueError(what) {}
+ };
+
+ /// early stages
+ struct Stopping: public Stop
+ {
+ Stopping(const std::string& what): Stop(what) {}
+ };
+
+ /// cleaning up
+ struct Stopped: public Stop
+ {
+ Stopped(const std::string& what): Stop(what) {}
+ };
+
+ /// cleaned up -- not much survives!
+ struct Shutdown: public Stop
+ {
+ Shutdown(const std::string& what): Stop(what) {}
+ };
+
+ /// Call this intermittently if there's a chance your coroutine might
+ /// continue running into application shutdown. Throws Stop if LLCoros has
+ /// been cleaned up.
+ static void checkStop();
+
/**
* Aliases for promise and future. An older underlying future implementation
* required us to wrap future; that's no longer needed. However -- if it's
@@ -204,13 +259,12 @@ public:
private:
std::string generateDistinctName(const std::string& prefix) const;
- void toplevel(const std::string& name, const callable_t& callable);
+ void toplevel(std::string name, callable_t callable);
struct CoroData;
#if LL_WINDOWS
static void winlevel(const callable_t& callable);
#endif
- CoroData& get_CoroData(const std::string& caller);
- const CoroData& get_CoroData(const std::string& caller) const;
+ static CoroData& get_CoroData(const std::string& caller);
S32 mStackSize;
@@ -223,6 +277,8 @@ private:
const std::string mName;
// set_consuming() state
bool mConsuming;
+ // setStatus() state
+ std::string mStatus;
F64 mCreationTime; // since epoch
};
typedef boost::ptr_map<std::string, CoroData> CoroMap;
diff --git a/indra/llcommon/tests/lleventcoro_test.cpp b/indra/llcommon/tests/lleventcoro_test.cpp
index c13920eefd..032923a108 100644
--- a/indra/llcommon/tests/lleventcoro_test.cpp
+++ b/indra/llcommon/tests/lleventcoro_test.cpp
@@ -40,6 +40,7 @@
#include <typeinfo>
#include "../test/lltut.h"
+#include "../test/lltestapp.h"
#include "llsd.h"
#include "llsdutil.h"
#include "llevents.h"
@@ -98,6 +99,7 @@ namespace tut
std::string replyName, errorName, threw, stringdata;
LLSD result, errordata;
int which;
+ LLTestApp testApp;
void explicit_wait(boost::shared_ptr<LLCoros::Promise<std::string>>& cbp);
void waitForEventOn1();