From 66981fab0b3c8dcc3a031d50710a2b24ec6b0603 Mon Sep 17 00:00:00 2001 From: Nat Goodspeed Date: Thu, 10 May 2018 21:46:07 -0400 Subject: SL-793: Use Boost.Fiber instead of the "dcoroutine" library. Longtime fans will remember that the "dcoroutine" library is a Google Summer of Code project by Giovanni P. Deretta. He originally called it "Boost.Coroutine," and we originally added it to our 3p-boost autobuild package as such. But when the official Boost.Coroutine library came along (with a very different API), and we still needed the API of the GSoC project, we renamed the unofficial one "dcoroutine" to allow coexistence. The "dcoroutine" library had an internal low-level API more or less analogous to Boost.Context. We later introduced an implementation of that internal API based on Boost.Context, a step towards eliminating the GSoC code in favor of official, supported Boost code. However, recent versions of Boost.Context no longer support the API on which we built the shim for "dcoroutine." We started down the path of reimplementing that shim using the current Boost.Context API -- then realized that it's time to bite the bullet and replace the "dcoroutine" API with the Boost.Fiber API, which we've been itching to do for literally years now. Naturally, most of the heavy lifting is in llcoros.{h,cpp} and lleventcoro.{h,cpp} -- which is good: the LLCoros layer abstracts away most of the differences between "dcoroutine" and Boost.Fiber. The one feature Boost.Fiber does not provide is the ability to forcibly terminate some other fiber. Accordingly, disable LLCoros::kill() and LLCoprocedureManager::shutdown(). The only known shutdown() call was in LLCoprocedurePool's destructor. We also took the opportunity to remove postAndSuspend2() and its associated machinery: FutureListener2, LLErrorEvent, errorException(), errorLog(), LLCoroEventPumps. All that dual-LLEventPump stuff was introduced at a time when the Responder pattern was king, and we assumed we'd want to listen on one LLEventPump with the success handler and on another with the error handler. We have never actually used that in practice. Remove associated tests, of course. There is one other semantic difference that necessitates patching a number of tests: with "dcoroutine," fulfilling a future IMMEDIATELY resumes the waiting coroutine. With Boost.Fiber, fulfilling a future merely marks the fiber as ready to resume next time the scheduler gets around to it. To observe the test side effects, we've inserted a number of llcoro::suspend() calls -- also in the main loop. For a long time we retained a single unit test exercising the raw "dcoroutine" API. Remove that. Eliminate llcoro_get_id.{h,cpp}, which provided llcoro::get_id(), which was a hack to emulate fiber-local variables. Since Boost.Fiber has an actual API for that, remove the hack. In fact, use (new alias) LLCoros::local_ptr for LLSingleton's dependency tracking in place of llcoro::get_id(). In CMake land, replace BOOST_COROUTINE_LIBRARY with BOOST_FIBER_LIBRARY. We don't actually use the Boost.Coroutine for anything (though there exist plausible use cases). --- indra/llcommon/llcoros.cpp | 297 ++++++++++++++------------------------------- 1 file changed, 88 insertions(+), 209 deletions(-) (limited to 'indra/llcommon/llcoros.cpp') diff --git a/indra/llcommon/llcoros.cpp b/indra/llcommon/llcoros.cpp index cc775775bf..f5ffd96cec 100644 --- a/indra/llcommon/llcoros.cpp +++ b/indra/llcommon/llcoros.cpp @@ -34,6 +34,17 @@ // std headers // external library headers #include +#include +#ifndef BOOST_DISABLE_ASSERTS +#define UNDO_BOOST_DISABLE_ASSERTS +// with Boost 1.65.1, needed for Mac with this specific header +#define BOOST_DISABLE_ASSERTS +#endif +#include +#ifdef UNDO_BOOST_DISABLE_ASSERTS +#undef UNDO_BOOST_DISABLE_ASSERTS +#undef BOOST_DISABLE_ASSERTS +#endif // other Linden headers #include "lltimer.h" #include "llevents.h" @@ -45,176 +56,69 @@ #include #endif -namespace { -void no_op() {} -} // anonymous namespace -// Do nothing, when we need nothing done. This is a static member of LLCoros -// because CoroData is a private nested class. -void LLCoros::no_cleanup(CoroData*) {} - -// CoroData for the currently-running coroutine. Use a thread_specific_ptr -// because each thread potentially has its own distinct pool of coroutines. -LLCoros::Current::Current() +const LLCoros::CoroData& LLCoros::get_CoroData(const std::string& caller) const { - // Use a function-static instance so this thread_specific_ptr is - // instantiated on demand. Since we happen to know it's consumed by - // LLSingleton, this is likely to happen before the runtime has finished - // initializing module-static data. For the same reason, we can't package - // this pointer in an LLSingleton. - - // This thread_specific_ptr does NOT own the CoroData object! That's owned - // by LLCoros::mCoros. It merely identifies it. For this reason we - // instantiate it with a no-op cleanup function. - static boost::thread_specific_ptr sCurrent(LLCoros::no_cleanup); - - // If this is the first time we're accessing sCurrent for the running - // thread, its get() will be NULL. This could be a problem, in that - // llcoro::get_id() would return the same (NULL) token value for the "main - // coroutine" in every thread, whereas what we really want is a distinct - // value for every distinct stack in the process. So if get() is NULL, - // give it a heap CoroData: this ensures that llcoro::get_id() will return - // distinct values. - // This tactic is "leaky": sCurrent explicitly does not destroy any - // CoroData to which it points, and we do NOT enter these "main coroutine" - // CoroData instances in the LLCoros::mCoros map. They are dummy entries, - // and they will leak at process shutdown: one CoroData per thread. - if (! sCurrent.get()) + CoroData* current = mCurrent.get(); + // For the main() coroutine, the one NOT explicitly launched by launch(), + // we never explicitly set mCurrent. Use a static CoroData instance with + // canonical values. + if (! current) { // It's tempting to provide a distinct name for each thread's "main // coroutine." But as getName() has always returned the empty string - // to mean "not in a coroutine," empty string should suffice here -- - // and truthfully the additional (thread-safe!) machinery to ensure - // uniqueness just doesn't feel worth the trouble. - // We use a no-op callable and a minimal stack size because, although - // CoroData's constructor in fact initializes its mCoro with a - // coroutine with that stack size, no one ever actually enters it by - // calling mCoro(). - sCurrent.reset(new CoroData(0, // no prev - "", // not a named coroutine - no_op, // no-op callable - 1024)); // stacksize moot + // to mean "not in a coroutine," empty string should suffice here. + static CoroData sMain(""); + // We need not reset() the local_ptr to this read-only data: reuse the + // same instance for every thread's main coroutine. + current = &sMain; } - - mCurrent = &sCurrent; + return *current; } -//static LLCoros::CoroData& LLCoros::get_CoroData(const std::string& caller) { - CoroData* current = Current(); - // With the dummy CoroData set in LLCoros::Current::Current(), this - // pointer should never be NULL. - llassert_always(current); - return *current; + // reuse const implementation, just cast away const-ness of result + return const_cast(const_cast(this)->get_CoroData(caller)); } //static -LLCoros::coro::self& LLCoros::get_self() +LLCoros::coro::id LLCoros::get_self() { - CoroData& current = get_CoroData("get_self()"); - if (! current.mSelf) - { - LL_ERRS("LLCoros") << "Calling get_self() from non-coroutine context!" << LL_ENDL; - } - return *current.mSelf; + return boost::this_fiber::get_id(); } //static void LLCoros::set_consuming(bool consuming) { - get_CoroData("set_consuming()").mConsuming = consuming; + CoroData& data(LLCoros::instance().get_CoroData("set_consuming()")); + // DO NOT call this on the main() coroutine. + llassert_always(! data.mName.empty()); + data.mConsuming = consuming; } //static bool LLCoros::get_consuming() { - return get_CoroData("get_consuming()").mConsuming; -} - -llcoro::Suspending::Suspending() -{ - LLCoros::Current current; - // Remember currently-running coroutine: we're about to suspend it. - mSuspended = current; - // Revert Current to the value it had at the moment we last switched - // into this coroutine. - current.reset(mSuspended->mPrev); -} - -llcoro::Suspending::~Suspending() -{ - LLCoros::Current current; - // Okay, we're back, update our mPrev - mSuspended->mPrev = current; - // and reinstate our Current. - current.reset(mSuspended); + return LLCoros::instance().get_CoroData("get_consuming()").mConsuming; } LLCoros::LLCoros(): // MAINT-2724: default coroutine stack size too small on Windows. // Previously we used // boost::context::guarded_stack_allocator::default_stacksize(); - // empirically this is 64KB on Windows and Linux. Try quadrupling. + // empirically this is insufficient. #if ADDRESS_SIZE == 64 mStackSize(512*1024) #else mStackSize(256*1024) #endif { - // Register our cleanup() method for "mainloop" ticks - LLEventPumps::instance().obtain("mainloop").listen( - "LLCoros", boost::bind(&LLCoros::cleanup, this, _1)); -} - -bool LLCoros::cleanup(const LLSD&) -{ - static std::string previousName; - static int previousCount = 0; - // Walk the mCoros map, checking and removing completed coroutines. - for (CoroMap::iterator mi(mCoros.begin()), mend(mCoros.end()); mi != mend; ) - { - // Has this coroutine exited (normal return, exception, exit() call) - // since last tick? - if (mi->second->mCoro.exited()) - { - if (previousName != mi->first) - { - previousName = mi->first; - previousCount = 1; - } - else - { - ++previousCount; - } - - if ((previousCount < 5) || !(previousCount % 50)) - { - if (previousCount < 5) - LL_DEBUGS("LLCoros") << "LLCoros: cleaning up coroutine " << mi->first << LL_ENDL; - else - LL_DEBUGS("LLCoros") << "LLCoros: cleaning up coroutine " << mi->first << "("<< previousCount << ")" << LL_ENDL; - - } - // The erase() call will invalidate its passed iterator value -- - // so increment mi FIRST -- but pass its original value to - // erase(). This is what postincrement is all about. - mCoros.erase(mi++); - } - else - { - // Still live, just skip this entry as if incrementing at the top - // of the loop as usual. - ++mi; - } - } - return false; } std::string LLCoros::generateDistinctName(const std::string& prefix) const { - static std::string previousName; - static int previousCount = 0; + static int unique = 0; // Allowing empty name would make getName()'s not-found return ambiguous. if (prefix.empty()) @@ -225,37 +129,15 @@ std::string LLCoros::generateDistinctName(const std::string& prefix) const // If the specified name isn't already in the map, just use that. std::string name(prefix); - // Find the lowest numeric suffix that doesn't collide with an existing - // entry. Start with 2 just to make it more intuitive for any interested - // parties: e.g. "joe", "joe2", "joe3"... - for (int i = 2; ; name = STRINGIZE(prefix << i++)) + // Until we find an unused name, append a numeric suffix for uniqueness. + while (mCoros.find(name) != mCoros.end()) { - if (mCoros.find(name) == mCoros.end()) - { - if (previousName != name) - { - previousName = name; - previousCount = 1; - } - else - { - ++previousCount; - } - - if ((previousCount < 5) || !(previousCount % 50)) - { - if (previousCount < 5) - LL_DEBUGS("LLCoros") << "LLCoros: launching coroutine " << name << LL_ENDL; - else - LL_DEBUGS("LLCoros") << "LLCoros: launching coroutine " << name << "(" << previousCount << ")" << LL_ENDL; - - } - - return name; - } + name = STRINGIZE(prefix << unique++); } + return name; } +/*==========================================================================*| bool LLCoros::kill(const std::string& name) { CoroMap::iterator found = mCoros.find(name); @@ -269,10 +151,11 @@ bool LLCoros::kill(const std::string& name) mCoros.erase(found); return true; } +|*==========================================================================*/ std::string LLCoros::getName() const { - return Current()->mName; + return get_CoroData("getName()").mName; } void LLCoros::setStackSize(S32 stacksize) @@ -300,6 +183,27 @@ void LLCoros::printActiveCoroutines() } } +std::string LLCoros::launch(const std::string& prefix, const callable_t& callable) +{ + std::string name(generateDistinctName(prefix)); + // 'dispatch' means: enter the new fiber immediately, returning here only + // when the fiber yields for whatever reason. + // std::allocator_arg is a flag to indicate that the following argument is + // a StackAllocator. + // protected_fixedsize_stack sets a guard page past the end of the new + // stack so that stack underflow will result in an access violation + // instead of weird, subtle, possibly undiagnosed memory stomps. + boost::fibers::fiber newCoro(boost::fibers::launch::dispatch, + std::allocator_arg, + boost::fibers::protected_fixedsize_stack(mStackSize), + [this, &name, &callable](){ toplevel(name, callable); }); + // You have two choices with a fiber instance: you can join() it or you + // can detach() it. If you try to destroy the instance before doing + // either, the program silently terminates. We don't need this handle. + newCoro.detach(); + return name; +} + #if LL_WINDOWS static const U32 STATUS_MSC_EXCEPTION = 0xE06D7363; // compiler specific @@ -340,10 +244,14 @@ void LLCoros::winlevel(const callable_t& callable) // Top-level wrapper around caller's coroutine callable. This function accepts // the coroutine library's implicit coro::self& parameter and saves it, but // does not pass it down to the caller's callable. -void LLCoros::toplevel(coro::self& self, CoroData* data, const callable_t& callable) +void LLCoros::toplevel(const std::string& name, const callable_t& callable) { - // capture the 'self' param in CoroData - data->mSelf = &self; + CoroData* corodata = new CoroData(name); + // Store it in our pointer map. Oddly, must cast away const-ness of key. + mCoros.insert(const_cast(name), corodata); + // also set it as current + mCurrent.reset(corodata); + // run the code the caller actually wants in the coroutine try { @@ -358,70 +266,41 @@ void LLCoros::toplevel(coro::self& self, CoroData* data, const callable_t& calla // Any uncaught exception derived from LLContinueError will be caught // here and logged. This coroutine will terminate but the rest of the // viewer will carry on. - LOG_UNHANDLED_EXCEPTION(STRINGIZE("coroutine " << data->mName)); + LOG_UNHANDLED_EXCEPTION(STRINGIZE("coroutine " << corodata->mName)); } catch (...) { // Any OTHER kind of uncaught exception will cause the viewer to // crash, hopefully informatively. - CRASH_ON_UNHANDLED_EXCEPTION(STRINGIZE("coroutine " << data->mName)); + CRASH_ON_UNHANDLED_EXCEPTION(STRINGIZE("coroutine " << corodata->mName)); } - // This cleanup isn't perfectly symmetrical with the way we initially set - // data->mPrev, but this is our last chance to reset Current. - Current().reset(data->mPrev); } -/***************************************************************************** -* MUST BE LAST -*****************************************************************************/ -// Turn off MSVC optimizations for just LLCoros::launch() -- see -// DEV-32777. But MSVC doesn't support push/pop for optimization flags as it -// does for warning suppression, and we really don't want to force -// optimization ON for other code even in Debug or RelWithDebInfo builds. - -#if LL_MSVC -// work around broken optimizations -#pragma warning(disable: 4748) -#pragma warning(disable: 4355) // 'this' used in initializer list: yes, intentionally -#pragma optimize("", off) -#endif // LL_MSVC - -LLCoros::CoroData::CoroData(CoroData* prev, const std::string& name, - const callable_t& callable, S32 stacksize): - mPrev(prev), +LLCoros::CoroData::CoroData(const std::string& name): mName(name), - // Wrap the caller's callable in our toplevel() function so we can manage - // Current appropriately at startup and shutdown of each coroutine. - mCoro(boost::bind(toplevel, _1, this, callable), stacksize), // don't consume events unless specifically directed mConsuming(false), - mSelf(0), mCreationTime(LLTimer::getTotalSeconds()) { } -std::string LLCoros::launch(const std::string& prefix, const callable_t& callable) +void LLCoros::delete_CoroData(CoroData* cdptr) { - std::string name(generateDistinctName(prefix)); - Current current; - // pass the current value of Current as previous context - CoroData* newCoro = new(std::nothrow) CoroData(current, name, callable, mStackSize); - if (newCoro == NULL) + // This custom cleanup function is necessarily static. Find and bind the + // LLCoros instance. + LLCoros& self(LLCoros::instance()); + // We set mCurrent on entry to a new fiber, expecting that the + // corresponding entry has already been stored in mCoros. It is an + // error if we do not find that entry. + CoroMap::iterator found = self.mCoros.find(cdptr->mName); + if (found == self.mCoros.end()) { - // Out of memory? - printActiveCoroutines(); - LL_ERRS("LLCoros") << "Failed to start coroutine: " << name << " Stacksize: " << mStackSize << " Total coroutines: " << mCoros.size() << LL_ENDL; + LL_ERRS("LLCoros") << "Coroutine '" << cdptr->mName << "' terminated " + << "without being stored in LLCoros::mCoros" + << LL_ENDL; } - // Store it in our pointer map - mCoros.insert(name, newCoro); - // also set it as current - current.reset(newCoro); - /* Run the coroutine until its first wait, then return here */ - (newCoro->mCoro)(std::nothrow); - return name; -} -#if LL_MSVC -// reenable optimizations -#pragma optimize("", on) -#endif // LL_MSVC + // Oh good, we found the mCoros entry. Erase it. Because it's a ptr_map, + // that will implicitly delete this CoroData. + self.mCoros.erase(found); +} -- cgit v1.2.3 From 101ab28f0c317e8c8489f0189f81b7b4e2381e9a Mon Sep 17 00:00:00 2001 From: Nat Goodspeed Date: Sat, 29 Dec 2018 10:27:16 -0500 Subject: SL-793: Fix lllogin_test.cpp for new LLCoros implementation. Delete the test for SRV timeout: lllogin no longer issues an SRV query. That test only confuses the test program without exercising any useful paths in production code. As with other tests dating from the previous LLCoros implementation, we need a few llcoro::suspend() calls sprinkled in so that a fiber marked ready -- by fulfilling the future for which it is waiting -- gets a chance to run. Clear LLEventPumps between test functions. --- indra/llcommon/llcoros.cpp | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'indra/llcommon/llcoros.cpp') diff --git a/indra/llcommon/llcoros.cpp b/indra/llcommon/llcoros.cpp index f5ffd96cec..939c70b7ea 100644 --- a/indra/llcommon/llcoros.cpp +++ b/indra/llcommon/llcoros.cpp @@ -241,9 +241,7 @@ void LLCoros::winlevel(const callable_t& callable) #endif -// Top-level wrapper around caller's coroutine callable. This function accepts -// the coroutine library's implicit coro::self& parameter and saves it, but -// does not pass it down to the caller's callable. +// Top-level wrapper around caller's coroutine callable. void LLCoros::toplevel(const std::string& name, const callable_t& callable) { CoroData* corodata = new CoroData(name); -- cgit v1.2.3 From 243b1115654346778fc6bb9b8ce5275033347a56 Mon Sep 17 00:00:00 2001 From: Anchor Date: Tue, 30 Apr 2019 16:30:36 -0600 Subject: [DRTVWR-476] - compile error fix --- indra/llcommon/llcoros.cpp | 2 ++ 1 file changed, 2 insertions(+) (limited to 'indra/llcommon/llcoros.cpp') diff --git a/indra/llcommon/llcoros.cpp b/indra/llcommon/llcoros.cpp index 939c70b7ea..37bcfc3242 100644 --- a/indra/llcommon/llcoros.cpp +++ b/indra/llcommon/llcoros.cpp @@ -26,6 +26,8 @@ * $/LicenseInfo$ */ +#include "llwin32headers.h" + // Precompiled header #include "linden_common.h" // associated header -- cgit v1.2.3 From 28a54c2f7b25b9fda763c51746701f0cd21c8018 Mon Sep 17 00:00:00 2001 From: Nat Goodspeed Date: Tue, 22 Oct 2019 16:49:29 -0400 Subject: DRTVWR-476: Infrastructure to help manage long-lived coroutines. Introduce LLCoros::Stop exception, with subclasses Stopping, Stopped and Shutdown. Add LLCoros::checkStop(), intended to be called periodically by any coroutine with nontrivial lifespan. It checks the LLApp status and, unless isRunning(), throws one of these new exceptions. Make LLCoros::toplevel() catch Stop specially and log forcible coroutine termination. Now that LLApp status matters even in a test program, introduce a trivial LLTestApp subclass whose sole function is to make isRunning() true. (LLApp::setStatus() is protected: only a subclass can call it.) Add LLTestApp instances to lleventcoro_test.cpp and lllogin_test.cpp. Make LLCoros::toplevel() accept parameters by value rather than by const reference so we can continue using them even after context switches. Make private LLCoros::get_CoroData() static. Given that we've observed some coroutines living past LLCoros destruction, making the caller call LLCoros::instance() is more dangerous than encapsulating it within a static method -- since the encapsulated call can check LLCoros::wasDeleted() first and do something reasonable instead. This also eliminates the need for both a const and non-const overload. Defend LLCoros::delete_CoroData() (cleanup function for fiber_specific_ptr for CoroData, implicitly called after coroutine termination) against calls after ~LLCoros(). Add a status string to coroutine-local data, with LLCoro::setStatus(), getStatus() and RAII class TempStatus. Add an optional 'when' string argument to LLCoros::printActiveCoroutines(). Make ~LLCoros() print the coroutines still active at destruction. --- indra/llcommon/llcoros.cpp | 105 +++++++++++++++++++++++++++++++++++---------- 1 file changed, 82 insertions(+), 23 deletions(-) (limited to 'indra/llcommon/llcoros.cpp') diff --git a/indra/llcommon/llcoros.cpp b/indra/llcommon/llcoros.cpp index 37bcfc3242..78a0c5d225 100644 --- a/indra/llcommon/llcoros.cpp +++ b/indra/llcommon/llcoros.cpp @@ -48,6 +48,7 @@ #undef BOOST_DISABLE_ASSERTS #endif // other Linden headers +#include "llapp.h" #include "lltimer.h" #include "llevents.h" #include "llerror.h" @@ -58,10 +59,15 @@ #include #endif - -const LLCoros::CoroData& LLCoros::get_CoroData(const std::string& caller) const +// static +LLCoros::CoroData& LLCoros::get_CoroData(const std::string& caller) { - CoroData* current = mCurrent.get(); + CoroData* current{ nullptr }; + // be careful about attempted accesses in the final throes of app shutdown + if (! wasDeleted()) + { + current = instance().mCurrent.get(); + } // For the main() coroutine, the one NOT explicitly launched by launch(), // we never explicitly set mCurrent. Use a static CoroData instance with // canonical values. @@ -78,12 +84,6 @@ const LLCoros::CoroData& LLCoros::get_CoroData(const std::string& caller) const return *current; } -LLCoros::CoroData& LLCoros::get_CoroData(const std::string& caller) -{ - // reuse const implementation, just cast away const-ness of result - return const_cast(const_cast(this)->get_CoroData(caller)); -} - //static LLCoros::coro::id LLCoros::get_self() { @@ -93,7 +93,7 @@ LLCoros::coro::id LLCoros::get_self() //static void LLCoros::set_consuming(bool consuming) { - CoroData& data(LLCoros::instance().get_CoroData("set_consuming()")); + CoroData& data(get_CoroData("set_consuming()")); // DO NOT call this on the main() coroutine. llassert_always(! data.mName.empty()); data.mConsuming = consuming; @@ -102,7 +102,19 @@ void LLCoros::set_consuming(bool consuming) //static bool LLCoros::get_consuming() { - return LLCoros::instance().get_CoroData("get_consuming()").mConsuming; + return get_CoroData("get_consuming()").mConsuming; +} + +// static +void LLCoros::setStatus(const std::string& status) +{ + get_CoroData("setStatus()").mStatus = status; +} + +// static +std::string LLCoros::getStatus() +{ + return get_CoroData("getStatus()").mStatus; } LLCoros::LLCoros(): @@ -118,6 +130,11 @@ LLCoros::LLCoros(): { } +LLCoros::~LLCoros() +{ + printActiveCoroutines("at LLCoros destruction"); +} + std::string LLCoros::generateDistinctName(const std::string& prefix) const { static int unique = 0; @@ -166,19 +183,19 @@ void LLCoros::setStackSize(S32 stacksize) mStackSize = stacksize; } -void LLCoros::printActiveCoroutines() +void LLCoros::printActiveCoroutines(const std::string& when) { - LL_INFOS("LLCoros") << "Number of active coroutines: " << (S32)mCoros.size() << LL_ENDL; + LL_INFOS("LLCoros") << "Number of active coroutines " << when + << ": " << (S32)mCoros.size() << LL_ENDL; if (mCoros.size() > 0) { LL_INFOS("LLCoros") << "-------------- List of active coroutines ------------"; - CoroMap::iterator iter; - CoroMap::iterator end = mCoros.end(); F64 time = LLTimer::getTotalSeconds(); - for (iter = mCoros.begin(); iter != end; iter++) + for (const auto& pair : mCoros) { - F64 life_time = time - iter->second->mCreationTime; - LL_CONT << LL_NEWLINE << "Name: " << iter->first << " life: " << life_time; + F64 life_time = time - pair.second->mCreationTime; + LL_CONT << LL_NEWLINE << pair.first << ' ' << pair.second->mStatus + << " life: " << life_time; } LL_CONT << LL_ENDL; LL_INFOS("LLCoros") << "-----------------------------------------------------" << LL_ENDL; @@ -244,11 +261,19 @@ void LLCoros::winlevel(const callable_t& callable) #endif // Top-level wrapper around caller's coroutine callable. -void LLCoros::toplevel(const std::string& name, const callable_t& callable) +// Normally we like to pass strings and such by const reference -- but in this +// case, we WANT to copy both the name and the callable to our local stack! +void LLCoros::toplevel(std::string name, callable_t callable) { CoroData* corodata = new CoroData(name); - // Store it in our pointer map. Oddly, must cast away const-ness of key. - mCoros.insert(const_cast(name), corodata); + if (corodata == NULL) + { + // Out of memory? + printActiveCoroutines(); + LL_ERRS("LLCoros") << "Failed to start coroutine: " << name << " Stacksize: " << mStackSize << " Total coroutines: " << mCoros.size() << LL_ENDL; + } + // Store it in our pointer map. + mCoros.insert(name, corodata); // also set it as current mCurrent.reset(corodata); @@ -261,18 +286,39 @@ void LLCoros::toplevel(const std::string& name, const callable_t& callable) callable(); #endif } + catch (const Stop& exc) + { + LL_INFOS("LLCoros") << "coroutine " << name << " terminating because " + << exc.what() << LL_ENDL; + } catch (const LLContinueError&) { // Any uncaught exception derived from LLContinueError will be caught // here and logged. This coroutine will terminate but the rest of the // viewer will carry on. - LOG_UNHANDLED_EXCEPTION(STRINGIZE("coroutine " << corodata->mName)); + LOG_UNHANDLED_EXCEPTION(STRINGIZE("coroutine " << name)); } catch (...) { // Any OTHER kind of uncaught exception will cause the viewer to // crash, hopefully informatively. - CRASH_ON_UNHANDLED_EXCEPTION(STRINGIZE("coroutine " << corodata->mName)); + CRASH_ON_UNHANDLED_EXCEPTION(STRINGIZE("coroutine " << name)); + } +} + +void LLCoros::checkStop() +{ + if (wasDeleted()) + { + LLTHROW(Shutdown("LLCoros was deleted")); + } + if (LLApp::isStopped()) + { + LLTHROW(Stopped("viewer is stopped")); + } + if (! LLApp::isRunning()) + { + LLTHROW(Stopping("viewer is stopping")); } } @@ -288,6 +334,19 @@ void LLCoros::delete_CoroData(CoroData* cdptr) { // This custom cleanup function is necessarily static. Find and bind the // LLCoros instance. + // In case the LLCoros instance has already been deleted, just warn and + // scoot. We do NOT want to reinstantiate LLCoros during shutdown! + if (wasDeleted()) + { + // The LLSingletons involved in logging may have been deleted too. + // This warning may help developers track down coroutines that have + // not yet been cleaned up. + // But cdptr is very likely a dangling pointer by this time, so don't + // try to dereference mName. + logwarns("Coroutine terminating after LLCoros instance deleted"); + return; + } + LLCoros& self(LLCoros::instance()); // We set mCurrent on entry to a new fiber, expecting that the // corresponding entry has already been stored in mCoros. It is an -- cgit v1.2.3 From cbf146f2b3fc255bc83f2b01101dc29658bea6ea Mon Sep 17 00:00:00 2001 From: Nat Goodspeed Date: Thu, 24 Oct 2019 12:54:38 -0400 Subject: DRTVWR-476: Pump coroutines a few more times when we start quitting. By the time "LLApp" listeners are notified that the app is quitting, the mainloop is no longer running. Even though those listeners do things like close work queues and inject exceptions into pending promises, any coroutines waiting on those resources must regain control before they can notice and shut down properly. Add a final "LLApp" listener that resumes ready coroutines a few more times. Make sure every other "LLApp" listener is positioned before that new one. --- indra/llcommon/llcoros.cpp | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) (limited to 'indra/llcommon/llcoros.cpp') diff --git a/indra/llcommon/llcoros.cpp b/indra/llcommon/llcoros.cpp index 78a0c5d225..ea54f1aa92 100644 --- a/indra/llcommon/llcoros.cpp +++ b/indra/llcommon/llcoros.cpp @@ -128,6 +128,38 @@ LLCoros::LLCoros(): mStackSize(256*1024) #endif { + // Set up a listener to notice when the viewer is starting to shut down. + // Store the connection in an LLTempBoundListener so it will automatically + // disconnect. + mAppListener = LLEventPumps::instance().obtain("LLApp").listen( + "final", // must be the LAST listener on this LLEventPump + [this](const LLSD& status) + { + if (status["status"].asString() == "quitting") + { + // Other LLApp status-change listeners do things like close + // work queues and inject the Stop exception into pending + // promises, to force coroutines waiting on those things to + // notice and terminate. The only problem is that by the time + // LLApp sets "quitting" status, the main loop has stopped + // pumping the fiber scheduler with yield() calls. A waiting + // coroutine still might not wake up until after resources on + // which it depends have been freed. Pump it a few times + // ourselves. Of course, stop pumping as soon as the last of + // the coroutines has terminated. + for (size_t count = 0; count < 10 && ! mCoros.empty(); ++count) + { + // don't use llcoro::suspend() because that module depends + // on this one + boost::this_fiber::yield(); + } + } + // If we're really the last listener, it shouldn't matter whether + // we consume this event -- but our being last depends on every + // other listen() call specifying before "final", which would be + // all too easy to forget. So do not consume the event. + return false; + }); } LLCoros::~LLCoros() -- cgit v1.2.3 From 26c8ccfc06bc9334c9a4d0d027e83ad0b1b92a86 Mon Sep 17 00:00:00 2001 From: Nat Goodspeed Date: Thu, 24 Oct 2019 16:05:37 -0400 Subject: DRTVWR-476: Back out changeset 40c0c6a8407d ("final" LLApp listener) --- indra/llcommon/llcoros.cpp | 32 -------------------------------- 1 file changed, 32 deletions(-) (limited to 'indra/llcommon/llcoros.cpp') diff --git a/indra/llcommon/llcoros.cpp b/indra/llcommon/llcoros.cpp index ea54f1aa92..78a0c5d225 100644 --- a/indra/llcommon/llcoros.cpp +++ b/indra/llcommon/llcoros.cpp @@ -128,38 +128,6 @@ LLCoros::LLCoros(): mStackSize(256*1024) #endif { - // Set up a listener to notice when the viewer is starting to shut down. - // Store the connection in an LLTempBoundListener so it will automatically - // disconnect. - mAppListener = LLEventPumps::instance().obtain("LLApp").listen( - "final", // must be the LAST listener on this LLEventPump - [this](const LLSD& status) - { - if (status["status"].asString() == "quitting") - { - // Other LLApp status-change listeners do things like close - // work queues and inject the Stop exception into pending - // promises, to force coroutines waiting on those things to - // notice and terminate. The only problem is that by the time - // LLApp sets "quitting" status, the main loop has stopped - // pumping the fiber scheduler with yield() calls. A waiting - // coroutine still might not wake up until after resources on - // which it depends have been freed. Pump it a few times - // ourselves. Of course, stop pumping as soon as the last of - // the coroutines has terminated. - for (size_t count = 0; count < 10 && ! mCoros.empty(); ++count) - { - // don't use llcoro::suspend() because that module depends - // on this one - boost::this_fiber::yield(); - } - } - // If we're really the last listener, it shouldn't matter whether - // we consume this event -- but our being last depends on every - // other listen() call specifying before "final", which would be - // all too easy to forget. So do not consume the event. - return false; - }); } LLCoros::~LLCoros() -- cgit v1.2.3 From 9008124d352a195616bc8e7b88b048f479cdc4c2 Mon Sep 17 00:00:00 2001 From: Nat Goodspeed Date: Fri, 25 Oct 2019 06:55:45 -0400 Subject: DRTVWR-476: Keep coroutine-local data on toplevel()'s stack frame. Instead of heap-allocating a CoroData instance per coroutine, storing the pointer in a ptr_map and deleting it from the ptr_map once the fiber_specific_ptr for that coroutine is cleaned up -- just declare a stack instance on the top-level stack frame, the simplest C++ lifespan management. Derive CoroData from LLInstanceTracker to detect potential name collisions and to enumerate instances. Continue registering each coroutine's CoroData instance in our fiber_specific_ptr, but use a no-op deleter function. Make ~LLCoros() directly pump the fiber scheduler a few times, instead of having a special "LLApp" listener. --- indra/llcommon/llcoros.cpp | 97 +++++++++++++++++++--------------------------- 1 file changed, 39 insertions(+), 58 deletions(-) (limited to 'indra/llcommon/llcoros.cpp') diff --git a/indra/llcommon/llcoros.cpp b/indra/llcommon/llcoros.cpp index 78a0c5d225..febe74b559 100644 --- a/indra/llcommon/llcoros.cpp +++ b/indra/llcommon/llcoros.cpp @@ -76,9 +76,9 @@ LLCoros::CoroData& LLCoros::get_CoroData(const std::string& caller) // It's tempting to provide a distinct name for each thread's "main // coroutine." But as getName() has always returned the empty string // to mean "not in a coroutine," empty string should suffice here. - static CoroData sMain(""); - // We need not reset() the local_ptr to this read-only data: reuse the - // same instance for every thread's main coroutine. + static thread_local CoroData sMain(""); + // We need not reset() the local_ptr to this instance; we'll simply + // find it again every time we discover that current is null. current = &sMain; } return *current; @@ -123,16 +123,36 @@ LLCoros::LLCoros(): // boost::context::guarded_stack_allocator::default_stacksize(); // empirically this is insufficient. #if ADDRESS_SIZE == 64 - mStackSize(512*1024) + mStackSize(512*1024), #else - mStackSize(256*1024) + mStackSize(256*1024), #endif + // mCurrent does NOT own the current CoroData instance -- it simply + // points to it. So initialize it with a no-op deleter. + mCurrent{ [](CoroData*){} } { } LLCoros::~LLCoros() { - printActiveCoroutines("at LLCoros destruction"); + printActiveCoroutines("at entry to ~LLCoros()"); + // Other LLApp status-change listeners do things like close + // work queues and inject the Stop exception into pending + // promises, to force coroutines waiting on those things to + // notice and terminate. The only problem is that by the time + // LLApp sets "quitting" status, the main loop has stopped + // pumping the fiber scheduler with yield() calls. A waiting + // coroutine still might not wake up until after resources on + // which it depends have been freed. Pump it a few times + // ourselves. Of course, stop pumping as soon as the last of + // the coroutines has terminated. + for (size_t count = 0; count < 10 && CoroData::instanceCount() > 0; ++count) + { + // don't use llcoro::suspend() because that module depends + // on this one + boost::this_fiber::yield(); + } + printActiveCoroutines("after pumping"); } std::string LLCoros::generateDistinctName(const std::string& prefix) const @@ -149,7 +169,7 @@ std::string LLCoros::generateDistinctName(const std::string& prefix) const std::string name(prefix); // Until we find an unused name, append a numeric suffix for uniqueness. - while (mCoros.find(name) != mCoros.end()) + while (CoroData::getInstance(name)) { name = STRINGIZE(prefix << unique++); } @@ -186,16 +206,17 @@ void LLCoros::setStackSize(S32 stacksize) void LLCoros::printActiveCoroutines(const std::string& when) { LL_INFOS("LLCoros") << "Number of active coroutines " << when - << ": " << (S32)mCoros.size() << LL_ENDL; - if (mCoros.size() > 0) + << ": " << CoroData::instanceCount() << LL_ENDL; + if (CoroData::instanceCount() > 0) { LL_INFOS("LLCoros") << "-------------- List of active coroutines ------------"; F64 time = LLTimer::getTotalSeconds(); - for (const auto& pair : mCoros) + for (auto it(CoroData::beginInstances()), end(CoroData::endInstances()); + it != end; ++it) { - F64 life_time = time - pair.second->mCreationTime; - LL_CONT << LL_NEWLINE << pair.first << ' ' << pair.second->mStatus - << " life: " << life_time; + F64 life_time = time - it->mCreationTime; + LL_CONT << LL_NEWLINE + << it->mName << ' ' << it->mStatus << " life: " << life_time; } LL_CONT << LL_ENDL; LL_INFOS("LLCoros") << "-----------------------------------------------------" << LL_ENDL; @@ -265,17 +286,10 @@ void LLCoros::winlevel(const callable_t& callable) // case, we WANT to copy both the name and the callable to our local stack! void LLCoros::toplevel(std::string name, callable_t callable) { - CoroData* corodata = new CoroData(name); - if (corodata == NULL) - { - // Out of memory? - printActiveCoroutines(); - LL_ERRS("LLCoros") << "Failed to start coroutine: " << name << " Stacksize: " << mStackSize << " Total coroutines: " << mCoros.size() << LL_ENDL; - } - // Store it in our pointer map. - mCoros.insert(name, corodata); - // also set it as current - mCurrent.reset(corodata); + // keep the CoroData on this top-level function's stack frame + CoroData corodata(name); + // set it as current + mCurrent.reset(&corodata); // run the code the caller actually wants in the coroutine try @@ -323,43 +337,10 @@ void LLCoros::checkStop() } LLCoros::CoroData::CoroData(const std::string& name): + LLInstanceTracker(name), mName(name), // don't consume events unless specifically directed mConsuming(false), mCreationTime(LLTimer::getTotalSeconds()) { } - -void LLCoros::delete_CoroData(CoroData* cdptr) -{ - // This custom cleanup function is necessarily static. Find and bind the - // LLCoros instance. - // In case the LLCoros instance has already been deleted, just warn and - // scoot. We do NOT want to reinstantiate LLCoros during shutdown! - if (wasDeleted()) - { - // The LLSingletons involved in logging may have been deleted too. - // This warning may help developers track down coroutines that have - // not yet been cleaned up. - // But cdptr is very likely a dangling pointer by this time, so don't - // try to dereference mName. - logwarns("Coroutine terminating after LLCoros instance deleted"); - return; - } - - LLCoros& self(LLCoros::instance()); - // We set mCurrent on entry to a new fiber, expecting that the - // corresponding entry has already been stored in mCoros. It is an - // error if we do not find that entry. - CoroMap::iterator found = self.mCoros.find(cdptr->mName); - if (found == self.mCoros.end()) - { - LL_ERRS("LLCoros") << "Coroutine '" << cdptr->mName << "' terminated " - << "without being stored in LLCoros::mCoros" - << LL_ENDL; - } - - // Oh good, we found the mCoros entry. Erase it. Because it's a ptr_map, - // that will implicitly delete this CoroData. - self.mCoros.erase(found); -} -- cgit v1.2.3 From 2a56ab44360aa49bd0df9281efc8a8a97a510013 Mon Sep 17 00:00:00 2001 From: Nat Goodspeed Date: Fri, 22 Nov 2019 11:58:27 -0500 Subject: DRTVWR-476, SL-12197: Don't throw Stopping from main coroutine. The new LLCoros::Stop exception is intended to terminate long-lived coroutines -- not interrupt mainstream shutdown processing. Only throw it on an explicitly-launched coroutine. Make LLCoros::getName() (used by the above test) static. As with other LLCoros methods, it might be called after the LLCoros LLSingleton instance has been deleted. Requiring the caller to call instance() implies a possible need to also call wasDeleted(). Encapsulate that nuance into a static method instead. --- indra/llcommon/llcoros.cpp | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) (limited to 'indra/llcommon/llcoros.cpp') diff --git a/indra/llcommon/llcoros.cpp b/indra/llcommon/llcoros.cpp index febe74b559..5f940de52b 100644 --- a/indra/llcommon/llcoros.cpp +++ b/indra/llcommon/llcoros.cpp @@ -192,7 +192,8 @@ bool LLCoros::kill(const std::string& name) } |*==========================================================================*/ -std::string LLCoros::getName() const +//static +std::string LLCoros::getName() { return get_CoroData("getName()").mName; } @@ -320,12 +321,21 @@ void LLCoros::toplevel(std::string name, callable_t callable) } } +//static void LLCoros::checkStop() { if (wasDeleted()) { LLTHROW(Shutdown("LLCoros was deleted")); } + // do this AFTER the check above, because getName() depends on + // get_CoroData(), which depends on the local_ptr in our instance(). + if (getName().empty()) + { + // Our Stop exception and its subclasses are intended to stop loitering + // coroutines. Don't throw it from the main coroutine. + return; + } if (LLApp::isStopped()) { LLTHROW(Stopped("viewer is stopped")); -- cgit v1.2.3 From 80f913fadb1a03b50fd449c3416b331eb2512bea Mon Sep 17 00:00:00 2001 From: Nat Goodspeed Date: Thu, 5 Dec 2019 11:54:52 -0500 Subject: DRTVWR-476: Adjust LLCoros to new LLInstanceTracker API. --- indra/llcommon/llcoros.cpp | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'indra/llcommon/llcoros.cpp') diff --git a/indra/llcommon/llcoros.cpp b/indra/llcommon/llcoros.cpp index 5f940de52b..9e0cceda2b 100644 --- a/indra/llcommon/llcoros.cpp +++ b/indra/llcommon/llcoros.cpp @@ -212,12 +212,11 @@ void LLCoros::printActiveCoroutines(const std::string& when) { LL_INFOS("LLCoros") << "-------------- List of active coroutines ------------"; F64 time = LLTimer::getTotalSeconds(); - for (auto it(CoroData::beginInstances()), end(CoroData::endInstances()); - it != end; ++it) + for (auto& cd : CoroData::instance_snapshot()) { - F64 life_time = time - it->mCreationTime; + F64 life_time = time - cd.mCreationTime; LL_CONT << LL_NEWLINE - << it->mName << ' ' << it->mStatus << " life: " << life_time; + << cd.mName << ' ' << cd.mStatus << " life: " << life_time; } LL_CONT << LL_ENDL; LL_INFOS("LLCoros") << "-----------------------------------------------------" << LL_ENDL; -- cgit v1.2.3 From 39e7b48317e3f0fe37d7398099ab1e38b97963bf Mon Sep 17 00:00:00 2001 From: Nat Goodspeed Date: Tue, 10 Dec 2019 10:56:24 -0500 Subject: DRTVWR-476: Make llcoro::logname() distinguish different threads. Actually, introduce static LLCoros::logname() and make the namespaced free function an alias for that. Because CoroData is a subclass of LLInstanceTracker with a key, every instance requires a distinct key. That conflicts with our "getName() returns empty string for default coroutine on thread" convention. Introduce a new CoroData constructor, specifically for the default coroutine on each thread, that initializes the getName() name to empty string while providing a distinct "mainN" key. Make get_CoroData() use that new constructor for its thread_local instance, passing an atomic incremented each time we initialize one for a new thread. Then LLCoros::logname() returns either the getName() name or the key. --- indra/llcommon/llcoros.cpp | 28 ++++++++++++++++++++++++---- 1 file changed, 24 insertions(+), 4 deletions(-) (limited to 'indra/llcommon/llcoros.cpp') diff --git a/indra/llcommon/llcoros.cpp b/indra/llcommon/llcoros.cpp index 9e0cceda2b..adb86c4e0b 100644 --- a/indra/llcommon/llcoros.cpp +++ b/indra/llcommon/llcoros.cpp @@ -34,6 +34,7 @@ #include "llcoros.h" // STL headers // std headers +#include // external library headers #include #include @@ -73,10 +74,9 @@ LLCoros::CoroData& LLCoros::get_CoroData(const std::string& caller) // canonical values. if (! current) { - // It's tempting to provide a distinct name for each thread's "main - // coroutine." But as getName() has always returned the empty string - // to mean "not in a coroutine," empty string should suffice here. - static thread_local CoroData sMain(""); + static std::atomic which_thread(0); + // Use alternate CoroData constructor. + static thread_local CoroData sMain(which_thread++); // We need not reset() the local_ptr to this instance; we'll simply // find it again every time we discover that current is null. current = &sMain; @@ -198,6 +198,13 @@ std::string LLCoros::getName() return get_CoroData("getName()").mName; } +//static +std::string LLCoros::logname() +{ + LLCoros::CoroData& data(get_CoroData("logname()")); + return data.mName.empty()? data.getKey() : data.mName; +} + void LLCoros::setStackSize(S32 stacksize) { LL_DEBUGS("LLCoros") << "Setting coroutine stack size to " << stacksize << LL_ENDL; @@ -353,3 +360,16 @@ LLCoros::CoroData::CoroData(const std::string& name): mCreationTime(LLTimer::getTotalSeconds()) { } + +LLCoros::CoroData::CoroData(int n): + // This constructor is used for the thread_local instance belonging to the + // default coroutine on each thread. We must give each one a different + // LLInstanceTracker key because LLInstanceTracker's map spans all + // threads, but we want the default coroutine on each thread to have the + // empty string as its visible name because some consumers test for that. + LLInstanceTracker("main" + stringize(n)), + mName(), + mConsuming(false), + mCreationTime(LLTimer::getTotalSeconds()) +{ +} -- cgit v1.2.3 From 0756885eadafbda9763769186f14d728f0887ead Mon Sep 17 00:00:00 2001 From: Nat Goodspeed Date: Wed, 18 Dec 2019 15:54:00 -0500 Subject: DRTVWR-476: make printActiveCoroutines() output slightly clearer. For the main coroutine on each thread, show the 'main0' (or whatever) name instead of the empty-string name. --- indra/llcommon/llcoros.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'indra/llcommon/llcoros.cpp') diff --git a/indra/llcommon/llcoros.cpp b/indra/llcommon/llcoros.cpp index adb86c4e0b..262929006d 100644 --- a/indra/llcommon/llcoros.cpp +++ b/indra/llcommon/llcoros.cpp @@ -223,7 +223,7 @@ void LLCoros::printActiveCoroutines(const std::string& when) { F64 life_time = time - cd.mCreationTime; LL_CONT << LL_NEWLINE - << cd.mName << ' ' << cd.mStatus << " life: " << life_time; + << cd.getKey() << ' ' << cd.mStatus << " life: " << life_time; } LL_CONT << LL_ENDL; LL_INFOS("LLCoros") << "-----------------------------------------------------" << LL_ENDL; -- cgit v1.2.3