From c78be38a6a4211f06876bc80b3f19f89a5f936e0 Mon Sep 17 00:00:00 2001 From: Nat Goodspeed Date: Wed, 17 Apr 2024 16:39:37 -0400 Subject: Reintroduce LLCoros::killreq() to request killing a named coroutine. Make LLCoros constructor echo "LLApp" status-change events on new "LLCoros" event pump. Rename LLCoros::kill() to killreq() because this operation only registers a request for the named coroutine to terminate next time it calls checkStop(). Add a new CoroData member to record the name of the coroutine requesting termination. killreq() sets that and also posts "killreq" to "LLCoros". Add an optional final-cleanup callback to LLCoros::checkStop(). Make checkStop() check for a pending killreq() request as well as viewer termination. Introduce new LLCoros::Killed exception for that case. Introduce LLCoros::getStopListener(), with two overloads, to encapsulate some of the messy logic to listen (perhaps temporarily) for viewer shutdown. Both overloads are for use by code at the source end of a queue or promise or other resource for which coroutines might still be waiting at viewer shutdown time. One overload is specifically for when the caller knows the name of the one and only coroutine that will wait on the resource (e.g. because the caller IS that coroutine). That overload honors killreq(). Use getStopListener() to simplify the four existing places where we set up such a listener. Add a fifth: also make WorkQueue listen for viewer shutdown (resolving a TODO comment). Remove LLLUAmanager::terminateScript(), getTerminationList() and the static sTerminationList. In the Lua interrupt callback, instead of checking sTerminationList, call LLCoros::checkStop(). Change LLFloaterLUAScripts terminate-script logic to call LLCoros::killreq() instead of posting on "LLLua" and calling LLLUAmanager::terminateScript(). Drop LLApp::setStatus() posting to "LLLua" LLEventPump: the above makes that moot. --- indra/llcommon/llcoros.cpp | 127 ++++++++++++++++++++++++++++++++++++--------- 1 file changed, 103 insertions(+), 24 deletions(-) (limited to 'indra/llcommon/llcoros.cpp') diff --git a/indra/llcommon/llcoros.cpp b/indra/llcommon/llcoros.cpp index c13900f74a..1926941d1f 100644 --- a/indra/llcommon/llcoros.cpp +++ b/indra/llcommon/llcoros.cpp @@ -51,18 +51,19 @@ #endif // other Linden headers #include "llapp.h" -#include "lltimer.h" -#include "llevents.h" #include "llerror.h" -#include "stringize.h" +#include "llevents.h" #include "llexception.h" +#include "llsdutil.h" +#include "lltimer.h" +#include "stringize.h" #if LL_WINDOWS #include #endif // static -LLCoros::CoroData& LLCoros::get_CoroData(const std::string& caller) +LLCoros::CoroData& LLCoros::get_CoroData(const std::string&) { CoroData* current{ nullptr }; // be careful about attempted accesses in the final throes of app shutdown @@ -94,7 +95,7 @@ LLCoros::coro::id LLCoros::get_self() //static void LLCoros::set_consuming(bool consuming) { - CoroData& data(get_CoroData("set_consuming()")); + auto& data(get_CoroData("set_consuming()")); // DO NOT call this on the main() coroutine. llassert_always(! data.mName.empty()); data.mConsuming = consuming; @@ -128,6 +129,15 @@ LLCoros::LLCoros(): // points to it. So initialize it with a no-op deleter. mCurrent{ [](CoroData*){} } { + auto& llapp{ LLEventPumps::instance().obtain("LLApp") }; + if (llapp.getListener("LLCoros") == LLBoundListener()) + { + // chain our "LLCoros" pump onto "LLApp" pump: echo events posted to "LLApp" + mConn = llapp.listen( + "LLCoros", + [](const LLSD& event) + { return LLEventPumps::instance().obtain("LLCoros").post(event); }); + } } LLCoros::~LLCoros() @@ -177,26 +187,26 @@ std::string LLCoros::generateDistinctName(const std::string& prefix) const // Until we find an unused name, append a numeric suffix for uniqueness. while (CoroData::getInstance(name)) { - name = STRINGIZE(prefix << unique++); + name = stringize(prefix, unique++); } return name; } -/*==========================================================================*| -bool LLCoros::kill(const std::string& name) +bool LLCoros::killreq(const std::string& name) { - CoroMap::iterator found = mCoros.find(name); - if (found == mCoros.end()) + auto found = CoroData::getInstance(name); + if (! found) { return false; } - // Because this is a boost::ptr_map, erasing the map entry also destroys - // the referenced heap object, in this case the boost::coroutine object, - // which will terminate the coroutine. - mCoros.erase(found); + // Next time the subject coroutine calls checkStop(), make it terminate. + found->mKilledBy = getName(); + // But if it's waiting for something, notify anyone in a position to poke + // it. + LLEventPumps::instance().obtain("LLCoros").post( + llsd::map("status", "killreq", "coro", name)); return true; } -|*==========================================================================*/ //static std::string LLCoros::getName() @@ -207,7 +217,7 @@ std::string LLCoros::getName() //static std::string LLCoros::logname() { - LLCoros::CoroData& data(get_CoroData("logname()")); + auto& data(get_CoroData("logname()")); return data.mName.empty()? data.getKey() : data.mName; } @@ -360,7 +370,7 @@ void LLCoros::toplevel(std::string name, callable_t callable) // Any uncaught exception derived from LLContinueError will be caught // here and logged. This coroutine will terminate but the rest of the // viewer will carry on. - LOG_UNHANDLED_EXCEPTION(STRINGIZE("coroutine " << name)); + LOG_UNHANDLED_EXCEPTION(stringize("coroutine ", name)); } catch (...) { @@ -373,15 +383,24 @@ void LLCoros::toplevel(std::string name, callable_t callable) } //static -void LLCoros::checkStop() +void LLCoros::checkStop(callable_t cleanup) { + // don't replicate this 'if' test throughout the code below + if (! cleanup) + { + cleanup = {[](){}}; // hey, look, I'm coding in Haskell! + } + if (wasDeleted()) { + cleanup(); LLTHROW(Shutdown("LLCoros was deleted")); } - // do this AFTER the check above, because getName() depends on - // get_CoroData(), which depends on the local_ptr in our instance(). - if (getName().empty()) + + // do this AFTER the check above, because get_CoroData() depends on the + // local_ptr in our instance(). + auto& data(get_CoroData("checkStop()")); + if (data.mName.empty()) { // Our Stop exception and its subclasses are intended to stop loitering // coroutines. Don't throw it from the main coroutine. @@ -389,19 +408,80 @@ void LLCoros::checkStop() } if (LLApp::isStopped()) { + cleanup(); LLTHROW(Stopped("viewer is stopped")); } if (! LLApp::isRunning()) { + cleanup(); LLTHROW(Stopping("viewer is stopping")); } + if (! data.mKilledBy.empty()) + { + // Someone wants to kill this coroutine + cleanup(); + LLTHROW(Killed(stringize("coroutine ", data.mName, " killed by ", data.mKilledBy))); + } +} + +LLBoundListener LLCoros::getStopListener(const std::string& caller, LLVoidListener cleanup) +{ + if (! cleanup) + return {}; + + // This overload only responds to viewer shutdown. + return LLEventPumps::instance().obtain("LLCoros") + .listen( + LLEventPump::inventName(caller), + [cleanup](const LLSD& event) + { + auto status{ event["status"].asString() }; + if (status != "running" && status != "killreq") + { + cleanup(event); + } + return false; + }); +} + +LLBoundListener LLCoros::getStopListener(const std::string& caller, + const std::string& cnsmr, + LLVoidListener cleanup) +{ + if (! cleanup) + return {}; + + std::string consumer{cnsmr}; + if (consumer.empty()) + { + consumer = getName(); + } + + // This overload responds to viewer shutdown and to killreq(consumer). + return LLEventPumps::instance().obtain("LLCoros") + .listen( + LLEventPump::inventName(caller), + [consumer, cleanup](const LLSD& event) + { + auto status{ event["status"].asString() }; + if (status == "killreq") + { + if (event["coro"].asString() == consumer) + { + cleanup(event); + } + } + else if (status != "running") + { + cleanup(event); + } + return false; + }); } LLCoros::CoroData::CoroData(const std::string& name): LLInstanceTracker(name), mName(name), - // don't consume events unless specifically directed - mConsuming(false), mCreationTime(LLTimer::getTotalSeconds()) { } @@ -414,7 +494,6 @@ LLCoros::CoroData::CoroData(int n): // empty string as its visible name because some consumers test for that. LLInstanceTracker("main" + stringize(n)), mName(), - mConsuming(false), mCreationTime(LLTimer::getTotalSeconds()) { } -- cgit v1.2.3 From 1b6e2ef62cec9608d160ea25d99080f0e2964ee5 Mon Sep 17 00:00:00 2001 From: Nat Goodspeed Date: Thu, 16 May 2024 13:30:14 -0400 Subject: On Windows, defend test.cpp against structured exceptions too. Since August 2023, we've seen occasional GitHub Windows build test runs terminate with 0xC00000FD: stack overflow. We've usually responded by bumping up the default coroutine stack size. On closer examination, it's always llleap_test.cpp that blows up that way -- and llleap_test.cpp doesn't appear to use coroutines at all. So apparently we've been consuming more address space for ALL viewer coroutines without actually addressing the problem. Reset the default coroutine stack size to where it was before we started bumping it up in response to these llleap_test.cpp stack overflow failures. Note that LLCoros already catches and reports Windows structured exceptions, underscoring that the observed stack overflow is not from within a coroutine. While at it, restore the Windows llleap_test.cpp data volume to match Posix. We think the problem that led to reducing that data volume was an APR bug, which we hope has been fixed. Equip test.cpp, the test driver program for all our TUT unit and integration tests, with a Windows structured exception handler. Try to treat a Windows structured exception as a test failure -- instead of silently terminating with 0xC00000FD. Moreover, when a structured exception occurs, output a stack trace so we can try to track it down. --- indra/llcommon/llcoros.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'indra/llcommon/llcoros.cpp') diff --git a/indra/llcommon/llcoros.cpp b/indra/llcommon/llcoros.cpp index aa8eca7d90..a70e3d9ae7 100644 --- a/indra/llcommon/llcoros.cpp +++ b/indra/llcommon/llcoros.cpp @@ -123,7 +123,7 @@ LLCoros::LLCoros(): // Previously we used // boost::context::guarded_stack_allocator::default_stacksize(); // empirically this is insufficient. - mStackSize(1024*1024), + mStackSize(512*1024), // mCurrent does NOT own the current CoroData instance -- it simply // points to it. So initialize it with a no-op deleter. mCurrent{ [](CoroData*){} } @@ -155,7 +155,7 @@ void LLCoros::cleanupSingleton() // don't use llcoro::suspend() because that module depends // on this one // This will yield current(main) thread and will let active - // corutines run once + // coroutines run once boost::this_fiber::yield(); } printActiveCoroutines("after pumping"); -- cgit v1.2.3 From 71d777ea126e7f02cb46c11bdb606094ca06f75c Mon Sep 17 00:00:00 2001 From: Nat Goodspeed Date: Fri, 24 May 2024 17:34:04 -0400 Subject: Promote seh_catcher() et al. to llexception.{h,cpp} for general use. --- indra/llcommon/llcoros.cpp | 51 +--------------------------------------------- 1 file changed, 1 insertion(+), 50 deletions(-) (limited to 'indra/llcommon/llcoros.cpp') diff --git a/indra/llcommon/llcoros.cpp b/indra/llcommon/llcoros.cpp index a70e3d9ae7..f3943b7138 100644 --- a/indra/llcommon/llcoros.cpp +++ b/indra/llcommon/llcoros.cpp @@ -286,55 +286,6 @@ std::string LLCoros::launch(const std::string& prefix, const callable_t& callabl return name; } -namespace -{ - -#if LL_WINDOWS - -static const U32 STATUS_MSC_EXCEPTION = 0xE06D7363; // compiler specific - -U32 exception_filter(U32 code, struct _EXCEPTION_POINTERS *exception_infop) -{ - if (code == STATUS_MSC_EXCEPTION) - { - // C++ exception, go on - return EXCEPTION_CONTINUE_SEARCH; - } - else - { - // handle it - return EXCEPTION_EXECUTE_HANDLER; - } -} - -void sehandle(const LLCoros::callable_t& callable) -{ - __try - { - callable(); - } - __except (exception_filter(GetExceptionCode(), GetExceptionInformation())) - { - // convert to C++ styled exception - // Note: it might be better to use _se_set_translator - // if you want exception to inherit full callstack - char integer_string[512]; - sprintf(integer_string, "SEH, code: %lu\n", GetExceptionCode()); - throw std::exception(integer_string); - } -} - -#else // ! LL_WINDOWS - -inline void sehandle(const LLCoros::callable_t& callable) -{ - callable(); -} - -#endif // ! LL_WINDOWS - -} // anonymous namespace - // Top-level wrapper around caller's coroutine callable. // Normally we like to pass strings and such by const reference -- but in this // case, we WANT to copy both the name and the callable to our local stack! @@ -348,7 +299,7 @@ void LLCoros::toplevel(std::string name, callable_t callable) // run the code the caller actually wants in the coroutine try { - sehandle(callable); + seh_catcher(callable); } catch (const Stop& exc) { -- cgit v1.2.3 From 5ed8df22cd59680a685c4ada7daa5555bf59d4fe Mon Sep 17 00:00:00 2001 From: Nat Goodspeed Date: Tue, 28 May 2024 13:22:05 -0400 Subject: Fix up llexception.h's cross-platform SEH wrapper. Introduce AlwaysReturn specialization, which always discards any result of calling the specified callable with specified args. Derive new Windows_SEH_exception from LLException, not std::runtime_error. Put the various SEH functions in LL::seh nested namespace, e.g. LL::seh::catcher() as the primary API. Break out more levels of Windows SEH handler to work around the restrictions on functions containing __try/__except. The triadic catcher() overload now does little save declare a std::string stacktrace before forwarding the call to catcher_inner(), passing a reference to stacktrace along with the trycode, filter and handler functions. catcher_inner() accepts the stacktrace and the three function template arguments. It contains the __try/__except logic. It calls a new filter_() wrapper template, which calls fill_stacktrace() before forwarding the call to the caller's filter function. fill_stacktrace(), in the .cpp file, contains the logic to populate the stacktrace string -- unless the Structured Exception is stack overflow, in which case it puts an explanatory string instead. catcher_inner()'s __except clause passes not only the code, but also the stacktrace string, to the caller's handler function. It wraps the caller's handler function in always_return(), where rtype is the type returned by the trycode function. This allows a handler to return a value, while also supporting the void handler case, e.g. one that throws a C++ exception. (This is why we need AlwaysReturn: some trycode() functions are themselves void.) For the dyadic catcher() overload, introduce common_filter() containing the logic to distinguish a C++ exception from any other kind of Structured Exception. The fact that the stacktrace is captured before the filter function is called should permit capturing a stacktrace for a C++ exception as well as for most other Structured Exceptions. As before, the monadic catcher() overload supplies the rethrow() handler, in the .cpp file. Change existing calls from seh_catcher() to LL::seh::catcher(). --- indra/llcommon/llcoros.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'indra/llcommon/llcoros.cpp') diff --git a/indra/llcommon/llcoros.cpp b/indra/llcommon/llcoros.cpp index f3943b7138..7512bac88d 100644 --- a/indra/llcommon/llcoros.cpp +++ b/indra/llcommon/llcoros.cpp @@ -299,7 +299,7 @@ void LLCoros::toplevel(std::string name, callable_t callable) // run the code the caller actually wants in the coroutine try { - seh_catcher(callable); + LL::seh::catcher(callable); } catch (const Stop& exc) { -- cgit v1.2.3 From d3b4b77a95baf66dcdb90c4312332bc2ac2c7663 Mon Sep 17 00:00:00 2001 From: Nat Goodspeed Date: Wed, 12 Jun 2024 17:32:18 -0400 Subject: Add LL_DEBUGS("LLCoros") start/end messages. We have log messages when a coroutine terminates abnormally, but we don't report either when it starts or when it terminates normally. Address that. --- indra/llcommon/llcoros.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'indra/llcommon/llcoros.cpp') diff --git a/indra/llcommon/llcoros.cpp b/indra/llcommon/llcoros.cpp index a6d7988256..c28baa5747 100644 --- a/indra/llcommon/llcoros.cpp +++ b/indra/llcommon/llcoros.cpp @@ -270,7 +270,7 @@ std::string LLCoros::launch(const std::string& prefix, const callable_t& callabl // std::allocator_arg is a flag to indicate that the following argument is // a StackAllocator. // protected_fixedsize_stack sets a guard page past the end of the new - // stack so that stack underflow will result in an access violation + // stack so that stack overflow will result in an access violation // instead of weird, subtle, possibly undiagnosed memory stomps. try @@ -355,10 +355,12 @@ void LLCoros::toplevel(std::string name, callable_t callable) // set it as current mCurrent.reset(&corodata); + LL_DEBUGS("LLCoros") << "entering " << name << LL_ENDL; // run the code the caller actually wants in the coroutine try { sehandle(callable); + LL_DEBUGS("LLCoros") << "done " << name << LL_ENDL; } catch (const Stop& exc) { @@ -370,7 +372,7 @@ void LLCoros::toplevel(std::string name, callable_t callable) // Any uncaught exception derived from LLContinueError will be caught // here and logged. This coroutine will terminate but the rest of the // viewer will carry on. - LOG_UNHANDLED_EXCEPTION(stringize("coroutine ", name)); + LOG_UNHANDLED_EXCEPTION("coroutine " + name); } catch (...) { -- cgit v1.2.3