diff options
-rw-r--r-- | indra/llcommon/CMakeLists.txt | 2 | ||||
-rw-r--r-- | indra/llcommon/coro_scheduler.cpp | 164 | ||||
-rw-r--r-- | indra/llcommon/coro_scheduler.h | 73 | ||||
-rw-r--r-- | indra/newview/llstartup.cpp | 5 |
4 files changed, 243 insertions, 1 deletions
diff --git a/indra/llcommon/CMakeLists.txt b/indra/llcommon/CMakeLists.txt index 20670d7ebe..221fc2bb3f 100644 --- a/indra/llcommon/CMakeLists.txt +++ b/indra/llcommon/CMakeLists.txt @@ -18,6 +18,7 @@ include(Tracy) set(llcommon_SOURCE_FILES apply.cpp commoncontrol.cpp + coro_scheduler.cpp hbxxh.cpp indra_constants.cpp lazyeventapi.cpp @@ -125,6 +126,7 @@ set(llcommon_HEADER_FILES chrono.h classic_callback.h commoncontrol.h + coro_scheduler.h ctype_workaround.h fix_macros.h fsyspath.h diff --git a/indra/llcommon/coro_scheduler.cpp b/indra/llcommon/coro_scheduler.cpp new file mode 100644 index 0000000000..337162cbd5 --- /dev/null +++ b/indra/llcommon/coro_scheduler.cpp @@ -0,0 +1,164 @@ +/** + * @file coro_scheduler.cpp + * @author Nat Goodspeed + * @date 2024-08-05 + * @brief Implementation for llcoro::scheduler. + * + * $LicenseInfo:firstyear=2024&license=viewerlgpl$ + * Copyright (c) 2024, Linden Research, Inc. + * $/LicenseInfo$ + */ + +// Precompiled header +#include "linden_common.h" +// associated header +#include "coro_scheduler.h" +// STL headers +// std headers +#include <iomanip> +// external library headers +#include <boost/fiber/operations.hpp> +// other Linden headers +#include "llcallbacklist.h" +#include "lldate.h" +#include "llerror.h" + +namespace llcoro +{ + +const F32 scheduler::DEFAULT_TIMESLICE{ LL::Timers::DEFAULT_TIMESLICE }; + +const std::string qname("General"); + +scheduler::scheduler(): + // Since use_scheduling_algorithm() must be called before any other + // Boost.Fibers operations, we can assume that the calling fiber is in + // fact the main fiber. + mMainID(boost::this_fiber::get_id()), + mStart(LLDate::now().secondsSinceEpoch()), + mQueue(LL::WorkQueue::getInstance(qname)) +{} + +void scheduler::awakened( boost::fibers::context* ctx) noexcept +{ + if (ctx->get_id() == mMainID) + { + // If the fiber that just came ready is the main fiber, record its + // pointer. + llassert(! mMainCtx); + mMainCtx = ctx; + } + // Delegate to round_robin::awakened() as usual, even for the main fiber. + // This way, as long as other fibers don't take too long, we can just let + // normal round_robin processing pass control to the main fiber. + super::awakened(ctx); +} + +boost::fibers::context* scheduler::pick_next() noexcept +{ + // count calls to pick_next() + ++mSwitches; + // pick_next() is called when the previous fiber has suspended, and we + // need to pick another. Did the previous pick_next() call pick the main + // fiber? If so, it's the main fiber that just suspended. + auto now = LLDate::now().secondsSinceEpoch(); + if (mMainRunning) + { + mMainRunning = false; + mMainLast = now; + } + + boost::fibers::context* next; + + // When the main fiber is ready, and it's been more than mTimeslice since + // the main fiber last ran, it's time to intervene. + F32 elapsed(now - mMainLast); + if (mMainCtx && elapsed > mTimeslice) + { + // We claim that the main fiber is not only stored in mMainCtx, but is + // also queued (somewhere) in our ready list. + llassert(mMainCtx->ready_is_linked()); + // The usefulness of a doubly-linked list is that, given only a + // pointer to an item, we can unlink it. + mMainCtx->ready_unlink(); + // Instead of delegating to round_robin::pick_next() to pop the head + // of the queue, override by returning mMainCtx. + next = mMainCtx; + + /*------------------------- logging stuff --------------------------*/ + // Unless this log tag is enabled, don't even bother posting. + LL_DEBUGS("LLCoros.scheduler"); + // This feature is inherently hard to verify. The logging in the + // lambda below seems useful, but also seems like a lot of overhead + // for a coroutine context switch. Try posting the logging lambda to a + // ThreadPool to offload that overhead. However, if this is still + // taking an unreasonable amount of context-switch time, this whole + // passage could be skipped. + + // Record this event for logging, but push it off to a thread pool to + // perform that work. Presumably std::weak_ptr::lock() is cheaper than + // WorkQueue::getInstance(). + LL::WorkQueue::ptr_t queue{ mQueue.lock() }; + // We probably started before the relevant WorkQueue was created. + if (! queue) + { + // Try again to locate the specified WorkQueue. + queue = LL::WorkQueue::getInstance(qname); + mQueue = queue; + } + // Both the lock() call and the getInstance() call might have failed. + if (queue) + { + // Bind values. Do NOT bind 'this' to avoid cross-thread access! + // It would be interesting to know from what queue position we + // unlinked the main fiber, out of how many in the ready list. + // Unfortunately round_robin::rqueue_ is private, not protected, + // so we have no access. + queue->post( + [switches=mSwitches, start=mStart, elapsed, now] + () + { + U32 runtime(U32(now) - U32(start)); + U32 minutes(runtime / 60u); + U32 seconds(runtime % 60u); + // use stringize to avoid lasting side effects to the + // logging ostream + LL_DEBUGS("LLCoros.scheduler") + << "At time " + << stringize(minutes, ":", std::setw(2), std::setfill('0'), seconds) + << " (" << switches << " switches), coroutines took " + << stringize(std::setprecision(4), elapsed) + << " sec, main coroutine jumped queue" + << LL_ENDL; + }); + } + LL_ENDL; + /*----------------------- end logging stuff ------------------------*/ + } + else + { + // Either the main fiber isn't yet ready, or it hasn't yet been + // mTimeslice seconds since the last time the main fiber ran. Business + // as usual. + next = super::pick_next(); + } + + // super::pick_next() could also have returned the main fiber, which is + // why this is a separate test instead of being folded into the override + // case above. + if (next && next->get_id() == mMainID) + { + // we're about to resume the main fiber: it's no longer "ready" + mMainCtx = nullptr; + // instead, it's "running" + mMainRunning = true; + } + return next; +} + +void scheduler::use() +{ + boost::fibers::use_scheduling_algorithm<scheduler>(); +} + +} // namespace llcoro diff --git a/indra/llcommon/coro_scheduler.h b/indra/llcommon/coro_scheduler.h new file mode 100644 index 0000000000..a7572ccf4d --- /dev/null +++ b/indra/llcommon/coro_scheduler.h @@ -0,0 +1,73 @@ +/** + * @file coro_scheduler.h + * @author Nat Goodspeed + * @date 2024-08-05 + * @brief Custom scheduler for viewer's Boost.Fibers (aka coroutines) + * + * $LicenseInfo:firstyear=2024&license=viewerlgpl$ + * Copyright (c) 2024, Linden Research, Inc. + * $/LicenseInfo$ + */ + +#if ! defined(LL_CORO_SCHEDULER_H) +#define LL_CORO_SCHEDULER_H + +#include "workqueue.h" +#include <boost/fiber/fiber.hpp> +#include <boost/fiber/algo/round_robin.hpp> + +/** + * llcoro::scheduler is specifically intended for the viewer's main thread. + * Its role is to ensure that the main coroutine, responsible for UI + * operations and coordinating everything else, doesn't get starved by + * secondary coroutines -- however many of those there might be. + * + * The simple boost::fibers::algo::round_robin scheduler could result in + * arbitrary time lag between resumptions of the main coroutine. Of course + * every well-behaved viewer coroutine must be coded to yield before too much + * real time has elapsed, but sheer volume of secondary coroutines could still + * consume unreasonable real time before cycling back to the main coroutine. + */ + +namespace llcoro +{ + +class scheduler: public boost::fibers::algo::round_robin +{ + using super = boost::fibers::algo::round_robin; +public: + // If the main fiber is ready, and it's been at least this long since the + // main fiber last ran, jump the main fiber to the head of the queue. + static const F32 DEFAULT_TIMESLICE; + + scheduler(); + void awakened( boost::fibers::context*) noexcept override; + boost::fibers::context* pick_next() noexcept override; + + static void use(); + +private: + // This is the fiber::id of the main fiber. We use this to discover + // whether the fiber passed to awakened() is in fact the main fiber. + boost::fibers::fiber::id mMainID; + // This context* is nullptr until awakened() notices that the main fiber + // has become ready, at which point it contains the main fiber's context*. + boost::fibers::context* mMainCtx{}; + // Set when pick_next() returns the main fiber. + bool mMainRunning{ false }; + // If it's been at least this long since the last time the main fiber got + // control, jump it to the head of the queue. + F32 mTimeslice{ DEFAULT_TIMESLICE }; + // Timestamp as of the last time we suspended the main fiber. + F32 mMainLast{ 0 }; + // Timestamp of start time + F32 mStart{ 0 }; + // count context switches + U64 mSwitches{ 0 }; + // WorkQueue for deferred logging + LL::WorkQueue::weak_t mQueue; +}; + +} // namespace llcoro + +#endif /* ! defined(LL_CORO_SCHEDULER_H) */ diff --git a/indra/newview/llstartup.cpp b/indra/newview/llstartup.cpp index 3cf0def66e..c7fb734440 100644 --- a/indra/newview/llstartup.cpp +++ b/indra/newview/llstartup.cpp @@ -48,6 +48,7 @@ #include "llaudioengine_openal.h" #endif +#include "coro_scheduler.h" #include "llavatarnamecache.h" #include "llexperiencecache.h" #include "lllandmark.h" @@ -400,10 +401,12 @@ bool idle_startup() static bool first_call = true; if (first_call) { + first_call = false; // Other phases get handled when startup state changes, // need to capture the initial state as well. LLStartUp::getPhases().startPhase(LLStartUp::getStartupStateString()); - first_call = false; + // Use our custom scheduler for coroutine scheduling. + llcoro::scheduler::use(); } gViewerWindow->showCursor(); |