From d3b1859ca3d6c2c2bcc92edba994b522cf9d4e99 Mon Sep 17 00:00:00 2001 From: Nat Goodspeed <nat@lindenlab.com> Date: Tue, 6 Aug 2024 15:55:58 -0400 Subject: Introduce a custom coroutine/fiber scheduler to prioritize UI. The viewer's main thread's main fiber is responsible for coordinating just about everything. With the default round_robin fiber scheduling algorithm, launching too many additional fibers could starve the main fiber, resulting in visible lag. This custom scheduler tracks when it switches to and from the main fiber, and at each context switch, how long it's been since the last time the main fiber ran. If that exceeds a certain timeslice, it jumps the main fiber to the head of the queue and resumes that instead of any other ready fiber. --- indra/llcommon/CMakeLists.txt | 2 + indra/llcommon/coro_scheduler.cpp | 164 ++++++++++++++++++++++++++++++++++++++ indra/llcommon/coro_scheduler.h | 73 +++++++++++++++++ indra/newview/llstartup.cpp | 5 +- 4 files changed, 243 insertions(+), 1 deletion(-) create mode 100644 indra/llcommon/coro_scheduler.cpp create mode 100644 indra/llcommon/coro_scheduler.h (limited to 'indra') diff --git a/indra/llcommon/CMakeLists.txt b/indra/llcommon/CMakeLists.txt index 20670d7ebe..221fc2bb3f 100644 --- a/indra/llcommon/CMakeLists.txt +++ b/indra/llcommon/CMakeLists.txt @@ -18,6 +18,7 @@ include(Tracy) set(llcommon_SOURCE_FILES apply.cpp commoncontrol.cpp + coro_scheduler.cpp hbxxh.cpp indra_constants.cpp lazyeventapi.cpp @@ -125,6 +126,7 @@ set(llcommon_HEADER_FILES chrono.h classic_callback.h commoncontrol.h + coro_scheduler.h ctype_workaround.h fix_macros.h fsyspath.h diff --git a/indra/llcommon/coro_scheduler.cpp b/indra/llcommon/coro_scheduler.cpp new file mode 100644 index 0000000000..337162cbd5 --- /dev/null +++ b/indra/llcommon/coro_scheduler.cpp @@ -0,0 +1,164 @@ +/** + * @file coro_scheduler.cpp + * @author Nat Goodspeed + * @date 2024-08-05 + * @brief Implementation for llcoro::scheduler. + * + * $LicenseInfo:firstyear=2024&license=viewerlgpl$ + * Copyright (c) 2024, Linden Research, Inc. + * $/LicenseInfo$ + */ + +// Precompiled header +#include "linden_common.h" +// associated header +#include "coro_scheduler.h" +// STL headers +// std headers +#include <iomanip> +// external library headers +#include <boost/fiber/operations.hpp> +// other Linden headers +#include "llcallbacklist.h" +#include "lldate.h" +#include "llerror.h" + +namespace llcoro +{ + +const F32 scheduler::DEFAULT_TIMESLICE{ LL::Timers::DEFAULT_TIMESLICE }; + +const std::string qname("General"); + +scheduler::scheduler(): + // Since use_scheduling_algorithm() must be called before any other + // Boost.Fibers operations, we can assume that the calling fiber is in + // fact the main fiber. + mMainID(boost::this_fiber::get_id()), + mStart(LLDate::now().secondsSinceEpoch()), + mQueue(LL::WorkQueue::getInstance(qname)) +{} + +void scheduler::awakened( boost::fibers::context* ctx) noexcept +{ + if (ctx->get_id() == mMainID) + { + // If the fiber that just came ready is the main fiber, record its + // pointer. + llassert(! mMainCtx); + mMainCtx = ctx; + } + // Delegate to round_robin::awakened() as usual, even for the main fiber. + // This way, as long as other fibers don't take too long, we can just let + // normal round_robin processing pass control to the main fiber. + super::awakened(ctx); +} + +boost::fibers::context* scheduler::pick_next() noexcept +{ + // count calls to pick_next() + ++mSwitches; + // pick_next() is called when the previous fiber has suspended, and we + // need to pick another. Did the previous pick_next() call pick the main + // fiber? If so, it's the main fiber that just suspended. + auto now = LLDate::now().secondsSinceEpoch(); + if (mMainRunning) + { + mMainRunning = false; + mMainLast = now; + } + + boost::fibers::context* next; + + // When the main fiber is ready, and it's been more than mTimeslice since + // the main fiber last ran, it's time to intervene. + F32 elapsed(now - mMainLast); + if (mMainCtx && elapsed > mTimeslice) + { + // We claim that the main fiber is not only stored in mMainCtx, but is + // also queued (somewhere) in our ready list. + llassert(mMainCtx->ready_is_linked()); + // The usefulness of a doubly-linked list is that, given only a + // pointer to an item, we can unlink it. + mMainCtx->ready_unlink(); + // Instead of delegating to round_robin::pick_next() to pop the head + // of the queue, override by returning mMainCtx. + next = mMainCtx; + + /*------------------------- logging stuff --------------------------*/ + // Unless this log tag is enabled, don't even bother posting. + LL_DEBUGS("LLCoros.scheduler"); + // This feature is inherently hard to verify. The logging in the + // lambda below seems useful, but also seems like a lot of overhead + // for a coroutine context switch. Try posting the logging lambda to a + // ThreadPool to offload that overhead. However, if this is still + // taking an unreasonable amount of context-switch time, this whole + // passage could be skipped. + + // Record this event for logging, but push it off to a thread pool to + // perform that work. Presumably std::weak_ptr::lock() is cheaper than + // WorkQueue::getInstance(). + LL::WorkQueue::ptr_t queue{ mQueue.lock() }; + // We probably started before the relevant WorkQueue was created. + if (! queue) + { + // Try again to locate the specified WorkQueue. + queue = LL::WorkQueue::getInstance(qname); + mQueue = queue; + } + // Both the lock() call and the getInstance() call might have failed. + if (queue) + { + // Bind values. Do NOT bind 'this' to avoid cross-thread access! + // It would be interesting to know from what queue position we + // unlinked the main fiber, out of how many in the ready list. + // Unfortunately round_robin::rqueue_ is private, not protected, + // so we have no access. + queue->post( + [switches=mSwitches, start=mStart, elapsed, now] + () + { + U32 runtime(U32(now) - U32(start)); + U32 minutes(runtime / 60u); + U32 seconds(runtime % 60u); + // use stringize to avoid lasting side effects to the + // logging ostream + LL_DEBUGS("LLCoros.scheduler") + << "At time " + << stringize(minutes, ":", std::setw(2), std::setfill('0'), seconds) + << " (" << switches << " switches), coroutines took " + << stringize(std::setprecision(4), elapsed) + << " sec, main coroutine jumped queue" + << LL_ENDL; + }); + } + LL_ENDL; + /*----------------------- end logging stuff ------------------------*/ + } + else + { + // Either the main fiber isn't yet ready, or it hasn't yet been + // mTimeslice seconds since the last time the main fiber ran. Business + // as usual. + next = super::pick_next(); + } + + // super::pick_next() could also have returned the main fiber, which is + // why this is a separate test instead of being folded into the override + // case above. + if (next && next->get_id() == mMainID) + { + // we're about to resume the main fiber: it's no longer "ready" + mMainCtx = nullptr; + // instead, it's "running" + mMainRunning = true; + } + return next; +} + +void scheduler::use() +{ + boost::fibers::use_scheduling_algorithm<scheduler>(); +} + +} // namespace llcoro diff --git a/indra/llcommon/coro_scheduler.h b/indra/llcommon/coro_scheduler.h new file mode 100644 index 0000000000..a7572ccf4d --- /dev/null +++ b/indra/llcommon/coro_scheduler.h @@ -0,0 +1,73 @@ +/** + * @file coro_scheduler.h + * @author Nat Goodspeed + * @date 2024-08-05 + * @brief Custom scheduler for viewer's Boost.Fibers (aka coroutines) + * + * $LicenseInfo:firstyear=2024&license=viewerlgpl$ + * Copyright (c) 2024, Linden Research, Inc. + * $/LicenseInfo$ + */ + +#if ! defined(LL_CORO_SCHEDULER_H) +#define LL_CORO_SCHEDULER_H + +#include "workqueue.h" +#include <boost/fiber/fiber.hpp> +#include <boost/fiber/algo/round_robin.hpp> + +/** + * llcoro::scheduler is specifically intended for the viewer's main thread. + * Its role is to ensure that the main coroutine, responsible for UI + * operations and coordinating everything else, doesn't get starved by + * secondary coroutines -- however many of those there might be. + * + * The simple boost::fibers::algo::round_robin scheduler could result in + * arbitrary time lag between resumptions of the main coroutine. Of course + * every well-behaved viewer coroutine must be coded to yield before too much + * real time has elapsed, but sheer volume of secondary coroutines could still + * consume unreasonable real time before cycling back to the main coroutine. + */ + +namespace llcoro +{ + +class scheduler: public boost::fibers::algo::round_robin +{ + using super = boost::fibers::algo::round_robin; +public: + // If the main fiber is ready, and it's been at least this long since the + // main fiber last ran, jump the main fiber to the head of the queue. + static const F32 DEFAULT_TIMESLICE; + + scheduler(); + void awakened( boost::fibers::context*) noexcept override; + boost::fibers::context* pick_next() noexcept override; + + static void use(); + +private: + // This is the fiber::id of the main fiber. We use this to discover + // whether the fiber passed to awakened() is in fact the main fiber. + boost::fibers::fiber::id mMainID; + // This context* is nullptr until awakened() notices that the main fiber + // has become ready, at which point it contains the main fiber's context*. + boost::fibers::context* mMainCtx{}; + // Set when pick_next() returns the main fiber. + bool mMainRunning{ false }; + // If it's been at least this long since the last time the main fiber got + // control, jump it to the head of the queue. + F32 mTimeslice{ DEFAULT_TIMESLICE }; + // Timestamp as of the last time we suspended the main fiber. + F32 mMainLast{ 0 }; + // Timestamp of start time + F32 mStart{ 0 }; + // count context switches + U64 mSwitches{ 0 }; + // WorkQueue for deferred logging + LL::WorkQueue::weak_t mQueue; +}; + +} // namespace llcoro + +#endif /* ! defined(LL_CORO_SCHEDULER_H) */ diff --git a/indra/newview/llstartup.cpp b/indra/newview/llstartup.cpp index 3cf0def66e..c7fb734440 100644 --- a/indra/newview/llstartup.cpp +++ b/indra/newview/llstartup.cpp @@ -48,6 +48,7 @@ #include "llaudioengine_openal.h" #endif +#include "coro_scheduler.h" #include "llavatarnamecache.h" #include "llexperiencecache.h" #include "lllandmark.h" @@ -400,10 +401,12 @@ bool idle_startup() static bool first_call = true; if (first_call) { + first_call = false; // Other phases get handled when startup state changes, // need to capture the initial state as well. LLStartUp::getPhases().startPhase(LLStartUp::getStartupStateString()); - first_call = false; + // Use our custom scheduler for coroutine scheduling. + llcoro::scheduler::use(); } gViewerWindow->showCursor(); -- cgit v1.2.3 From 08def53d8bf07beaa56db80e95aa76f3682c557c Mon Sep 17 00:00:00 2001 From: Nat Goodspeed <nat@lindenlab.com> Date: Wed, 7 Aug 2024 16:01:45 -0400 Subject: Move llcoro::scheduler::use() call from llstartup to llappviewer. Thanks, Maxim. --- indra/newview/llappviewer.cpp | 3 ++- indra/newview/llstartup.cpp | 2 -- 2 files changed, 2 insertions(+), 3 deletions(-) (limited to 'indra') diff --git a/indra/newview/llappviewer.cpp b/indra/newview/llappviewer.cpp index ff29f64aeb..3d53200eda 100644 --- a/indra/newview/llappviewer.cpp +++ b/indra/newview/llappviewer.cpp @@ -765,7 +765,8 @@ bool LLAppViewer::init() //set the max heap size. initMaxHeapSize() ; LLCoros::instance().setStackSize(gSavedSettings.getS32("CoroutineStackSize")); - + // Use our custom scheduler for coroutine scheduling. + llcoro::scheduler::use(); // Although initLoggingAndGetLastDuration() is the right place to mess with // setFatalFunction(), we can't query gSavedSettings until after diff --git a/indra/newview/llstartup.cpp b/indra/newview/llstartup.cpp index c7fb734440..82eb59c002 100644 --- a/indra/newview/llstartup.cpp +++ b/indra/newview/llstartup.cpp @@ -405,8 +405,6 @@ bool idle_startup() // Other phases get handled when startup state changes, // need to capture the initial state as well. LLStartUp::getPhases().startPhase(LLStartUp::getStartupStateString()); - // Use our custom scheduler for coroutine scheduling. - llcoro::scheduler::use(); } gViewerWindow->showCursor(); -- cgit v1.2.3 From 3102dc0db472acac7c4007e7423e405a889d665a Mon Sep 17 00:00:00 2001 From: Nat Goodspeed <nat@lindenlab.com> Date: Wed, 7 Aug 2024 16:12:23 -0400 Subject: Allow smaller minimum timer intervals. Add test_flycam.lua to exercise the smaller intervals. --- indra/llcommon/llcallbacklist.cpp | 2 +- indra/newview/scripts/lua/test_flycam.lua | 38 +++++++++++++++++++++++++++++++ 2 files changed, 39 insertions(+), 1 deletion(-) create mode 100644 indra/newview/scripts/lua/test_flycam.lua (limited to 'indra') diff --git a/indra/llcommon/llcallbacklist.cpp b/indra/llcommon/llcallbacklist.cpp index 555c793333..647b268b8b 100644 --- a/indra/llcommon/llcallbacklist.cpp +++ b/indra/llcommon/llcallbacklist.cpp @@ -391,7 +391,7 @@ public: TimersListener(const LazyEventAPIParams& params): LLEventAPI(params) {} // Forbid a script from requesting callbacks too quickly. - static constexpr LLSD::Real MINTIMER{ 1.0 }; + static constexpr LLSD::Real MINTIMER{ 0.010 }; void scheduleAfter(const LLSD& params); void scheduleEvery(const LLSD& params); diff --git a/indra/newview/scripts/lua/test_flycam.lua b/indra/newview/scripts/lua/test_flycam.lua new file mode 100644 index 0000000000..4fb5608b33 --- /dev/null +++ b/indra/newview/scripts/lua/test_flycam.lua @@ -0,0 +1,38 @@ +-- Make camera fly around the subject avatar for a few seconds. + +local LLAgent = require 'LLAgent' +local startup = require 'startup' +local timers = require 'timers' + +local initial = LLAgent.getRegionPosition() +local height = 2.0 -- meters +local radius = 4.0 -- meters +local speed = 1.0 -- meters/second along circle +local start = os.clock() +local stop = os.clock() + 30 -- seconds + +local function cameraPos(t) + local radians = speed * t + return { + initial[1] + radius * math.cos(radians), + initial[2] + radius * math.sin(radians), + initial[3] + height + } +end + +local function moveCamera() + if os.clock() < stop then + -- usual case + LLAgent.setCamera{ camera_pos=cameraPos(os.clock() - start), camera_locked=true } + return nil + else + -- last time + LLAgent.removeCamParams() + LLAgent.setFollowCamActive(false) + return true + end +end + +startup.wait('STATE_STARTED') +-- call moveCamera() repeatedly until it returns true +local timer = timers.Timer(0.1, moveCamera, true) -- cgit v1.2.3 From cdc4f5dbd69526906724dc298594906826f6b8b0 Mon Sep 17 00:00:00 2001 From: Nat Goodspeed <nat@lindenlab.com> Date: Wed, 7 Aug 2024 16:34:14 -0400 Subject: Move #include "coro_scheduler.h" from llstartup to llappviewer. --- indra/newview/llappviewer.cpp | 1 + indra/newview/llstartup.cpp | 1 - 2 files changed, 1 insertion(+), 1 deletion(-) (limited to 'indra') diff --git a/indra/newview/llappviewer.cpp b/indra/newview/llappviewer.cpp index 3d53200eda..c259275d8f 100644 --- a/indra/newview/llappviewer.cpp +++ b/indra/newview/llappviewer.cpp @@ -29,6 +29,7 @@ #include "llappviewer.h" // Viewer includes +#include "coro_scheduler.h" #include "llversioninfo.h" #include "llfeaturemanager.h" #include "lluictrlfactory.h" diff --git a/indra/newview/llstartup.cpp b/indra/newview/llstartup.cpp index 82eb59c002..d49e0d9ba2 100644 --- a/indra/newview/llstartup.cpp +++ b/indra/newview/llstartup.cpp @@ -48,7 +48,6 @@ #include "llaudioengine_openal.h" #endif -#include "coro_scheduler.h" #include "llavatarnamecache.h" #include "llexperiencecache.h" #include "lllandmark.h" -- cgit v1.2.3 From b4fe47a5c0abac02d161640e04a9a78afb1c5987 Mon Sep 17 00:00:00 2001 From: Nat Goodspeed <nat@lindenlab.com> Date: Thu, 8 Aug 2024 09:07:56 -0400 Subject: Ensure that the flycam stays near moving avatar. --- indra/newview/scripts/lua/test_flycam.lua | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'indra') diff --git a/indra/newview/scripts/lua/test_flycam.lua b/indra/newview/scripts/lua/test_flycam.lua index 4fb5608b33..05c3c37b93 100644 --- a/indra/newview/scripts/lua/test_flycam.lua +++ b/indra/newview/scripts/lua/test_flycam.lua @@ -4,7 +4,6 @@ local LLAgent = require 'LLAgent' local startup = require 'startup' local timers = require 'timers' -local initial = LLAgent.getRegionPosition() local height = 2.0 -- meters local radius = 4.0 -- meters local speed = 1.0 -- meters/second along circle @@ -12,11 +11,12 @@ local start = os.clock() local stop = os.clock() + 30 -- seconds local function cameraPos(t) + local agent = LLAgent.getRegionPosition() local radians = speed * t return { - initial[1] + radius * math.cos(radians), - initial[2] + radius * math.sin(radians), - initial[3] + height + agent[1] + radius * math.cos(radians), + agent[2] + radius * math.sin(radians), + agent[3] + height } end -- cgit v1.2.3