diff options
author | Nat Goodspeed <nat@lindenlab.com> | 2024-10-22 14:46:15 -0400 |
---|---|---|
committer | Nat Goodspeed <nat@lindenlab.com> | 2024-10-22 14:46:15 -0400 |
commit | bfe759584f63c0587a2dc6a0086ad9d5b6c63a56 (patch) | |
tree | 84653a1211908c7dd2ce5bc4877e79f8e6515099 /indra/llcommon/coro_scheduler.cpp | |
parent | 62fc3ceaf5251458239f91192a05edc64bedf33b (diff) | |
parent | 394f7b37f2ec05c7cfb32c350432886f1c493c85 (diff) |
Merge branch 'develop' into marchcat/xcode-16
Diffstat (limited to 'indra/llcommon/coro_scheduler.cpp')
-rw-r--r-- | indra/llcommon/coro_scheduler.cpp | 83 |
1 files changed, 65 insertions, 18 deletions
diff --git a/indra/llcommon/coro_scheduler.cpp b/indra/llcommon/coro_scheduler.cpp index 2d8b6e1a97..b6117fa6a1 100644 --- a/indra/llcommon/coro_scheduler.cpp +++ b/indra/llcommon/coro_scheduler.cpp @@ -20,6 +20,7 @@ #include <boost/fiber/operations.hpp> // other Linden headers #include "llcallbacklist.h" +#include "llcoros.h" #include "lldate.h" #include "llerror.h" @@ -56,17 +57,55 @@ void scheduler::awakened( boost::fibers::context* ctx) noexcept boost::fibers::context* scheduler::pick_next() noexcept { + auto now = LLDate::now().secondsSinceEpoch(); // count calls to pick_next() ++mSwitches; // pick_next() is called when the previous fiber has suspended, and we // need to pick another. Did the previous pick_next() call pick the main - // fiber? If so, it's the main fiber that just suspended. - auto now = LLDate::now().secondsSinceEpoch(); - if (mMainRunning) + // fiber? (Or is this the first pick_next() call?) If so, it's the main + // fiber that just suspended. + if ((! mPrevCtx) || mPrevCtx->get_id() == mMainID) { - mMainRunning = false; mMainLast = now; } + else + { + // How long did we spend in the fiber that just suspended? + // Don't bother with long runs of the main fiber, since (a) it happens + // pretty often and (b) it's moderately likely that we've reached here + // from the canonical yield at the top of mainloop, and what we'd want + // to know about is whatever the main fiber was doing in the + // *previous* iteration of mainloop. + F64 elapsed{ now - mResumeTime }; + LLCoros::CoroData& data{ LLCoros::get_CoroData(mPrevCtx->get_id()) }; + // Find iterator to the first mHistogram key greater than elapsed. + auto past = data.mHistogram.upper_bound(elapsed); + // If the smallest key (mHistogram.begin()->first) is greater than + // elapsed, then we need not bother with this timeslice. + if (past != data.mHistogram.begin()) + { + // Here elapsed was greater than at least one key. Back off to the + // previous entry and increment that count. If it's end(), backing + // off gets us the last entry -- assuming mHistogram isn't empty. + llassert(! data.mHistogram.empty()); + ++(--past)->second; + LL::WorkQueue::ptr_t queue{ getWorkQueue() }; + // make sure the queue exists + if (queue) + { + // If it proves difficult to track down *why* the fiber spent so + // much time, consider also binding and reporting + // boost::stacktrace::stacktrace(). + queue->post( + [name=data.getName(), elapsed] + { + LL_WARNS_ONCE("LLCoros.scheduler") + << "Coroutine " << name << " ran for " + << elapsed << " seconds" << LL_ENDL; + }); + } + } + } boost::fibers::context* next; @@ -96,17 +135,9 @@ boost::fibers::context* scheduler::pick_next() noexcept // passage could be skipped. // Record this event for logging, but push it off to a thread pool to - // perform that work. Presumably std::weak_ptr::lock() is cheaper than - // WorkQueue::getInstance(). - LL::WorkQueue::ptr_t queue{ mQueue.lock() }; - // We probably started before the relevant WorkQueue was created. - if (! queue) - { - // Try again to locate the specified WorkQueue. - queue = LL::WorkQueue::getInstance(qname); - mQueue = queue; - } - // Both the lock() call and the getInstance() call might have failed. + // perform that work. + LL::WorkQueue::ptr_t queue{ getWorkQueue() }; + // The work queue we're looking for might not exist right now. if (queue) { // Bind values. Do NOT bind 'this' to avoid cross-thread access! @@ -116,7 +147,6 @@ boost::fibers::context* scheduler::pick_next() noexcept // so we have no access. queue->post( [switches=mSwitches, start=mStart, elapsed, now] - () { U32 runtime(U32(now) - U32(start)); U32 minutes(runtime / 60u); @@ -150,12 +180,29 @@ boost::fibers::context* scheduler::pick_next() noexcept { // we're about to resume the main fiber: it's no longer "ready" mMainCtx = nullptr; - // instead, it's "running" - mMainRunning = true; } + mPrevCtx = next; + // remember when we resumed this fiber so our next call can measure how + // long the previous resumption was + mResumeTime = LLDate::now().secondsSinceEpoch(); return next; } +LL::WorkQueue::ptr_t scheduler::getWorkQueue() +{ + // Cache a weak_ptr to our target work queue, presuming that + // std::weak_ptr::lock() is cheaper than WorkQueue::getInstance(). + LL::WorkQueue::ptr_t queue{ mQueue.lock() }; + // We probably started before the relevant WorkQueue was created. + if (! queue) + { + // Try again to locate the specified WorkQueue. + queue = LL::WorkQueue::getInstance(qname); + mQueue = queue; + } + return queue; +} + void scheduler::use() { boost::fibers::use_scheduling_algorithm<scheduler>(); |