summaryrefslogtreecommitdiff
path: root/indra/llcommon/coro_scheduler.cpp
diff options
context:
space:
mode:
authorNat Goodspeed <nat@lindenlab.com>2024-10-22 14:46:15 -0400
committerNat Goodspeed <nat@lindenlab.com>2024-10-22 14:46:15 -0400
commitbfe759584f63c0587a2dc6a0086ad9d5b6c63a56 (patch)
tree84653a1211908c7dd2ce5bc4877e79f8e6515099 /indra/llcommon/coro_scheduler.cpp
parent62fc3ceaf5251458239f91192a05edc64bedf33b (diff)
parent394f7b37f2ec05c7cfb32c350432886f1c493c85 (diff)
Merge branch 'develop' into marchcat/xcode-16
Diffstat (limited to 'indra/llcommon/coro_scheduler.cpp')
-rw-r--r--indra/llcommon/coro_scheduler.cpp83
1 files changed, 65 insertions, 18 deletions
diff --git a/indra/llcommon/coro_scheduler.cpp b/indra/llcommon/coro_scheduler.cpp
index 2d8b6e1a97..b6117fa6a1 100644
--- a/indra/llcommon/coro_scheduler.cpp
+++ b/indra/llcommon/coro_scheduler.cpp
@@ -20,6 +20,7 @@
#include <boost/fiber/operations.hpp>
// other Linden headers
#include "llcallbacklist.h"
+#include "llcoros.h"
#include "lldate.h"
#include "llerror.h"
@@ -56,17 +57,55 @@ void scheduler::awakened( boost::fibers::context* ctx) noexcept
boost::fibers::context* scheduler::pick_next() noexcept
{
+ auto now = LLDate::now().secondsSinceEpoch();
// count calls to pick_next()
++mSwitches;
// pick_next() is called when the previous fiber has suspended, and we
// need to pick another. Did the previous pick_next() call pick the main
- // fiber? If so, it's the main fiber that just suspended.
- auto now = LLDate::now().secondsSinceEpoch();
- if (mMainRunning)
+ // fiber? (Or is this the first pick_next() call?) If so, it's the main
+ // fiber that just suspended.
+ if ((! mPrevCtx) || mPrevCtx->get_id() == mMainID)
{
- mMainRunning = false;
mMainLast = now;
}
+ else
+ {
+ // How long did we spend in the fiber that just suspended?
+ // Don't bother with long runs of the main fiber, since (a) it happens
+ // pretty often and (b) it's moderately likely that we've reached here
+ // from the canonical yield at the top of mainloop, and what we'd want
+ // to know about is whatever the main fiber was doing in the
+ // *previous* iteration of mainloop.
+ F64 elapsed{ now - mResumeTime };
+ LLCoros::CoroData& data{ LLCoros::get_CoroData(mPrevCtx->get_id()) };
+ // Find iterator to the first mHistogram key greater than elapsed.
+ auto past = data.mHistogram.upper_bound(elapsed);
+ // If the smallest key (mHistogram.begin()->first) is greater than
+ // elapsed, then we need not bother with this timeslice.
+ if (past != data.mHistogram.begin())
+ {
+ // Here elapsed was greater than at least one key. Back off to the
+ // previous entry and increment that count. If it's end(), backing
+ // off gets us the last entry -- assuming mHistogram isn't empty.
+ llassert(! data.mHistogram.empty());
+ ++(--past)->second;
+ LL::WorkQueue::ptr_t queue{ getWorkQueue() };
+ // make sure the queue exists
+ if (queue)
+ {
+ // If it proves difficult to track down *why* the fiber spent so
+ // much time, consider also binding and reporting
+ // boost::stacktrace::stacktrace().
+ queue->post(
+ [name=data.getName(), elapsed]
+ {
+ LL_WARNS_ONCE("LLCoros.scheduler")
+ << "Coroutine " << name << " ran for "
+ << elapsed << " seconds" << LL_ENDL;
+ });
+ }
+ }
+ }
boost::fibers::context* next;
@@ -96,17 +135,9 @@ boost::fibers::context* scheduler::pick_next() noexcept
// passage could be skipped.
// Record this event for logging, but push it off to a thread pool to
- // perform that work. Presumably std::weak_ptr::lock() is cheaper than
- // WorkQueue::getInstance().
- LL::WorkQueue::ptr_t queue{ mQueue.lock() };
- // We probably started before the relevant WorkQueue was created.
- if (! queue)
- {
- // Try again to locate the specified WorkQueue.
- queue = LL::WorkQueue::getInstance(qname);
- mQueue = queue;
- }
- // Both the lock() call and the getInstance() call might have failed.
+ // perform that work.
+ LL::WorkQueue::ptr_t queue{ getWorkQueue() };
+ // The work queue we're looking for might not exist right now.
if (queue)
{
// Bind values. Do NOT bind 'this' to avoid cross-thread access!
@@ -116,7 +147,6 @@ boost::fibers::context* scheduler::pick_next() noexcept
// so we have no access.
queue->post(
[switches=mSwitches, start=mStart, elapsed, now]
- ()
{
U32 runtime(U32(now) - U32(start));
U32 minutes(runtime / 60u);
@@ -150,12 +180,29 @@ boost::fibers::context* scheduler::pick_next() noexcept
{
// we're about to resume the main fiber: it's no longer "ready"
mMainCtx = nullptr;
- // instead, it's "running"
- mMainRunning = true;
}
+ mPrevCtx = next;
+ // remember when we resumed this fiber so our next call can measure how
+ // long the previous resumption was
+ mResumeTime = LLDate::now().secondsSinceEpoch();
return next;
}
+LL::WorkQueue::ptr_t scheduler::getWorkQueue()
+{
+ // Cache a weak_ptr to our target work queue, presuming that
+ // std::weak_ptr::lock() is cheaper than WorkQueue::getInstance().
+ LL::WorkQueue::ptr_t queue{ mQueue.lock() };
+ // We probably started before the relevant WorkQueue was created.
+ if (! queue)
+ {
+ // Try again to locate the specified WorkQueue.
+ queue = LL::WorkQueue::getInstance(qname);
+ mQueue = queue;
+ }
+ return queue;
+}
+
void scheduler::use()
{
boost::fibers::use_scheduling_algorithm<scheduler>();