/**
 * @file   coro_scheduler.cpp
 * @author Nat Goodspeed
 * @date   2024-08-05
 * @brief  Implementation for llcoro::scheduler.
 *
 * $LicenseInfo:firstyear=2024&license=viewerlgpl$
 * Copyright (c) 2024, Linden Research, Inc.
 * $/LicenseInfo$
 */

// Precompiled header
#include "linden_common.h"
// associated header
#include "coro_scheduler.h"
// STL headers
// std headers
#include <iomanip>
// external library headers
#include <boost/fiber/operations.hpp>
// other Linden headers
#include "llcallbacklist.h"
#include "llcoros.h"
#include "lldate.h"
#include "llerror.h"

namespace llcoro
{

const F64 scheduler::DEFAULT_TIMESLICE{ LL::Timers::DEFAULT_TIMESLICE };

const std::string qname("General");

scheduler::scheduler():
    // Since use_scheduling_algorithm() must be called before any other
    // Boost.Fibers operations, we can assume that the calling fiber is in
    // fact the main fiber.
    mMainID(boost::this_fiber::get_id()),
    mStart(LLDate::now().secondsSinceEpoch()),
    mQueue(LL::WorkQueue::getInstance(qname))
{}

void scheduler::awakened( boost::fibers::context* ctx) noexcept
{
    if (ctx->get_id() == mMainID)
    {
        // If the fiber that just came ready is the main fiber, record its
        // pointer.
        llassert(! mMainCtx);
        mMainCtx = ctx;
    }
    // Delegate to round_robin::awakened() as usual, even for the main fiber.
    // This way, as long as other fibers don't take too long, we can just let
    // normal round_robin processing pass control to the main fiber.
    super::awakened(ctx);
}

boost::fibers::context* scheduler::pick_next() noexcept
{
    auto now = LLDate::now().secondsSinceEpoch();
    // count calls to pick_next()
    ++mSwitches;
    // pick_next() is called when the previous fiber has suspended, and we
    // need to pick another. Did the previous pick_next() call pick the main
    // fiber? (Or is this the first pick_next() call?) If so, it's the main
    // fiber that just suspended.
    if ((! mPrevCtx) || mPrevCtx->get_id() == mMainID)
    {
        mMainLast = now;
    }
    else
    {
        // How long did we spend in the fiber that just suspended?
        // Don't bother with long runs of the main fiber, since (a) it happens
        // pretty often and (b) it's moderately likely that we've reached here
        // from the canonical yield at the top of mainloop, and what we'd want
        // to know about is whatever the main fiber was doing in the
        // *previous* iteration of mainloop.
        F64 elapsed{ now - mResumeTime };
        LLCoros::CoroData& data{ LLCoros::get_CoroData(mPrevCtx->get_id()) };
        // Find iterator to the first mHistogram key greater than elapsed.
        auto past = data.mHistogram.upper_bound(elapsed);
        // If the smallest key (mHistogram.begin()->first) is greater than
        // elapsed, then we need not bother with this timeslice.
        if (past != data.mHistogram.begin())
        {
            // Here elapsed was greater than at least one key. Back off to the
            // previous entry and increment that count. If it's end(), backing
            // off gets us the last entry -- assuming mHistogram isn't empty.
            llassert(! data.mHistogram.empty());
            ++(--past)->second;
            LL::WorkQueue::ptr_t queue{ getWorkQueue() };
            // make sure the queue exists
            if (queue)
            {
                // If it proves difficult to track down *why* the fiber spent so
                // much time, consider also binding and reporting
                // boost::stacktrace::stacktrace().
                queue->post(
                    [name=data.getName(), elapsed]
                    {
                        LL_WARNS_ONCE("LLCoros.scheduler")
                            << "Coroutine " << name << " ran for "
                            << elapsed << " seconds" << LL_ENDL;
                    });
            }
        }
    }

    boost::fibers::context* next;

    // When the main fiber is ready, and it's been more than mTimeslice since
    // the main fiber last ran, it's time to intervene.
    F64 elapsed(now - mMainLast);
    if (mMainCtx && elapsed > mTimeslice)
    {
        // We claim that the main fiber is not only stored in mMainCtx, but is
        // also queued (somewhere) in our ready list.
        llassert(mMainCtx->ready_is_linked());
        // The usefulness of a doubly-linked list is that, given only a
        // pointer to an item, we can unlink it.
        mMainCtx->ready_unlink();
        // Instead of delegating to round_robin::pick_next() to pop the head
        // of the queue, override by returning mMainCtx.
        next = mMainCtx;

        /*------------------------- logging stuff --------------------------*/
        // Unless this log tag is enabled, don't even bother posting.
        LL_DEBUGS("LLCoros.scheduler") << " ";
        // This feature is inherently hard to verify. The logging in the
        // lambda below seems useful, but also seems like a lot of overhead
        // for a coroutine context switch. Try posting the logging lambda to a
        // ThreadPool to offload that overhead. However, if this is still
        // taking an unreasonable amount of context-switch time, this whole
        // passage could be skipped.

        // Record this event for logging, but push it off to a thread pool to
        // perform that work.
        LL::WorkQueue::ptr_t queue{ getWorkQueue() };
        // The work queue we're looking for might not exist right now.
        if (queue)
        {
            // Bind values. Do NOT bind 'this' to avoid cross-thread access!
            // It would be interesting to know from what queue position we
            // unlinked the main fiber, out of how many in the ready list.
            // Unfortunately round_robin::rqueue_ is private, not protected,
            // so we have no access.
            queue->post(
                [switches=mSwitches, start=mStart, elapsed, now]
                {
                    U32 runtime(U32(now) - U32(start));
                    U32 minutes(runtime / 60u);
                    U32 seconds(runtime % 60u);
                    // use stringize to avoid lasting side effects to the
                    // logging ostream
                    LL_DEBUGS("LLCoros.scheduler")
                        << "At time "
                        << stringize(minutes, ":", std::setw(2), std::setfill('0'), seconds)
                        << " (" << switches << " switches), coroutines took "
                        << stringize(std::setprecision(4), elapsed)
                        << " sec, main coroutine jumped queue"
                        << LL_ENDL;
                });
        }
        LL_ENDL;
        /*----------------------- end logging stuff ------------------------*/
    }
    else
    {
        // Either the main fiber isn't yet ready, or it hasn't yet been
        // mTimeslice seconds since the last time the main fiber ran. Business
        // as usual.
        next = super::pick_next();
    }

    // super::pick_next() could also have returned the main fiber, which is
    // why this is a separate test instead of being folded into the override
    // case above.
    if (next && next->get_id() == mMainID)
    {
        // we're about to resume the main fiber: it's no longer "ready"
        mMainCtx = nullptr;
    }
    mPrevCtx = next;
    // remember when we resumed this fiber so our next call can measure how
    // long the previous resumption was
    mResumeTime = LLDate::now().secondsSinceEpoch();
    return next;
}

LL::WorkQueue::ptr_t scheduler::getWorkQueue()
{
    // Cache a weak_ptr to our target work queue, presuming that
    // std::weak_ptr::lock() is cheaper than WorkQueue::getInstance().
    LL::WorkQueue::ptr_t queue{ mQueue.lock() };
    // We probably started before the relevant WorkQueue was created.
    if (! queue)
    {
        // Try again to locate the specified WorkQueue.
        queue = LL::WorkQueue::getInstance(qname);
        mQueue = queue;
    }
    return queue;
}

void scheduler::use()
{
    boost::fibers::use_scheduling_algorithm<scheduler>();
}

} // namespace llcoro