summaryrefslogtreecommitdiff
path: root/indra/llcommon/workqueue.h
blob: 141d2f93c1261e63877f8ad8f59235423efd48db (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
/**
 * @file   workqueue.h
 * @author Nat Goodspeed
 * @date   2021-09-30
 * @brief  Queue used for inter-thread work passing.
 *
 * $LicenseInfo:firstyear=2021&license=viewerlgpl$
 * Copyright (c) 2021, Linden Research, Inc.
 * $/LicenseInfo$
 */

#if ! defined(LL_WORKQUEUE_H)
#define LL_WORKQUEUE_H

#include "llcoros.h"
#include "llevents.h"
#include "llexception.h"
#include "llinstancetracker.h"
#include "llinstancetrackersubclass.h"
#include "threadsafeschedule.h"
#include <chrono>
#include <exception>                // std::current_exception
#include <functional>               // std::function
#include <string>

namespace LL
{

/*****************************************************************************
*   WorkQueueBase: API for WorkQueue and WorkSchedule
*****************************************************************************/
    /**
     * A typical WorkQueue has a string name that can be used to find it.
     */
    class WorkQueueBase: public LLInstanceTracker<WorkQueueBase, std::string>
    {
    private:
        using super = LLInstanceTracker<WorkQueueBase, std::string>;

    public:
        using Work = std::function<void()>;
        using Closed = LLThreadSafeQueueInterrupt;
        // for runFor()
        using TimePoint = std::chrono::steady_clock::time_point;

        struct Error: public LLException
        {
            Error(const std::string& what): LLException(what) {}
        };

        /**
         * You may omit the WorkQueueBase name, in which case a unique name is
         * synthesized; for practical purposes that makes it anonymous.
         */
        WorkQueueBase(const std::string& name, bool auto_shutdown);

        /**
         * Since the point of WorkQueue is to pass work to some other worker
         * thread(s) asynchronously, it's important that it continue to exist
         * until the worker thread(s) have drained it. To communicate that
         * it's time for them to quit, close() the queue.
         */
        virtual void close() = 0;

        /**
         * WorkQueue supports multiple producers and multiple consumers. In
         * the general case it's misleading to test size(), since any other
         * thread might change it the nanosecond the lock is released. On that
         * basis, some might argue against publishing a size() method at all.
         *
         * But there are two specific cases in which a test based on size()
         * might be reasonable:
         *
         * * If you're the only producer, noticing that size() == 0 is
         *   meaningful.
         * * If you're the only consumer, noticing that size() > 0 is
         *   meaningful.
         */
        virtual size_t size() = 0;
        /// producer end: are we prevented from pushing any additional items?
        virtual bool isClosed() = 0;
        /// consumer end: are we done, is the queue entirely drained?
        virtual bool done() = 0;

        /*---------------------- fire and forget API -----------------------*/

        /**
         * post work, unless the queue is closed before we can post
         */
        virtual bool post(const Work&) = 0;

        /**
         * post work, unless the queue is full
         */
        virtual bool tryPost(const Work&) = 0;

        /**
         * Post work to another WorkQueue, which may or may not still exist
         * and be open. Support any post() overload. Return true if we were
         * able to post.
         */
        template <typename... ARGS>
        static bool postMaybe(weak_t target, ARGS&&... args);

        /*------------------------- handshake API --------------------------*/

        /**
         * Post work to another WorkQueue, requesting a specific callback to
         * be run on this WorkQueue on completion. Optional final argument is
         * TimePoint for WorkSchedule.
         *
         * Returns true if able to post, false if the other WorkQueue is
         * inaccessible.
         */
        template <typename CALLABLE, typename FOLLOWUP, typename... ARGS>
        bool postTo(weak_t target, CALLABLE&& callable, FOLLOWUP&& callback,
                    ARGS&&... args);

        /**
         * Post work, blocking the calling coroutine, returning the result to
         * caller on completion. Optional final argument is TimePoint for
         * WorkSchedule.
         *
         * In general, we assume that each thread's default coroutine is busy
         * servicing its WorkQueue or whatever. To try to prevent mistakes, we
         * forbid calling waitForResult() from a thread's default coroutine.
         */
        template <typename CALLABLE, typename... ARGS>
        auto waitForResult(CALLABLE&& callable, ARGS&&... args)
        {
            checkCoroutine("waitForResult()");
            return waitForResult_(std::forward<CALLABLE>(callable),
                                  std::forward<ARGS>(args)...);
        }

        /**
         * Post work, blocking the calling coroutine, returning the result to
         * caller on completion. Optional final argument is TimePoint for
         * WorkSchedule.
         */
        template <typename CALLABLE, typename... ARGS>
        auto waitForResult_(CALLABLE&& callable, ARGS&&... args);

        /*--------------------------- worker API ---------------------------*/

        /**
         * runUntilClose() pulls TimedWork items off this WorkQueue until the
         * queue is closed, at which point it returns. This would be the
         * typical entry point for a simple worker thread.
         */
        void runUntilClose();

        /**
         * runPending() runs all TimedWork items that are ready to run. It
         * returns true if the queue remains open, false if the queue has been
         * closed. This could be used by a thread whose primary purpose is to
         * serve the queue, but also wants to do other things with its idle time.
         */
        bool runPending();

        /**
         * runOne() runs at most one ready TimedWork item -- zero if none are
         * ready. It returns true if the queue remains open, false if the
         * queue has been closed.
         */
        bool runOne();

        /**
         * runFor() runs a subset of ready TimedWork items, until the
         * timeslice has been exceeded. It returns true if the queue remains
         * open, false if the queue has been closed. This could be used by a
         * busy main thread to lend a bounded few CPU cycles to this WorkQueue
         * without risking the WorkQueue blowing out the length of any one
         * frame.
         */
        template <typename Rep, typename Period>
        bool runFor(const std::chrono::duration<Rep, Period>& timeslice)
        {
            LL_PROFILE_ZONE_SCOPED;
            return runUntil(TimePoint::clock::now() + timeslice);
        }

        /**
         * runUntil() is just like runFor(), only with a specific end time
         * instead of a timeslice duration.
         */
        bool runUntil(const TimePoint& until);

    protected:
        template <typename CALLABLE, typename FOLLOWUP>
        static auto makeReplyLambda(CALLABLE&& callable, FOLLOWUP&& callback);
        /// general case: arbitrary C++ return type
        template <typename CALLABLE, typename FOLLOWUP, typename RETURNTYPE>
        struct MakeReplyLambda;
        /// specialize for CALLABLE returning void
        template <typename CALLABLE, typename FOLLOWUP>
        struct MakeReplyLambda<CALLABLE, FOLLOWUP, void>;

        /// general case: arbitrary C++ return type
        template <typename CALLABLE, typename RETURNTYPE>
        struct WaitForResult;
        /// specialize for CALLABLE returning void
        template <typename CALLABLE>
        struct WaitForResult<CALLABLE, void>;

        static void checkCoroutine(const std::string& method);
        static void error(const std::string& msg);
        static std::string makeName(const std::string& name);
        void callWork(const Work& work);

        LLTempBoundListener mStopListener;

    private:
        virtual Work pop_() = 0;
        virtual bool tryPop_(Work&) = 0;
    };

/*****************************************************************************
*   WorkQueue: no timestamped task support
*****************************************************************************/
    class WorkQueue: public LLInstanceTrackerSubclass<WorkQueue, WorkQueueBase>
    {
    private:
        using super = LLInstanceTrackerSubclass<WorkQueue, WorkQueueBase>;

    public:
        /**
         * You may omit the WorkQueue name, in which case a unique name is
         * synthesized; for practical purposes that makes it anonymous.
         */
        WorkQueue(const std::string& name = std::string(), size_t capacity=1024, bool auto_shutdown = true);

        /**
         * Since the point of WorkQueue is to pass work to some other worker
         * thread(s) asynchronously, it's important that it continue to exist
         * until the worker thread(s) have drained it. To communicate that
         * it's time for them to quit, close() the queue.
         */
        void close() override;

        /**
         * WorkQueue supports multiple producers and multiple consumers. In
         * the general case it's misleading to test size(), since any other
         * thread might change it the nanosecond the lock is released. On that
         * basis, some might argue against publishing a size() method at all.
         *
         * But there are two specific cases in which a test based on size()
         * might be reasonable:
         *
         * * If you're the only producer, noticing that size() == 0 is
         *   meaningful.
         * * If you're the only consumer, noticing that size() > 0 is
         *   meaningful.
         */
        size_t size() override;
        /// producer end: are we prevented from pushing any additional items?
        bool isClosed() override;
        /// consumer end: are we done, is the queue entirely drained?
        bool done() override;

        /*---------------------- fire and forget API -----------------------*/

        /**
         * post work, unless the queue is closed before we can post
         */
        bool post(const Work&) override;

        /**
         * post work, unless the queue is full
         */
        bool tryPost(const Work&) override;

    private:
        using Queue = LLThreadSafeQueue<Work>;
        Queue mQueue;

        Work pop_() override;
        bool tryPop_(Work&) override;
    };

/*****************************************************************************
*   WorkSchedule: add support for timestamped tasks
*****************************************************************************/
    class WorkSchedule: public LLInstanceTrackerSubclass<WorkSchedule, WorkQueueBase>
    {
    private:
        using super = LLInstanceTrackerSubclass<WorkSchedule, WorkQueueBase>;
        using Queue = ThreadSafeSchedule<Work>;
        // helper for postEvery()
        template <typename Rep, typename Period, typename CALLABLE>
        class BackJack;

    public:
        using TimePoint = Queue::TimePoint;
        using TimedWork = Queue::TimeTuple;

        /**
         * You may omit the WorkSchedule name, in which case a unique name is
         * synthesized; for practical purposes that makes it anonymous.
         */
        WorkSchedule(const std::string& name = std::string(), size_t capacity=1024, bool auto_shutdown = true);

        /**
         * Since the point of WorkSchedule is to pass work to some other worker
         * thread(s) asynchronously, it's important that the WorkSchedule continue
         * to exist until the worker thread(s) have drained it. To communicate
         * that it's time for them to quit, close() the queue.
         */
        void close() override;

        /**
         * WorkSchedule supports multiple producers and multiple consumers. In
         * the general case it's misleading to test size(), since any other
         * thread might change it the nanosecond the lock is released. On that
         * basis, some might argue against publishing a size() method at all.
         *
         * But there are two specific cases in which a test based on size()
         * might be reasonable:
         *
         * * If you're the only producer, noticing that size() == 0 is
         *   meaningful.
         * * If you're the only consumer, noticing that size() > 0 is
         *   meaningful.
         */
        size_t size() override;
        /// producer end: are we prevented from pushing any additional items?
        bool isClosed() override;
        /// consumer end: are we done, is the queue entirely drained?
        bool done() override;

        /*---------------------- fire and forget API -----------------------*/

        /**
         * post work, unless the queue is closed before we can post
         */
        bool post(const Work& callable) override;

        /**
         * post work for a particular time, unless the queue is closed before
         * we can post
         */
        bool post(const Work& callable, const TimePoint& time);

        /**
         * post work, unless the queue is full
         */
        bool tryPost(const Work& callable) override;

        /**
         * post work for a particular time, unless the queue is full
         */
        bool tryPost(const Work& callable, const TimePoint& time);

        /**
         * Launch a callable returning bool that will trigger repeatedly at
         * specified interval, until the callable returns false.
         *
         * If you need to signal that callable from outside, DO NOT bind a
         * reference to a simple bool! That's not thread-safe. Instead, bind
         * an LLCond variant, e.g. LLOneShotCond or LLBoolCond.
         */
        template <typename Rep, typename Period, typename CALLABLE>
        bool postEvery(const std::chrono::duration<Rep, Period>& interval,
                       CALLABLE&& callable);

    private:
        Queue mQueue;

        Work pop_() override;
        bool tryPop_(Work&) override;
    };

    /**
     * BackJack is, in effect, a hand-rolled lambda, binding a WorkSchedule, a
     * CALLABLE that returns bool, a TimePoint and an interval at which to
     * relaunch it. As long as the callable continues returning true, BackJack
     * keeps resubmitting it to the target WorkQueue.
     *
     * "You go back, Jack, and do it again -- wheel turnin' round and round..."
     * --Steely Dan, from "Can't Buy a Thrill" (1972)
     * https://www.youtube.com/watch?v=yCgHTmv4YU8
     */
    // Why is BackJack a class and not a lambda? Because, unlike a lambda, a
    // class method gets its own 'this' pointer -- which we need to resubmit
    // the whole BackJack callable.
    template <typename Rep, typename Period, typename CALLABLE>
    class WorkSchedule::BackJack
    {
    public:
        // bind the desired data
        BackJack(weak_t target,
                 const TimePoint& start,
                 const std::chrono::duration<Rep, Period>& interval,
                 CALLABLE&& callable):
            mTarget(target),
            mStart(start),
            mInterval(interval),
            mCallable(std::move(callable))
        {}

        // This operator() method, called by target WorkSchedule, is what
        // makes this object a Work item. Although WE require a callable
        // returning bool, WorkSchedule wants a void callable. We consume the
        // bool.
        void operator()()
        {
            // If mCallable() throws an exception, don't catch it here: if it
            // throws once, it's likely to throw every time, so it's a waste
            // of time to arrange to call it again.
            if (mCallable())
            {
                // Modify mStart to the new start time we desire. If we simply
                // added mInterval to now, we'd get actual timings of
                // (mInterval + slop), where 'slop' is the latency between the
                // previous mStart and the WorkQueue actually calling us.
                // Instead, add mInterval to mStart so that at least we
                // register our intent to fire at exact mIntervals.
                mStart += mInterval;

                // We're being called at this moment by the target WorkSchedule.
                // Assume it still exists, rather than checking the result of
                // lock().
                // Resubmit the whole *this callable: that's why we're a class
                // rather than a lambda. Allow moving *this so we can carry a
                // move-only callable; but naturally this statement must be
                // the last time we reference this instance, which may become
                // moved-from.
                auto target{ std::dynamic_pointer_cast<WorkSchedule>(mTarget.lock()) };
                // Discard bool return: once this queue is closed, oh well,
                // just stop
                target->post(std::move(*this), mStart);
            }
        }

    private:
        weak_t mTarget;
        TimePoint mStart;
        std::chrono::duration<Rep, Period> mInterval;
        CALLABLE mCallable;
    };

    template <typename Rep, typename Period, typename CALLABLE>
    bool WorkSchedule::postEvery(const std::chrono::duration<Rep, Period>& interval,
                                 CALLABLE&& callable)
    {
        if (interval.count() <= 0)
        {
            // It's essential that postEvery() be called with a positive
            // interval, since each call to BackJack posts another instance of
            // itself at (start + interval) and we order by target time. A
            // zero or negative interval would result in that BackJack
            // instance going to the head of the queue every time, immediately
            // ready to run. Effectively that would produce an infinite loop,
            // a denial of service on this WorkQueue.
            error("postEvery(interval) may not be 0");
        }
        // Instantiate and post a suitable BackJack, binding a weak_ptr to
        // self, the current time, the desired interval and the desired
        // callable.
        return post(
            BackJack<Rep, Period, CALLABLE>(
                 getWeak(), TimePoint::clock::now(), interval, std::move(callable)));
    }

    /// general case: arbitrary C++ return type
    template <typename CALLABLE, typename FOLLOWUP, typename RETURNTYPE>
    struct WorkQueueBase::MakeReplyLambda
    {
        auto operator()(CALLABLE&& callable, FOLLOWUP&& callback)
        {
            // Call the callable in any case -- but to minimize
            // copying the result, immediately bind it into the reply
            // lambda. The reply lambda also binds the original
            // callback, so that when we, the originating WorkQueue,
            // finally receive and process the reply lambda, we'll
            // call the bound callback with the bound result -- on the
            // same thread that originally called postTo().
            return
                [result = std::forward<CALLABLE>(callable)(),
                 callback = std::move(callback)]
                ()
                mutable { callback(std::move(result)); };
        }
    };

    /// specialize for CALLABLE returning void
    template <typename CALLABLE, typename FOLLOWUP>
    struct WorkQueueBase::MakeReplyLambda<CALLABLE, FOLLOWUP, void>
    {
        auto operator()(CALLABLE&& callable, FOLLOWUP&& callback)
        {
            // Call the callable, which produces no result.
            std::forward<CALLABLE>(callable)();
            // Our completion callback is simply the caller's callback.
            return std::move(callback);
        }
    };

    template <typename CALLABLE, typename FOLLOWUP>
    auto WorkQueueBase::makeReplyLambda(CALLABLE&& callable, FOLLOWUP&& callback)
    {
        return MakeReplyLambda<CALLABLE, FOLLOWUP,
                               decltype(std::forward<CALLABLE>(callable)())>()
            (std::move(callable), std::move(callback));
    }

    template <typename CALLABLE, typename FOLLOWUP, typename... ARGS>
    bool WorkQueueBase::postTo(weak_t target, CALLABLE&& callable, FOLLOWUP&& callback,
                               ARGS&&... args)
    {
        LL_PROFILE_ZONE_SCOPED;
        // We're being asked to post to the WorkQueue at target.
        // target is a weak_ptr: have to lock it to check it.
        auto tptr = target.lock();
        if (! tptr)
            // can't post() if the target WorkQueue has been destroyed
            return false;

        // Here we believe target WorkQueue still exists. Post to it a
        // lambda that packages our callable, our callback and a weak_ptr
        // to this originating WorkQueue.
        return tptr->post(
            [reply = super::getWeak(),
             callable = std::move(callable),
             callback = std::move(callback)]
            () mutable
            {
                // Use postMaybe() below in case this originating WorkQueue
                // has been closed or destroyed. Remember, the outer lambda is
                // now running on a thread servicing the target WorkQueue, and
                // real time has elapsed since postTo()'s tptr->post() call.
                try
                {
                    // Make a reply lambda to repost to THIS WorkQueue.
                    // Delegate to makeReplyLambda() so we can partially
                    // specialize on void return.
                    postMaybe(reply, makeReplyLambda(std::move(callable), std::move(callback)));
                }
                catch (...)
                {
                    // Either variant of makeReplyLambda() is responsible for
                    // calling the caller's callable. If that throws, return
                    // the exception to the originating thread.
                    postMaybe(
                        reply,
                        // Bind the current exception to transport back to the
                        // originating WorkQueue. Once there, rethrow it.
                        [exc = std::current_exception()]{ std::rethrow_exception(exc); });
                }
            },
            // if caller passed a TimePoint, pass it along to post()
            std::forward<ARGS>(args)...);
    }

    template <typename... ARGS>
    bool WorkQueueBase::postMaybe(weak_t target, ARGS&&... args)
    {
        LL_PROFILE_ZONE_SCOPED;
        // target is a weak_ptr: have to lock it to check it
        auto tptr = target.lock();
        if (tptr)
        {
            return tptr->post(std::forward<ARGS>(args)...);
        }
        // target no longer exists
        return false;
    }

    /// general case: arbitrary C++ return type
    template <typename CALLABLE, typename RETURNTYPE>
    struct WorkQueueBase::WaitForResult
    {
        template <typename... ARGS>
        auto operator()(WorkQueueBase* self, CALLABLE&& callable, ARGS&&... args)
        {
            LLCoros::Promise<RETURNTYPE> promise;
            bool posted = self->post(
                // We dare to bind a reference to Promise because it's
                // specifically designed for cross-thread communication.
                [&promise, callable = std::move(callable)]()
                mutable {
                    try
                    {
                        // call the caller's callable and trigger promise with result
                        promise.set_value(callable());
                    }
                    catch (...)
                    {
                        promise.set_exception(std::current_exception());
                    }
                },
                // if caller passed a TimePoint, pass it to post()
                std::forward<ARGS>(args)...);
            if (! posted)
            {
                LLTHROW(WorkQueueBase::Closed());
            }
            auto future{ LLCoros::getFuture(promise) };
            // now, on the calling thread, wait for that result
            LLCoros::TempStatus st("waiting for WorkQueue::waitForResult()");
            return future.get();
        }
    };

    /// specialize for CALLABLE returning void
    template <typename CALLABLE>
    struct WorkQueueBase::WaitForResult<CALLABLE, void>
    {
        template <typename... ARGS>
        void operator()(WorkQueueBase* self, CALLABLE&& callable, ARGS&&... args)
        {
            LLCoros::Promise<void> promise;
            bool posted = self->post(
                // &promise is designed for cross-thread access
                [&promise, callable = std::move(callable)]()
                mutable {
                    try
                    {
                        callable();
                        promise.set_value();
                    }
                    catch (...)
                    {
                        promise.set_exception(std::current_exception());
                    }
                },
                // if caller passed a TimePoint, pass it to post()
                std::forward<ARGS>(args)...);
            if (! posted)
            {
                LLTHROW(WorkQueueBase::Closed());
            }
            auto future{ LLCoros::getFuture(promise) };
            // block until set_value()
            LLCoros::TempStatus st("waiting for void WorkQueue::waitForResult()");
            future.get();
        }
    };

    template <typename CALLABLE, typename... ARGS>
    auto WorkQueueBase::waitForResult_(CALLABLE&& callable, ARGS&&... args)
    {
        // derive callable's return type so we can specialize for void
        return WaitForResult<CALLABLE, decltype(std::forward<CALLABLE>(callable)())>()
            (this, std::forward<CALLABLE>(callable), std::forward<ARGS>(args)...);
    }

} // namespace LL

#endif /* ! defined(LL_WORKQUEUE_H) */