diff options
| author | Erik Kundiman <erik@megapahit.org> | 2026-01-11 16:35:26 +0800 |
|---|---|---|
| committer | Erik Kundiman <erik@megapahit.org> | 2026-01-11 16:35:26 +0800 |
| commit | 2f765440f8f7017e5af219c6124705c76af477c8 (patch) | |
| tree | a5ac1cf749b0377ee3780737615bd75c1a5fe306 /indra/llcommon | |
| parent | bb0d3408d116174986765bcb2cd698a9d4a4f0d7 (diff) | |
| parent | b26f62eb0ce72b9cdd83296e87ba1954ee1b8b04 (diff) | |
Merge tag 'Second_Life_Release#b26f62eb-26.1' into 2026.01
Diffstat (limited to 'indra/llcommon')
| -rw-r--r-- | indra/llcommon/CMakeLists.txt | 2 | ||||
| -rw-r--r-- | indra/llcommon/llthread.cpp | 6 | ||||
| -rw-r--r-- | indra/llcommon/llwatchdog.cpp | 290 | ||||
| -rw-r--r-- | indra/llcommon/llwatchdog.h | 118 | ||||
| -rw-r--r-- | indra/llcommon/workqueue.cpp | 7 | ||||
| -rw-r--r-- | indra/llcommon/workqueue.h | 6 |
6 files changed, 426 insertions, 3 deletions
diff --git a/indra/llcommon/CMakeLists.txt b/indra/llcommon/CMakeLists.txt index 55dde5dc42..2bafef5d1e 100644 --- a/indra/llcommon/CMakeLists.txt +++ b/indra/llcommon/CMakeLists.txt @@ -99,6 +99,7 @@ set(llcommon_SOURCE_FILES lluri.cpp lluriparser.cpp lluuid.cpp + llwatchdog.cpp llworkerthread.cpp hbxxh.cpp u64.cpp @@ -240,6 +241,7 @@ set(llcommon_HEADER_FILES lluri.h lluriparser.h lluuid.h + llwatchdog.h llwin32headers.h llworkerthread.h hbxxh.h diff --git a/indra/llcommon/llthread.cpp b/indra/llcommon/llthread.cpp index 692941a892..8c12ee7f12 100644 --- a/indra/llcommon/llthread.cpp +++ b/indra/llcommon/llthread.cpp @@ -240,7 +240,11 @@ void LLThread::tryRun() LL::WorkQueue::ptr_t main_queue = LL::WorkQueue::getInstance("mainloop"); main_queue->post( // Bind the current exception, rethrow it in main loop. - [exc = std::current_exception()]() { std::rethrow_exception(exc); }); + [exc = std::current_exception(), name = mName]() + { + LL_INFOS("THREAD") << "Rethrowing exception from thread " << name << LL_ENDL; + std::rethrow_exception(exc); + }); } #endif // else LL_WINDOWS } diff --git a/indra/llcommon/llwatchdog.cpp b/indra/llcommon/llwatchdog.cpp new file mode 100644 index 0000000000..fa240a9ed7 --- /dev/null +++ b/indra/llcommon/llwatchdog.cpp @@ -0,0 +1,290 @@ +/** + * @file llthreadwatchdog.cpp + * @brief The LLThreadWatchdog class definitions + * + * $LicenseInfo:firstyear=2007&license=viewerlgpl$ + * Second Life Viewer Source Code + * Copyright (C) 2010, Linden Research, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; + * version 2.1 of the License only. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + * Linden Research, Inc., 945 Battery Street, San Francisco, CA 94111 USA + * $/LicenseInfo$ + */ + +// Precompiled header +#include "linden_common.h" + +#include "llwatchdog.h" +#include "llthread.h" + +constexpr U32 WATCHDOG_SLEEP_TIME_USEC = 1000000U; + +// This class runs the watchdog timing thread. +class LLWatchdogTimerThread : public LLThread +{ +public: + LLWatchdogTimerThread() : + LLThread("Watchdog"), + mSleepMsecs(0), + mStopping(false) + { + } + + ~LLWatchdogTimerThread() {} + + void setSleepTime(long ms) { mSleepMsecs = ms; } + void stop() + { + mStopping = true; + mSleepMsecs = 1; + } + + void run() override + { + while(!mStopping) + { + LLWatchdog::getInstance()->run(); + ms_sleep(mSleepMsecs); + } + } + +private: + long mSleepMsecs; + bool mStopping; +}; + +// LLWatchdogEntry +LLWatchdogEntry::LLWatchdogEntry(const std::string& thread_name) + : mThreadName(thread_name) + , mThreadID(LLThread::currentID()) +{ +} + +LLWatchdogEntry::~LLWatchdogEntry() +{ + stop(); +} + +void LLWatchdogEntry::start() +{ + LLWatchdog::getInstance()->add(this); +} + +void LLWatchdogEntry::stop() +{ + // this can happen very late in the shutdown sequence + if (!LLWatchdog::wasDeleted()) + { + LLWatchdog::getInstance()->remove(this); + } +} +std::string LLWatchdogEntry::getThreadName() const +{ + return mThreadName + llformat(": %d", mThreadID); +} + +// LLWatchdogTimeout +const std::string UNINIT_STRING = "uninitialized"; + +LLWatchdogTimeout::LLWatchdogTimeout(const std::string& thread_name) : + LLWatchdogEntry(thread_name), + mTimeout(0.0f), + mPingState(UNINIT_STRING) +{ +} + +LLWatchdogTimeout::~LLWatchdogTimeout() +{ +} + +bool LLWatchdogTimeout::isAlive() const +{ + return (mTimer.getStarted() && !mTimer.hasExpired()); +} + +void LLWatchdogTimeout::reset() +{ + mTimer.setTimerExpirySec(mTimeout); +} + +void LLWatchdogTimeout::setTimeout(F32 d) +{ + mTimeout = d; +} + +void LLWatchdogTimeout::start(std::string_view state) +{ + if (mTimeout == 0) + { + LL_WARNS() << "Cant' start watchdog entry - no timeout set" << LL_ENDL; + return; + } + // Order of operation is very important here. + // After LLWatchdogEntry::start() is called + // LLWatchdogTimeout::isAlive() will be called asynchronously. + ping(state); + mTimer.start(); + mTimer.setTimerExpirySec(mTimeout); // timer expiration set to 0 by start() + LLWatchdogEntry::start(); +} + +void LLWatchdogTimeout::stop() +{ + LLWatchdogEntry::stop(); + mTimer.stop(); +} + +void LLWatchdogTimeout::ping(std::string_view state) +{ + if (!state.empty()) + { + mPingState = state; + } + reset(); +} + +// LLWatchdog +LLWatchdog::LLWatchdog() + :mSuspectsAccessMutex() + ,mTimer(nullptr) + ,mLastClockCount(0) +{ +} + +LLWatchdog::~LLWatchdog() +{ +} + +void LLWatchdog::add(LLWatchdogEntry* e) +{ + lockThread(); + mSuspects.insert(e); + unlockThread(); +} + +void LLWatchdog::remove(LLWatchdogEntry* e) +{ + lockThread(); + mSuspects.erase(e); + unlockThread(); +} + +void LLWatchdog::init(func_t set_error_state_callback) +{ + if (!mSuspectsAccessMutex && !mTimer) + { + mSuspectsAccessMutex = new LLMutex(); + mTimer = new LLWatchdogTimerThread(); + mTimer->setSleepTime(WATCHDOG_SLEEP_TIME_USEC / 1000); + mLastClockCount = LLTimer::getTotalTime(); + + // mTimer->start() kicks off the thread, any code after + // start needs to use the mSuspectsAccessMutex + mTimer->start(); + } + mCreateMarkerFnc = set_error_state_callback; +} + +void LLWatchdog::cleanup() +{ + if (mTimer) + { + mTimer->stop(); + delete mTimer; + mTimer = nullptr; + } + + if (mSuspectsAccessMutex) + { + delete mSuspectsAccessMutex; + mSuspectsAccessMutex = nullptr; + } + + mLastClockCount = 0; +} + +void LLWatchdog::run() +{ + lockThread(); + + // Check the time since the last call to run... + // If the time elapsed is two times greater than the regualr sleep time + // reset the active timeouts. + constexpr U32 TIME_ELAPSED_MULTIPLIER = 2; + U64 current_time = LLTimer::getTotalTime(); + U64 current_run_delta = current_time - mLastClockCount; + mLastClockCount = current_time; + + if (current_run_delta > (WATCHDOG_SLEEP_TIME_USEC * TIME_ELAPSED_MULTIPLIER)) + { + LL_INFOS() << "Watchdog thread delayed: resetting entries." << LL_ENDL; + for (const auto& suspect : mSuspects) + { + suspect->reset(); + } + } + else + { + SuspectsRegistry::iterator result = + std::find_if(mSuspects.begin(), + mSuspects.end(), + [](const LLWatchdogEntry* suspect){ return ! suspect->isAlive(); }); + if (result != mSuspects.end()) + { + // error!!! + if(mTimer) + { + mTimer->stop(); + } + + // Sets error marker file + mCreateMarkerFnc(); + // Todo1: Warn user? + // Todo2: We probably want to report even if 5 seconds passed, just not error 'yet'. + std::string last_state = (*result)->getLastState(); + if (last_state.empty()) + { + LL_ERRS() << "Watchdog timer for thread " << (*result)->getThreadName() + << " expired; assuming viewer is hung and crashing" << LL_ENDL; + } + else + { + LL_ERRS() << "Watchdog timer for thread " << (*result)->getThreadName() + << " expired with state: " << last_state + << "; assuming viewer is hung and crashing" << LL_ENDL; + } + } + } + + + unlockThread(); +} + +void LLWatchdog::lockThread() +{ + if (mSuspectsAccessMutex) + { + mSuspectsAccessMutex->lock(); + } +} + +void LLWatchdog::unlockThread() +{ + if (mSuspectsAccessMutex) + { + mSuspectsAccessMutex->unlock(); + } +} diff --git a/indra/llcommon/llwatchdog.h b/indra/llcommon/llwatchdog.h new file mode 100644 index 0000000000..fded881bb8 --- /dev/null +++ b/indra/llcommon/llwatchdog.h @@ -0,0 +1,118 @@ +/** + * @file llthreadwatchdog.h + * @brief The LLThreadWatchdog class declaration + * + * $LicenseInfo:firstyear=2007&license=viewerlgpl$ + * Second Life Viewer Source Code + * Copyright (C) 2010, Linden Research, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; + * version 2.1 of the License only. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + * Linden Research, Inc., 945 Battery Street, San Francisco, CA 94111 USA + * $/LicenseInfo$ + */ + +#ifndef LL_LLTHREADWATCHDOG_H +#define LL_LLTHREADWATCHDOG_H + +#ifndef LL_TIMER_H + #include "lltimer.h" +#endif +#include "llmutex.h" +#include "llsingleton.h" + +#include <functional> + +// LLWatchdogEntry is the interface used by the tasks that +// need to be watched. +class LLWatchdogEntry +{ +public: + LLWatchdogEntry(const std::string &thread_name); + virtual ~LLWatchdogEntry(); + + // isAlive is accessed by the watchdog thread. + // This may mean that resources used by + // isAlive and other method may need synchronization. + virtual bool isAlive() const = 0; + virtual void reset() = 0; + virtual void start(); + virtual void stop(); + virtual std::string getLastState() const { return std::string(); } + typedef std::thread::id id_t; + std::string getThreadName() const; + +private: + id_t mThreadID; // ID of the thread being watched + std::string mThreadName; +}; + +class LLWatchdogTimeout : public LLWatchdogEntry +{ +public: + LLWatchdogTimeout(const std::string& thread_name); + virtual ~LLWatchdogTimeout(); + + bool isAlive() const override; + void reset() override; + void start() override { start(""); } + void stop() override; + + void start(std::string_view state); + void setTimeout(F32 d); + void ping(std::string_view state); + const std::string& getState() {return mPingState; } + std::string getLastState() const override { return mPingState; } + +private: + LLTimer mTimer; + F32 mTimeout; + std::string mPingState; +}; + +class LLWatchdogTimerThread; // Defined in the cpp +class LLWatchdog : public LLSingleton<LLWatchdog> +{ + LLSINGLETON(LLWatchdog); + ~LLWatchdog(); + +public: + // Add an entry to the watchdog. + void add(LLWatchdogEntry* e); + void remove(LLWatchdogEntry* e); + + typedef std::function<void()> func_t; + void init(func_t set_error_state_callback); + void run(); + void cleanup(); + + +private: + void lockThread(); + void unlockThread(); + + typedef std::set<LLWatchdogEntry*> SuspectsRegistry; + SuspectsRegistry mSuspects; + LLMutex* mSuspectsAccessMutex; + LLWatchdogTimerThread* mTimer; + U64 mLastClockCount; + + // At the moment watchdog expects app to set markers in mCreateMarkerFnc, + // but technically can be used to set any error states or do some cleanup + // or show warnings. + func_t mCreateMarkerFnc; +}; + +#endif // LL_LLTHREADWATCHDOG_H diff --git a/indra/llcommon/workqueue.cpp b/indra/llcommon/workqueue.cpp index 7efaebd569..0407d6c3e9 100644 --- a/indra/llcommon/workqueue.cpp +++ b/indra/llcommon/workqueue.cpp @@ -216,10 +216,15 @@ void LL::WorkQueueBase::callWork(const Work& work) LL_WARNS("LLCoros") << "Capturing and rethrowing uncaught exception in WorkQueueBase " << getKey() << LL_ENDL; + std::string name = getKey(); LL::WorkQueue::ptr_t main_queue = LL::WorkQueue::getInstance("mainloop"); main_queue->post( // Bind the current exception, rethrow it in main loop. - [exc = std::current_exception()]() { std::rethrow_exception(exc); }); + [exc = std::current_exception(), name]() + { + LL_INFOS("LLCoros") << "Rethrowing exception from WorkQueueBase::callWork " << name << LL_ENDL; + std::rethrow_exception(exc); + }); } else { diff --git a/indra/llcommon/workqueue.h b/indra/llcommon/workqueue.h index 735ad38a26..573203a5b3 100644 --- a/indra/llcommon/workqueue.h +++ b/indra/llcommon/workqueue.h @@ -530,7 +530,11 @@ namespace LL reply, // Bind the current exception to transport back to the // originating WorkQueue. Once there, rethrow it. - [exc = std::current_exception()](){ std::rethrow_exception(exc); }); + [exc = std::current_exception()]() + { + LL_INFOS("LLCoros") << "Rethrowing exception from WorkQueueBase::postTo" << LL_ENDL; + std::rethrow_exception(exc); + }); } }, // if caller passed a TimePoint, pass it along to post() |
