diff options
| author | Andrey Kleshchev <117672381+akleshchev@users.noreply.github.com> | 2025-12-17 18:39:07 +0200 |
|---|---|---|
| committer | Andrey Kleshchev <117672381+akleshchev@users.noreply.github.com> | 2025-12-18 17:00:34 +0200 |
| commit | de9a9111fa3269a7a6a2d966cf52869d6a711333 (patch) | |
| tree | d1f81f3d797681aef48833198cdf17e9a798b55b | |
| parent | 05282cfd45b89abbc8efae6ee7bdfe7122bbec10 (diff) | |
#5084 Improve watchdog's feedback
| -rw-r--r-- | indra/newview/llappviewer.cpp | 28 | ||||
| -rw-r--r-- | indra/newview/llwatchdog.cpp | 27 | ||||
| -rw-r--r-- | indra/newview/llwatchdog.h | 12 |
3 files changed, 56 insertions, 11 deletions
diff --git a/indra/newview/llappviewer.cpp b/indra/newview/llappviewer.cpp index 569fd30b21..8358583c35 100644 --- a/indra/newview/llappviewer.cpp +++ b/indra/newview/llappviewer.cpp @@ -1239,7 +1239,7 @@ bool LLAppViewer::init() /*----------------------------------------------------------------------*/ // nat 2016-06-29 moved the following here from the former mainLoop(). - mMainloopTimeout = new LLWatchdogTimeout(); + mMainloopTimeout = new LLWatchdogTimeout("mainloop"); // Create IO Pump to use for HTTP Requests. gServicePump = new LLPumpIO(gAPRPoolp); @@ -1429,12 +1429,14 @@ bool LLAppViewer::doFrame() { LL_PROFILE_ZONE_NAMED_CATEGORY_APP("df mainloop"); + pingMainloopTimeout("df mainloop"); // canonical per-frame event mainloop.post(newFrame); } { LL_PROFILE_ZONE_NAMED_CATEGORY_APP("df suspend"); + pingMainloopTimeout("df suspend"); // give listeners a chance to run llcoro::suspend(); // if one of our coroutines threw an uncaught exception, rethrow it now @@ -1470,6 +1472,7 @@ bool LLAppViewer::doFrame() { { LL_PROFILE_ZONE_NAMED_CATEGORY_APP("df pauseMainloopTimeout"); + pingMainloopTimeout("df idle"); // So that it will be aware of last state. pauseMainloopTimeout(); // *TODO: Remove. Messages shouldn't be stalling for 20+ seconds! } @@ -1481,7 +1484,7 @@ bool LLAppViewer::doFrame() { LL_PROFILE_ZONE_NAMED_CATEGORY_APP("df resumeMainloopTimeout"); - resumeMainloopTimeout(); + resumeMainloopTimeout("df idle"); } } @@ -1496,7 +1499,7 @@ bool LLAppViewer::doFrame() } disconnectViewer(); - resumeMainloopTimeout(); + resumeMainloopTimeout("df snapshot n disconnect"); } // Render scene. @@ -2301,7 +2304,22 @@ void errorHandler(const std::string& title_string, const std::string& message_st } if (!message_string.empty()) { - OSMessageBox(message_string, title_string.empty() ? LLTrans::getString("MBFatalError") : title_string, OSMB_OK); + if (on_main_thread()) + { + // Prevent watchdog from killing us while dialog is up. + // Can't do pauseMainloopTimeout, since this may be called + // from threads and we are not going to need watchdog now. + LLAppViewer::instance()->pauseMainloopTimeout(); + + // todo: might want to have non-crashing timeout for OOM cases + // and needs a way to pause main loop. + OSMessageBox(message_string, title_string.empty() ? LLTrans::getString("MBFatalError") : title_string, OSMB_OK); + LLAppViewer::instance()->resumeMainloopTimeout(); + } + else + { + OSMessageBox(message_string, title_string.empty() ? LLTrans::getString("MBFatalError") : title_string, OSMB_OK); + } } } @@ -5825,7 +5843,7 @@ void LLAppViewer::initMainloopTimeout(std::string_view state) { if (!mMainloopTimeout) { - mMainloopTimeout = new LLWatchdogTimeout(); + mMainloopTimeout = new LLWatchdogTimeout("mainloop"); resumeMainloopTimeout(state); } } diff --git a/indra/newview/llwatchdog.cpp b/indra/newview/llwatchdog.cpp index 614d1afc2a..0984606456 100644 --- a/indra/newview/llwatchdog.cpp +++ b/indra/newview/llwatchdog.cpp @@ -67,7 +67,9 @@ private: }; // LLWatchdogEntry -LLWatchdogEntry::LLWatchdogEntry() +LLWatchdogEntry::LLWatchdogEntry(const std::string& thread_name) + : mThreadName(thread_name) + , mThreadID(LLThread::currentID()) { } @@ -89,11 +91,16 @@ void LLWatchdogEntry::stop() LLWatchdog::getInstance()->remove(this); } } +std::string LLWatchdogEntry::getThreadName() const +{ + return mThreadName + llformat(": %d", mThreadID); +} // LLWatchdogTimeout const std::string UNINIT_STRING = "uninitialized"; -LLWatchdogTimeout::LLWatchdogTimeout() : +LLWatchdogTimeout::LLWatchdogTimeout(const std::string& thread_name) : + LLWatchdogEntry(thread_name), mTimeout(0.0f), mPingState(UNINIT_STRING) { @@ -249,9 +256,21 @@ void LLWatchdog::run() { LLAppViewer::instance()->createErrorMarker(LAST_EXEC_FROZE); } - // Todo1: warn user? + // Todo1: Warn user? // Todo2: We probably want to report even if 5 seconds passed, just not error 'yet'. - LL_ERRS() << "Watchdog timer expired; assuming viewer is hung and crashing" << LL_ENDL; + // Todo3: This will report crash as 'llerror', consider adding 'watchdog' reason. + std::string last_state = (*result)->getLastState(); + if (last_state.empty()) + { + LL_ERRS() << "Watchdog timer for thread " << (*result)->getThreadName() + << " expired; assuming viewer is hung and crashing" << LL_ENDL; + } + else + { + LL_ERRS() << "Watchdog timer for thread " << (*result)->getThreadName() + << " expired with state: " << last_state + << "; assuming viewer is hung and crashing" << LL_ENDL; + } } } diff --git a/indra/newview/llwatchdog.h b/indra/newview/llwatchdog.h index b7dd55577e..a8056f4337 100644 --- a/indra/newview/llwatchdog.h +++ b/indra/newview/llwatchdog.h @@ -36,7 +36,7 @@ class LLWatchdogEntry { public: - LLWatchdogEntry(); + LLWatchdogEntry(const std::string &thread_name); virtual ~LLWatchdogEntry(); // isAlive is accessed by the watchdog thread. @@ -46,12 +46,19 @@ public: virtual void reset() = 0; virtual void start(); virtual void stop(); + virtual std::string getLastState() const { return std::string(); } + typedef std::thread::id id_t; + std::string getThreadName() const; + +private: + id_t mThreadID; // ID of the thread being watched + std::string mThreadName; }; class LLWatchdogTimeout : public LLWatchdogEntry { public: - LLWatchdogTimeout(); + LLWatchdogTimeout(const std::string& thread_name); virtual ~LLWatchdogTimeout(); bool isAlive() const override; @@ -63,6 +70,7 @@ public: void setTimeout(F32 d); void ping(std::string_view state); const std::string& getState() {return mPingState; } + std::string getLastState() const override { return mPingState; } private: LLTimer mTimer; |
