summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAndrey Kleshchev <117672381+akleshchev@users.noreply.github.com>2025-12-17 18:39:07 +0200
committerAndrey Kleshchev <117672381+akleshchev@users.noreply.github.com>2025-12-18 17:00:34 +0200
commitde9a9111fa3269a7a6a2d966cf52869d6a711333 (patch)
treed1f81f3d797681aef48833198cdf17e9a798b55b
parent05282cfd45b89abbc8efae6ee7bdfe7122bbec10 (diff)
#5084 Improve watchdog's feedback
-rw-r--r--indra/newview/llappviewer.cpp28
-rw-r--r--indra/newview/llwatchdog.cpp27
-rw-r--r--indra/newview/llwatchdog.h12
3 files changed, 56 insertions, 11 deletions
diff --git a/indra/newview/llappviewer.cpp b/indra/newview/llappviewer.cpp
index 569fd30b21..8358583c35 100644
--- a/indra/newview/llappviewer.cpp
+++ b/indra/newview/llappviewer.cpp
@@ -1239,7 +1239,7 @@ bool LLAppViewer::init()
/*----------------------------------------------------------------------*/
// nat 2016-06-29 moved the following here from the former mainLoop().
- mMainloopTimeout = new LLWatchdogTimeout();
+ mMainloopTimeout = new LLWatchdogTimeout("mainloop");
// Create IO Pump to use for HTTP Requests.
gServicePump = new LLPumpIO(gAPRPoolp);
@@ -1429,12 +1429,14 @@ bool LLAppViewer::doFrame()
{
LL_PROFILE_ZONE_NAMED_CATEGORY_APP("df mainloop");
+ pingMainloopTimeout("df mainloop");
// canonical per-frame event
mainloop.post(newFrame);
}
{
LL_PROFILE_ZONE_NAMED_CATEGORY_APP("df suspend");
+ pingMainloopTimeout("df suspend");
// give listeners a chance to run
llcoro::suspend();
// if one of our coroutines threw an uncaught exception, rethrow it now
@@ -1470,6 +1472,7 @@ bool LLAppViewer::doFrame()
{
{
LL_PROFILE_ZONE_NAMED_CATEGORY_APP("df pauseMainloopTimeout");
+ pingMainloopTimeout("df idle"); // So that it will be aware of last state.
pauseMainloopTimeout(); // *TODO: Remove. Messages shouldn't be stalling for 20+ seconds!
}
@@ -1481,7 +1484,7 @@ bool LLAppViewer::doFrame()
{
LL_PROFILE_ZONE_NAMED_CATEGORY_APP("df resumeMainloopTimeout");
- resumeMainloopTimeout();
+ resumeMainloopTimeout("df idle");
}
}
@@ -1496,7 +1499,7 @@ bool LLAppViewer::doFrame()
}
disconnectViewer();
- resumeMainloopTimeout();
+ resumeMainloopTimeout("df snapshot n disconnect");
}
// Render scene.
@@ -2301,7 +2304,22 @@ void errorHandler(const std::string& title_string, const std::string& message_st
}
if (!message_string.empty())
{
- OSMessageBox(message_string, title_string.empty() ? LLTrans::getString("MBFatalError") : title_string, OSMB_OK);
+ if (on_main_thread())
+ {
+ // Prevent watchdog from killing us while dialog is up.
+ // Can't do pauseMainloopTimeout, since this may be called
+ // from threads and we are not going to need watchdog now.
+ LLAppViewer::instance()->pauseMainloopTimeout();
+
+ // todo: might want to have non-crashing timeout for OOM cases
+ // and needs a way to pause main loop.
+ OSMessageBox(message_string, title_string.empty() ? LLTrans::getString("MBFatalError") : title_string, OSMB_OK);
+ LLAppViewer::instance()->resumeMainloopTimeout();
+ }
+ else
+ {
+ OSMessageBox(message_string, title_string.empty() ? LLTrans::getString("MBFatalError") : title_string, OSMB_OK);
+ }
}
}
@@ -5825,7 +5843,7 @@ void LLAppViewer::initMainloopTimeout(std::string_view state)
{
if (!mMainloopTimeout)
{
- mMainloopTimeout = new LLWatchdogTimeout();
+ mMainloopTimeout = new LLWatchdogTimeout("mainloop");
resumeMainloopTimeout(state);
}
}
diff --git a/indra/newview/llwatchdog.cpp b/indra/newview/llwatchdog.cpp
index 614d1afc2a..0984606456 100644
--- a/indra/newview/llwatchdog.cpp
+++ b/indra/newview/llwatchdog.cpp
@@ -67,7 +67,9 @@ private:
};
// LLWatchdogEntry
-LLWatchdogEntry::LLWatchdogEntry()
+LLWatchdogEntry::LLWatchdogEntry(const std::string& thread_name)
+ : mThreadName(thread_name)
+ , mThreadID(LLThread::currentID())
{
}
@@ -89,11 +91,16 @@ void LLWatchdogEntry::stop()
LLWatchdog::getInstance()->remove(this);
}
}
+std::string LLWatchdogEntry::getThreadName() const
+{
+ return mThreadName + llformat(": %d", mThreadID);
+}
// LLWatchdogTimeout
const std::string UNINIT_STRING = "uninitialized";
-LLWatchdogTimeout::LLWatchdogTimeout() :
+LLWatchdogTimeout::LLWatchdogTimeout(const std::string& thread_name) :
+ LLWatchdogEntry(thread_name),
mTimeout(0.0f),
mPingState(UNINIT_STRING)
{
@@ -249,9 +256,21 @@ void LLWatchdog::run()
{
LLAppViewer::instance()->createErrorMarker(LAST_EXEC_FROZE);
}
- // Todo1: warn user?
+ // Todo1: Warn user?
// Todo2: We probably want to report even if 5 seconds passed, just not error 'yet'.
- LL_ERRS() << "Watchdog timer expired; assuming viewer is hung and crashing" << LL_ENDL;
+ // Todo3: This will report crash as 'llerror', consider adding 'watchdog' reason.
+ std::string last_state = (*result)->getLastState();
+ if (last_state.empty())
+ {
+ LL_ERRS() << "Watchdog timer for thread " << (*result)->getThreadName()
+ << " expired; assuming viewer is hung and crashing" << LL_ENDL;
+ }
+ else
+ {
+ LL_ERRS() << "Watchdog timer for thread " << (*result)->getThreadName()
+ << " expired with state: " << last_state
+ << "; assuming viewer is hung and crashing" << LL_ENDL;
+ }
}
}
diff --git a/indra/newview/llwatchdog.h b/indra/newview/llwatchdog.h
index b7dd55577e..a8056f4337 100644
--- a/indra/newview/llwatchdog.h
+++ b/indra/newview/llwatchdog.h
@@ -36,7 +36,7 @@
class LLWatchdogEntry
{
public:
- LLWatchdogEntry();
+ LLWatchdogEntry(const std::string &thread_name);
virtual ~LLWatchdogEntry();
// isAlive is accessed by the watchdog thread.
@@ -46,12 +46,19 @@ public:
virtual void reset() = 0;
virtual void start();
virtual void stop();
+ virtual std::string getLastState() const { return std::string(); }
+ typedef std::thread::id id_t;
+ std::string getThreadName() const;
+
+private:
+ id_t mThreadID; // ID of the thread being watched
+ std::string mThreadName;
};
class LLWatchdogTimeout : public LLWatchdogEntry
{
public:
- LLWatchdogTimeout();
+ LLWatchdogTimeout(const std::string& thread_name);
virtual ~LLWatchdogTimeout();
bool isAlive() const override;
@@ -63,6 +70,7 @@ public:
void setTimeout(F32 d);
void ping(std::string_view state);
const std::string& getState() {return mPingState; }
+ std::string getLastState() const override { return mPingState; }
private:
LLTimer mTimer;