summaryrefslogtreecommitdiff
path: root/indra/llcommon/llwatchdog.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'indra/llcommon/llwatchdog.cpp')
-rw-r--r--indra/llcommon/llwatchdog.cpp290
1 files changed, 290 insertions, 0 deletions
diff --git a/indra/llcommon/llwatchdog.cpp b/indra/llcommon/llwatchdog.cpp
new file mode 100644
index 0000000000..fa240a9ed7
--- /dev/null
+++ b/indra/llcommon/llwatchdog.cpp
@@ -0,0 +1,290 @@
+/**
+ * @file llthreadwatchdog.cpp
+ * @brief The LLThreadWatchdog class definitions
+ *
+ * $LicenseInfo:firstyear=2007&license=viewerlgpl$
+ * Second Life Viewer Source Code
+ * Copyright (C) 2010, Linden Research, Inc.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation;
+ * version 2.1 of the License only.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ * Linden Research, Inc., 945 Battery Street, San Francisco, CA 94111 USA
+ * $/LicenseInfo$
+ */
+
+// Precompiled header
+#include "linden_common.h"
+
+#include "llwatchdog.h"
+#include "llthread.h"
+
+constexpr U32 WATCHDOG_SLEEP_TIME_USEC = 1000000U;
+
+// This class runs the watchdog timing thread.
+class LLWatchdogTimerThread : public LLThread
+{
+public:
+ LLWatchdogTimerThread() :
+ LLThread("Watchdog"),
+ mSleepMsecs(0),
+ mStopping(false)
+ {
+ }
+
+ ~LLWatchdogTimerThread() {}
+
+ void setSleepTime(long ms) { mSleepMsecs = ms; }
+ void stop()
+ {
+ mStopping = true;
+ mSleepMsecs = 1;
+ }
+
+ void run() override
+ {
+ while(!mStopping)
+ {
+ LLWatchdog::getInstance()->run();
+ ms_sleep(mSleepMsecs);
+ }
+ }
+
+private:
+ long mSleepMsecs;
+ bool mStopping;
+};
+
+// LLWatchdogEntry
+LLWatchdogEntry::LLWatchdogEntry(const std::string& thread_name)
+ : mThreadName(thread_name)
+ , mThreadID(LLThread::currentID())
+{
+}
+
+LLWatchdogEntry::~LLWatchdogEntry()
+{
+ stop();
+}
+
+void LLWatchdogEntry::start()
+{
+ LLWatchdog::getInstance()->add(this);
+}
+
+void LLWatchdogEntry::stop()
+{
+ // this can happen very late in the shutdown sequence
+ if (!LLWatchdog::wasDeleted())
+ {
+ LLWatchdog::getInstance()->remove(this);
+ }
+}
+std::string LLWatchdogEntry::getThreadName() const
+{
+ return mThreadName + llformat(": %d", mThreadID);
+}
+
+// LLWatchdogTimeout
+const std::string UNINIT_STRING = "uninitialized";
+
+LLWatchdogTimeout::LLWatchdogTimeout(const std::string& thread_name) :
+ LLWatchdogEntry(thread_name),
+ mTimeout(0.0f),
+ mPingState(UNINIT_STRING)
+{
+}
+
+LLWatchdogTimeout::~LLWatchdogTimeout()
+{
+}
+
+bool LLWatchdogTimeout::isAlive() const
+{
+ return (mTimer.getStarted() && !mTimer.hasExpired());
+}
+
+void LLWatchdogTimeout::reset()
+{
+ mTimer.setTimerExpirySec(mTimeout);
+}
+
+void LLWatchdogTimeout::setTimeout(F32 d)
+{
+ mTimeout = d;
+}
+
+void LLWatchdogTimeout::start(std::string_view state)
+{
+ if (mTimeout == 0)
+ {
+ LL_WARNS() << "Cant' start watchdog entry - no timeout set" << LL_ENDL;
+ return;
+ }
+ // Order of operation is very important here.
+ // After LLWatchdogEntry::start() is called
+ // LLWatchdogTimeout::isAlive() will be called asynchronously.
+ ping(state);
+ mTimer.start();
+ mTimer.setTimerExpirySec(mTimeout); // timer expiration set to 0 by start()
+ LLWatchdogEntry::start();
+}
+
+void LLWatchdogTimeout::stop()
+{
+ LLWatchdogEntry::stop();
+ mTimer.stop();
+}
+
+void LLWatchdogTimeout::ping(std::string_view state)
+{
+ if (!state.empty())
+ {
+ mPingState = state;
+ }
+ reset();
+}
+
+// LLWatchdog
+LLWatchdog::LLWatchdog()
+ :mSuspectsAccessMutex()
+ ,mTimer(nullptr)
+ ,mLastClockCount(0)
+{
+}
+
+LLWatchdog::~LLWatchdog()
+{
+}
+
+void LLWatchdog::add(LLWatchdogEntry* e)
+{
+ lockThread();
+ mSuspects.insert(e);
+ unlockThread();
+}
+
+void LLWatchdog::remove(LLWatchdogEntry* e)
+{
+ lockThread();
+ mSuspects.erase(e);
+ unlockThread();
+}
+
+void LLWatchdog::init(func_t set_error_state_callback)
+{
+ if (!mSuspectsAccessMutex && !mTimer)
+ {
+ mSuspectsAccessMutex = new LLMutex();
+ mTimer = new LLWatchdogTimerThread();
+ mTimer->setSleepTime(WATCHDOG_SLEEP_TIME_USEC / 1000);
+ mLastClockCount = LLTimer::getTotalTime();
+
+ // mTimer->start() kicks off the thread, any code after
+ // start needs to use the mSuspectsAccessMutex
+ mTimer->start();
+ }
+ mCreateMarkerFnc = set_error_state_callback;
+}
+
+void LLWatchdog::cleanup()
+{
+ if (mTimer)
+ {
+ mTimer->stop();
+ delete mTimer;
+ mTimer = nullptr;
+ }
+
+ if (mSuspectsAccessMutex)
+ {
+ delete mSuspectsAccessMutex;
+ mSuspectsAccessMutex = nullptr;
+ }
+
+ mLastClockCount = 0;
+}
+
+void LLWatchdog::run()
+{
+ lockThread();
+
+ // Check the time since the last call to run...
+ // If the time elapsed is two times greater than the regualr sleep time
+ // reset the active timeouts.
+ constexpr U32 TIME_ELAPSED_MULTIPLIER = 2;
+ U64 current_time = LLTimer::getTotalTime();
+ U64 current_run_delta = current_time - mLastClockCount;
+ mLastClockCount = current_time;
+
+ if (current_run_delta > (WATCHDOG_SLEEP_TIME_USEC * TIME_ELAPSED_MULTIPLIER))
+ {
+ LL_INFOS() << "Watchdog thread delayed: resetting entries." << LL_ENDL;
+ for (const auto& suspect : mSuspects)
+ {
+ suspect->reset();
+ }
+ }
+ else
+ {
+ SuspectsRegistry::iterator result =
+ std::find_if(mSuspects.begin(),
+ mSuspects.end(),
+ [](const LLWatchdogEntry* suspect){ return ! suspect->isAlive(); });
+ if (result != mSuspects.end())
+ {
+ // error!!!
+ if(mTimer)
+ {
+ mTimer->stop();
+ }
+
+ // Sets error marker file
+ mCreateMarkerFnc();
+ // Todo1: Warn user?
+ // Todo2: We probably want to report even if 5 seconds passed, just not error 'yet'.
+ std::string last_state = (*result)->getLastState();
+ if (last_state.empty())
+ {
+ LL_ERRS() << "Watchdog timer for thread " << (*result)->getThreadName()
+ << " expired; assuming viewer is hung and crashing" << LL_ENDL;
+ }
+ else
+ {
+ LL_ERRS() << "Watchdog timer for thread " << (*result)->getThreadName()
+ << " expired with state: " << last_state
+ << "; assuming viewer is hung and crashing" << LL_ENDL;
+ }
+ }
+ }
+
+
+ unlockThread();
+}
+
+void LLWatchdog::lockThread()
+{
+ if (mSuspectsAccessMutex)
+ {
+ mSuspectsAccessMutex->lock();
+ }
+}
+
+void LLWatchdog::unlockThread()
+{
+ if (mSuspectsAccessMutex)
+ {
+ mSuspectsAccessMutex->unlock();
+ }
+}