diff options
Diffstat (limited to 'indra')
| -rw-r--r-- | indra/llcommon/llapp.cpp | 163 | ||||
| -rw-r--r-- | indra/llcommon/llapp.h | 3 | ||||
| -rw-r--r-- | indra/llcommon/llheartbeat.cpp | 165 | ||||
| -rw-r--r-- | indra/llcommon/llheartbeat.h | 73 | ||||
| -rw-r--r-- | indra/llcommon/lltimer.cpp | 79 | ||||
| -rw-r--r-- | indra/llcommon/lltimer.h | 6 | 
6 files changed, 378 insertions, 111 deletions
| diff --git a/indra/llcommon/llapp.cpp b/indra/llcommon/llapp.cpp index 6591bbc070..067dc4fc43 100644 --- a/indra/llcommon/llapp.cpp +++ b/indra/llcommon/llapp.cpp @@ -49,11 +49,24 @@  LONG WINAPI default_windows_exception_handler(struct _EXCEPTION_POINTERS *exception_infop);  BOOL ConsoleCtrlHandler(DWORD fdwCtrlType);  #else -#include <unistd.h> // for fork() +# include <signal.h> +# include <unistd.h> // for fork()  void setup_signals();  void default_unix_signal_handler(int signum, siginfo_t *info, void *); -const S32 LL_SMACKDOWN_SIGNAL = SIGUSR1; -#endif +# if LL_DARWIN +/* OSX doesn't support SIGRT* */ +S32 LL_SMACKDOWN_SIGNAL = SIGUSR1; +S32 LL_HEARTBEAT_SIGNAL = SIGUSR2; +# else +/* We want reliable delivery of our signals - SIGRT* is it. */ +/* Old LinuxThreads versions eat SIGRTMIN+0 to SIGRTMIN+2, avoid those. */ +/* Note that SIGRTMIN/SIGRTMAX may expand to a glibc function call with a +   nonconstant result so these are not consts and cannot be used in constant- +   expressions.  SIGRTMAX may return -1 on rare broken setups. */ +S32 LL_SMACKDOWN_SIGNAL = (SIGRTMAX >= 0) ? (SIGRTMAX-1) : SIGUSR1; +S32 LL_HEARTBEAT_SIGNAL = (SIGRTMAX >= 0) ? (SIGRTMAX-0) : SIGUSR2; +# endif // LL_DARWIN +#endif // LL_WINDOWS  // the static application instance  LLApp* LLApp::sApplication = NULL; @@ -501,6 +514,9 @@ void setup_signals()  	sigaction(SIGSEGV, &act, NULL);  	sigaction(SIGSYS, &act, NULL); +	sigaction(LL_HEARTBEAT_SIGNAL, &act, NULL); +	sigaction(LL_SMACKDOWN_SIGNAL, &act, NULL); +  	// Asynchronous signals that are normally ignored  	sigaction(SIGCHLD, &act, NULL);  	sigaction(SIGUSR2, &act, NULL); @@ -511,7 +527,6 @@ void setup_signals()  	sigaction(SIGINT, &act, NULL);  	// Asynchronous signals that result in core -	sigaction(LL_SMACKDOWN_SIGNAL, &act, NULL);  	sigaction(SIGQUIT, &act, NULL);  } @@ -533,6 +548,9 @@ void clear_signals()  	sigaction(SIGSEGV, &act, NULL);  	sigaction(SIGSYS, &act, NULL); +	sigaction(LL_HEARTBEAT_SIGNAL, &act, NULL); +	sigaction(LL_SMACKDOWN_SIGNAL, &act, NULL); +  	// Asynchronous signals that are normally ignored  	sigaction(SIGCHLD, &act, NULL); @@ -543,7 +561,6 @@ void clear_signals()  	// Asynchronous signals that result in core  	sigaction(SIGUSR2, &act, NULL); -	sigaction(LL_SMACKDOWN_SIGNAL, &act, NULL);  	sigaction(SIGQUIT, &act, NULL);  } @@ -564,16 +581,7 @@ void default_unix_signal_handler(int signum, siginfo_t *info, void *)  	switch (signum)  	{ -	case SIGALRM: -	case SIGPIPE: -	case SIGUSR2: -		// We don't care about these signals, ignore them -		if (LLApp::sLogInSignal) -		{ -			llinfos << "Signal handler - Ignoring this signal" << llendl; -		} -		return; -    case SIGCHLD: +	case SIGCHLD:  		if (LLApp::sLogInSignal)  		{  			llinfos << "Signal handler - Got SIGCHLD from " << info->si_pid << llendl; @@ -602,59 +610,6 @@ void default_unix_signal_handler(int signum, siginfo_t *info, void *)  		clear_signals();  		raise(signum);  		return; -	case LL_SMACKDOWN_SIGNAL: // Smackdown treated just like any other app termination, for now -		if (LLApp::sLogInSignal) -		{ -			llwarns << "Signal handler - Handling smackdown signal!" << llendl; -		} -		else -		{ -			// Don't log anything, even errors - this is because this signal could happen anywhere. -			LLError::setDefaultLevel(LLError::LEVEL_NONE); -		} -		 -		// Change the signal that we reraise to SIGABRT, so we generate a core dump. -		signum = SIGABRT; -	case SIGBUS: -	case SIGSEGV: -	case SIGQUIT: -		if (LLApp::sLogInSignal) -		{ -			llwarns << "Signal handler - Handling fatal signal!" << llendl; -		} -		if (LLApp::isError()) -		{ -			// Received second fatal signal while handling first, just die right now -			// Set the signal handlers back to default before handling the signal - this makes the next signal wipe out the app. -			clear_signals(); - -			if (LLApp::sLogInSignal) -			{ -				llwarns << "Signal handler - Got another fatal signal while in the error handler, die now!" << llendl; -			} -			raise(signum); -			return; -		} -			 -		if (LLApp::sLogInSignal) -		{ -			llwarns << "Signal handler - Flagging error status and waiting for shutdown" << llendl; -		} -		// Flag status to ERROR, so thread_error does its work. -		LLApp::setError(); -		// Block in the signal handler until somebody says that we're done. -		while (LLApp::sErrorThreadRunning && !LLApp::isStopped()) -		{ -			ms_sleep(10); -		} - -		if (LLApp::sLogInSignal) -		{ -			llwarns << "Signal handler - App is stopped, reraising signal" << llendl; -		} -		clear_signals(); -		raise(signum); -		return;  	case SIGINT:  	case SIGHUP:  	case SIGTERM: @@ -675,10 +630,76 @@ void default_unix_signal_handler(int signum, siginfo_t *info, void *)  		}  		LLApp::setQuitting();  		return; +	case SIGALRM: +	case SIGPIPE: +	case SIGUSR2:  	default: -		if (LLApp::sLogInSignal) -		{ -			llwarns << "Signal handler - Unhandled signal, ignoring!" << llendl; +		if (signum == LL_SMACKDOWN_SIGNAL || +		    signum == SIGBUS || +		    signum == SIGILL || +		    signum == SIGFPE || +		    signum == SIGSEGV || +		    signum == SIGQUIT) +		{  +			if (signum == LL_SMACKDOWN_SIGNAL) +			{ +				// Smackdown treated just like any other app termination, for now +				if (LLApp::sLogInSignal) +				{ +					llwarns << "Signal handler - Handling smackdown signal!" << llendl; +				} +				else +				{ +					// Don't log anything, even errors - this is because this signal could happen anywhere. +					LLError::setDefaultLevel(LLError::LEVEL_NONE); +				} +				 +				// Change the signal that we reraise to SIGABRT, so we generate a core dump. +				signum = SIGABRT; +			} +			 +			if (LLApp::sLogInSignal) +			{ +				llwarns << "Signal handler - Handling fatal signal!" << llendl; +			} +			if (LLApp::isError()) +			{ +				// Received second fatal signal while handling first, just die right now +				// Set the signal handlers back to default before handling the signal - this makes the next signal wipe out the app. +				clear_signals(); +				 +				if (LLApp::sLogInSignal) +				{ +					llwarns << "Signal handler - Got another fatal signal while in the error handler, die now!" << llendl; +				} +				raise(signum); +				return; +			} +			 +			if (LLApp::sLogInSignal) +			{ +				llwarns << "Signal handler - Flagging error status and waiting for shutdown" << llendl; +			} +			// Flag status to ERROR, so thread_error does its work. +			LLApp::setError(); +			// Block in the signal handler until somebody says that we're done. +			while (LLApp::sErrorThreadRunning && !LLApp::isStopped()) +			{ +				ms_sleep(10); +			} +			 +			if (LLApp::sLogInSignal) +			{ +				llwarns << "Signal handler - App is stopped, reraising signal" << llendl; +			} +			clear_signals(); +			raise(signum); +			return; +		} else { +			if (LLApp::sLogInSignal) +			{ +				llinfos << "Signal handler - Unhandled signal " << signum << ", ignoring!" << llendl; +			}  		}  	}  } diff --git a/indra/llcommon/llapp.h b/indra/llcommon/llapp.h index d64af62538..c199601c20 100644 --- a/indra/llcommon/llapp.h +++ b/indra/llcommon/llapp.h @@ -46,7 +46,8 @@ typedef void (*LLAppErrorHandler)();  typedef void (*LLAppChildCallback)(int pid, bool exited, int status);  #if !LL_WINDOWS -extern const S32 LL_SMACKDOWN_SIGNAL; +extern S32 LL_SMACKDOWN_SIGNAL; +extern S32 LL_HEARTBEAT_SIGNAL;  // Clear all of the signal handlers (which we want to do for the child process when we fork  void clear_signals(); diff --git a/indra/llcommon/llheartbeat.cpp b/indra/llcommon/llheartbeat.cpp new file mode 100644 index 0000000000..782a4f7ff6 --- /dev/null +++ b/indra/llcommon/llheartbeat.cpp @@ -0,0 +1,165 @@ +/** + * @file llheartbeat.cpp + * @brief Class encapsulating logic for telling a watchdog that we live. + * + * $LicenseInfo:firstyear=2008&license=viewergpl$ + *  + * Copyright (c) 2008, Linden Research, Inc. + *  + * Second Life Viewer Source Code + * The source code in this file ("Source Code") is provided by Linden Lab + * to you under the terms of the GNU General Public License, version 2.0 + * ("GPL"), unless you have obtained a separate licensing agreement + * ("Other License"), formally executed by you and Linden Lab.  Terms of + * the GPL can be found in doc/GPL-license.txt in this distribution, or + * online at http://secondlife.com/developers/opensource/gplv2 + *  + * There are special exceptions to the terms and conditions of the GPL as + * it is applied to this Source Code. View the full text of the exception + * in the file doc/FLOSS-exception.txt in this software distribution, or + * online at http://secondlife.com/developers/opensource/flossexception + *  + * By copying, modifying or distributing this software, you acknowledge + * that you have read and understood your obligations described above, + * and agree to abide by those obligations. + *  + * ALL LINDEN LAB SOURCE CODE IS PROVIDED "AS IS." LINDEN LAB MAKES NO + * WARRANTIES, EXPRESS, IMPLIED OR OTHERWISE, REGARDING ITS ACCURACY, + * COMPLETENESS OR PERFORMANCE. + * $/LicenseInfo$ + */ + +#include <errno.h> +#include <signal.h> + +#include "linden_common.h" +#include "llapp.h" + +#include "llheartbeat.h" + +LLHeartbeat::LLHeartbeat(F32 secs_between_heartbeat, +			 F32 aggressive_heartbeat_panic_secs, +			 F32 aggressive_heartbeat_max_blocking_secs) +	: mSecsBetweenHeartbeat(secs_between_heartbeat), +	  mAggressiveHeartbeatPanicSecs(aggressive_heartbeat_panic_secs), +	  mAggressiveHeartbeatMaxBlockingSecs(aggressive_heartbeat_max_blocking_secs), +	  mSuppressed(false) +{ +	mBeatTimer.reset(); +	mBeatTimer.setTimerExpirySec(mSecsBetweenHeartbeat); +	mPanicTimer.reset(); +	mPanicTimer.setTimerExpirySec(mAggressiveHeartbeatPanicSecs); +} + +LLHeartbeat::~LLHeartbeat() +{ +	// do nothing. +} + +void +LLHeartbeat::setSuppressed(bool is_suppressed) +{ +	mSuppressed = is_suppressed; +} + +// returns 0 on success, -1 on permanent failure, 1 on temporary failure +int +LLHeartbeat::rawSend() +{ +#if LL_WINDOWS +	return 0; // Pretend we succeeded. +#else +	if (mSuppressed) +		return 0; // Pretend we succeeded. + +	union sigval dummy; +	int result = sigqueue(getppid(), LL_HEARTBEAT_SIGNAL, dummy); +	if (result == 0) +		return 0; // success + +	int err = errno; +	if (err == EAGAIN) +		return 1; // failed to queue, try again + +	return -1; // other failure. +#endif +} + +int +LLHeartbeat::rawSendWithTimeout(F32 timeout_sec) +{ +	int result = 0; + +	// Spin tightly until our heartbeat is digested by the watchdog +	// or we time-out.  We don't really want to sleep because our +	// wake-up time might be undesirably synchronised to a hidden +	// clock by the system's scheduler. +	mTimeoutTimer.reset(); +	mTimeoutTimer.setTimerExpirySec(timeout_sec); +	do { +		result = rawSend(); +		//llinfos << " HEARTSENDc=" << result << llendl; +	} while (result==1 && !mTimeoutTimer.hasExpired()); + +	return result; +} + +bool +LLHeartbeat::send(F32 timeout_sec) +{ +	bool total_success = false; +	int result = 1; + +	if (timeout_sec > 0.f) { +		// force a spin until success or timeout +		result = rawSendWithTimeout(timeout_sec); +	} else { +		if (mBeatTimer.hasExpired()) { +			// zero-timeout; we don't care too much whether our +			// heartbeat was digested. +			result = rawSend(); +			//llinfos << " HEARTSENDb=" << result << llendl; +		} +	} + +	if (result == -1) { +		// big failure. +	} else if (result == 0) { +		total_success = true; +	} else { +		// need to retry at some point +	} + +	if (total_success) { +		mBeatTimer.reset(); +		mBeatTimer.setTimerExpirySec(mSecsBetweenHeartbeat); +		// reset the time until we start panicking about lost +		// heartbeats again. +		mPanicTimer.reset(); +		mPanicTimer.setTimerExpirySec(mAggressiveHeartbeatPanicSecs); +	} else { +		// leave mBeatTimer as expired so we'll lazily poke the +		// watchdog again next time through. +	} + +	if (mPanicTimer.hasExpired()) { +		// It's been ages since we successfully had a heartbeat +		// digested by the watchdog.  Sit here and spin a while +		// in the hope that we can force it through. +		llwarns << "Unable to deliver heartbeat to launcher for " << mPanicTimer.getElapsedTimeF32() << " seconds.  Going to try very hard for up to " << mAggressiveHeartbeatMaxBlockingSecs << " seconds." << llendl; +		result = rawSendWithTimeout(mAggressiveHeartbeatMaxBlockingSecs); +		if (result == 0) { +			total_success = true; +		} else { +			// we couldn't even force it through.  That's bad, +			// but we'll try again in a while. +			llwarns << "Could not deliver heartbeat to launcher even after trying very hard for " << mAggressiveHeartbeatMaxBlockingSecs << " seconds." << llendl; +		} +		 +		// in any case, reset the panic timer. +		mPanicTimer.reset(); +		mPanicTimer.setTimerExpirySec(mAggressiveHeartbeatPanicSecs); +	} + +	return total_success; +} diff --git a/indra/llcommon/llheartbeat.h b/indra/llcommon/llheartbeat.h new file mode 100644 index 0000000000..0761642e68 --- /dev/null +++ b/indra/llcommon/llheartbeat.h @@ -0,0 +1,73 @@ +/**  + * @file llheartbeat.h + * @brief Class encapsulating logic for telling a watchdog that we live. + * + * $LicenseInfo:firstyear=2008&license=viewergpl$ + *  + * Copyright (c) 2008, Linden Research, Inc. + *  + * Second Life Viewer Source Code + * The source code in this file ("Source Code") is provided by Linden Lab + * to you under the terms of the GNU General Public License, version 2.0 + * ("GPL"), unless you have obtained a separate licensing agreement + * ("Other License"), formally executed by you and Linden Lab.  Terms of + * the GPL can be found in doc/GPL-license.txt in this distribution, or + * online at http://secondlife.com/developers/opensource/gplv2 + *  + * There are special exceptions to the terms and conditions of the GPL as + * it is applied to this Source Code. View the full text of the exception + * in the file doc/FLOSS-exception.txt in this software distribution, or + * online at http://secondlife.com/developers/opensource/flossexception + *  + * By copying, modifying or distributing this software, you acknowledge + * that you have read and understood your obligations described above, + * and agree to abide by those obligations. + *  + * ALL LINDEN LAB SOURCE CODE IS PROVIDED "AS IS." LINDEN LAB MAKES NO + * WARRANTIES, EXPRESS, IMPLIED OR OTHERWISE, REGARDING ITS ACCURACY, + * COMPLETENESS OR PERFORMANCE. + * $/LicenseInfo$ + */ + +#ifndef LL_LLHEARTBEAT_H +#define LL_LLHEARTBEAT_H + +#include "linden_common.h" + +#include "lltimer.h" + +// Note: Win32 does not support the heartbeat/smackdown system; +//   heartbeat-delivery turns into a no-op there. + +class LLHeartbeat +{ +public: +	// secs_between_heartbeat: after a heartbeat is successfully delivered, +	//   we suppress sending more for this length of time. +	// aggressive_heartbeat_panic_secs: if we've been failing to +	//   successfully deliver heartbeats for this length of time then +	//   we block for a while until we're really sure we got one delivered. +	// aggressive_heartbeat_max_blocking_secs: this is the length of +	//   time we block for when we're aggressively ensuring that a 'panic' +	//   heartbeat was delivered. +	LLHeartbeat(F32 secs_between_heartbeat = 5.0f, +		    F32 aggressive_heartbeat_panic_secs = 10.0f, +		    F32 aggressive_heartbeat_max_blocking_secs = 4.0f); +	~LLHeartbeat(); + +	bool send(F32 timeout_sec = 0.0f); +	void setSuppressed(bool is_suppressed); + +private: +	int rawSend(); +	int rawSendWithTimeout(F32 timeout_sec); +	F32 mSecsBetweenHeartbeat; +	F32 mAggressiveHeartbeatPanicSecs; +	F32 mAggressiveHeartbeatMaxBlockingSecs; +	bool mSuppressed; +	LLTimer mBeatTimer; +	LLTimer mPanicTimer; +	LLTimer mTimeoutTimer; +}; + +#endif // LL_HEARTBEAT_H diff --git a/indra/llcommon/lltimer.cpp b/indra/llcommon/lltimer.cpp index 9786d44899..cf984e4fe2 100644 --- a/indra/llcommon/lltimer.cpp +++ b/indra/llcommon/lltimer.cpp @@ -39,10 +39,8 @@  #	define WIN32_LEAN_AND_MEAN  #	include <winsock2.h>  #	include <windows.h> -#elif LL_LINUX || LL_SOLARIS -#	include <sys/time.h> -#	include <sched.h> -#elif LL_DARWIN +#elif LL_LINUX || LL_SOLARIS || LL_DARWIN +#       include <errno.h>  #	include <sys/time.h>  #else   #	error "architecture not supported" @@ -81,42 +79,55 @@ U64 gLastTotalTimeClockCount = 0;  //---------------------------------------------------------------------------  #if LL_WINDOWS -void ms_sleep(long ms) +void ms_sleep(U32 ms)  { -	Sleep((U32)ms); +	Sleep(ms);  } - -void llyield() +#elif LL_LINUX || LL_SOLARIS || LL_DARWIN +void ms_sleep(U32 ms)  { -	SleepEx(0, TRUE); // Relinquishes time slice to any thread of equal priority, can be woken up by extended IO functions -} -#elif LL_LINUX || LL_SOLARIS -void ms_sleep(long ms) -{ -	struct timespec t; -	t.tv_sec = ms / 1000; -	t.tv_nsec = (ms % 1000) * 1000000l; -	nanosleep(&t, NULL); -} +	long mslong = ms; // tv_nsec is a long +	struct timespec thiswait, nextwait; +	bool sleep_more = false; -void llyield() -{ -	sched_yield(); -} -#elif LL_DARWIN -void ms_sleep(long ms) -{ -	struct timespec t; -	t.tv_sec = ms / 1000; -	t.tv_nsec = (ms % 1000) * 1000000l; -	nanosleep(&t, NULL); -} +	thiswait.tv_sec = ms / 1000; +	thiswait.tv_nsec = (mslong % 1000) * 1000000l; +	do { +		int result = nanosleep(&thiswait, &nextwait); -void llyield() -{ -//	sched_yield(); +		// check if sleep was interrupted by a signal; unslept +		// remainder was written back into 't' and we just nanosleep +		// again. +		sleep_more = (result == -1 && EINTR == errno); + +		if (sleep_more) +		{ +			if ( nextwait.tv_sec > thiswait.tv_sec || +			     (nextwait.tv_sec == thiswait.tv_sec && +			      nextwait.tv_nsec >= thiswait.tv_nsec) ) +			{ +				// if the remaining time isn't actually going +				// down then we're being shafted by low clock +				// resolution - manually massage the sleep time +				// downward. +				if (nextwait.tv_nsec > 1000000) { +					// lose 1ms +					nextwait.tv_nsec -= 1000000; +				} else { +					if (nextwait.tv_sec == 0) { +						// already so close to finished +						sleep_more = false; +					} else { +						// lose up to 1ms +						nextwait.tv_nsec = 0; +					} +				} +			} +			thiswait = nextwait; +		} +	} while (sleep_more);  } -#else  +#else  # error "architecture not supported"  #endif diff --git a/indra/llcommon/lltimer.h b/indra/llcommon/lltimer.h index 647f042828..113eb1e9e3 100644 --- a/indra/llcommon/lltimer.h +++ b/indra/llcommon/lltimer.h @@ -112,12 +112,8 @@ U64 get_clock_count();  F64 calc_clock_frequency(U32 msecs);  void update_clock_frequencies(); -  // Sleep for milliseconds -void ms_sleep(long ms); - -// Yield -//void llyield(); // Yield your timeslice - not implemented yet for Mac, so commented out. +void ms_sleep(U32 ms);  // Returns the correct UTC time in seconds, like time(NULL).  // Useful on the viewer, which may have its local clock set wrong. | 
