Convert LLProcess implementation from platform-specific to using APR.

Include logic to engage Linden apr_procattr_autokill_set() extension: on Windows, magic CreateProcess() flag must be pushed down into apr_proc_create() level. When using an APR package without that extension, present implementation should lock (e.g.) SLVoice.exe lifespan to viewer's on Windows XP but probably won't on Windows 7: need magic flag on CreateProcess(). Using APR child-termination callback requires us to define state (e.g. LLProcess::RUNNING). Take the opportunity to present Status, capturing state and (if terminated) rc or signal number; but since most of the time all caller really wants is to log the outcome, also present status string, encapsulating logic to examine state and describe exited-with-rc vs. killed-by-signal. New Status logic may report clearer results in the case of a Windows child process killed by exception. Clarify that static LLProcess::isRunning(handle) overload is only for use when the original LLProcess object has been destroyed: really only for unit tests. We necessarily retain our original platform-specific implementations for just that one method. (Nonstatic isRunning() no longer calls static method.) Clarify log output from llprocess_test.cpp in a couple places.
author: Nat Goodspeed <nat@lindenlab.com> 2012-02-07 10:53:23 -0500
committer: Nat Goodspeed <nat@lindenlab.com> 2012-02-07 10:53:23 -0500
commit: aafb03b29f5166e8978931ad8b717be32d942836 (patch)
tree: db1f294e7327a876ddb91ffb3ac37423f987dd83 /indra
parent: d99acd56cdc41d72a073a4419e3e51c356e675bb (diff)
3 files changed, 455 insertions, 167 deletions
diff --git a/indra/llcommon/llprocess.cpp b/indra/llcommon/llprocess.cpp
index 8611d67f25..bc27002701 100644
--- a/indra/llcommon/llprocess.cpp
+++ b/indra/llcommon/llprocess.cpp
@@ -30,11 +30,15 @@
 #include "llsingleton.h"
 #include "llstring.h"
 #include "stringize.h"
+#include "llapr.h"
 
 #include <boost/foreach.hpp>
 #include <iostream>
 #include <stdexcept>
 
+static std::string empty;
+static LLProcess::Status interpret_status(int status);
+
 /// Need an exception to avoid constructing an invalid LLProcess object, but
 /// internal use only
 struct LLProcessError: public std::runtime_error
@@ -55,9 +59,14 @@ LLProcessPtr LLProcess::create(const LLSDOrParams& params)
 	}
 }
 
+/// Call an apr function returning apr_status_t. On failure, log warning and
+/// throw LLProcessError mentioning the function call that produced that
+/// result.
+#define chkapr(func)                            \
+    if (ll_apr_warn_status(func))               \
+        throw LLProcessError(#func " failed")
+
 LLProcess::LLProcess(const LLSDOrParams& params):
-	mProcessID(0),
-	mProcessHandle(0),
 	mAutokill(params.autokill)
 {
 	if (! params.validateBlock(true))
@@ -66,31 +75,298 @@ LLProcess::LLProcess(const LLSDOrParams& params):
 									   << LLSDNotationStreamer(params)));
 	}
 
-	launch(params);
+	apr_procattr_t *procattr = NULL;
+	chkapr(apr_procattr_create(&procattr, gAPRPoolp));
+
+	// For which of stdin, stdout, stderr should we create a pipe to the
+	// child? In the viewer, there are only a couple viable
+	// apr_procattr_io_set() alternatives: inherit the viewer's own stdxxx
+	// handle (APR_NO_PIPE, e.g. for stdout, stderr), or create a pipe that's
+	// blocking on the child end but nonblocking at the viewer end
+	// (APR_CHILD_BLOCK). The viewer can't block for anything: the parent end
+	// MUST be nonblocking. As the APR documentation itself points out, it
+	// makes very little sense to set nonblocking I/O for the child end of a
+	// pipe: only a specially-written child could deal with that.
+	// Other major options could include explicitly creating a single APR pipe
+	// and passing it as both stdout and stderr (apr_procattr_child_out_set(),
+	// apr_procattr_child_err_set()), or accepting a filename, opening it and
+	// passing that apr_file_t (simple <, >, 2> redirect emulation).
+//	chkapr(apr_procattr_io_set(procattr, APR_CHILD_BLOCK, APR_CHILD_BLOCK, APR_CHILD_BLOCK));
+	chkapr(apr_procattr_io_set(procattr, APR_NO_PIPE, APR_NO_PIPE, APR_NO_PIPE));
+
+	// Thumbs down on implicitly invoking the shell to invoke the child. From
+	// our point of view, the other major alternative to APR_PROGRAM_PATH
+	// would be APR_PROGRAM_ENV: still copy environment, but require full
+	// executable pathname. I don't see a downside to searching the PATH,
+	// though: if our caller wants (e.g.) a specific Python interpreter, s/he
+	// can still pass the full pathname.
+	chkapr(apr_procattr_cmdtype_set(procattr, APR_PROGRAM_PATH));
+	// YES, do extra work if necessary to report child exec() failures back to
+	// parent process.
+	chkapr(apr_procattr_error_check_set(procattr, 1));
+	// Do not start a non-autokill child in detached state. On Posix
+	// platforms, this setting attempts to daemonize the new child, closing
+	// std handles and the like, and that's a bit more detachment than we
+	// want. autokill=false just means not to implicitly kill the child when
+	// the parent terminates!
+//	chkapr(apr_procattr_detach_set(procattr, params.autokill? 0 : 1));
+
+	if (params.autokill)
+	{
+#if defined(APR_HAS_PROCATTR_AUTOKILL_SET)
+		apr_status_t ok = apr_procattr_autokill_set(procattr, 1);
+# if LL_WINDOWS
+		// As of 2012-02-02, we only expect this to be implemented on Windows.
+		// Avoid spamming the log with warnings we fully expect.
+		ll_apr_warn_status(ok);
+# endif // LL_WINDOWS
+#else
+		LL_WARNS("LLProcess") << "This version of APR lacks Linden apr_procattr_autokill_set() extension" << LL_ENDL;
+#endif
+	}
+
+	// Have to instantiate named std::strings for string params items so their
+	// c_str() values persist.
+	std::string cwd(params.cwd);
+	if (! cwd.empty())
+	{
+		chkapr(apr_procattr_dir_set(procattr, cwd.c_str()));
+	}
+
+	// create an argv vector for the child process
+	std::vector<const char*> argv;
+
+	// add the executable path
+	std::string executable(params.executable);
+	argv.push_back(executable.c_str());
+
+	// and any arguments
+	std::vector<std::string> args(params.args.begin(), params.args.end());
+	BOOST_FOREACH(const std::string& arg, args)
+	{
+		argv.push_back(arg.c_str());
+	}
+
+	// terminate with a null pointer
+	argv.push_back(NULL);
+
+	// Launch! The NULL would be the environment block, if we were passing one.
+	chkapr(apr_proc_create(&mProcess, argv[0], &argv[0], NULL, procattr, gAPRPoolp));    
+
+	// arrange to call status_callback()
+	apr_proc_other_child_register(&mProcess, &LLProcess::status_callback, this, mProcess.in,
+								  gAPRPoolp);
+	mStatus.mState = RUNNING;
+
+	mDesc = STRINGIZE(LLStringUtil::quote(params.executable) << " (" << mProcess.pid << ')');
+	LL_INFOS("LLProcess") << "Launched " << params << " (" << mProcess.pid << ")" << LL_ENDL;
+
+	// Unless caller explicitly turned off autokill (child should persist),
+	// take steps to terminate the child. This is all suspenders-and-belt: in
+	// theory our destructor should kill an autokill child, but in practice
+	// that doesn't always work (e.g. VWR-21538).
+	if (params.autokill)
+	{
+		// Tie the lifespan of this child process to the lifespan of our APR
+		// pool: on destruction of the pool, forcibly kill the process. Tell
+		// APR to try SIGTERM and wait 3 seconds. If that didn't work, use
+		// SIGKILL.
+		apr_pool_note_subprocess(gAPRPoolp, &mProcess, APR_KILL_AFTER_TIMEOUT);
+
+		// On Windows, associate the new child process with our Job Object.
+		autokill();
+	}
 }
 
 LLProcess::~LLProcess()
 {
+	// Only in state RUNNING are we registered for callback. In UNSTARTED we
+	// haven't yet registered. And since receiving the callback is the only
+	// way we detect child termination, we only change from state RUNNING at
+	// the same time we unregister.
+	if (mStatus.mState == RUNNING)
+	{
+		// We're still registered for a callback: unregister. Do it before
+		// we even issue the kill(): even if kill() somehow prompted an
+		// instantaneous callback (unlikely), this object is going away! Any
+		// information updated in this object by such a callback is no longer
+		// available to any consumer anyway.
+		apr_proc_other_child_unregister(this);
+	}
+
 	if (mAutokill)
 	{
-		kill();
+		kill("destructor");
+	}
+}
+
+bool LLProcess::kill(const std::string& who)
+{
+	if (isRunning())
+	{
+		LL_INFOS("LLProcess") << who << " killing " << mDesc << LL_ENDL;
+
+#if LL_WINDOWS
+		int sig = -1;
+#else  // Posix
+		int sig = SIGTERM;
+#endif
+
+		ll_apr_warn_status(apr_proc_kill(&mProcess, sig));
 	}
+
+	return ! isRunning();
 }
 
 bool LLProcess::isRunning(void)
 {
-	mProcessHandle = isRunning(mProcessHandle, mDesc);
-	return (mProcessHandle != 0);
+	return getStatus().mState == RUNNING;
+}
+
+LLProcess::Status LLProcess::getStatus()
+{
+	// Only when mState is RUNNING might the status change dynamically. For
+	// any other value, pointless to attempt to update status: it won't
+	// change.
+	if (mStatus.mState == RUNNING)
+	{
+		// Tell APR to sense whether the child is still running and call
+		// handle_status() appropriately. We should be able to get the same
+		// info from an apr_proc_wait(APR_NOWAIT) call; but at least in APR
+		// 1.4.2, testing suggests that even with APR_NOWAIT, apr_proc_wait()
+		// blocks the caller. We can't have that in the viewer. Hence the
+		// callback rigmarole. Once we update APR, it's probably worth testing
+		// again. Also -- although there's an apr_proc_other_child_refresh()
+		// call, i.e. get that information for one specific child, it accepts
+		// an 'apr_other_child_rec_t*' that's mentioned NOWHERE else in the
+		// documentation or header files! I would use the specific call if I
+		// knew how. As it is, each call to this method will call callbacks
+		// for ALL still-running child processes. Sigh...
+		apr_proc_other_child_refresh_all(APR_OC_REASON_RUNNING);
+	}
+
+	return mStatus;
+}
+
+std::string LLProcess::getStatusString()
+{
+	return getStatusString(getStatus());
+}
+
+std::string LLProcess::getStatusString(const Status& status)
+{
+	return getStatusString(mDesc, status);
+}
+
+//static
+std::string LLProcess::getStatusString(const std::string& desc, const Status& status)
+{
+	if (status.mState == UNSTARTED)
+		return desc + " was never launched";
+
+	if (status.mState == RUNNING)
+		return desc + " running";
+
+	if (status.mState == EXITED)
+		return STRINGIZE(desc << " exited with code " << status.mData);
+
+	if (status.mState == KILLED)
+#if LL_WINDOWS
+		return STRINGIZE(desc << " killed with exception " << std::hex << status.mData);
+#else
+		return STRINGIZE(desc << " killed by signal " << status.mData);
+#endif
+
+
+	return STRINGIZE(desc << " in unknown state " << status.mState << " (" << status.mData << ")");
+}
+
+// Classic-C-style APR callback
+void LLProcess::status_callback(int reason, void* data, int status)
+{
+	// Our only role is to bounce this static method call back into object
+	// space.
+	static_cast<LLProcess*>(data)->handle_status(reason, status);
+}
+
+#define tabent(symbol) { symbol, #symbol }
+static struct ReasonCode
+{
+	int code;
+	const char* name;
+} reasons[] =
+{
+	tabent(APR_OC_REASON_DEATH),
+	tabent(APR_OC_REASON_UNWRITABLE),
+	tabent(APR_OC_REASON_RESTART),
+	tabent(APR_OC_REASON_UNREGISTER),
+	tabent(APR_OC_REASON_LOST),
+	tabent(APR_OC_REASON_RUNNING)
+};
+#undef tabent
+
+// Object-oriented callback
+void LLProcess::handle_status(int reason, int status)
+{
+	{
+		// This odd appearance of LL_DEBUGS is just to bracket a lookup that will
+		// only be performed if in fact we're going to produce the log message.
+		LL_DEBUGS("LLProcess") << empty;
+		std::string reason_str;
+		BOOST_FOREACH(const ReasonCode& rcp, reasons)
+		{
+			if (reason == rcp.code)
+			{
+				reason_str = rcp.name;
+				break;
+			}
+		}
+		if (reason_str.empty())
+		{
+			reason_str = STRINGIZE("unknown reason " << reason);
+		}
+		LL_CONT << mDesc << ": handle_status(" << reason_str << ", " << status << ")" << LL_ENDL;
+	}
+
+	if (! (reason == APR_OC_REASON_DEATH || reason == APR_OC_REASON_LOST))
+	{
+		// We're only interested in the call when the child terminates.
+		return;
+	}
+
+	// Somewhat oddly, APR requires that you explicitly unregister even when
+	// it already knows the child has terminated. We must pass the same 'data'
+	// pointer as for the register() call, which was our 'this'.
+	apr_proc_other_child_unregister(this);
+	// We overload mStatus.mState to indicate whether the child is registered
+	// for APR callback: only RUNNING means registered. Track that we've
+	// unregistered. We know the child has terminated; might be EXITED or
+	// KILLED; refine below.
+	mStatus.mState = EXITED;
+
+//	wi->rv = apr_proc_wait(wi->child, &wi->rc, &wi->why, APR_NOWAIT);
+	// It's just wrong to call apr_proc_wait() here. The only way APR knows to
+	// call us with APR_OC_REASON_DEATH is that it's already reaped this child
+	// process, so calling wait() will only produce "huh?" from the OS. We
+	// must rely on the status param passed in, which unfortunately comes
+	// straight from the OS wait() call, which means we have to decode it by
+	// hand.
+	mStatus = interpret_status(status);
+	LL_INFOS("LLProcess") << getStatusString() << LL_ENDL;
 }
 
 LLProcess::id LLProcess::getProcessID() const
 {
-	return mProcessID;
+	return mProcess.pid;
 }
 
 LLProcess::handle LLProcess::getProcessHandle() const
 {
-	return mProcessHandle;
+#if LL_WINDOWS
+	return mProcess.hproc;
+#else
+	return mProcess.pid;
+#endif
 }
 
 std::ostream& operator<<(std::ostream& out, const LLProcess::Params& params)
@@ -178,77 +454,15 @@ private:
 	LLProcess::handle mJob;
 };
 
-void LLProcess::launch(const LLSDOrParams& params)
+void LLProcess::autokill()
 {
-	PROCESS_INFORMATION pinfo;
-	STARTUPINFOA sinfo = { sizeof(sinfo) };
-
-	// LLProcess::create()'s caller passes a Unix-style array of strings for
-	// command-line arguments. Our caller can and should expect that these will be
-	// passed to the child process as individual arguments, regardless of content
-	// (e.g. embedded spaces). But because Windows invokes any child process with
-	// a single command-line string, this means we must quote each argument behind
-	// the scenes.
-	std::string args = LLStringUtil::quote(params.executable);
-	BOOST_FOREACH(const std::string& arg, params.args)
-	{
-		args += " ";
-		args += LLStringUtil::quote(arg);
-	}
-
-	// So retarded.  Windows requires that the second parameter to
-	// CreateProcessA be a writable (non-const) string...
-	std::vector<char> args2(args.begin(), args.end());
-	args2.push_back('\0');
-
-	// Convert wrapper to a real std::string so we can use c_str(); but use a
-	// named variable instead of a temporary so c_str() pointer remains valid.
-	std::string cwd(params.cwd);
-	const char * working_directory = 0;
-	if (! cwd.empty())
-		working_directory = cwd.c_str();
-
-	// It's important to pass CREATE_BREAKAWAY_FROM_JOB because Windows 7 et
-	// al. tend to implicitly launch new processes already bound to a job. From
-	// http://msdn.microsoft.com/en-us/library/windows/desktop/ms681949%28v=vs.85%29.aspx :
-	// "The process must not already be assigned to a job; if it is, the
-	// function fails with ERROR_ACCESS_DENIED." ...
-	// "If the process is being monitored by the Program Compatibility
-	// Assistant (PCA), it is placed into a compatibility job. Therefore, the
-	// process must be created using CREATE_BREAKAWAY_FROM_JOB before it can
-	// be placed in another job."
-	if( ! CreateProcessA(NULL,      // lpApplicationName
-                         &args2[0], // lpCommandLine
-                         NULL,      // lpProcessAttributes
-                         NULL,      // lpThreadAttributes
-                         FALSE,     // bInheritHandles
-                         CREATE_BREAKAWAY_FROM_JOB, // dwCreationFlags
-                         NULL,      // lpEnvironment
-                         working_directory, // lpCurrentDirectory
-                         &sinfo,            // lpStartupInfo
-                         &pinfo ) )         // lpProcessInformation
-	{
-		throw LLProcessError(WindowsErrorString("CreateProcessA"));
-	}
-
-	// CloseHandle(pinfo.hProcess); // stops leaks - nothing else
-	mProcessID = pinfo.dwProcessId;
-	mProcessHandle = pinfo.hProcess;
-	CloseHandle(pinfo.hThread); // stops leaks - nothing else
-
-	mDesc = STRINGIZE(LLStringUtil::quote(params.executable) << " (" << mProcessID << ')');
-	LL_INFOS("LLProcess") << "Launched " << params << " (" << mProcessID << ")" << LL_ENDL;
-
-	// Now associate the new child process with our Job Object -- unless
-	// autokill is false, i.e. caller asserts the child should persist.
-	if (params.autokill)
-	{
-		LLJob::instance().assignProcess(mDesc, mProcessHandle);
-}
+	LLJob::instance().assignProcess(mDesc, mProcess.hproc);
 }
 
 LLProcess::handle LLProcess::isRunning(handle h, const std::string& desc)
 {
+	// This direct Windows implementation is because we have no access to the
+	// apr_proc_t struct: we expect it's been destroyed.
 	if (! h)
 		return 0;
 
@@ -258,22 +472,44 @@ LLProcess::handle LLProcess::isRunning(handle h, const std::string& desc)
 		// the process has completed.
 		if (! desc.empty())
 		{
-			LL_INFOS("LLProcess") << desc << " terminated" << LL_ENDL;
+			DWORD status = 0;
+			if (! GetExitCodeProcess(h, &status))
+			{
+				LL_WARNS("LLProcess") << desc << " terminated, but "
+									  << WindowsErrorString("GetExitCodeProcess()") << LL_ENDL;
+			}
+			{
+				LL_INFOS("LLProcess") << getStatusString(desc, interpret_status(status))
+									  << LL_ENDL;
+			}
 		}
+		CloseHandle(h);
 		return 0;
 	}
 
 	return h;
 }
 
-bool LLProcess::kill(void)
+static LLProcess::Status interpret_status(int status)
 {
-	if (! mProcessHandle)
-		return false;
+	LLProcess::Status result;
+
+	// This bit of code is cribbed from apr/threadproc/win32/proc.c, a
+	// function (unfortunately static) called why_from_exit_code():
+	/* See WinNT.h STATUS_ACCESS_VIOLATION and family for how
+	 * this class of failures was determined
+	 */
+	if ((status & 0xFFFF0000) == 0xC0000000)
+	{
+		result.mState = KILLED;
+	}
+	else
+	{
+		result.mState = EXITED;
+	}
+	result.mData = status;
 
-	LL_INFOS("LLProcess") << "killing " << mDesc << LL_ENDL;
-	TerminateProcess(mProcessHandle, 0);
-	return ! isRunning();
+	return result;
 }
 
 /// GetLastError()/FormatMessage() boilerplate
@@ -315,98 +551,91 @@ static std::string WindowsErrorString(const std::string& operation)
 #include <errno.h>
 #include <sys/wait.h>
 
+void LLProcess::autokill()
+{
+	// What we ought to do here is to:
+	// 1. create a unique process group and run all autokill children in that
+	//    group (see https://jira.secondlife.com/browse/SWAT-563);
+	// 2. figure out a way to intercept control when the viewer exits --
+	//    gracefully or not; 
+	// 3. when the viewer exits, kill off the aforementioned process group.
+
+	// It's point 2 that's troublesome. Although I've seen some signal-
+	// handling logic in the Posix viewer code, I haven't yet found any bit of
+	// code that's run no matter how the viewer exits (a try/finally for the
+	// whole process, as it were).
+}
+
 // Attempt to reap a process ID -- returns true if the process has exited and been reaped, false otherwise.
-static bool reap_pid(pid_t pid)
+static bool reap_pid(pid_t pid, LLProcess::Status* pstatus=NULL)
 {
-	pid_t wait_result = ::waitpid(pid, NULL, WNOHANG);
+	LLProcess::Status dummy;
+	if (! pstatus)
+	{
+		// If caller doesn't want to see Status, give us a target anyway so we
+		// don't have to have a bunch of conditionals.
+		pstatus = &dummy;
+	}
+
+	int status = 0;
+	pid_t wait_result = ::waitpid(pid, &status, WNOHANG);
 	if (wait_result == pid)
 	{
+		*pstatus = interpret_status(status);
 		return true;
 	}
-	if (wait_result == -1 && errno == ECHILD)
+	if (wait_result == 0)
 	{
-		// No such process -- this may mean we're ignoring SIGCHILD.
-		return true;
+		pstatus->mState = LLProcess::RUNNING;
+		pstatus->mData	= 0;
+		return false;
 	}
-	
-	return false;
-}
 
-void LLProcess::launch(const LLSDOrParams& params)
-{
-	// flush all buffers before the child inherits them
-	::fflush(NULL);
+	// Clear caller's Status block; caller must interpret UNSTARTED to mean
+	// "if this PID was ever valid, it no longer is."
+	*pstatus = LLProcess::Status();
 
-	pid_t child = vfork();
-	if (child == 0)
+	// We've dealt with the success cases: we were able to reap the child
+	// (wait_result == pid) or it's still running (wait_result == 0). It may
+	// be that the child terminated but didn't hang around long enough for us
+	// to reap. In that case we still have no Status to report, but we can at
+	// least state that it's not running.
+	if (wait_result == -1 && errno == ECHILD)
 	{
-		// child process
-
-		std::string cwd(params.cwd);
-		if (! cwd.empty())
-		{
-			// change to the desired child working directory
-			if (::chdir(cwd.c_str()))
-			{
-				// chdir failed
-				LL_WARNS("LLProcess") << "could not chdir(\"" << cwd << "\")" << LL_ENDL;
-				// pointless to throw; this is child process...
-				_exit(248);
-			}
-		}
-
-		// create an argv vector for the child process
-		std::vector<const char*> fake_argv;
-
-		// add the executable path
-		std::string executable(params.executable);
-		fake_argv.push_back(executable.c_str());
-
-		// and any arguments
-		std::vector<std::string> args(params.args.begin(), params.args.end());
-		BOOST_FOREACH(const std::string& arg, args)
-		{
-			fake_argv.push_back(arg.c_str());
-		}
-
-		// terminate with a null pointer
-		fake_argv.push_back(NULL);
-
-		::execv(executable.c_str(), const_cast<char* const*>(&fake_argv[0]));
-
-		// If we reach this point, the exec failed.
-		LL_WARNS("LLProcess") << "failed to launch: ";
-		BOOST_FOREACH(const char* arg, fake_argv)
-		{
-			LL_CONT << arg << ' ';
-		}
-		LL_CONT << LL_ENDL;
-		// Use _exit() instead of exit() per the vfork man page. Exit with a
-		// distinctive rc: someday soon we'll be able to retrieve it, and it
-		// would be nice to be able to tell that the child process failed!
-		_exit(249);
+		// No such process -- this may mean we're ignoring SIGCHILD.
+		return true;
 	}
 
-	// parent process
-	mProcessID = child;
-	mProcessHandle = child;
-
-	mDesc = STRINGIZE(LLStringUtil::quote(params.executable) << " (" << mProcessID << ')');
-	LL_INFOS("LLProcess") << "Launched " << params << " (" << mProcessID << ")" << LL_ENDL;
+	// Uh, should never happen?!
+	LL_WARNS("LLProcess") << "LLProcess::reap_pid(): waitpid(" << pid << ") returned "
+						  << wait_result << "; not meaningful?" << LL_ENDL;
+	// If caller is looping until this pid terminates, and if we can't find
+	// out, better to break the loop than to claim it's still running.
+	return true;
 }
 
 LLProcess::id LLProcess::isRunning(id pid, const std::string& desc)
 {
+	// This direct Posix implementation is because we have no access to the
+	// apr_proc_t struct: we expect it's been destroyed.
 	if (! pid)
 		return 0;
 
 	// Check whether the process has exited, and reap it if it has.
-	if(reap_pid(pid))
+	LLProcess::Status status;
+	if(reap_pid(pid, &status))
 	{
 		// the process has exited.
 		if (! desc.empty())
 		{
-			LL_INFOS("LLProcess") << desc << " terminated" << LL_ENDL;
+			std::string statstr(desc + " apparently terminated: no status available");
+			// We don't just pass UNSTARTED to getStatusString() because, in
+			// the context of reap_pid(), that state has special meaning.
+			if (status.mState != UNSTARTED)
+			{
+				statstr = getStatusString(desc, status);
+			}
+			LL_INFOS("LLProcess") << statstr << LL_ENDL;
 		}
 		return 0;
 	}
@@ -414,18 +643,27 @@ LLProcess::id LLProcess::isRunning(id pid, const std::string& desc)
 	return pid;
 }
 
-bool LLProcess::kill(void)
+static LLProcess::Status interpret_status(int status)
 {
-	if (! mProcessID)
-		return false;
+	LLProcess::Status result;
 
-	// Try to kill the process. We'll do approximately the same thing whether
-	// the kill returns an error or not, so we ignore the result.
-	LL_INFOS("LLProcess") << "killing " << mDesc << LL_ENDL;
-	(void)::kill(mProcessID, SIGTERM);
+	if (WIFEXITED(status))
+	{
+		result.mState = LLProcess::EXITED;
+		result.mData  = WEXITSTATUS(status);
+	}
+	else if (WIFSIGNALED(status))
+	{
+		result.mState = LLProcess::KILLED;
+		result.mData  = WTERMSIG(status);
+	}
+	else                            // uh, shouldn't happen?
+	{
+		result.mState = LLProcess::EXITED;
+		result.mData  = status;     // someone else will have to decode
+	}
 
-	// This will have the side-effect of reaping the zombie if the process has exited.
-	return ! isRunning();
+	return result;
 }
 
 /*==========================================================================*|
diff --git a/indra/llcommon/llprocess.h b/indra/llcommon/llprocess.h
index 8a842589ec..689f8aedab 100644
--- a/indra/llcommon/llprocess.h
+++ b/indra/llcommon/llprocess.h
@@ -29,6 +29,7 @@
 
 #include "llinitparam.h"
 #include "llsdparam.h"
+#include "apr_thread_proc.h"
 #include <boost/shared_ptr.hpp>
 #include <boost/noncopyable.hpp>
 #include <iosfwd>                   // std::ostream
@@ -95,13 +96,52 @@ public:
 	static LLProcessPtr create(const LLSDOrParams& params);
 	virtual ~LLProcess();
 
-	// isRunning isn't const because, if child isn't running, it clears stored
-	// process ID
+	// isRunning() isn't const because, when child terminates, it sets stored
+	// Status
 	bool isRunning(void);
-	
+
+	/**
+	 * State of child process
+	 */
+	enum state
+	{
+		UNSTARTED,					///< initial value, invisible to consumer
+		RUNNING,					///< child process launched
+		EXITED,						///< child process terminated voluntarily
+		KILLED						///< child process terminated involuntarily
+	};
+
+	/**
+	 * Status info
+	 */
+	struct Status
+	{
+		Status():
+			mState(UNSTARTED),
+			mData(0)
+		{}
+
+		state mState;				///< @see state
+		/**
+		 * - for mState == EXITED: mData is exit() code
+		 * - for mState == KILLED: mData is signal number (Posix)
+		 * - otherwise: mData is undefined
+		 */
+		int mData;
+	};
+
+	/// Status query
+	Status getStatus();
+	/// English Status string query, for logging etc.
+	std::string getStatusString();
+	/// English Status string query for previously-captured Status
+	std::string getStatusString(const Status& status);
+	/// static English Status string query
+	static std::string getStatusString(const std::string& desc, const Status& status);
+
 	// Attempt to kill the process -- returns true if the process is no longer running when it returns.
 	// Note that even if this returns false, the process may exit some time after it's called.
-	bool kill(void);
+	bool kill(const std::string& who="");
 
 #if LL_WINDOWS
 	typedef int id;                 ///< as returned by getProcessID()
@@ -133,18 +173,28 @@ public:
 	 * a whole set of operations supported on freestanding @c handle values.
 	 * New functionality should be added as nonstatic members operating on
 	 * the same data as getProcessHandle().
+	 *
+	 * In particular, if child termination is detected by static isRunning()
+	 * rather than by nonstatic isRunning(), the LLProcess object won't be
+	 * aware of the child's changed status and may encounter OS errors trying
+	 * to obtain it. static isRunning() is only intended for after the
+	 * launching LLProcess object has been destroyed.
 	 */
 	static handle isRunning(handle, const std::string& desc="");
 
 private:
 	/// constructor is private: use create() instead
 	LLProcess(const LLSDOrParams& params);
-	void launch(const LLSDOrParams& params);
+	void autokill();
+	// Classic-C-style APR callback
+	static void status_callback(int reason, void* data, int status);
+	// Object-oriented callback
+	void handle_status(int reason, int status);
 
 	std::string mDesc;
-	id mProcessID;
-	handle mProcessHandle;
+	apr_proc_t mProcess;
 	bool mAutokill;
+	Status mStatus;
 };
 
 /// for logging
diff --git a/indra/llcommon/tests/llprocess_test.cpp b/indra/llcommon/tests/llprocess_test.cpp
index 4ad45bdf27..60ed12ad6a 100644
--- a/indra/llcommon/tests/llprocess_test.cpp
+++ b/indra/llcommon/tests/llprocess_test.cpp
@@ -630,7 +630,7 @@ namespace tut
             // Destroy the LLProcess, which should kill the child.
         }
         // wait for the script to terminate... one way or another.
-        while (LLProcess::isRunning(phandle))
+        while (LLProcess::isRunning(phandle, "kill() script"))
         {
             sleep(1);
         }
@@ -643,7 +643,7 @@ namespace tut
     template<> template<>
     void object::test<6>()
     {
-        set_test_name("autokill");
+        set_test_name("autokill=false");
         NamedTempFile from("from", "not started");
         NamedTempFile to("to", "");
         LLProcess::handle phandle(0);
@@ -695,7 +695,7 @@ namespace tut
             outf << "go";
         } // flush and close.
         // now wait for the script to terminate... one way or another.
-        while (LLProcess::isRunning(phandle))
+        while (LLProcess::isRunning(phandle, "autokill script"))
         {
             sleep(1);
         }
author	Nat Goodspeed <nat@lindenlab.com>	2012-02-07 10:53:23 -0500
committer	Nat Goodspeed <nat@lindenlab.com>	2012-02-07 10:53:23 -0500
commit	aafb03b29f5166e8978931ad8b717be32d942836 (patch)
tree	db1f294e7327a876ddb91ffb3ac37423f987dd83 /indra
parent	d99acd56cdc41d72a073a4419e3e51c356e675bb (diff)