diff options
author | Nat Goodspeed <nat@lindenlab.com> | 2012-02-07 10:53:23 -0500 |
---|---|---|
committer | Nat Goodspeed <nat@lindenlab.com> | 2012-02-07 10:53:23 -0500 |
commit | aafb03b29f5166e8978931ad8b717be32d942836 (patch) | |
tree | db1f294e7327a876ddb91ffb3ac37423f987dd83 /indra/llcommon/llprocess.cpp | |
parent | d99acd56cdc41d72a073a4419e3e51c356e675bb (diff) |
Convert LLProcess implementation from platform-specific to using APR.
Include logic to engage Linden apr_procattr_autokill_set() extension: on
Windows, magic CreateProcess() flag must be pushed down into apr_proc_create()
level. When using an APR package without that extension, present
implementation should lock (e.g.) SLVoice.exe lifespan to viewer's on Windows
XP but probably won't on Windows 7: need magic flag on CreateProcess().
Using APR child-termination callback requires us to define state (e.g.
LLProcess::RUNNING). Take the opportunity to present Status, capturing state
and (if terminated) rc or signal number; but since most of the time all caller
really wants is to log the outcome, also present status string, encapsulating
logic to examine state and describe exited-with-rc vs. killed-by-signal.
New Status logic may report clearer results in the case of a Windows child
process killed by exception.
Clarify that static LLProcess::isRunning(handle) overload is only for use when
the original LLProcess object has been destroyed: really only for unit tests.
We necessarily retain our original platform-specific implementations for just
that one method. (Nonstatic isRunning() no longer calls static method.)
Clarify log output from llprocess_test.cpp in a couple places.
Diffstat (limited to 'indra/llcommon/llprocess.cpp')
-rw-r--r-- | indra/llcommon/llprocess.cpp | 552 |
1 files changed, 395 insertions, 157 deletions
diff --git a/indra/llcommon/llprocess.cpp b/indra/llcommon/llprocess.cpp index 8611d67f25..bc27002701 100644 --- a/indra/llcommon/llprocess.cpp +++ b/indra/llcommon/llprocess.cpp @@ -30,11 +30,15 @@ #include "llsingleton.h" #include "llstring.h" #include "stringize.h" +#include "llapr.h" #include <boost/foreach.hpp> #include <iostream> #include <stdexcept> +static std::string empty; +static LLProcess::Status interpret_status(int status); + /// Need an exception to avoid constructing an invalid LLProcess object, but /// internal use only struct LLProcessError: public std::runtime_error @@ -55,9 +59,14 @@ LLProcessPtr LLProcess::create(const LLSDOrParams& params) } } +/// Call an apr function returning apr_status_t. On failure, log warning and +/// throw LLProcessError mentioning the function call that produced that +/// result. +#define chkapr(func) \ + if (ll_apr_warn_status(func)) \ + throw LLProcessError(#func " failed") + LLProcess::LLProcess(const LLSDOrParams& params): - mProcessID(0), - mProcessHandle(0), mAutokill(params.autokill) { if (! params.validateBlock(true)) @@ -66,31 +75,298 @@ LLProcess::LLProcess(const LLSDOrParams& params): << LLSDNotationStreamer(params))); } - launch(params); + apr_procattr_t *procattr = NULL; + chkapr(apr_procattr_create(&procattr, gAPRPoolp)); + + // For which of stdin, stdout, stderr should we create a pipe to the + // child? In the viewer, there are only a couple viable + // apr_procattr_io_set() alternatives: inherit the viewer's own stdxxx + // handle (APR_NO_PIPE, e.g. for stdout, stderr), or create a pipe that's + // blocking on the child end but nonblocking at the viewer end + // (APR_CHILD_BLOCK). The viewer can't block for anything: the parent end + // MUST be nonblocking. As the APR documentation itself points out, it + // makes very little sense to set nonblocking I/O for the child end of a + // pipe: only a specially-written child could deal with that. + // Other major options could include explicitly creating a single APR pipe + // and passing it as both stdout and stderr (apr_procattr_child_out_set(), + // apr_procattr_child_err_set()), or accepting a filename, opening it and + // passing that apr_file_t (simple <, >, 2> redirect emulation). +// chkapr(apr_procattr_io_set(procattr, APR_CHILD_BLOCK, APR_CHILD_BLOCK, APR_CHILD_BLOCK)); + chkapr(apr_procattr_io_set(procattr, APR_NO_PIPE, APR_NO_PIPE, APR_NO_PIPE)); + + // Thumbs down on implicitly invoking the shell to invoke the child. From + // our point of view, the other major alternative to APR_PROGRAM_PATH + // would be APR_PROGRAM_ENV: still copy environment, but require full + // executable pathname. I don't see a downside to searching the PATH, + // though: if our caller wants (e.g.) a specific Python interpreter, s/he + // can still pass the full pathname. + chkapr(apr_procattr_cmdtype_set(procattr, APR_PROGRAM_PATH)); + // YES, do extra work if necessary to report child exec() failures back to + // parent process. + chkapr(apr_procattr_error_check_set(procattr, 1)); + // Do not start a non-autokill child in detached state. On Posix + // platforms, this setting attempts to daemonize the new child, closing + // std handles and the like, and that's a bit more detachment than we + // want. autokill=false just means not to implicitly kill the child when + // the parent terminates! +// chkapr(apr_procattr_detach_set(procattr, params.autokill? 0 : 1)); + + if (params.autokill) + { +#if defined(APR_HAS_PROCATTR_AUTOKILL_SET) + apr_status_t ok = apr_procattr_autokill_set(procattr, 1); +# if LL_WINDOWS + // As of 2012-02-02, we only expect this to be implemented on Windows. + // Avoid spamming the log with warnings we fully expect. + ll_apr_warn_status(ok); +# endif // LL_WINDOWS +#else + LL_WARNS("LLProcess") << "This version of APR lacks Linden apr_procattr_autokill_set() extension" << LL_ENDL; +#endif + } + + // Have to instantiate named std::strings for string params items so their + // c_str() values persist. + std::string cwd(params.cwd); + if (! cwd.empty()) + { + chkapr(apr_procattr_dir_set(procattr, cwd.c_str())); + } + + // create an argv vector for the child process + std::vector<const char*> argv; + + // add the executable path + std::string executable(params.executable); + argv.push_back(executable.c_str()); + + // and any arguments + std::vector<std::string> args(params.args.begin(), params.args.end()); + BOOST_FOREACH(const std::string& arg, args) + { + argv.push_back(arg.c_str()); + } + + // terminate with a null pointer + argv.push_back(NULL); + + // Launch! The NULL would be the environment block, if we were passing one. + chkapr(apr_proc_create(&mProcess, argv[0], &argv[0], NULL, procattr, gAPRPoolp)); + + // arrange to call status_callback() + apr_proc_other_child_register(&mProcess, &LLProcess::status_callback, this, mProcess.in, + gAPRPoolp); + mStatus.mState = RUNNING; + + mDesc = STRINGIZE(LLStringUtil::quote(params.executable) << " (" << mProcess.pid << ')'); + LL_INFOS("LLProcess") << "Launched " << params << " (" << mProcess.pid << ")" << LL_ENDL; + + // Unless caller explicitly turned off autokill (child should persist), + // take steps to terminate the child. This is all suspenders-and-belt: in + // theory our destructor should kill an autokill child, but in practice + // that doesn't always work (e.g. VWR-21538). + if (params.autokill) + { + // Tie the lifespan of this child process to the lifespan of our APR + // pool: on destruction of the pool, forcibly kill the process. Tell + // APR to try SIGTERM and wait 3 seconds. If that didn't work, use + // SIGKILL. + apr_pool_note_subprocess(gAPRPoolp, &mProcess, APR_KILL_AFTER_TIMEOUT); + + // On Windows, associate the new child process with our Job Object. + autokill(); + } } LLProcess::~LLProcess() { + // Only in state RUNNING are we registered for callback. In UNSTARTED we + // haven't yet registered. And since receiving the callback is the only + // way we detect child termination, we only change from state RUNNING at + // the same time we unregister. + if (mStatus.mState == RUNNING) + { + // We're still registered for a callback: unregister. Do it before + // we even issue the kill(): even if kill() somehow prompted an + // instantaneous callback (unlikely), this object is going away! Any + // information updated in this object by such a callback is no longer + // available to any consumer anyway. + apr_proc_other_child_unregister(this); + } + if (mAutokill) { - kill(); + kill("destructor"); + } +} + +bool LLProcess::kill(const std::string& who) +{ + if (isRunning()) + { + LL_INFOS("LLProcess") << who << " killing " << mDesc << LL_ENDL; + +#if LL_WINDOWS + int sig = -1; +#else // Posix + int sig = SIGTERM; +#endif + + ll_apr_warn_status(apr_proc_kill(&mProcess, sig)); } + + return ! isRunning(); } bool LLProcess::isRunning(void) { - mProcessHandle = isRunning(mProcessHandle, mDesc); - return (mProcessHandle != 0); + return getStatus().mState == RUNNING; +} + +LLProcess::Status LLProcess::getStatus() +{ + // Only when mState is RUNNING might the status change dynamically. For + // any other value, pointless to attempt to update status: it won't + // change. + if (mStatus.mState == RUNNING) + { + // Tell APR to sense whether the child is still running and call + // handle_status() appropriately. We should be able to get the same + // info from an apr_proc_wait(APR_NOWAIT) call; but at least in APR + // 1.4.2, testing suggests that even with APR_NOWAIT, apr_proc_wait() + // blocks the caller. We can't have that in the viewer. Hence the + // callback rigmarole. Once we update APR, it's probably worth testing + // again. Also -- although there's an apr_proc_other_child_refresh() + // call, i.e. get that information for one specific child, it accepts + // an 'apr_other_child_rec_t*' that's mentioned NOWHERE else in the + // documentation or header files! I would use the specific call if I + // knew how. As it is, each call to this method will call callbacks + // for ALL still-running child processes. Sigh... + apr_proc_other_child_refresh_all(APR_OC_REASON_RUNNING); + } + + return mStatus; +} + +std::string LLProcess::getStatusString() +{ + return getStatusString(getStatus()); +} + +std::string LLProcess::getStatusString(const Status& status) +{ + return getStatusString(mDesc, status); +} + +//static +std::string LLProcess::getStatusString(const std::string& desc, const Status& status) +{ + if (status.mState == UNSTARTED) + return desc + " was never launched"; + + if (status.mState == RUNNING) + return desc + " running"; + + if (status.mState == EXITED) + return STRINGIZE(desc << " exited with code " << status.mData); + + if (status.mState == KILLED) +#if LL_WINDOWS + return STRINGIZE(desc << " killed with exception " << std::hex << status.mData); +#else + return STRINGIZE(desc << " killed by signal " << status.mData); +#endif + + + return STRINGIZE(desc << " in unknown state " << status.mState << " (" << status.mData << ")"); +} + +// Classic-C-style APR callback +void LLProcess::status_callback(int reason, void* data, int status) +{ + // Our only role is to bounce this static method call back into object + // space. + static_cast<LLProcess*>(data)->handle_status(reason, status); +} + +#define tabent(symbol) { symbol, #symbol } +static struct ReasonCode +{ + int code; + const char* name; +} reasons[] = +{ + tabent(APR_OC_REASON_DEATH), + tabent(APR_OC_REASON_UNWRITABLE), + tabent(APR_OC_REASON_RESTART), + tabent(APR_OC_REASON_UNREGISTER), + tabent(APR_OC_REASON_LOST), + tabent(APR_OC_REASON_RUNNING) +}; +#undef tabent + +// Object-oriented callback +void LLProcess::handle_status(int reason, int status) +{ + { + // This odd appearance of LL_DEBUGS is just to bracket a lookup that will + // only be performed if in fact we're going to produce the log message. + LL_DEBUGS("LLProcess") << empty; + std::string reason_str; + BOOST_FOREACH(const ReasonCode& rcp, reasons) + { + if (reason == rcp.code) + { + reason_str = rcp.name; + break; + } + } + if (reason_str.empty()) + { + reason_str = STRINGIZE("unknown reason " << reason); + } + LL_CONT << mDesc << ": handle_status(" << reason_str << ", " << status << ")" << LL_ENDL; + } + + if (! (reason == APR_OC_REASON_DEATH || reason == APR_OC_REASON_LOST)) + { + // We're only interested in the call when the child terminates. + return; + } + + // Somewhat oddly, APR requires that you explicitly unregister even when + // it already knows the child has terminated. We must pass the same 'data' + // pointer as for the register() call, which was our 'this'. + apr_proc_other_child_unregister(this); + // We overload mStatus.mState to indicate whether the child is registered + // for APR callback: only RUNNING means registered. Track that we've + // unregistered. We know the child has terminated; might be EXITED or + // KILLED; refine below. + mStatus.mState = EXITED; + +// wi->rv = apr_proc_wait(wi->child, &wi->rc, &wi->why, APR_NOWAIT); + // It's just wrong to call apr_proc_wait() here. The only way APR knows to + // call us with APR_OC_REASON_DEATH is that it's already reaped this child + // process, so calling wait() will only produce "huh?" from the OS. We + // must rely on the status param passed in, which unfortunately comes + // straight from the OS wait() call, which means we have to decode it by + // hand. + mStatus = interpret_status(status); + LL_INFOS("LLProcess") << getStatusString() << LL_ENDL; } LLProcess::id LLProcess::getProcessID() const { - return mProcessID; + return mProcess.pid; } LLProcess::handle LLProcess::getProcessHandle() const { - return mProcessHandle; +#if LL_WINDOWS + return mProcess.hproc; +#else + return mProcess.pid; +#endif } std::ostream& operator<<(std::ostream& out, const LLProcess::Params& params) @@ -178,77 +454,15 @@ private: LLProcess::handle mJob; }; -void LLProcess::launch(const LLSDOrParams& params) +void LLProcess::autokill() { - PROCESS_INFORMATION pinfo; - STARTUPINFOA sinfo = { sizeof(sinfo) }; - - // LLProcess::create()'s caller passes a Unix-style array of strings for - // command-line arguments. Our caller can and should expect that these will be - // passed to the child process as individual arguments, regardless of content - // (e.g. embedded spaces). But because Windows invokes any child process with - // a single command-line string, this means we must quote each argument behind - // the scenes. - std::string args = LLStringUtil::quote(params.executable); - BOOST_FOREACH(const std::string& arg, params.args) - { - args += " "; - args += LLStringUtil::quote(arg); - } - - // So retarded. Windows requires that the second parameter to - // CreateProcessA be a writable (non-const) string... - std::vector<char> args2(args.begin(), args.end()); - args2.push_back('\0'); - - // Convert wrapper to a real std::string so we can use c_str(); but use a - // named variable instead of a temporary so c_str() pointer remains valid. - std::string cwd(params.cwd); - const char * working_directory = 0; - if (! cwd.empty()) - working_directory = cwd.c_str(); - - // It's important to pass CREATE_BREAKAWAY_FROM_JOB because Windows 7 et - // al. tend to implicitly launch new processes already bound to a job. From - // http://msdn.microsoft.com/en-us/library/windows/desktop/ms681949%28v=vs.85%29.aspx : - // "The process must not already be assigned to a job; if it is, the - // function fails with ERROR_ACCESS_DENIED." ... - // "If the process is being monitored by the Program Compatibility - // Assistant (PCA), it is placed into a compatibility job. Therefore, the - // process must be created using CREATE_BREAKAWAY_FROM_JOB before it can - // be placed in another job." - if( ! CreateProcessA(NULL, // lpApplicationName - &args2[0], // lpCommandLine - NULL, // lpProcessAttributes - NULL, // lpThreadAttributes - FALSE, // bInheritHandles - CREATE_BREAKAWAY_FROM_JOB, // dwCreationFlags - NULL, // lpEnvironment - working_directory, // lpCurrentDirectory - &sinfo, // lpStartupInfo - &pinfo ) ) // lpProcessInformation - { - throw LLProcessError(WindowsErrorString("CreateProcessA")); - } - - // CloseHandle(pinfo.hProcess); // stops leaks - nothing else - mProcessID = pinfo.dwProcessId; - mProcessHandle = pinfo.hProcess; - CloseHandle(pinfo.hThread); // stops leaks - nothing else - - mDesc = STRINGIZE(LLStringUtil::quote(params.executable) << " (" << mProcessID << ')'); - LL_INFOS("LLProcess") << "Launched " << params << " (" << mProcessID << ")" << LL_ENDL; - - // Now associate the new child process with our Job Object -- unless - // autokill is false, i.e. caller asserts the child should persist. - if (params.autokill) - { - LLJob::instance().assignProcess(mDesc, mProcessHandle); -} + LLJob::instance().assignProcess(mDesc, mProcess.hproc); } LLProcess::handle LLProcess::isRunning(handle h, const std::string& desc) { + // This direct Windows implementation is because we have no access to the + // apr_proc_t struct: we expect it's been destroyed. if (! h) return 0; @@ -258,22 +472,44 @@ LLProcess::handle LLProcess::isRunning(handle h, const std::string& desc) // the process has completed. if (! desc.empty()) { - LL_INFOS("LLProcess") << desc << " terminated" << LL_ENDL; + DWORD status = 0; + if (! GetExitCodeProcess(h, &status)) + { + LL_WARNS("LLProcess") << desc << " terminated, but " + << WindowsErrorString("GetExitCodeProcess()") << LL_ENDL; + } + { + LL_INFOS("LLProcess") << getStatusString(desc, interpret_status(status)) + << LL_ENDL; + } } + CloseHandle(h); return 0; } return h; } -bool LLProcess::kill(void) +static LLProcess::Status interpret_status(int status) { - if (! mProcessHandle) - return false; + LLProcess::Status result; + + // This bit of code is cribbed from apr/threadproc/win32/proc.c, a + // function (unfortunately static) called why_from_exit_code(): + /* See WinNT.h STATUS_ACCESS_VIOLATION and family for how + * this class of failures was determined + */ + if ((status & 0xFFFF0000) == 0xC0000000) + { + result.mState = KILLED; + } + else + { + result.mState = EXITED; + } + result.mData = status; - LL_INFOS("LLProcess") << "killing " << mDesc << LL_ENDL; - TerminateProcess(mProcessHandle, 0); - return ! isRunning(); + return result; } /// GetLastError()/FormatMessage() boilerplate @@ -315,98 +551,91 @@ static std::string WindowsErrorString(const std::string& operation) #include <errno.h> #include <sys/wait.h> +void LLProcess::autokill() +{ + // What we ought to do here is to: + // 1. create a unique process group and run all autokill children in that + // group (see https://jira.secondlife.com/browse/SWAT-563); + // 2. figure out a way to intercept control when the viewer exits -- + // gracefully or not; + // 3. when the viewer exits, kill off the aforementioned process group. + + // It's point 2 that's troublesome. Although I've seen some signal- + // handling logic in the Posix viewer code, I haven't yet found any bit of + // code that's run no matter how the viewer exits (a try/finally for the + // whole process, as it were). +} + // Attempt to reap a process ID -- returns true if the process has exited and been reaped, false otherwise. -static bool reap_pid(pid_t pid) +static bool reap_pid(pid_t pid, LLProcess::Status* pstatus=NULL) { - pid_t wait_result = ::waitpid(pid, NULL, WNOHANG); + LLProcess::Status dummy; + if (! pstatus) + { + // If caller doesn't want to see Status, give us a target anyway so we + // don't have to have a bunch of conditionals. + pstatus = &dummy; + } + + int status = 0; + pid_t wait_result = ::waitpid(pid, &status, WNOHANG); if (wait_result == pid) { + *pstatus = interpret_status(status); return true; } - if (wait_result == -1 && errno == ECHILD) + if (wait_result == 0) { - // No such process -- this may mean we're ignoring SIGCHILD. - return true; + pstatus->mState = LLProcess::RUNNING; + pstatus->mData = 0; + return false; } - - return false; -} -void LLProcess::launch(const LLSDOrParams& params) -{ - // flush all buffers before the child inherits them - ::fflush(NULL); + // Clear caller's Status block; caller must interpret UNSTARTED to mean + // "if this PID was ever valid, it no longer is." + *pstatus = LLProcess::Status(); - pid_t child = vfork(); - if (child == 0) + // We've dealt with the success cases: we were able to reap the child + // (wait_result == pid) or it's still running (wait_result == 0). It may + // be that the child terminated but didn't hang around long enough for us + // to reap. In that case we still have no Status to report, but we can at + // least state that it's not running. + if (wait_result == -1 && errno == ECHILD) { - // child process - - std::string cwd(params.cwd); - if (! cwd.empty()) - { - // change to the desired child working directory - if (::chdir(cwd.c_str())) - { - // chdir failed - LL_WARNS("LLProcess") << "could not chdir(\"" << cwd << "\")" << LL_ENDL; - // pointless to throw; this is child process... - _exit(248); - } - } - - // create an argv vector for the child process - std::vector<const char*> fake_argv; - - // add the executable path - std::string executable(params.executable); - fake_argv.push_back(executable.c_str()); - - // and any arguments - std::vector<std::string> args(params.args.begin(), params.args.end()); - BOOST_FOREACH(const std::string& arg, args) - { - fake_argv.push_back(arg.c_str()); - } - - // terminate with a null pointer - fake_argv.push_back(NULL); - - ::execv(executable.c_str(), const_cast<char* const*>(&fake_argv[0])); - - // If we reach this point, the exec failed. - LL_WARNS("LLProcess") << "failed to launch: "; - BOOST_FOREACH(const char* arg, fake_argv) - { - LL_CONT << arg << ' '; - } - LL_CONT << LL_ENDL; - // Use _exit() instead of exit() per the vfork man page. Exit with a - // distinctive rc: someday soon we'll be able to retrieve it, and it - // would be nice to be able to tell that the child process failed! - _exit(249); + // No such process -- this may mean we're ignoring SIGCHILD. + return true; } - // parent process - mProcessID = child; - mProcessHandle = child; - - mDesc = STRINGIZE(LLStringUtil::quote(params.executable) << " (" << mProcessID << ')'); - LL_INFOS("LLProcess") << "Launched " << params << " (" << mProcessID << ")" << LL_ENDL; + // Uh, should never happen?! + LL_WARNS("LLProcess") << "LLProcess::reap_pid(): waitpid(" << pid << ") returned " + << wait_result << "; not meaningful?" << LL_ENDL; + // If caller is looping until this pid terminates, and if we can't find + // out, better to break the loop than to claim it's still running. + return true; } LLProcess::id LLProcess::isRunning(id pid, const std::string& desc) { + // This direct Posix implementation is because we have no access to the + // apr_proc_t struct: we expect it's been destroyed. if (! pid) return 0; // Check whether the process has exited, and reap it if it has. - if(reap_pid(pid)) + LLProcess::Status status; + if(reap_pid(pid, &status)) { // the process has exited. if (! desc.empty()) { - LL_INFOS("LLProcess") << desc << " terminated" << LL_ENDL; + std::string statstr(desc + " apparently terminated: no status available"); + // We don't just pass UNSTARTED to getStatusString() because, in + // the context of reap_pid(), that state has special meaning. + if (status.mState != UNSTARTED) + { + statstr = getStatusString(desc, status); + } + LL_INFOS("LLProcess") << statstr << LL_ENDL; } return 0; } @@ -414,18 +643,27 @@ LLProcess::id LLProcess::isRunning(id pid, const std::string& desc) return pid; } -bool LLProcess::kill(void) +static LLProcess::Status interpret_status(int status) { - if (! mProcessID) - return false; + LLProcess::Status result; - // Try to kill the process. We'll do approximately the same thing whether - // the kill returns an error or not, so we ignore the result. - LL_INFOS("LLProcess") << "killing " << mDesc << LL_ENDL; - (void)::kill(mProcessID, SIGTERM); + if (WIFEXITED(status)) + { + result.mState = LLProcess::EXITED; + result.mData = WEXITSTATUS(status); + } + else if (WIFSIGNALED(status)) + { + result.mState = LLProcess::KILLED; + result.mData = WTERMSIG(status); + } + else // uh, shouldn't happen? + { + result.mState = LLProcess::EXITED; + result.mData = status; // someone else will have to decode + } - // This will have the side-effect of reaping the zombie if the process has exited. - return ! isRunning(); + return result; } /*==========================================================================*| |