aboutsummaryrefslogtreecommitdiff
path: root/butl
diff options
context:
space:
mode:
authorBoris Kolpackov <boris@codesynthesis.com>2016-08-21 12:36:35 +0200
committerBoris Kolpackov <boris@codesynthesis.com>2016-08-21 12:36:35 +0200
commite930d5c9cb4176c6055bde2b4ff196f4b5f92f69 (patch)
tree948cd3832e609248d02c8abaafe16385f93a44a7 /butl
parentb6616430560102415bb82062b845a23c830bac0d (diff)
Redo process path search to better accommodate Windows-specific semantics
Diffstat (limited to 'butl')
-rw-r--r--butl/process101
-rw-r--r--butl/process.cxx335
-rw-r--r--butl/process.ixx57
3 files changed, 383 insertions, 110 deletions
diff --git a/butl/process b/butl/process
index ed9f798..75f3c66 100644
--- a/butl/process
+++ b/butl/process
@@ -13,6 +13,7 @@
#include <cstdint> // uint32_t
#include <system_error>
+#include <butl/path>
#include <butl/export>
namespace butl
@@ -28,14 +29,68 @@ namespace butl
process_error (int e, bool child)
: system_error (e, std::system_category ()), child_ (child) {}
#else
- process_error (const std::string& d)
- : system_error (ECHILD, std::system_category (), d), child_ (false) {}
+ process_error (int e)
+ : system_error (e, std::system_category ()), child_ (false) {}
+
+ process_error (const std::string& d, int e = ECHILD)
+ : system_error (e, std::system_category (), d), child_ (false) {}
#endif
private:
bool child_;
};
+ // A process executable has three paths: initial, recall, and effective.
+ // Initial is the original "command" that you specify in argv[0] and on
+ // POSIX that's what ends up in the child's argv[0]. But not on Windows. On
+ // Windows the command is first searched for in the parent executable's
+ // directory and if found then that's what should end up in child's argv[0].
+ // So this is the recall path. It is called recall because this is what the
+ // caller of the parent process will be able to execute if you printed the
+ // command line. Finally, effective is the actual path to the executable
+ // that will include the directory part if found in PATH, the .exe extension
+ // if one is missing, etc.
+ //
+ // As an example, let's say we run foo\foo.exe that itself spawns bar which
+ // is found as foo\bar.exe. The paths will then be:
+ //
+ // initial: bar
+ // recall: foo\bar
+ // effective: foo\bar.exe
+ //
+ // In most cases, at least on POSIX, all three paths will be the same. As an
+ // optimization, if the recall path is empty, then it means it is the same
+ // as initial. Similarly, if the effective path is empty then, it is the
+ // same as recall (and if that is empty, as initial).
+ //
+ // Note that the call to path_search() below adjust args[0] to point to the
+ // recall path which brings up lifetime issues. To address this this class
+ // also implements an RAII-based auto-restore of args[0] to its initial
+ // value.
+ //
+ class process_path
+ {
+ public:
+ const char* initial = nullptr;
+ path recall;
+ path effect;
+
+ // Moveable-only type.
+ //
+ process_path (process_path&&);
+ process_path& operator= (process_path&&);
+
+ process_path (const process_path&) = delete;
+ process_path& operator= (const process_path&) = delete;
+
+ process_path () = default;
+ process_path (const char* i, const char** a0): initial (i), args0_ (a0) {}
+ ~process_path () {if (args0_ != nullptr) *args0_ = initial;}
+
+ private:
+ const char** args0_ = nullptr;
+ };
+
class LIBBUTL_EXPORT process
{
public:
@@ -70,7 +125,13 @@ namespace butl
// exceptions (e.g., if exec() failed) can be thrown in the child
// version of us.
//
- process (char const* const args[], int in = 0, int out = 1, int err = 2);
+ // Note that the versions without the the process_path argument may
+ // temporarily change args[0] (see path_search() for details).
+ //
+ process (const char* args[], int in = 0, int out = 1, int err = 2);
+
+ process (const process_path&, const char* args[],
+ int in = 0, int out = 1, int err = 2);
// The "piping" constructor, for example:
//
@@ -80,14 +141,26 @@ namespace butl
// rhs.wait (); // Wait for last first.
// lhs.wait ();
//
- process (char const* const args[], process& in, int out = 1, int err = 2);
+ process (const char* args[], process& in, int out = 1, int err = 2);
+
+ process (const process_path&, const char* args[],
+ process& in, int out = 1, int err = 2);
// Versions of the above constructors that allow us to change the
// current working directory of the child process. NULL and empty
// cwd arguments are ignored.
//
- process (const char* cwd, char const* const[], int = 0, int = 1, int = 2);
- process (const char* cwd, char const* const[], process&, int = 1, int = 2);
+ process (const char* cwd, const char* [], int = 0, int = 1, int = 2);
+
+ process (const char* cwd,
+ const process_path&, const char* [],
+ int = 0, int = 1, int = 2);
+
+ process (const char* cwd, const char* [], process&, int = 1, int = 2);
+
+ process (const char* cwd,
+ const process_path&, const char* [],
+ process&, int = 1, int = 2);
// Wait for the process to terminate. Return true if the process
// terminated normally and with the zero exit status. Unless ignore_error
@@ -122,6 +195,22 @@ namespace butl
//
process ();
+ // Resolve process' paths based on the initial path in args0. If recall
+ // differs from initial, adjust args0 to point to the recall path. If
+ // resolution fails, throw process_error. Normally, you will use this
+ // function like this:
+ //
+ // const char* args[] = {"foo", ..., nullptr};
+ //
+ // process_path pp (process::path_search (args[0]))
+ //
+ // ... // E.g., print args[0].
+ //
+ // process p (pp, args);
+ //
+ process_path
+ path_search (const char*& args0);
+
public:
#ifndef _WIN32
using handle_type = pid_t;
diff --git a/butl/process.cxx b/butl/process.cxx
index 80b97cc..cceceed 100644
--- a/butl/process.cxx
+++ b/butl/process.cxx
@@ -7,6 +7,8 @@
#ifndef _WIN32
# include <unistd.h> // execvp, fork, dup2, pipe, chdir, *_FILENO, getpid
# include <sys/wait.h> // waitpid
+# include <sys/types.h> // _stat
+# include <sys/stat.h> // _stat(), S_IS*
#else
# include <butl/win32-utility>
@@ -16,13 +18,23 @@
# include <sys/types.h> // stat
# include <sys/stat.h> // stat(), S_IS*
+# ifdef _MSC_VER // Unlikely to be fixed in newer versions.
+# define S_ISREG(m) (((m) & S_IFMT) == S_IFREG)
+
+# define STDIN_FILENO 0
+# define STDOUT_FILENO 1
+# define STDERR_FILENO 2
+# endif // _MSC_VER
+
# include <memory> // unique_ptr
+# include <cstdlib> // __argv[]
-# include <butl/path>
# include <butl/win32-utility>
#endif
#include <cassert>
+#include <cstddef> // size_t
+#include <cstring> // strlen(), strchr()
#include <butl/utility> // casecmp()
#include <butl/fdstream> // fdnull(), fdclose()
@@ -33,15 +45,6 @@ using namespace std;
using namespace butl::win32;
#endif
-#ifdef _MSC_VER // Unlikely to be fixed in newer versions.
-#define S_ISREG(m) (((m) & S_IFMT) == S_IFREG)
-# define S_ISDIR(m) (((m) & S_IFMT) == S_IFDIR)
-
-# define STDIN_FILENO 0
-# define STDOUT_FILENO 1
-# define STDERR_FILENO 2
-#endif // _MSC_VER
-
namespace butl
{
class auto_fd
@@ -89,8 +92,96 @@ namespace butl
#ifndef _WIN32
+ process_path process::
+ path_search (const char*& args0)
+ {
+ // Note that there is a similar version for Win32.
+
+ typedef path::traits traits;
+
+ const char* f (args0);
+ size_t fn (strlen (f));
+
+ path rp, ep; // recall & effective
+ auto search = [&ep, f, fn] (const char* d, size_t dn) -> bool
+ {
+ string s (move (ep).string ()); // Reuse buffer.
+
+ if (dn != 0)
+ {
+ s.assign (d, dn);
+
+ if (!traits::is_separator (s.back ()))
+ s += traits::directory_separator;
+ }
+
+ s.append (f, fn);
+ ep = path (move (s)); // Move back into result.
+
+ // Check that the file exists and has at least one executable bit set.
+ // This way we get a bit closer to the "continue search on EACCES"
+ // semantics (see below).
+ //
+ struct stat si;
+ return (stat (ep.string ().c_str (), &si) == 0 &&
+ S_ISREG (si.st_mode) &&
+ (si.st_mode & (S_IEXEC | S_IXGRP | S_IXOTH)) != 0);
+ };
+
+ for (;;) // The "goto end" loop.
+ {
+ // If there is a directory component in the file, then search does not
+ // apply.
+ //
+ if (traits::find_separator (f, fn) != nullptr)
+ break;
+
+ // The search order is documented in exec(3). Some of the differences
+ // compared to exec*p() functions:
+ //
+ // 1. If there no PATH, we don't default to current directory/_CS_PATH.
+ // 2. We do not continue searching on EACCES from execve().
+ // 3. We do not execute via default shell on ENOEXEC from execve().
+ //
+ {
+ const char* b (getenv ("PATH"));
+
+ for (const char* e; b != nullptr; b = (e != nullptr ? e + 1 : e))
+ {
+ e = strchr (b, traits::path_separator);
+
+ // Empty path (i.e., a double colon or a colon at the beginning or
+ // end of PATH) means search in the current dirrectory.
+ //
+ if (search (b, e != nullptr ? e - b : strlen (b)))
+ break;
+ }
+
+ if (b != nullptr)
+ break;
+ }
+
+ // Did not find anything.
+ //
+ throw process_error (ENOENT, false);
+ }
+
+ // Found the file and the result is in rp and ep, both of which can be
+ // empty.
+ //
+ process_path r (f,
+ rp.empty () ? nullptr : &(args0 = rp.string ().c_str ()));
+
+ r.recall = move (rp);
+ r.effect = move (ep);
+
+ return r;
+ }
+
process::
- process (const char* cwd, char const* const args[], int in, int out, int err)
+ process (const char* cwd,
+ const process_path& pp, const char* args[],
+ int in, int out, int err)
{
using pipe = auto_fd[2];
@@ -176,7 +267,11 @@ namespace butl
if (cwd != nullptr && *cwd != '\0' && chdir (cwd) != 0)
fail (true);
- if (execvp (args[0], const_cast<char**> (&args[0])) == -1)
+ const char* file (pp.effect.empty ()
+ ? args[0]
+ : pp.effect.string ().c_str ());
+
+ if (execv (file, const_cast<char**> (&args[0])) == -1)
fail (true);
}
@@ -188,9 +283,10 @@ namespace butl
}
process::
- process (const char* cwd, char const* const args[],
+ process (const char* cwd,
+ const process_path& pp, const char* args[],
process& in, int out, int err)
- : process (cwd, args, in.in_ofd, out, err)
+ : process (cwd, pp, args, in.in_ofd, out, err)
{
assert (in.in_ofd != -1); // Should be a pipe.
close (in.in_ofd); // Close it on our side.
@@ -250,102 +346,155 @@ namespace butl
#else // _WIN32
- // Why do we search for the program ourselves when CreateProcess() can be
- // made to do that for us? Well, that's a bit of a historic mystery. We
- // could use it to disable search in the current working directory. Or we
- // could handle batch files automatically.
- //
- static path
- path_search (const path& f)
+ process_path process::
+ path_search (const char*& args0)
{
+ // Note that there is a similar version for Win32.
+
typedef path::traits traits;
- // If there is a directory component in the file, then the PATH search
- // does not apply.
+ const char* f (args0);
+ size_t fn (strlen (f));
+
+ // Unless there is already the .exe extension, then we will need to add
+ // it. Note that running .bat files requires starting cmd.exe and passing
+ // the batch file as an argument (see CreateProcess() for deails). So
+ // if/when we decide to support those, it will have to be handled
+ // differently.
//
- if (!f.simple ())
- return f;
+ bool ext;
+ {
+ const char* e (traits::find_extension (f, fn));
+ ext = (e == nullptr || casecmp (e, ".exe") != 0);
+ }
- path r;
- auto search = [&r, &f] (const char* d, size_t n) -> bool
+ path rp, ep; // recall & effective
+ auto search = [&ep, f, fn, ext] (const char* d, size_t dn) -> bool
{
- string s (move (r).string ()); // Reuse buffer.
+ string s (move (ep).string ()); // Reuse buffer.
- if (n != 0)
+ if (dn != 0)
{
- s.assign (d, n);
+ s.assign (d, dn);
if (!traits::is_separator (s.back ()))
s += traits::directory_separator;
}
- s += f.string ();
- r = path (move (s)); // Move back into result.
+ s.append (f, fn);
+ ep = path (move (s)); // Move back into result.
- // Unless there is already the .exe extension, add it. Note that running
- // .bat files requires starting cmd.exe and passing the batch file as an
- // argument (see CreateProcess() for deails). So if/when we decide to
- // support those, it will have to be handled differently.
+ // Add the .exe extension if necessary.
//
- const char* e (r.extension ());
- if (e == nullptr || casecmp (e, "exe") != 0)
- r += ".exe";
+ if (ext)
+ ep += ".exe";
// Only check that the file exists since the executable mode is set
// according to the file extension.
//
- struct stat si;
- return stat (r.string ().c_str (), &si) == 0 && S_ISREG (si.st_mode);
+ struct _stat si;
+ return _stat (ep.string ().c_str (), &si) == 0 && S_ISREG (si.st_mode);
};
- // The search order is documented in CreateProcess(). First we look in
- // the directory of the parent executable.
- //
+ for (;;) // The "goto end" loop.
{
- char d[_MAX_PATH + 1];
- DWORD n (GetModuleFileName (NULL, d, _MAX_PATH + 1));
+ // If there is a directory component in the file, then search does not
+ // apply. But we may still need to append the extension.
+ //
+ if (traits::find_separator (f, fn) != nullptr)
+ {
+ if (ext)
+ {
+ ep = path (f, fn);
+ ep += ".exe";
+ }
- if (n == 0 || n == _MAX_PATH + 1) // Failed or truncated.
- throw process_error (last_error_msg ());
+ break;
+ }
- const char* p (traits::rfind_separator (d, n));
- assert (p != nullptr);
+ // The search order is documented in CreateProcess(). First we look in
+ // the directory of the parent executable.
+ //
+ {
+ char d[_MAX_PATH + 1];
+ DWORD n (GetModuleFileName (NULL, d, _MAX_PATH + 1));
- if (search (d, p - d + 1)) // Include trailing slash.
- return r;
- }
+ if (n == 0 || n == _MAX_PATH + 1) // Failed or truncated.
+ throw process_error (last_error_msg ());
- // Next look in the current working directory. Crazy, I know.
- //
- if (search ("", 0))
- return r;
+ const char* p (traits::rfind_separator (d, n));
+ assert (p != nullptr);
- // Finally, search in PATH.
- //
- if (const char* s = getenv ("PATH"))
- {
- string ps (s);
+ if (search (d, p - d + 1)) // Include trailing slash.
+ {
+ // In this case we have to set the recall path.
+ //
+ // Note that the directory we have extracted is always absolute but
+ // the parent's recall path (argv[0]) might be relative. It seems,
+ // ideally, we would want to use parent's argv[0] dir (if any) to
+ // form the recall path. In particular, if the parent has no
+ // directory, then it means it was found via the standard search
+ // (e.g., PATH) and then so should the child.
+ //
+ // How do we get the parent's argv[0]? Luckily, here is __argv on
+ // Windows.
+ //
+ const char* d (__argv[0]);
+ size_t n (strlen (d));
+ if (const char* p = traits::rfind_separator (d, n))
+ {
+ string s (d, p - d + 1); // Include trailing slash.
+ s.append (f, fn);
+ rp = path (move (s));
+ }
+
+ break;
+ }
+ }
- for (size_t b (0), e (ps.find (traits::path_separator));
- b != string::npos;)
+ // Next look in the current working directory. Crazy, I know.
+ //
+ // The recall path is the same as initial, though it might not be a bad
+ // idea to prepend .\ for clarity.
+ //
+ if (search ("", 0))
+ break;
+
+ // Finally, search in PATH. Recall is unchanged.
+ //
{
- // Empty path (i.e., a double colon or a colon at the beginning or end
- // of PATH) means search in the current dirrectory.
- //
- if (search (ps.c_str () + b, (e != string::npos ? e : ps.size ()) - b))
- return r;
+ const char* b (getenv ("PATH"));
- if (e == string::npos)
- b = e;
- else
+ for (const char* e; b != nullptr; b = (e != nullptr ? e + 1 : e))
{
- b = e + 1;
- e = ps.find (traits::path_separator, b);
+ e = strchr (b, traits::path_separator);
+
+ // Empty path (i.e., a double colon or a colon at the beginning or
+ // end of PATH) means search in the current dirrectory.
+ //
+ if (search (b, e != nullptr ? e - b : strlen (b)))
+ break;
}
+
+ if (b != nullptr)
+ break;
}
+
+ // Did not find anything.
+ //
+ throw process_error (ENOENT);
}
- return path ();
+ // Found the file and the result is in rp and ep, both of which can be
+ // empty.
+ //
+ process_path r (f,
+ rp.empty () ? nullptr : &(args0 = rp.string ().c_str ()));
+
+ r.recall = move (rp);
+ r.effect = move (ep);
+
+ return r;
}
class auto_handle
@@ -392,7 +541,9 @@ namespace butl
};
process::
- process (const char* cwd, char const* const args[], int in, int out, int err)
+ process (const char* cwd,
+ const process_path& pp, const char* args[],
+ int in, int out, int err)
{
using pipe = auto_handle[2];
@@ -481,32 +632,15 @@ namespace butl
// Create the process.
//
- path file (args[0]);
-
- // Do PATH search.
- //
- if (file.simple ())
- {
- file = path_search (file);
-
- if (file.empty ())
- fail ("file not found");
- }
- else
- {
- // Unless there is already the .exe extension, add it. See path_search()
- // for details.
- //
- const char* e (file.extension ());
- if (e == nullptr || casecmp (e, "exe") != 0)
- file += ".exe";
- }
+ const char* file (pp.effect.empty ()
+ ? args[0]
+ : pp.effect.string ().c_str ());
// Serialize the arguments to string.
//
string cmd_line;
- for (char const* const* p (args); *p != 0; ++p)
+ for (const char* const* p (args); *p != 0; ++p)
{
if (p != args)
cmd_line += ' ';
@@ -575,7 +709,7 @@ namespace butl
fail ("invalid file descriptor");
if (!CreateProcess (
- file.string ().c_str (),
+ file,
const_cast<char*> (cmd_line.c_str ()),
0, // Process security attributes.
0, // Primary thread security attributes.
@@ -621,9 +755,10 @@ namespace butl
}
process::
- process (const char* cwd, char const* const args[],
+ process (const char* cwd,
+ const process_path& pp, const char* args[],
process& in, int out, int err)
- : process (cwd, args, in.in_ofd, out, err)
+ : process (cwd, pp, args, in.in_ofd, out, err)
{
assert (in.in_ofd != -1); // Should be a pipe.
_close (in.in_ofd); // Close it on our side.
diff --git a/butl/process.ixx b/butl/process.ixx
index 3b7ed7d..59ac60c 100644
--- a/butl/process.ixx
+++ b/butl/process.ixx
@@ -2,8 +2,39 @@
// copyright : Copyright (c) 2014-2016 Code Synthesis Ltd
// license : MIT; see accompanying LICENSE file
+#include <utility> // move()
+
namespace butl
{
+ inline process_path::
+ process_path (process_path&& p)
+ : initial (p.initial),
+ recall (std::move (p.recall)),
+ effect (std::move (p.effect)),
+ args0_ (p.args0_)
+ {
+ p.args0_ = nullptr;
+ }
+
+ inline process_path& process_path::
+ operator= (process_path&& p)
+ {
+ if (this != &p)
+ {
+ if (args0_ != nullptr)
+ *args0_ = initial;
+
+ initial = p.initial;
+ recall = std::move (p.recall);
+ effect = std::move (p.effect);
+ args0_ = p.args0_;
+
+ p.args0_ = nullptr;
+ }
+
+ return *this;
+ }
+
inline process::
process ()
: handle (0),
@@ -15,12 +46,30 @@ namespace butl
}
inline process::
- process (char const* const args[], int in, int out, int err)
- : process (nullptr, args, in, out, err) {}
+ process (const char* args[], int in, int out, int err)
+ : process (nullptr, path_search (args[0]), args, in, out, err) {}
+
+ inline process::
+ process (const process_path& pp, const char* args[],
+ int in, int out, int err)
+ : process (nullptr, pp, args, in, out, err) {}
+
+ inline process::
+ process (const char* args[], process& in, int out, int err)
+ : process (nullptr, path_search (args[0]), args, in, out, err) {}
+
+ inline process::
+ process (const process_path& pp, const char* args[],
+ process& in, int out, int err)
+ : process (nullptr, pp, args, in, out, err) {}
+
+ inline process::
+ process (const char* cwd, const char* args[], int in, int out, int err)
+ : process (cwd, path_search (args[0]), args, in, out, err) {}
inline process::
- process (char const* const args[], process& in, int out, int err)
- : process (nullptr, args, in, out, err) {}
+ process (const char* cwd, const char* args[], process& in, int out, int err)
+ : process (cwd, path_search (args[0]), args, in, out, err) {}
inline process::
process (process&& p)