Allow process path values and targets as buildscript program names

Also deduce the recipe name.
author: Karen Arutyunov <karen@codesynthesis.com> 2020-05-26 21:35:59 +0300
committer: Karen Arutyunov <karen@codesynthesis.com> 2020-06-03 12:26:33 +0300
commit: 920ed11a433b0e292a18adb8c68829a00e8c70cc (patch)
tree: e365baf8be68b168e19f42f20c5dde1526c1cbba /libbuild2/script
parent: 4001ff053071c09008e88312c4f973c417322a07 (diff)
5 files changed, 141 insertions, 39 deletions
diff --git a/libbuild2/script/parser.cxx b/libbuild2/script/parser.cxx
index aa60111..33dc273 100644
--- a/libbuild2/script/parser.cxx
+++ b/libbuild2/script/parser.cxx
@@ -96,6 +96,19 @@ namespace build2
       return regex_parts (string (s, 1, rn), s[0], string (s, fp, p - fp));
     }
 
+    optional<process_path> parser::
+    parse_program (token& t, type& tt, names& ns)
+    {
+      parse_names (t, tt,
+                   ns,
+                   pattern_mode::ignore,
+                   true /* chunk */,
+                   "command line",
+                   nullptr);
+
+      return nullopt;
+    }
+
     pair<command_expr, parser::here_docs> parser::
     parse_command_expr (token& t, type& tt,
                         const redirect_aliases& ra)
@@ -279,7 +292,9 @@ namespace build2
         {
         case pending::none: c.arguments.push_back (move (w)); break;
         case pending::program:
-          c.program = parse_path (move (w), "program path");
+          c.program = process_path (nullptr /* initial */,
+                                    parse_path (move (w), "program path"),
+                                    path () /* effect */);
           break;
 
         case pending::out_merge: add_merge (c.out, w, 2); break;
@@ -726,12 +741,20 @@ namespace build2
           {
             if (pre_parse_)
             {
-              // The only things we need to handle here are the here-document
-              // and here-document regex end markers since we need to know
-              // how many of them to pre-parse after the command.
+              // The only things we need to handle here are the tokens that
+              // introduce the next command, since we handle the command
+              // leading name chunks specially, and the here-document and
+              // here-document regex end markers, since we need to know how
+              // many of them to pre-parse after the command.
               //
               switch (tt)
               {
+              case type::pipe:
+              case type::log_or:
+              case type::log_and:
+                p = pending::program;
+                break;
+
               case type::in_doc:
               case type::out_doc:
                 mod = move (t.value);
@@ -994,23 +1017,53 @@ namespace build2
               }
             }
 
-            // Parse the next chunk as simple names to get expansion, etc.
-            // Note that we do it in the chunking mode to detect whether
-            // anything in each chunk is quoted.
+            // Parse the next chunk as names to get expansion, etc. Note that
+            // we do it in the chunking mode to detect whether anything in
+            // each chunk is quoted. If we are waiting for the command
+            // program, then delegate the parsing to the derived parser, so it
+            // can translate complex program names (targets, process_paths)
+            // during execution and perform some static analysis during
+            // pre-parsing.
             //
             // @@ PAT: should we support pattern expansion? This is even
             // fuzzier than the variable case above. Though this is the
             // shell semantics. Think what happens when we do rm *.txt?
             //
             reset_quoted (t);
-            parse_names (t, tt,
-                         ns,
-                         pattern_mode::ignore,
-                         true,
-                         "command line",
-                         nullptr);
-
-            if (pre_parse_) // Nothing else to do if we are pre-parsing.
+
+            if (p == pending::program)
+            {
+              optional<process_path> pp (parse_program (t, tt, ns));
+
+              // During pre-parsing we are not interested in the
+              // parse_program() call result, so just discard the potentially
+              // unhandled program chunk names.
+              //
+              if (!pre_parse_)
+              {
+                if (pp)
+                {
+                  c.program = move (*pp);
+                  p = pending::none;
+                }
+              }
+              else
+              {
+                ns.clear ();
+                p = pending::none;
+              }
+            }
+            else
+              parse_names (t, tt,
+                           ns,
+                           pattern_mode::ignore,
+                           true /* chunk */,
+                           "command line",
+                           nullptr);
+
+            // Nothing else to do if we are pre-parsing.
+            //
+            if (pre_parse_)
               break;
 
             // Process what we got. Determine whether anything inside was
diff --git a/libbuild2/script/parser.hxx b/libbuild2/script/parser.hxx
index a63ecde..b15f632 100644
--- a/libbuild2/script/parser.hxx
+++ b/libbuild2/script/parser.hxx
@@ -162,14 +162,42 @@ namespace build2
                   size_t& li,
                   variable_pool* = nullptr);
 
+      // Customization hooks.
+      //
+    protected:
+      // Parse the command's leading name chunk.
+      //
+      // During the execution phase try to parse and translate the leading
+      // names into the process path and return nullopt if choose not to do
+      // so, leaving it to the parser to handle. Also return in the last
+      // argument uninterpreted names, if any.
+      //
+      // The default implementation always returns nullopt. The derived parser
+      // can provide an override that can, for example, handle process path
+      // values, executable targets, etc.
+      //
+      // Note that normally it makes sense to leave simple unpaired names for
+      // the parser to handle, unless there is a good reason not to (e.g.,
+      // it's a special builtin or some such). Such names may contain
+      // something that requires re-lexing, for example `foo|bar`, which won't
+      // be easy to translate but which are handled by the parser.
+      //
+      // During the pre-parsing phase the returned process path and names
+      // (that must still be parsed) are discarded. The main purpose of the
+      // call is to allow implementations to perform static script analysis,
+      // recognize and execute certain directives, or some such.
+      //
+      virtual optional<process_path>
+      parse_program (token&, token_type&, names&);
+
       // Set lexer pointers for both the current and the base classes.
       //
     protected:
       void
       set_lexer (lexer*);
 
-      // Number of quoted tokens since last reset. Note that this includes
-      // the peeked token, if any.
+      // Number of quoted tokens since last reset. Note that this includes the
+      // peeked token, if any.
       //
     protected:
       size_t
diff --git a/libbuild2/script/run.cxx b/libbuild2/script/run.cxx
index 38436b9..5629a15 100644
--- a/libbuild2/script/run.cxx
+++ b/libbuild2/script/run.cxx
@@ -102,7 +102,7 @@ namespace build2
       catch (const io_error& e)
       {
         // While there can be no fault of the script command being currently
-        // executed let's add the location anyway to ease the
+        // executed let's add the location anyway to help with
         // troubleshooting. And let's stick to that principle down the road.
         //
         fail (ll) << "unable to read " << p << ": " << e << endf;
@@ -949,7 +949,14 @@ namespace build2
       command_pipe::const_iterator nc (bc + 1);
       bool last (nc == ec);
 
-      const string& program (c.program.string ());
+      // True if the process path is not pre-searched and the program path
+      // still needs to be resolved.
+      //
+      bool resolve (c.program.initial == nullptr);
+
+      // Program name that may require resolution.
+      //
+      const string& program (c.program.recall.string ());
 
       const redirect& in ((c.in ? *c.in : env.in).effective ());
 
@@ -961,7 +968,7 @@ namespace build2
 
       auto process_args = [&c] () -> cstrings
       {
-        cstrings args {c.program.string ().c_str ()};
+        cstrings args {c.program.recall_string ()};
 
         for (const auto& a: c.arguments)
           args.push_back (a.c_str ());
@@ -982,7 +989,7 @@ namespace build2
       // specify any redirects or exit code check sounds like a right thing
       // to do.
       //
-      if (program == "exit")
+      if (resolve && program == "exit")
       {
         // In case the builtin is erroneously pipelined from the other
         // command, we will close stdin gracefully (reading out the stream
@@ -1150,7 +1157,7 @@ namespace build2
       // that. Checking that the user didn't specify any meaningless
       // redirects or exit code check sounds as a right thing to do.
       //
-      if (program == "set")
+      if (resolve && program == "set")
       {
         if (!last)
           fail (ll) << "set builtin must be the last pipe command";
@@ -1322,11 +1329,13 @@ namespace build2
       assert (ofd.out.get () != -1 && efd.get () != -1);
 
       optional<process_exit> exit;
-      builtin_function* bf (builtins.find (program));
+      const builtin_info* bi (resolve
+                              ? builtins.find (program)
+                              : nullptr);
 
       bool success;
 
-      if (bf != nullptr)
+      if (bi != nullptr && bi->function != nullptr)
       {
         // Execute the builtin.
         //
@@ -1544,11 +1553,11 @@ namespace build2
         try
         {
           uint8_t r; // Storage.
-          builtin b (bf (r,
-                         c.arguments,
-                         move (ifd), move (ofd.out), move (efd),
-                         *env.work_dir.path,
-                         bcs));
+          builtin b (bi->function (r,
+                                   c.arguments,
+                                   move (ifd), move (ofd.out), move (efd),
+                                   *env.work_dir.path,
+                                   bcs));
 
           success = run_pipe (env,
                               nc,
@@ -1570,14 +1579,15 @@ namespace build2
         //
         cstrings args (process_args ());
 
-        // Resolve the relative not simple program path against the script's
-        // working directory. The simple one will be left for the process
-        // path search machinery. Also strip the potential leading `^`,
-        // indicating that this is an external program rather than a
-        // builtin.
+        // If the process path is not pre-searched then resolve the relative
+        // non-simple program path against the script's working directory. The
+        // simple one will be left for the process path search machinery. Also
+        // strip the potential leading `^` (indicates that this is an external
+        // program rather than a builtin).
         //
         path p;
 
+        if (resolve)
         try
         {
           p = path (args[0]);
@@ -1610,7 +1620,9 @@ namespace build2
 
         try
         {
-          process_path pp (process::path_search (args[0]));
+          process_path pp (resolve
+                           ? process::path_search (args[0])
+                           : process_path ());
 
           // Note: the builtin-escaping character '^' is not printed.
           //
@@ -1618,7 +1630,7 @@ namespace build2
             print_process (args);
 
           process pr (
-            pp,
+            resolve ? pp : c.program,
             args.data (),
             {ifd.get (), -1}, process::pipe (ofd), {-1, efd.get ()},
             env.work_dir.path->string ().c_str ());
@@ -1656,7 +1668,11 @@ namespace build2
       if (!success)
         return false;
 
-      const path& pr (c.program);
+      // Use the program path for diagnostics (print relative, etc).
+      //
+      const path& pr (resolve
+                      ? c.program.recall
+                      : path (c.program.recall_string ())); // Can't throw.
 
       // If there is no valid exit code available by whatever reason then we
       // print the proper diagnostics, dump stderr (if cached and not too
diff --git a/libbuild2/script/script.cxx b/libbuild2/script/script.cxx
index c85bfd3..7722b47 100644
--- a/libbuild2/script/script.cxx
+++ b/libbuild2/script/script.cxx
@@ -350,7 +350,7 @@ namespace build2
       {
         // Program.
         //
-        to_stream_q (o, c.program.string ());
+        to_stream_q (o, c.program.recall_string ());
 
         // Arguments.
         //
diff --git a/libbuild2/script/script.hxx b/libbuild2/script/script.hxx
index f4998b7..891b2f6 100644
--- a/libbuild2/script/script.hxx
+++ b/libbuild2/script/script.hxx
@@ -294,7 +294,12 @@ namespace build2
     //
     struct command
     {
-      path program;
+      // We use NULL initial as an indication that the path stored in recall
+      // is a program name that still needs to be resolved into the builtin
+      // function or the process path.
+      //
+      process_path program;
+
       strings arguments;
 
       optional<redirect> in;
author	Karen Arutyunov <karen@codesynthesis.com>	2020-05-26 21:35:59 +0300
committer	Karen Arutyunov <karen@codesynthesis.com>	2020-06-03 12:26:33 +0300
commit	920ed11a433b0e292a18adb8c68829a00e8c70cc (patch)
tree	e365baf8be68b168e19f42f20c5dde1526c1cbba /libbuild2/script
parent	4001ff053071c09008e88312c4f973c417322a07 (diff)