From 757f42e7dea94f8b79b3d55074dedeafd853ddc5 Mon Sep 17 00:00:00 2001
From: Boris Kolpackov <boris@codesynthesis.com>
Date: Fri, 25 Nov 2016 15:17:01 +0200
Subject: Implement literal here-document support

---
 build2/lexer                  | 10 +++---
 build2/lexer.cxx              | 10 +++---
 build2/test/script/lexer      | 10 ++++--
 build2/test/script/lexer.cxx  | 34 +++++++++++++++---
 build2/test/script/parser     |  6 ++--
 build2/test/script/parser.cxx | 81 ++++++++++++++++++++++++++++---------------
 6 files changed, 105 insertions(+), 46 deletions(-)

(limited to 'build2')
diff --git a/build2/lexer b/build2/lexer
index 59150a9..e2cf07c 100644
--- a/build2/lexer
+++ b/build2/lexer
@@ -85,7 +85,9 @@ namespace build2
     // specifythe pair separator character (if the mode supports pairs).
     //
     virtual void
-    mode (lexer_mode, char pair_separator = '\0');
+    mode (lexer_mode,
+          char pair_separator = '\0',
+          const char* escapes = nullptr);
 
     // Expire the current mode early.
     //
@@ -119,6 +121,8 @@ namespace build2
       bool sep_space; // Are whitespaces separators (see skip_spaces())?
       bool quotes;    // Recognize quoted fragments.
 
+      const char* escapes; // Effective escape sequences to recognize.
+
       // Word separator characters. For two-character sequence put the first
       // one in sep_first and the second one in the corresponding position of
       // sep_second. If it's a single-character sequence, then put space in
@@ -170,16 +174,14 @@ namespace build2
         : char_scanner (is),
           fail ("error", &name_),
           name_ (n),
-          escapes_ (e),
           processor_ (p),
           sep_ (false)
     {
       if (sm)
-        mode (lexer_mode::normal, '@');
+        mode (lexer_mode::normal, '@', e);
     }
 
     const path name_;
-    const char* escapes_;
     void (*processor_) (token&, const lexer&);
 
     std::stack<state> state_;
diff --git a/build2/lexer.cxx b/build2/lexer.cxx
index b73c291..3c8eb5a 100644
--- a/build2/lexer.cxx
+++ b/build2/lexer.cxx
@@ -30,7 +30,7 @@ namespace build2
   }
 
   void lexer::
-  mode (lexer_mode m, char ps)
+  mode (lexer_mode m, char ps, const char* esc)
   {
     const char* s1 (nullptr);
     const char* s2 (nullptr);
@@ -76,7 +76,7 @@ namespace build2
     default: assert (false); // Unhandled custom mode.
     }
 
-    state_.push (state {m, ps, s, q, s1, s2});
+    state_.push (state {m, ps, s, q, esc, s1, s2});
   }
 
   token lexer::
@@ -329,8 +329,10 @@ namespace build2
         get ();
         xchar p (peek ());
 
-        if (escapes_ == nullptr ||
-            (!eos (p) && strchr (escapes_, p) != nullptr))
+        const char* esc (st.escapes);
+
+        if (esc == nullptr ||
+            (*esc != '\0' && !eos (p) && strchr (esc, p) != nullptr))
         {
           get ();
 
diff --git a/build2/test/script/lexer b/build2/test/script/lexer
index 5597e9a..b812f84 100644
--- a/build2/test/script/lexer
+++ b/build2/test/script/lexer
@@ -29,7 +29,8 @@ namespace build2
           second_token,    // Expires at the end of the token.
           variable_line,   // Expires at the end of the line.
           command_line,
-          here_line,
+          here_line_single,
+          here_line_double,
           description_line // Expires at the end of the line.
         };
 
@@ -48,10 +49,13 @@ namespace build2
                const path& name,
                lexer_mode m,
                const char* escapes = nullptr)
-            : base_lexer (is, name, escapes, nullptr, false) {mode (m);}
+            : base_lexer (is, name, nullptr, nullptr, false)
+        {
+          mode (m, '\0', escapes);
+        }
 
         virtual void
-        mode (base_mode, char = '\0') override;
+        mode (base_mode, char = '\0', const char* = nullptr) override;
 
         // Number of quoted (double or single) tokens since last reset.
         //
diff --git a/build2/test/script/lexer.cxx b/build2/test/script/lexer.cxx
index 19e7498..72fa85b 100644
--- a/build2/test/script/lexer.cxx
+++ b/build2/test/script/lexer.cxx
@@ -15,7 +15,7 @@ namespace build2
       using type = token_type;
 
       void lexer::
-      mode (base_mode m, char ps)
+      mode (base_mode m, char ps, const char* esc)
       {
         const char* s1 (nullptr);
         const char* s2 (nullptr);
@@ -76,7 +76,23 @@ namespace build2
             s = false;
             break;
           }
-        case lexer_mode::here_line:
+        case lexer_mode::here_line_single:
+          {
+            // This one is like a single-quoted string except it treats
+            // newlines as a separator. We also treat quotes as literals.
+            //
+            // Note that it might be tempting to enable line continuation
+            // escapes. However, we will then have to also enable escaping of
+            // the backslash, which makes it a lot less tempting.
+            //
+            s1 = "\n";
+            s2 = " ";
+            esc = ""; // Disable escape sequences.
+            s = false;
+            q = false;
+            break;
+          }
+        case lexer_mode::here_line_double:
           {
             // This one is like a double-quoted string except it treats
             // newlines as a separator. We also treat quotes as literals.
@@ -105,13 +121,13 @@ namespace build2
                     m == lexer_mode::eval ||
                     m == lexer_mode::attribute);
 
-            base_lexer::mode (m, ps);
+            base_lexer::mode (m, ps, esc);
             return;
           }
         }
 
         assert (ps == '\0');
-        state_.push (state {m, ps, s, q, s1, s2});
+        state_.push (state {m, ps, s, q, esc, s1, s2});
       }
 
       token lexer::
@@ -126,7 +142,8 @@ namespace build2
         case lexer_mode::second_token:
         case lexer_mode::variable_line:
         case lexer_mode::command_line:
-        case lexer_mode::here_line:
+        case lexer_mode::here_line_single:
+        case lexer_mode::here_line_double:
           r = next_line ();
           break;
         case lexer_mode::description_line:
@@ -184,7 +201,13 @@ namespace build2
               sep = true; // Treat newline as always separated.
               return make_token (type::newline);
             }
+          }
+        }
 
+        if (m != lexer_mode::here_line_single)
+        {
+          switch (c)
+          {
             // Variable expansion, function call, and evaluation context.
             //
           case '$': return make_token (type::dollar);
@@ -192,6 +215,7 @@ namespace build2
           }
         }
 
+
         if (m == lexer_mode::variable_line)
         {
           switch (c)
diff --git a/build2/test/script/parser b/build2/test/script/parser
index fdfbe11..da82df2 100644
--- a/build2/test/script/parser
+++ b/build2/test/script/parser
@@ -99,10 +99,10 @@ namespace build2
         {
           size_t expr;  // Index in command_expr.
           size_t pipe;  // Index in command_pipe.
-          size_t redir; // Redirect (0 - in, 1 - out, 2 - err).
-
+          int fd;       // Redirect fd (0 - in, 1 - out, 2 - err).
           string end;
-          bool no_newline;
+          bool literal;    // Literal (single-quote).
+          bool no_newline; // No final newline.
         };
         using here_docs = vector<here_doc>;
 
diff --git a/build2/test/script/parser.cxx b/build2/test/script/parser.cxx
index 7655ba9..9e2018f 100644
--- a/build2/test/script/parser.cxx
+++ b/build2/test/script/parser.cxx
@@ -1261,11 +1261,11 @@ namespace build2
         cleanup_type ct;  // Pending cleanup type.
         here_docs hd;     // Expected here-documents.
 
-        // Add the next word to either one of the pending positions or
-        // to program arguments by default.
+        // Add the next word to either one of the pending positions or to
+        // program arguments by default.
         //
-        auto add_word = [&expr, &c, &p, &nn, &app, &ct, &hd, this]
-          (string&& w, const location& l)
+        auto add_word =
+          [&c, &p, &nn, &app, &ct, this] (string&& w, const location& l)
         {
           auto add_merge = [&l, this] (redirect& r, const string& w, int fd)
           {
@@ -1290,13 +1290,6 @@ namespace build2
             r.str = move (w);
           };
 
-          auto add_here_end = [&expr, &hd, &nn] (size_t r, string&& w)
-          {
-            hd.push_back (
-              here_doc {
-                expr.size () - 1, expr.back ().pipe.size (), r, move (w), nn});
-          };
-
           auto parse_path = [&l, this] (string&& w, const char* what) -> path
           {
             try
@@ -1335,10 +1328,8 @@ namespace build2
           {
           case pending::none: c.arguments.push_back (move (w)); break;
           case pending::program:
-          {
             c.program = parse_path (move (w), "program path");
             break;
-          }
 
           case pending::out_merge: add_merge (c.out, w, 2); break;
           case pending::err_merge: add_merge (c.err, w, 1); break;
@@ -1347,21 +1338,19 @@ namespace build2
           case pending::out_string: add_here_str (c.out, move (w)); break;
           case pending::err_string: add_here_str (c.err, move (w)); break;
 
-          case pending::in_document:  add_here_end (0, move (w)); break;
-          case pending::out_document: add_here_end (1, move (w)); break;
-          case pending::err_document: add_here_end (2, move (w)); break;
+            // These are handled specially below.
+            //
+          case pending::in_document:
+          case pending::out_document:
+          case pending::err_document: assert (false); break;
 
           case pending::in_file:  add_file (c.in,  0, move (w)); break;
           case pending::out_file: add_file (c.out, 1, move (w)); break;
           case pending::err_file: add_file (c.err, 2, move (w)); break;
 
           case pending::clean:
-            {
-              c.cleanups.push_back (
-                {ct, parse_path (move (w), "cleanup path")});
-
-              break;
-            }
+            c.cleanups.push_back ({ct, parse_path (move (w), "cleanup path")});
+            break;
           }
 
           p = pending::none;
@@ -1692,7 +1681,9 @@ namespace build2
                     fail (t) << "partially-quoted here-document end marker";
                   }
 
-                  hd.push_back (here_doc {0, 0, 0, move (t.value), nn});
+                  hd.push_back (
+                    here_doc {
+                      0, 0, 0, move (t.value), qt == quote_type::single, nn});
                   break;
                 }
 
@@ -1774,6 +1765,40 @@ namespace build2
             }
           default:
             {
+              // Here-document end markers are literal (we verified that above
+              // during pre-parsing) and we need to know whether they were
+              // quoted. So handle this case specially.
+              //
+              {
+                int fd;
+                switch (p)
+                {
+                case pending::in_document:  fd =  0; break;
+                case pending::out_document: fd =  1; break;
+                case pending::err_document: fd =  2; break;
+                default:                    fd = -1; break;
+                }
+
+                if (fd != -1)
+                {
+                  hd.push_back (
+                    here_doc {
+                      expr.size () - 1,
+                      expr.back ().pipe.size (),
+                      fd,
+                      move (t.value),
+                      (t.qtype == quote_type::unquoted ||
+                       t.qtype == quote_type::single),
+                      nn});
+
+                  p = pending::none;
+                  nn = false;
+
+                  next (t, tt);
+                  break;
+                }
+              }
+
               // Parse the next chunk as simple names to get expansion, etc.
               // Note that we do it in the chunking mode to detect whether
               // anything in each chunk is quoted.
@@ -2060,10 +2085,12 @@ namespace build2
         //
         for (here_doc& h: p.second)
         {
-          // Switch to the here-line mode which is like double-quoted but
-          // recognized the newline as a separator.
+          // Switch to the here-line mode which is like single/double-quoted
+          // string but recognized the newline as a separator.
           //
-          mode (lexer_mode::here_line);
+          mode (h.literal
+                ? lexer_mode::here_line_single
+                : lexer_mode::here_line_double);
           next (t, tt);
 
           string v (parse_here_document (t, tt, h.end, h.no_newline));
@@ -2071,7 +2098,7 @@ namespace build2
           if (!pre_parse_)
           {
             command& c (p.first[h.expr].pipe[h.pipe]);
-            redirect& r (h.redir == 0 ? c.in : h.redir == 1 ? c.out : c.err);
+            redirect& r (h.fd == 0 ? c.in : h.fd == 1 ? c.out : c.err);
 
             r.doc.doc = move (v);
             r.doc.end = move (h.end);
-- 
cgit v1.1