Implement support for here-document indentation

author: Boris Kolpackov <boris@codesynthesis.com> 2016-11-01 10:28:23 +0200
committer: Boris Kolpackov <boris@codesynthesis.com> 2016-11-04 09:26:37 +0200
commit: 7f6808984dfb37848edf149c34242f7163eb1a17 (patch)
tree: d74ae705c9cf1e48aa2430fbd2299d7bb2f5724c
parent: 35c9e7698e768883065d944b2c43e5af9cb37ee4 (diff)
2 files changed, 180 insertions, 11 deletions
diff --git a/build2/test/script/parser.cxx b/build2/test/script/parser.cxx
index 4c7b17b..fc40fd0 100644
--- a/build2/test/script/parser.cxx
+++ b/build2/test/script/parser.cxx
@@ -1543,17 +1543,55 @@ namespace build2
       {
         string r;
 
+        // Here-documents can be indented. The leading whitespaces of the end
+        // marker line (called strip prefix) determine the indentation. Every
+        // other line in the here-document should start with this prefix which
+        // is automatically stripped. The only exception is a blank line.
+        //
+        // The fact that the strip prefix is only known at the end, after
+        // seeing all the lines, is rather inconvenient. As a result, the way
+        // we implement this is a bit hackish (though there is also something
+        // elegant about it): at the end of the pre-parse stage we are going
+        // re-examine the sequence of tokens that comprise this here-document
+        // and "fix up" the first token of each line by stripping the prefix.
+        //
+        string sp;
+
+        // Remember the position of the first token in this here-document.
+        //
+        size_t ri (pre_parse_ ? replay_data_.size () - 1 : 0);
+
         while (tt != type::eos)
         {
-          // Check if this is the end marker.
+          // Check if this is the end marker. For starters, it should be a
+          // single, unquoted word followed by a newline.
           //
-          if (tt == type::word &&
-              !t.quoted        &&
-              t.value == em    &&
-              peek () == type::newline)
+          if (tt == type::word && !t.quoted && peek () == type::newline)
           {
-            next (t, tt); // Get the newline.
-            break;
+            const string& v (t.value);
+
+            size_t vn (v.size ());
+            size_t en (em.size ());
+
+            // Then check that it ends with the end marker.
+            //
+            if (vn >= en && v.compare (vn - en, en, em) == 0)
+            {
+              // Now check that the prefix only contains whitespaces.
+              //
+              size_t n (vn - en);
+
+              if (v.find_first_not_of (" \t") >= n)
+              {
+                assert (pre_parse_ || n == 0); // Should have been stripped.
+
+                if (n != 0)
+                  sp.assign (v, 0, n); // Save the strip prefix.
+
+                next (t, tt); // Get the newline.
+                break;
+              }
+            }
           }
 
           // Expand the line (can be blank).
@@ -1604,10 +1642,53 @@ namespace build2
         if (tt == type::eos)
           fail (t) << "missing here-document end marker '" << em << "'";
 
-        // Add final newline if requested.
-        //
-        if (!pre_parse_ && !nn)
-          r += '\n';
+        if (pre_parse_)
+        {
+          // Strip the indentation prefix if there is one.
+          //
+          assert (replay_ == replay::save);
+
+          if (!sp.empty ())
+          {
+            size_t sn (sp.size ());
+
+            for (; ri != replay_data_.size (); ++ri)
+            {
+              token& rt (replay_data_[ri].token);
+
+              if (rt.type == type::newline) // Blank
+                continue;
+
+              if (rt.type != type::word || rt.value.compare (0, sn, sp) != 0)
+                fail (rt) << "unindented here-document line";
+
+              // If the word is equal to the strip prefix then we have to drop
+              // the token. Note that simply making it an empty word won't
+              // have the same semantics. For instance, it would trigger
+              // concatenated expansion.
+              //
+              if (rt.value.size () == sn)
+                replay_data_.erase (replay_data_.begin () + ri);
+              else
+              {
+                rt.value.erase (0, sn);
+                rt.column += sn;
+                ++ri;
+              }
+
+              // Skip until next newline.
+              //
+              for (; replay_data_[ri].token.type != type::newline; ++ri) ;
+            }
+          }
+        }
+        else
+        {
+          // Add final newline if requested.
+          //
+          if (!nn)
+            r += '\n';
+        }
 
         return r;
       }
diff --git a/unit-tests/test/script/parser/here-document.test b/unit-tests/test/script/parser/here-document.test
index d6b21fd..4fa62d2 100644
--- a/unit-tests/test/script/parser/here-document.test
+++ b/unit-tests/test/script/parser/here-document.test
@@ -1,4 +1,92 @@
 
+: indent
+:
+{
+  : basic
+  :
+  $* <<EOI >>EOO
+  cmd <<EOF
+    foo
+     bar
+     	 baz
+    EOF
+  EOI
+  cmd <<EOF
+  foo
+   bar
+   	 baz
+  EOF
+  EOO
+
+  : blank
+  :
+  $* <<EOI >>EOO
+  cmd <<EOF
+    foo
+
+
+    bar
+    EOF
+  EOI
+  cmd <<EOF
+  foo
+
+
+  bar
+  EOF
+  EOO
+
+  : non-ws-prefix
+  :
+  $* <<EOI >>EOO
+  cmd <<EOF
+   x EOF
+  EOF
+  EOI
+  cmd <<EOF
+   x EOF
+  EOF
+  EOO
+
+  : whole-token
+  : Test the case where the indentation is a whole token
+  :
+  $* <<EOI >>EOO
+  x = foo bar
+  cmd <<EOF
+    \$x
+    EOF
+  EOI
+  cmd <<EOF
+  foo bar
+  EOF
+  EOO
+
+  : long-line
+  : Test the case where the line contains multiple tokens
+  :
+  $* <<EOI >>EOO
+  x = foo
+  cmd <<EOF
+     \$x bar \$x
+    EOF
+  EOI
+  cmd <<EOF
+   foo bar foo
+  EOF
+  EOO
+
+  : unindented
+  :
+  $* <<EOI 2>>EOE != 0
+  cmd <<EOF
+   bar
+    EOF
+  EOI
+  testscript:2:1: error: unindented here-document line
+  EOE
+}
+
 $* <<EOI >>EOO  # blank-lines
 cmd <<EOF
 foo
author	Boris Kolpackov <boris@codesynthesis.com>	2016-11-01 10:28:23 +0200
committer	Boris Kolpackov <boris@codesynthesis.com>	2016-11-04 09:26:37 +0200
commit	7f6808984dfb37848edf149c34242f7163eb1a17 (patch)
tree	d74ae705c9cf1e48aa2430fbd2299d7bb2f5724c
parent	35c9e7698e768883065d944b2c43e5af9cb37ee4 (diff)