aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBoris Kolpackov <boris@codesynthesis.com>2016-11-01 10:28:23 +0200
committerBoris Kolpackov <boris@codesynthesis.com>2016-11-04 09:26:37 +0200
commit7f6808984dfb37848edf149c34242f7163eb1a17 (patch)
treed74ae705c9cf1e48aa2430fbd2299d7bb2f5724c
parent35c9e7698e768883065d944b2c43e5af9cb37ee4 (diff)
Implement support for here-document indentation
-rw-r--r--build2/test/script/parser.cxx103
-rw-r--r--unit-tests/test/script/parser/here-document.test88
2 files changed, 180 insertions, 11 deletions
diff --git a/build2/test/script/parser.cxx b/build2/test/script/parser.cxx
index 4c7b17b..fc40fd0 100644
--- a/build2/test/script/parser.cxx
+++ b/build2/test/script/parser.cxx
@@ -1543,17 +1543,55 @@ namespace build2
{
string r;
+ // Here-documents can be indented. The leading whitespaces of the end
+ // marker line (called strip prefix) determine the indentation. Every
+ // other line in the here-document should start with this prefix which
+ // is automatically stripped. The only exception is a blank line.
+ //
+ // The fact that the strip prefix is only known at the end, after
+ // seeing all the lines, is rather inconvenient. As a result, the way
+ // we implement this is a bit hackish (though there is also something
+ // elegant about it): at the end of the pre-parse stage we are going
+ // re-examine the sequence of tokens that comprise this here-document
+ // and "fix up" the first token of each line by stripping the prefix.
+ //
+ string sp;
+
+ // Remember the position of the first token in this here-document.
+ //
+ size_t ri (pre_parse_ ? replay_data_.size () - 1 : 0);
+
while (tt != type::eos)
{
- // Check if this is the end marker.
+ // Check if this is the end marker. For starters, it should be a
+ // single, unquoted word followed by a newline.
//
- if (tt == type::word &&
- !t.quoted &&
- t.value == em &&
- peek () == type::newline)
+ if (tt == type::word && !t.quoted && peek () == type::newline)
{
- next (t, tt); // Get the newline.
- break;
+ const string& v (t.value);
+
+ size_t vn (v.size ());
+ size_t en (em.size ());
+
+ // Then check that it ends with the end marker.
+ //
+ if (vn >= en && v.compare (vn - en, en, em) == 0)
+ {
+ // Now check that the prefix only contains whitespaces.
+ //
+ size_t n (vn - en);
+
+ if (v.find_first_not_of (" \t") >= n)
+ {
+ assert (pre_parse_ || n == 0); // Should have been stripped.
+
+ if (n != 0)
+ sp.assign (v, 0, n); // Save the strip prefix.
+
+ next (t, tt); // Get the newline.
+ break;
+ }
+ }
}
// Expand the line (can be blank).
@@ -1604,10 +1642,53 @@ namespace build2
if (tt == type::eos)
fail (t) << "missing here-document end marker '" << em << "'";
- // Add final newline if requested.
- //
- if (!pre_parse_ && !nn)
- r += '\n';
+ if (pre_parse_)
+ {
+ // Strip the indentation prefix if there is one.
+ //
+ assert (replay_ == replay::save);
+
+ if (!sp.empty ())
+ {
+ size_t sn (sp.size ());
+
+ for (; ri != replay_data_.size (); ++ri)
+ {
+ token& rt (replay_data_[ri].token);
+
+ if (rt.type == type::newline) // Blank
+ continue;
+
+ if (rt.type != type::word || rt.value.compare (0, sn, sp) != 0)
+ fail (rt) << "unindented here-document line";
+
+ // If the word is equal to the strip prefix then we have to drop
+ // the token. Note that simply making it an empty word won't
+ // have the same semantics. For instance, it would trigger
+ // concatenated expansion.
+ //
+ if (rt.value.size () == sn)
+ replay_data_.erase (replay_data_.begin () + ri);
+ else
+ {
+ rt.value.erase (0, sn);
+ rt.column += sn;
+ ++ri;
+ }
+
+ // Skip until next newline.
+ //
+ for (; replay_data_[ri].token.type != type::newline; ++ri) ;
+ }
+ }
+ }
+ else
+ {
+ // Add final newline if requested.
+ //
+ if (!nn)
+ r += '\n';
+ }
return r;
}
diff --git a/unit-tests/test/script/parser/here-document.test b/unit-tests/test/script/parser/here-document.test
index d6b21fd..4fa62d2 100644
--- a/unit-tests/test/script/parser/here-document.test
+++ b/unit-tests/test/script/parser/here-document.test
@@ -1,4 +1,92 @@
+: indent
+:
+{
+ : basic
+ :
+ $* <<EOI >>EOO
+ cmd <<EOF
+ foo
+ bar
+ baz
+ EOF
+ EOI
+ cmd <<EOF
+ foo
+ bar
+ baz
+ EOF
+ EOO
+
+ : blank
+ :
+ $* <<EOI >>EOO
+ cmd <<EOF
+ foo
+
+
+ bar
+ EOF
+ EOI
+ cmd <<EOF
+ foo
+
+
+ bar
+ EOF
+ EOO
+
+ : non-ws-prefix
+ :
+ $* <<EOI >>EOO
+ cmd <<EOF
+ x EOF
+ EOF
+ EOI
+ cmd <<EOF
+ x EOF
+ EOF
+ EOO
+
+ : whole-token
+ : Test the case where the indentation is a whole token
+ :
+ $* <<EOI >>EOO
+ x = foo bar
+ cmd <<EOF
+ \$x
+ EOF
+ EOI
+ cmd <<EOF
+ foo bar
+ EOF
+ EOO
+
+ : long-line
+ : Test the case where the line contains multiple tokens
+ :
+ $* <<EOI >>EOO
+ x = foo
+ cmd <<EOF
+ \$x bar \$x
+ EOF
+ EOI
+ cmd <<EOF
+ foo bar foo
+ EOF
+ EOO
+
+ : unindented
+ :
+ $* <<EOI 2>>EOE != 0
+ cmd <<EOF
+ bar
+ EOF
+ EOI
+ testscript:2:1: error: unindented here-document line
+ EOE
+}
+
$* <<EOI >>EOO # blank-lines
cmd <<EOF
foo