aboutsummaryrefslogtreecommitdiff
path: root/build2
diff options
context:
space:
mode:
authorBoris Kolpackov <boris@codesynthesis.com>2016-11-25 15:17:01 +0200
committerBoris Kolpackov <boris@codesynthesis.com>2016-11-25 15:17:01 +0200
commit757f42e7dea94f8b79b3d55074dedeafd853ddc5 (patch)
tree8fa27fd27e36a85a6348d85b746d49a676a27027 /build2
parenta3dad2118fb3925ef4f9baa90cea0dfd44ca93c6 (diff)
Implement literal here-document support
Diffstat (limited to 'build2')
-rw-r--r--build2/lexer10
-rw-r--r--build2/lexer.cxx10
-rw-r--r--build2/test/script/lexer10
-rw-r--r--build2/test/script/lexer.cxx34
-rw-r--r--build2/test/script/parser6
-rw-r--r--build2/test/script/parser.cxx81
6 files changed, 105 insertions, 46 deletions
diff --git a/build2/lexer b/build2/lexer
index 59150a9..e2cf07c 100644
--- a/build2/lexer
+++ b/build2/lexer
@@ -85,7 +85,9 @@ namespace build2
// specifythe pair separator character (if the mode supports pairs).
//
virtual void
- mode (lexer_mode, char pair_separator = '\0');
+ mode (lexer_mode,
+ char pair_separator = '\0',
+ const char* escapes = nullptr);
// Expire the current mode early.
//
@@ -119,6 +121,8 @@ namespace build2
bool sep_space; // Are whitespaces separators (see skip_spaces())?
bool quotes; // Recognize quoted fragments.
+ const char* escapes; // Effective escape sequences to recognize.
+
// Word separator characters. For two-character sequence put the first
// one in sep_first and the second one in the corresponding position of
// sep_second. If it's a single-character sequence, then put space in
@@ -170,16 +174,14 @@ namespace build2
: char_scanner (is),
fail ("error", &name_),
name_ (n),
- escapes_ (e),
processor_ (p),
sep_ (false)
{
if (sm)
- mode (lexer_mode::normal, '@');
+ mode (lexer_mode::normal, '@', e);
}
const path name_;
- const char* escapes_;
void (*processor_) (token&, const lexer&);
std::stack<state> state_;
diff --git a/build2/lexer.cxx b/build2/lexer.cxx
index b73c291..3c8eb5a 100644
--- a/build2/lexer.cxx
+++ b/build2/lexer.cxx
@@ -30,7 +30,7 @@ namespace build2
}
void lexer::
- mode (lexer_mode m, char ps)
+ mode (lexer_mode m, char ps, const char* esc)
{
const char* s1 (nullptr);
const char* s2 (nullptr);
@@ -76,7 +76,7 @@ namespace build2
default: assert (false); // Unhandled custom mode.
}
- state_.push (state {m, ps, s, q, s1, s2});
+ state_.push (state {m, ps, s, q, esc, s1, s2});
}
token lexer::
@@ -329,8 +329,10 @@ namespace build2
get ();
xchar p (peek ());
- if (escapes_ == nullptr ||
- (!eos (p) && strchr (escapes_, p) != nullptr))
+ const char* esc (st.escapes);
+
+ if (esc == nullptr ||
+ (*esc != '\0' && !eos (p) && strchr (esc, p) != nullptr))
{
get ();
diff --git a/build2/test/script/lexer b/build2/test/script/lexer
index 5597e9a..b812f84 100644
--- a/build2/test/script/lexer
+++ b/build2/test/script/lexer
@@ -29,7 +29,8 @@ namespace build2
second_token, // Expires at the end of the token.
variable_line, // Expires at the end of the line.
command_line,
- here_line,
+ here_line_single,
+ here_line_double,
description_line // Expires at the end of the line.
};
@@ -48,10 +49,13 @@ namespace build2
const path& name,
lexer_mode m,
const char* escapes = nullptr)
- : base_lexer (is, name, escapes, nullptr, false) {mode (m);}
+ : base_lexer (is, name, nullptr, nullptr, false)
+ {
+ mode (m, '\0', escapes);
+ }
virtual void
- mode (base_mode, char = '\0') override;
+ mode (base_mode, char = '\0', const char* = nullptr) override;
// Number of quoted (double or single) tokens since last reset.
//
diff --git a/build2/test/script/lexer.cxx b/build2/test/script/lexer.cxx
index 19e7498..72fa85b 100644
--- a/build2/test/script/lexer.cxx
+++ b/build2/test/script/lexer.cxx
@@ -15,7 +15,7 @@ namespace build2
using type = token_type;
void lexer::
- mode (base_mode m, char ps)
+ mode (base_mode m, char ps, const char* esc)
{
const char* s1 (nullptr);
const char* s2 (nullptr);
@@ -76,7 +76,23 @@ namespace build2
s = false;
break;
}
- case lexer_mode::here_line:
+ case lexer_mode::here_line_single:
+ {
+ // This one is like a single-quoted string except it treats
+ // newlines as a separator. We also treat quotes as literals.
+ //
+ // Note that it might be tempting to enable line continuation
+ // escapes. However, we will then have to also enable escaping of
+ // the backslash, which makes it a lot less tempting.
+ //
+ s1 = "\n";
+ s2 = " ";
+ esc = ""; // Disable escape sequences.
+ s = false;
+ q = false;
+ break;
+ }
+ case lexer_mode::here_line_double:
{
// This one is like a double-quoted string except it treats
// newlines as a separator. We also treat quotes as literals.
@@ -105,13 +121,13 @@ namespace build2
m == lexer_mode::eval ||
m == lexer_mode::attribute);
- base_lexer::mode (m, ps);
+ base_lexer::mode (m, ps, esc);
return;
}
}
assert (ps == '\0');
- state_.push (state {m, ps, s, q, s1, s2});
+ state_.push (state {m, ps, s, q, esc, s1, s2});
}
token lexer::
@@ -126,7 +142,8 @@ namespace build2
case lexer_mode::second_token:
case lexer_mode::variable_line:
case lexer_mode::command_line:
- case lexer_mode::here_line:
+ case lexer_mode::here_line_single:
+ case lexer_mode::here_line_double:
r = next_line ();
break;
case lexer_mode::description_line:
@@ -184,7 +201,13 @@ namespace build2
sep = true; // Treat newline as always separated.
return make_token (type::newline);
}
+ }
+ }
+ if (m != lexer_mode::here_line_single)
+ {
+ switch (c)
+ {
// Variable expansion, function call, and evaluation context.
//
case '$': return make_token (type::dollar);
@@ -192,6 +215,7 @@ namespace build2
}
}
+
if (m == lexer_mode::variable_line)
{
switch (c)
diff --git a/build2/test/script/parser b/build2/test/script/parser
index fdfbe11..da82df2 100644
--- a/build2/test/script/parser
+++ b/build2/test/script/parser
@@ -99,10 +99,10 @@ namespace build2
{
size_t expr; // Index in command_expr.
size_t pipe; // Index in command_pipe.
- size_t redir; // Redirect (0 - in, 1 - out, 2 - err).
-
+ int fd; // Redirect fd (0 - in, 1 - out, 2 - err).
string end;
- bool no_newline;
+ bool literal; // Literal (single-quote).
+ bool no_newline; // No final newline.
};
using here_docs = vector<here_doc>;
diff --git a/build2/test/script/parser.cxx b/build2/test/script/parser.cxx
index 7655ba9..9e2018f 100644
--- a/build2/test/script/parser.cxx
+++ b/build2/test/script/parser.cxx
@@ -1261,11 +1261,11 @@ namespace build2
cleanup_type ct; // Pending cleanup type.
here_docs hd; // Expected here-documents.
- // Add the next word to either one of the pending positions or
- // to program arguments by default.
+ // Add the next word to either one of the pending positions or to
+ // program arguments by default.
//
- auto add_word = [&expr, &c, &p, &nn, &app, &ct, &hd, this]
- (string&& w, const location& l)
+ auto add_word =
+ [&c, &p, &nn, &app, &ct, this] (string&& w, const location& l)
{
auto add_merge = [&l, this] (redirect& r, const string& w, int fd)
{
@@ -1290,13 +1290,6 @@ namespace build2
r.str = move (w);
};
- auto add_here_end = [&expr, &hd, &nn] (size_t r, string&& w)
- {
- hd.push_back (
- here_doc {
- expr.size () - 1, expr.back ().pipe.size (), r, move (w), nn});
- };
-
auto parse_path = [&l, this] (string&& w, const char* what) -> path
{
try
@@ -1335,10 +1328,8 @@ namespace build2
{
case pending::none: c.arguments.push_back (move (w)); break;
case pending::program:
- {
c.program = parse_path (move (w), "program path");
break;
- }
case pending::out_merge: add_merge (c.out, w, 2); break;
case pending::err_merge: add_merge (c.err, w, 1); break;
@@ -1347,21 +1338,19 @@ namespace build2
case pending::out_string: add_here_str (c.out, move (w)); break;
case pending::err_string: add_here_str (c.err, move (w)); break;
- case pending::in_document: add_here_end (0, move (w)); break;
- case pending::out_document: add_here_end (1, move (w)); break;
- case pending::err_document: add_here_end (2, move (w)); break;
+ // These are handled specially below.
+ //
+ case pending::in_document:
+ case pending::out_document:
+ case pending::err_document: assert (false); break;
case pending::in_file: add_file (c.in, 0, move (w)); break;
case pending::out_file: add_file (c.out, 1, move (w)); break;
case pending::err_file: add_file (c.err, 2, move (w)); break;
case pending::clean:
- {
- c.cleanups.push_back (
- {ct, parse_path (move (w), "cleanup path")});
-
- break;
- }
+ c.cleanups.push_back ({ct, parse_path (move (w), "cleanup path")});
+ break;
}
p = pending::none;
@@ -1692,7 +1681,9 @@ namespace build2
fail (t) << "partially-quoted here-document end marker";
}
- hd.push_back (here_doc {0, 0, 0, move (t.value), nn});
+ hd.push_back (
+ here_doc {
+ 0, 0, 0, move (t.value), qt == quote_type::single, nn});
break;
}
@@ -1774,6 +1765,40 @@ namespace build2
}
default:
{
+ // Here-document end markers are literal (we verified that above
+ // during pre-parsing) and we need to know whether they were
+ // quoted. So handle this case specially.
+ //
+ {
+ int fd;
+ switch (p)
+ {
+ case pending::in_document: fd = 0; break;
+ case pending::out_document: fd = 1; break;
+ case pending::err_document: fd = 2; break;
+ default: fd = -1; break;
+ }
+
+ if (fd != -1)
+ {
+ hd.push_back (
+ here_doc {
+ expr.size () - 1,
+ expr.back ().pipe.size (),
+ fd,
+ move (t.value),
+ (t.qtype == quote_type::unquoted ||
+ t.qtype == quote_type::single),
+ nn});
+
+ p = pending::none;
+ nn = false;
+
+ next (t, tt);
+ break;
+ }
+ }
+
// Parse the next chunk as simple names to get expansion, etc.
// Note that we do it in the chunking mode to detect whether
// anything in each chunk is quoted.
@@ -2060,10 +2085,12 @@ namespace build2
//
for (here_doc& h: p.second)
{
- // Switch to the here-line mode which is like double-quoted but
- // recognized the newline as a separator.
+ // Switch to the here-line mode which is like single/double-quoted
+ // string but recognized the newline as a separator.
//
- mode (lexer_mode::here_line);
+ mode (h.literal
+ ? lexer_mode::here_line_single
+ : lexer_mode::here_line_double);
next (t, tt);
string v (parse_here_document (t, tt, h.end, h.no_newline));
@@ -2071,7 +2098,7 @@ namespace build2
if (!pre_parse_)
{
command& c (p.first[h.expr].pipe[h.pipe]);
- redirect& r (h.redir == 0 ? c.in : h.redir == 1 ? c.out : c.err);
+ redirect& r (h.fd == 0 ? c.in : h.fd == 1 ? c.out : c.err);
r.doc.doc = move (v);
r.doc.end = move (h.end);