aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBoris Kolpackov <boris@codesynthesis.com>2016-11-26 16:19:28 +0200
committerBoris Kolpackov <boris@codesynthesis.com>2016-11-26 16:19:28 +0200
commit73c7f8615ebfaf76063207fbd071b2ff7b6b5a3f (patch)
treea4b9bfdd5e50dcbe1ec05aa135c171270414f1b7
parent757f42e7dea94f8b79b3d55074dedeafd853ddc5 (diff)
Spec testscript regex, add support in token/lexer
-rw-r--r--build2/test/script/lexer.cxx110
-rw-r--r--build2/test/script/parser6
-rw-r--r--build2/test/script/parser.cxx124
-rw-r--r--build2/test/script/token19
-rw-r--r--build2/test/script/token.cxx55
-rw-r--r--build2/token20
-rw-r--r--doc/testscript.cli104
7 files changed, 262 insertions, 176 deletions
diff --git a/build2/test/script/lexer.cxx b/build2/test/script/lexer.cxx
index 72fa85b..cdf726b 100644
--- a/build2/test/script/lexer.cxx
+++ b/build2/test/script/lexer.cxx
@@ -4,6 +4,8 @@
#include <build2/test/script/lexer>
+#include <cstring> // strchr()
+
using namespace std;
namespace build2
@@ -176,6 +178,33 @@ namespace build2
if (eos (c))
return make_token (type::eos);
+ auto make_token_with_modifiers =
+ [&sep, ln, cn, this] (type t, const char* mods, bool exc = false)
+ {
+ string v;
+ if (mods != nullptr)
+ {
+ for (xchar p (peek ());
+ (strchr (mods, p) != nullptr && // Modifier.
+ strchr (v.c_str (), p) == nullptr); // Not already seen.
+ p = peek ())
+ {
+ get ();
+ v += p;
+
+ // If mutually exclusive, then we are done.
+ //
+ if (exc)
+ break;
+ }
+ }
+
+ return token (t, move (v), sep,
+ quote_type::unquoted, false,
+ ln, cn,
+ token_printer);
+ };
+
state st (state_.top ()); // Make copy (see first/second_token).
lexer_mode m (st.mode);
@@ -299,27 +328,22 @@ namespace build2
{
xchar p (peek ());
- if (p == '?' || p == '!' || p == '&')
+ if (p == '&')
{
get ();
-
- switch (p)
- {
- case '?': return make_token (type::clean_maybe);
- case '!': return make_token (type::clean_never);
- case '&': return make_token (type::log_and);
- }
+ return make_token (type::log_and);
}
- else
- return make_token (type::clean_always);
+
+ return make_token_with_modifiers (type::clean, "!?", true);
}
// <
//
case '<':
{
+ type r (type::in_str);
xchar p (peek ());
- if (p == '+' || p == '-' || p == ':' || p == '<')
+ if (p == '+' || p == '-' || p == '<')
{
get ();
@@ -327,35 +351,40 @@ namespace build2
{
case '+': return make_token (type::in_pass);
case '-': return make_token (type::in_null);
- case ':': return make_token (type::in_str_nn);
case '<':
{
+ r = type::in_doc;
p = peek ();
- if (p == ':' || p == '<')
+ if (p == '<')
{
get ();
-
- return make_token (p == ':'
- ? type::in_doc_nn
- : type::in_file);
+ r = type::in_file;
}
- else
- return make_token (type::in_doc);
+ break;
}
}
}
- else
- return make_token (type::in_str);
+ // Handle modifiers.
+ //
+ const char* mod (nullptr);
+ switch (r)
+ {
+ case type::in_str:
+ case type::in_doc: mod = ":"; break;
+ }
+
+ return make_token_with_modifiers (r, mod);
}
// >
//
case '>':
{
+ type r (type::out_str);
xchar p (peek ());
- if (p == '+' || p == '-' || p == '&' || p == ':' || p == '>')
+ if (p == '+' || p == '-' || p == '&' || p == '>')
{
get ();
@@ -364,37 +393,32 @@ namespace build2
case '+': return make_token (type::out_pass);
case '-': return make_token (type::out_null);
case '&': return make_token (type::out_merge);
- case ':': return make_token (type::out_str_nn);
case '>':
{
+ r = type::out_doc;
p = peek ();
- if (p == ':' || p == '>')
+ if (p == '>')
{
get ();
-
- if (p == ':')
- return make_token (type::out_doc_nn);
-
- // File redirect.
- //
- p = peek ();
-
- if (p == '&')
- {
- get ();
- return make_token (type::out_file_app);
- }
- else
- return make_token (type::out_file);
+ r = type::out_file;
}
- else
- return make_token (type::out_doc);
+ break;
}
}
}
- else
- return make_token (type::out_str);
+
+ // Handle modifiers.
+ //
+ const char* mod (nullptr);
+ switch (r)
+ {
+ case type::out_str:
+ case type::out_doc: mod = "~:"; break;
+ case type::out_file: mod = "&"; break;
+ }
+
+ return make_token_with_modifiers (r, mod);
}
}
}
diff --git a/build2/test/script/parser b/build2/test/script/parser
index da82df2..ee270d8 100644
--- a/build2/test/script/parser
+++ b/build2/test/script/parser
@@ -102,7 +102,7 @@ namespace build2
int fd; // Redirect fd (0 - in, 1 - out, 2 - err).
string end;
bool literal; // Literal (single-quote).
- bool no_newline; // No final newline.
+ string modifiers;
};
using here_docs = vector<here_doc>;
@@ -116,7 +116,9 @@ namespace build2
parse_here_documents (token&, token_type&,
pair<command_expr, here_docs>&);
string
- parse_here_document (token&, token_type&, const string&, bool);
+ parse_here_document (token&, token_type&,
+ const string&,
+ const string&);
// Execute. Issue diagnostics and throw failed in case of an error.
//
diff --git a/build2/test/script/parser.cxx b/build2/test/script/parser.cxx
index 9e2018f..fae138b 100644
--- a/build2/test/script/parser.cxx
+++ b/build2/test/script/parser.cxx
@@ -1256,16 +1256,13 @@ namespace build2
clean
};
pending p (pending::program);
- bool nn (false); // True if pending here-{str,doc} is "no-newline".
- bool app (false); // True if to append to pending file.
- cleanup_type ct; // Pending cleanup type.
- here_docs hd; // Expected here-documents.
+ string mod; // Modifiers for pending in_* and out_* positions.
+ here_docs hd; // Expected here-documents.
// Add the next word to either one of the pending positions or to
// program arguments by default.
//
- auto add_word =
- [&c, &p, &nn, &app, &ct, this] (string&& w, const location& l)
+ auto add_word = [&c, &p, &mod, this] (string&& w, const location& l)
{
auto add_merge = [&l, this] (redirect& r, const string& w, int fd)
{
@@ -1284,9 +1281,10 @@ namespace build2
<< "file descriptor must be " << fd;
};
- auto add_here_str = [&nn] (redirect& r, string&& w)
+ auto add_here_str = [&mod] (redirect& r, string&& w)
{
- if (!nn) w += '\n';
+ if (mod.find (':') == string::npos)
+ w += '\n';
r.str = move (w);
};
@@ -1310,7 +1308,7 @@ namespace build2
}
};
- auto add_file = [&app, &parse_path] (redirect& r, int fd, string&& w)
+ auto add_file = [&mod, &parse_path] (redirect& r, int fd, string&& w)
{
const char* what (nullptr);
switch (fd)
@@ -1321,7 +1319,7 @@ namespace build2
}
r.file.path = parse_path (move (w), what);
- r.file.append = app;
+ r.file.append = mod.find ('&') != string::npos;
};
switch (p)
@@ -1349,13 +1347,23 @@ namespace build2
case pending::err_file: add_file (c.err, 2, move (w)); break;
case pending::clean:
- c.cleanups.push_back ({ct, parse_path (move (w), "cleanup path")});
- break;
+ {
+ cleanup_type t;
+ switch (mod[0]) // Ok, if empty
+ {
+ case '!': t = cleanup_type::never; break;
+ case '?': t = cleanup_type::maybe; break;
+ default: t = cleanup_type::always; break;
+ }
+
+ c.cleanups.push_back (
+ {t, parse_path (move (w), "cleanup path")});
+ break;
+ }
}
p = pending::none;
- nn = false;
- app = false;
+ mod.clear ();
};
// Make sure we don't have any pending positions to fill.
@@ -1389,11 +1397,11 @@ namespace build2
// Parse the redirect operator.
//
auto parse_redirect =
- [&c, &p, &nn, &app, this] (const token& t, const location& l)
+ [&c, &p, &mod, this] (token& t, const location& l)
{
// Our semantics is the last redirect seen takes effect.
//
- assert (p == pending::none && !nn && !app);
+ assert (p == pending::none && mod.empty ());
// See if we have the file descriptor.
//
@@ -1430,9 +1438,7 @@ namespace build2
case type::in_pass:
case type::in_null:
case type::in_str:
- case type::in_str_nn:
case type::in_doc:
- case type::in_doc_nn:
case type::in_file:
{
if ((fd = fd == 3 ? 0 : fd) != 0)
@@ -1444,11 +1450,8 @@ namespace build2
case type::out_null:
case type::out_merge:
case type::out_str:
- case type::out_str_nn:
case type::out_doc:
- case type::out_doc_nn:
case type::out_file:
- case type::out_file_app:
{
if ((fd = fd == 3 ? 1 : fd) == 0)
fail (l) << "invalid out redirect file descriptor " << fd;
@@ -1468,17 +1471,12 @@ namespace build2
case type::out_merge: rt = redirect_type::merge; break;
- case type::in_str_nn:
- case type::out_str_nn: nn = true; // Fall through.
case type::in_str:
case type::out_str: rt = redirect_type::here_string; break;
- case type::in_doc_nn:
- case type::out_doc_nn: nn = true; // Fall through.
case type::in_doc:
case type::out_doc: rt = redirect_type::here_document; break;
- case type::out_file_app: app = true; // Fall through.
case type::in_file:
case type::out_file: rt = redirect_type::file; break;
}
@@ -1525,20 +1523,16 @@ namespace build2
}
break;
}
+
+ mod = move (t.value);
};
// Set pending cleanup type.
//
- auto parse_clean = [&p, &ct] (type tt)
+ auto parse_clean = [&p, &mod] (token& t)
{
- switch (tt)
- {
- case type::clean_always: ct = cleanup_type::always; break;
- case type::clean_maybe: ct = cleanup_type::maybe; break;
- case type::clean_never: ct = cleanup_type::never; break;
- }
-
p = pending::clean;
+ mod = move (t.value);
};
const location ll (get_location (t)); // Line location.
@@ -1606,18 +1600,10 @@ namespace build2
case type::out_str:
case type::out_doc:
- case type::in_str_nn:
- case type::in_doc_nn:
- case type::out_str_nn:
- case type::out_doc_nn:
-
case type::in_file:
case type::out_file:
- case type::out_file_app:
- case type::clean_always:
- case type::clean_maybe:
- case type::clean_never:
+ case type::clean:
{
if (pre_parse_)
{
@@ -1625,16 +1611,12 @@ namespace build2
// end markers since we need to know how many of them to pre-
// parse after the command.
//
- nn = false;
-
switch (tt)
{
- case type::in_doc_nn:
- case type::out_doc_nn:
- nn = true;
- // Fall through.
case type::in_doc:
case type::out_doc:
+ mod = move (t.value);
+
// We require the end marker to be a literal, unquoted word.
// In particularm, we don't allow quoted because of cases
// like foo"$bar" (where we will see word 'foo').
@@ -1683,7 +1665,10 @@ namespace build2
hd.push_back (
here_doc {
- 0, 0, 0, move (t.value), qt == quote_type::single, nn});
+ 0, 0, 0,
+ move (t.value),
+ qt == quote_type::single,
+ move (mod)});
break;
}
@@ -1736,24 +1721,16 @@ namespace build2
case type::out_str:
case type::out_doc:
- case type::in_str_nn:
- case type::in_doc_nn:
- case type::out_str_nn:
- case type::out_doc_nn:
-
case type::in_file:
case type::out_file:
- case type::out_file_app:
{
parse_redirect (t, l);
break;
}
- case type::clean_always:
- case type::clean_maybe:
- case type::clean_never:
+ case type::clean:
{
- parse_clean (tt);
+ parse_clean (t);
break;
}
@@ -1789,10 +1766,10 @@ namespace build2
move (t.value),
(t.qtype == quote_type::unquoted ||
t.qtype == quote_type::single),
- nn});
+ move (mod)});
p = pending::none;
- nn = false;
+ mod.clear ();
next (t, tt);
break;
@@ -1975,30 +1952,21 @@ namespace build2
case type::in_str:
case type::out_str:
- case type::in_str_nn:
- case type::out_str_nn:
-
case type::in_file:
case type::out_file:
- case type::out_file_app:
{
parse_redirect (t, l);
break;
}
- case type::clean_always:
- case type::clean_maybe:
- case type::clean_never:
+ case type::clean:
{
- parse_clean (tt);
+ parse_clean (t);
break;
}
case type::in_doc:
case type::out_doc:
-
- case type::in_doc_nn:
- case type::out_doc_nn:
{
fail (l) << "here-document redirect in expansion";
break;
@@ -2093,7 +2061,7 @@ namespace build2
: lexer_mode::here_line_double);
next (t, tt);
- string v (parse_here_document (t, tt, h.end, h.no_newline));
+ string v (parse_here_document (t, tt, h.end, h.modifiers));
if (!pre_parse_)
{
@@ -2109,7 +2077,9 @@ namespace build2
}
string parser::
- parse_here_document (token& t, type& tt, const string& em, bool nn)
+ parse_here_document (token& t, type& tt,
+ const string& em,
+ const string& mod)
{
// enter: first token on first line
// leave: newline (after end marker)
@@ -2259,9 +2229,9 @@ namespace build2
}
else
{
- // Add final newline if requested.
+ // Add final newline unless suppressed.
//
- if (!nn)
+ if (mod.find (':') == string::npos)
r += '\n';
}
diff --git a/build2/test/script/token b/build2/test/script/token
index d4f6eec..7f79746 100644
--- a/build2/test/script/token
+++ b/build2/test/script/token
@@ -30,29 +30,22 @@ namespace build2
minus, // -
pipe, // |
- clean_always, // &
- clean_maybe, // &?
- clean_never, // &!
+ clean, // &{?!} (modifiers in value)
log_and, // &&
log_or, // ||
in_pass, // <+
in_null, // <-
- in_str, // <
- in_str_nn, // <:
- in_doc, // <<
- in_doc_nn, // <<:
+ in_str, // <{:} (modifiers in value)
+ in_doc, // <<{:} (modifiers in value)
in_file, // <<<
out_pass, // >+
out_null, // >-
out_merge, // >&
- out_str, // >
- out_str_nn, // >:
- out_doc, // >>
- out_doc_nn, // >>:
- out_file, // >>>
- out_file_app // >>>&
+ out_str, // >{:~} (modifiers in value)
+ out_doc, // >>{:~} (modifiers in value)
+ out_file // >>>{&} (modifiers in value)
};
token_type () = default;
diff --git a/build2/test/script/token.cxx b/build2/test/script/token.cxx
index 79e64de..a8ef5b4 100644
--- a/build2/test/script/token.cxx
+++ b/build2/test/script/token.cxx
@@ -15,42 +15,37 @@ namespace build2
void
token_printer (ostream& os, const token& t, bool d)
{
+ const string& v (t.value);
+
// Only quote non-name tokens for diagnostics.
//
const char* q (d ? "'" : "");
switch (t.type)
{
- case token_type::semi: os << q << ';' << q; break;
-
- case token_type::plus: os << q << '+' << q; break;
- case token_type::minus: os << q << '-' << q; break;
-
- case token_type::clean_always: os << q << '&' << q; break;
- case token_type::clean_maybe: os << q << "&?" << q; break;
- case token_type::clean_never: os << q << "&!" << q; break;
-
- case token_type::pipe: os << q << '|' << q; break;
- case token_type::log_and: os << q << "&&" << q; break;
- case token_type::log_or: os << q << "||" << q; break;
-
- case token_type::in_pass: os << q << "<+" << q; break;
- case token_type::in_null: os << q << "<-" << q; break;
- case token_type::in_str: os << q << '<' << q; break;
- case token_type::in_str_nn: os << q << "<:" << q; break;
- case token_type::in_doc: os << q << "<<" << q; break;
- case token_type::in_doc_nn: os << q << "<<:" << q; break;
- case token_type::in_file: os << q << "<<<" << q; break;
-
- case token_type::out_pass: os << q << ">+" << q; break;
- case token_type::out_null: os << q << ">-" << q; break;
- case token_type::out_merge: os << q << ">&" << q; break;
- case token_type::out_str: os << q << '>' << q; break;
- case token_type::out_str_nn: os << q << ">:" << q; break;
- case token_type::out_doc: os << q << ">>" << q; break;
- case token_type::out_doc_nn: os << q << ">>:" << q; break;
- case token_type::out_file: os << q << ">>>" << q; break;
- case token_type::out_file_app: os << q << ">>>&" << q; break;
+ case token_type::semi: os << q << ';' << q; break;
+
+ case token_type::plus: os << q << '+' << q; break;
+ case token_type::minus: os << q << '-' << q; break;
+
+ case token_type::clean: os << q << '&' << v << q; break;
+
+ case token_type::pipe: os << q << '|' << q; break;
+ case token_type::log_and: os << q << "&&" << q; break;
+ case token_type::log_or: os << q << "||" << q; break;
+
+ case token_type::in_pass: os << q << "<+" << q; break;
+ case token_type::in_null: os << q << "<-" << q; break;
+ case token_type::in_str: os << q << '<' << v << q; break;
+ case token_type::in_doc: os << q << "<<" << v << q; break;
+ case token_type::in_file: os << q << "<<<" << q; break;
+
+ case token_type::out_pass: os << q << ">+" << q; break;
+ case token_type::out_null: os << q << ">-" << q; break;
+ case token_type::out_merge: os << q << ">&" << q; break;
+ case token_type::out_str: os << q << '>' << v << q; break;
+ case token_type::out_doc: os << q << ">>" << v << q; break;
+ case token_type::out_file: os << q << ">>>" << v << q; break;
default: build2::token_printer (os, t, d);
}
diff --git a/build2/token b/build2/token
index df25d4c..0dc914f 100644
--- a/build2/token
+++ b/build2/token
@@ -81,7 +81,10 @@ namespace build2
quote_type qtype;
bool qcomp;
- string value; // Only valid for word.
+ // Normally only used for word, but can also be used to store "modifiers"
+ // or some such for other tokens.
+ //
+ string value;
uint64_t line;
uint64_t column;
@@ -93,18 +96,23 @@ namespace build2
: token (token_type::eos, false, 0, 0, token_printer) {}
token (token_type t, bool s, uint64_t l, uint64_t c, printer_type* p)
- : type (t), separated (s), qtype (quote_type::unquoted),
- line (l), column (c),
- printer (p) {}
+ : token (t, string (), s, quote_type::unquoted, false, l, c, p) {}
token (string v, bool s,
quote_type qt, bool qc,
uint64_t l, uint64_t c)
- : type (token_type::word), separated (s),
+ : token (token_type::word, move (v), s, qt, qc, l, c, &token_printer){}
+
+ token (token_type t,
+ string v, bool s,
+ quote_type qt, bool qc,
+ uint64_t l, uint64_t c,
+ printer_type* p)
+ : type (t), separated (s),
qtype (qt), qcomp (qc),
value (move (v)),
line (l), column (c),
- printer (&token_printer) {}
+ printer (p) {}
};
// Output the token value in a format suitable for diagnostics.
diff --git a/doc/testscript.cli b/doc/testscript.cli
index 79c6836..a9ba608 100644
--- a/doc/testscript.cli
+++ b/doc/testscript.cli
@@ -792,16 +792,16 @@ stderr: '2'(out-redirect)
in-redirect: '<-'|\
'<+'|\
- ('<'|'<:') <text>|\
- ('<<'|'<<:') <here-end>|\
+ '<'{':'?} <text>|\
+ '<<'{':'?} <here-end>|\
'<<<' <file>
out-redirect: '>-'|\
'>+'|\
'>&' ('1'|'2')|\
- ('>'|'>:') <text>|\
- ('>>'|'>>:') <here-end>|\
- ('>>>'|'>>>&') <file>
+ '>'{':'?'~'?} <text>|\
+ '>>'{':'?'~'?} <here-end>|\
+ '>>>'{'&'?} <file>
cleanup: ('&'|'&!'|'&?') (<file>|<dir>)
@@ -1463,6 +1463,100 @@ EOI
The leading whitespace stripping does not apply to line continuations.
+\h#here-regex|Output Regex|
+
+The expected result in output here-strings and here-documents can be specified
+as a regular expression instead of plain text. To signal the use of regular
+expressions the redirect must include the \c{~} modifier, for example:
+
+\
+$* >~'/fo+/' 2>>~/EOE/
+/ba+r/
+baz
+EOE
+\
+
+The regular expression used for output matching has two levels. At the outer
+level the expression is over lines with each line treated as a single
+character. We will refer to this outer expression as \i{line-regex} and
+to its characters as \i{line-char}.
+
+A line-char can be a literal line (like \c{baz} in the example above) in
+which case it will only be equal to an identical line in the output. Or a
+line-char can be an inner level regex (like \c{ba+r} above) in which
+case it will be equal to any line in the output that matches this regex.
+Where not clear from context we will refer to this inner expression as
+\i{char-regex} and its characters as \c{char}.
+
+A line is treated as literal unless it starts with the \i{regex introducer
+character} (\c{/} in the above example). In contrast, the line-regex is always
+in effect (in a sense, the \c{~} modifier is its introducer). Note that the
+here-string regex naturally must always start with an introducer.
+
+A char-regex line that starts with an introducer must also end with one
+optionally followed by \i{match flags}. Currently the only supported flag is
+\c{i} for case-insensitive match. For example:
+
+\
+$* >>~/EOO/
+/ba+r/i
+/ba+z/i
+EOO
+\
+
+Any character can act as a regex introducer. For here-strings it is the first
+character in the string. For here-documents the introducer is specified as
+part of the end marker. In this case the first character is the introducer,
+everything after that and until the second occurrence of the introducer is the
+actual end marker, and everything after that are global match flags. Global
+match flags apply to every char-regex (but not literal line) in this
+here-document. Note that there is no way to escape the introducer character
+inside the regex.
+
+As an example, here is a shorter version of the previous example that also
+uses a different introducer character.
+
+\
+$* >>~%EOO%i
+%ba+r%
+%ba+z%
+EOO
+\
+
+By default a line-char is treated as an ordinary, non-syntax character with
+regards to line-regex. Lines that start with a regex introducer but do not end
+with one are used to specify syntax line-chars. Such syntax line-chars can
+also be specified after (or instead of) match flags. For example:
+
+\
+$* >>~/EOO/
+/(
+/fo+x/|
+/ba+r/|
+/ba+z/
+/)+
+EOO
+\
+
+As an illustration, if we call the \c{/fo+x/} expression \c{A}, \c{/ba+r/} \-
+\c{B}, and \c{/ba+z/} \- C, then we can represent the above line-regex in
+the following more traditional form:
+
+\
+(A|B|C)+
+\
+
+Only characters from the \c{()|*+?{\}0123456789,=!} set are allowed as
+syntax line-chars with presence of any other character being an error.
+
+A blank line as well as the \c{//} sequence (assuming \c{/} is the introducer)
+are treated as an empty line-char. For the purpose of matching, newlines are
+viewed as separators rather than being part of a line. In particular, in this
+model, the customary trailing newline at the end of the output introduces a
+trailing empty line-char. As a result, unless the \c{:} (no newline) redirect
+modifier is used, an empty line-char is implicitly added to line-regex.
+
+
\h1#style|Style Guide|
This section describes the Testscript style that is used in the \c{build2}