aboutsummaryrefslogtreecommitdiff
path: root/libbuild2
diff options
context:
space:
mode:
authorBoris Kolpackov <boris@codesynthesis.com>2019-11-14 12:55:54 +0200
committerBoris Kolpackov <boris@codesynthesis.com>2019-11-14 13:20:08 +0200
commit5ec57d68a5205173a02c34a24d7129347d43196c (patch)
tree303de46753bcde9f9ccff094d6591b6bb7583931 /libbuild2
parent62a688e3fd7d1fdb8ce5590ebe9cb99e90cbe5d7 (diff)
Tighten up attribute recognition during parsing
Now it should be possible to use `[]` for wildcard patterns, for example: foo = foo.[hit]xx Note that a leading bracket expression will still be recognized as attributes and escaping or quoting it will inhibit pattern matching. To resolve this case we need to specify an empty attribute list: foo = [] [abc]-foo.cxx
Diffstat (limited to 'libbuild2')
-rw-r--r--libbuild2/lexer+eval.test.testscript6
-rw-r--r--libbuild2/lexer.cxx148
-rw-r--r--libbuild2/lexer.hxx46
-rw-r--r--libbuild2/lexer.test.cxx12
-rw-r--r--libbuild2/parser.cxx189
-rw-r--r--libbuild2/parser.hxx11
-rw-r--r--libbuild2/test/script/lexer.cxx48
-rw-r--r--libbuild2/test/script/parser.cxx7
-rw-r--r--libbuild2/token.hxx9
9 files changed, 286 insertions, 190 deletions
diff --git a/libbuild2/lexer+eval.test.testscript b/libbuild2/lexer+eval.test.testscript
index 963f3d0..46452a7 100644
--- a/libbuild2/lexer+eval.test.testscript
+++ b/libbuild2/lexer+eval.test.testscript
@@ -6,7 +6,7 @@ test.arguments = eval
: punctuation
:
-$* <:'x:x{x}x[x]x$x?x,x(x)' >>EOO
+$* <:'x:x{x}x$x?x,x(x)' >>EOO
'x'
:
'x'
@@ -14,10 +14,6 @@ $* <:'x:x{x}x[x]x$x?x,x(x)' >>EOO
'x'
}
'x'
-[
-'x'
-]
-'x'
$
'x'
?
diff --git a/libbuild2/lexer.cxx b/libbuild2/lexer.cxx
index 61d7fbf..b405929 100644
--- a/libbuild2/lexer.cxx
+++ b/libbuild2/lexer.cxx
@@ -23,11 +23,15 @@ namespace build2
void lexer::
mode (lexer_mode m, char ps, optional<const char*> esc)
{
+ bool a (false); // attributes
+
const char* s1 (nullptr);
const char* s2 (nullptr);
- bool s (true);
- bool n (true);
- bool q (true);
+
+ bool s (true); // space
+ bool n (true); // newline
+ bool q (true); // quotes
+
if (!esc)
{
@@ -39,35 +43,39 @@ namespace build2
{
case lexer_mode::normal:
{
- s1 = ":<>=+ $(){}[]#\t\n";
- s2 = " = ";
+ a = true;
+ s1 = ":<>=+ $(){}#\t\n";
+ s2 = " = ";
break;
}
case lexer_mode::value:
{
- s1 = " $(){}[]#\t\n";
- s2 = " ";
+ s1 = " $(){}#\t\n";
+ s2 = " ";
break;
}
case lexer_mode::values:
{
- s1 = " $(){}[],#\t\n";
- s2 = " ";
+ // a: beginning and after `,`?
+ s1 = " $(){},#\t\n";
+ s2 = " ";
break;
}
case lexer_mode::switch_expressions:
{
- s1 = " $(){}[],:#\t\n";
- s2 = " ";
+ // a: beginning and after `,`?
+ s1 = " $(){},:#\t\n";
+ s2 = " ";
break;
}
case lexer_mode::case_patterns:
{
- s1 = " $(){}[],|:#\t\n";
- s2 = " ";
+ // a: beginning and after `,` & `|`?
+ s1 = " $(){},|:#\t\n";
+ s2 = " ";
break;
}
- case lexer_mode::attribute:
+ case lexer_mode::attributes:
{
s1 = " $(]#\t\n";
s2 = " ";
@@ -75,8 +83,8 @@ namespace build2
}
case lexer_mode::eval:
{
- s1 = ":<>=!&|?, $(){}[]#\t\n";
- s2 = " = &| ";
+ s1 = ":<>=!&|?, $(){}#\t\n";
+ s2 = " = &| ";
break;
}
case lexer_mode::buildspec:
@@ -91,8 +99,10 @@ namespace build2
//
// 3. Treat newline as an ordinary space.
//
- s1 = " $(){}[],\t\n";
- s2 = " ";
+ // Also note that we don't have buildspec attributes.
+ //
+ s1 = " $(){},\t\n";
+ s2 = " ";
n = false;
break;
}
@@ -109,13 +119,13 @@ namespace build2
default: assert (false); // Unhandled custom mode.
}
- state_.push (state {m, ps, s, n, q, *esc, s1, s2});
+ state_.push (state {m, a, ps, s, n, q, *esc, s1, s2});
}
token lexer::
next ()
{
- const state& st (state_.top ());
+ state& st (state_.top ());
lexer_mode m (st.mode);
// For some modes we have dedicated imlementations of next().
@@ -127,7 +137,7 @@ namespace build2
case lexer_mode::values:
case lexer_mode::switch_expressions:
case lexer_mode::case_patterns:
- case lexer_mode::attribute:
+ case lexer_mode::attributes:
case lexer_mode::variable:
case lexer_mode::buildspec: break;
case lexer_mode::eval: return next_eval ();
@@ -147,6 +157,17 @@ namespace build2
ln, cn, token_printer);
};
+ // Handle attributes (do it first to make sure the flag is cleared
+ // regardless of what we return).
+ //
+ if (st.attributes)
+ {
+ st.attributes = false;
+
+ if (c == '[')
+ return make_token (type::lsbrace);
+ }
+
if (eos (c))
return make_token (type::eos);
@@ -155,11 +176,11 @@ namespace build2
if (c == st.sep_pair)
return make_token (type::pair_separator, string (1, c));
+ // NOTE: remember to update mode(), next_eval() if adding any new special
+ // characters.
+
switch (c)
{
- // NOTE: remember to update mode(), next_eval() if adding new special
- // characters.
- //
case '\n':
{
// Expire value/values modes at the end of the line.
@@ -170,20 +191,13 @@ namespace build2
m == lexer_mode::case_patterns)
state_.pop ();
- sep = true; // Treat newline as always separated.
- return make_token (type::newline);
- }
- case '{': return make_token (type::lcbrace);
- case '}': return make_token (type::rcbrace);
- case '[': return make_token (type::lsbrace);
- case ']':
- {
- // Expire attribute mode after closing ']'.
+ // Re-enable attributes in the normal mode.
//
- if (m == lexer_mode::attribute)
- state_.pop ();
+ if (state_.top ().mode == lexer_mode::normal)
+ state_.top ().attributes = true;
- return make_token (type::rsbrace);
+ sep = true; // Treat newline as always separated.
+ return make_token (type::newline);
}
case '$': return make_token (type::dollar);
case ')': return make_token (type::rparen);
@@ -198,6 +212,31 @@ namespace build2
}
}
+ // The following characters are special in all modes except attributes.
+ //
+ if (m != lexer_mode::attributes)
+ {
+ switch (c)
+ {
+ case '{': return make_token (type::lcbrace);
+ case '}': return make_token (type::rcbrace);
+ }
+ }
+
+ // The following characters are special in the attributes modes.
+ //
+ if (m == lexer_mode::attributes)
+ {
+ switch (c)
+ {
+ case ']':
+ {
+ state_.pop (); // Expire the attributes mode after closing `]`.
+ return make_token (type::rsbrace);
+ }
+ }
+ }
+
// The following characters are special in the normal, variable, and
// switch_expressions modes.
//
@@ -208,9 +247,6 @@ namespace build2
{
switch (c)
{
- // NOTE: remember to update mode(), next_eval() if adding new special
- // characters.
- //
case ':': return make_token (type::colon);
}
}
@@ -221,9 +257,6 @@ namespace build2
{
switch (c)
{
- // NOTE: remember to update mode(), next_eval() if adding new special
- // characters.
- //
case '=':
{
if (peek () == '+')
@@ -249,8 +282,6 @@ namespace build2
//
if (m == lexer_mode::normal)
{
- // NOTE: remember to update mode() if adding new special characters.
- //
switch (c)
{
case '<': return make_token (type::labrace);
@@ -265,8 +296,6 @@ namespace build2
m == lexer_mode::switch_expressions ||
m == lexer_mode::case_patterns)
{
- // NOTE: remember to update mode() if adding new special characters.
- //
switch (c)
{
case ',': return make_token (type::comma);
@@ -277,8 +306,6 @@ namespace build2
//
if (m == lexer_mode::case_patterns)
{
- // NOTE: remember to update mode() if adding new special characters.
- //
switch (c)
{
case '|': return make_token (type::bit_or);
@@ -294,13 +321,16 @@ namespace build2
token lexer::
next_eval ()
{
+ // This mode is quite a bit like the value mode when it comes to special
+ // characters, except that we have some of our own.
+
bool sep (skip_spaces ());
xchar c (get ());
if (eos (c))
fail (c) << "unterminated evaluation context";
- const state& st (state_.top ());
+ state& st (state_.top ());
uint64_t ln (c.line), cn (c.column);
@@ -311,28 +341,30 @@ namespace build2
ln, cn, token_printer);
};
- // This mode is quite a bit like the value mode when it comes to special
- // characters, except that we have some of our own.
+ // Handle attributes (do it first to make sure the flag is cleared
+ // regardless of what we return).
//
+ if (st.attributes)
+ {
+ st.attributes = false;
+
+ if (c == '[')
+ return make_token (type::lsbrace);
+ }
// Handle pair separator.
//
if (c == st.sep_pair)
return make_token (type::pair_separator, string (1, c));
- // Note: we don't treat [ and ] as special here. Maybe can use them for
- // something later.
- //
+ // NOTE: remember to update mode() if adding any new special characters.
+
switch (c)
{
- // NOTE: remember to update mode() if adding new special characters.
- //
case '\n': fail (c) << "newline in evaluation context" << endf;
case ':': return make_token (type::colon);
case '{': return make_token (type::lcbrace);
case '}': return make_token (type::rcbrace);
- case '[': return make_token (type::lsbrace);
- case ']': return make_token (type::rsbrace);
case '$': return make_token (type::dollar);
case '?': return make_token (type::question);
case ',': return make_token (type::comma);
diff --git a/libbuild2/lexer.hxx b/libbuild2/lexer.hxx
index 59debc4..715926c 100644
--- a/libbuild2/lexer.hxx
+++ b/libbuild2/lexer.hxx
@@ -21,28 +21,29 @@ namespace build2
{
// Context-dependent lexing mode. Quoted modes are internal and should not
// be set explicitly. In the value mode we don't treat certain characters
- // (e.g., '+', '=') as special so that we can use them in the variable
- // values, e.g., 'foo = g++'. In contrast, in the variable mode, we restrict
- // certain character (e.g., '/') from appearing in the name. The values mode
- // is like value but recogizes ',' as special (used in contexts where we
- // need to list multiple values). The attribute mode is also like value
- // except it doesn't treat '{' and '}' as special (so we cannot have name
- // groups in attributes). The eval mode is used in the evaluation context.
+ // (e.g., `+`, `=`) as special so that we can use them in the variable
+ // values, e.g., `foo = g++`. In contrast, in the variable mode, we restrict
+ // certain character (e.g., `/`) from appearing in the name. The values mode
+ // is like value but recogizes `,` as special (used in contexts where we
+ // need to list multiple values). The attributes mode is also like value
+ // except it doesn't treat `{` and `}` as special (so we cannot have name
+ // groups in attributes) and recognizes the closing `]`. The eval mode is
+ // used in the evaluation context.
//
// A number of modes are "derived" from the value/values mode by recognizing
// a few extra characters:
//
// switch_expressions values plus `:`
- // case_patterns values plus '|' and ':'
+ // case_patterns values plus `|` and `:`
//
// Note that the normal, value/values and derived, as well as eval modes
// split words separated by the pair character (to disable pairs one can
- // pass '\0' as a pair character).
+ // pass `\0` as a pair character).
//
// The alternative modes must be set manually. The value/values and derived
// modes automatically expires after the end of the line. The attribute mode
- // expires after the closing ']'. The variable mode expires after the word
- // token. And the eval mode expires after the closing ')'.
+ // expires after the closing `]`. The variable mode expires after the word
+ // token. And the eval mode expires after the closing `)`.
//
// Note that normally it is only safe to switch mode when the current token
// is not quoted (or, more generally, when you are not in the double-quoted
@@ -50,6 +51,15 @@ namespace build2
// variable name mode). Failed that your mode (which now will be the top of
// the mode stack) will prevent proper recognition of the closing quote.
//
+ // Finally, attributes recognition (the `[` token) cuts across most of the
+ // modes and is handled with a flag. In the normal mode it is automatically
+ // set at the beginning and after each newline. In all other modes it must
+ // be explicitly set at points where attributes are recognized. In all the
+ // cases it is automatically reset after lexing the next token (whether `[`
+ // or not).
+ //
+ // @@ Maybe also enable at the beginning of value?
+ //
// Extendable/inheritable enum-like class.
//
@@ -65,7 +75,7 @@ namespace build2
values,
case_patterns,
switch_expressions,
- attribute,
+ attributes,
eval,
single_quoted,
double_quoted,
@@ -97,15 +107,20 @@ namespace build2
name () const {return name_;}
// Note: sets mode for the next token. The second argument can be used to
- // specifythe pair separator character (if the mode supports pairs). If
- // escapes not specified, then inherit the current mode's (thought a mode
- // can also override it).
+ // specify the pair separator character (if the mode supports pairs). If
+ // escapes is not specified, then inherit the current mode's (though a
+ // mode can also override it).
//
virtual void
mode (lexer_mode,
char pair_separator = '\0',
optional<const char*> escapes = nullopt);
+ // Enable attributes recognition for the next token.
+ //
+ void
+ enable_attributes () {state_.top ().attributes = true;}
+
// Expire the current mode early.
//
void
@@ -136,6 +151,7 @@ namespace build2
struct state
{
lexer_mode mode;
+ bool attributes;
char sep_pair;
bool sep_space; // Are whitespaces separators (see skip_spaces())?
diff --git a/libbuild2/lexer.test.cxx b/libbuild2/lexer.test.cxx
index 32151db..eeed532 100644
--- a/libbuild2/lexer.test.cxx
+++ b/libbuild2/lexer.test.cxx
@@ -31,12 +31,12 @@ namespace build2
quote = true;
else
{
- if (a == "normal") m = lexer_mode::normal;
- else if (a == "variable") m = lexer_mode::variable;
- else if (a == "value") m = lexer_mode::value;
- else if (a == "attribute") m = lexer_mode::attribute;
- else if (a == "eval") m = lexer_mode::eval;
- else if (a == "buildspec") m = lexer_mode::buildspec;
+ if (a == "normal") m = lexer_mode::normal;
+ else if (a == "variable") m = lexer_mode::variable;
+ else if (a == "value") m = lexer_mode::value;
+ else if (a == "attributes") m = lexer_mode::attributes;
+ else if (a == "eval") m = lexer_mode::eval;
+ else if (a == "buildspec") m = lexer_mode::buildspec;
else assert (false);
break;
}
diff --git a/libbuild2/parser.cxx b/libbuild2/parser.cxx
index 9e586e0..be1ba0b 100644
--- a/libbuild2/parser.cxx
+++ b/libbuild2/parser.cxx
@@ -287,11 +287,15 @@ namespace build2
{
tracer trace ("parser::parse_clause", &path_);
- // parse_clause() should always stop at a token that is at the beginning
- // of the line (except for eof). That is, if something is called to parse
- // a line, it should parse it until newline (or fail). This is important
- // for if-else blocks, directory scopes, etc., that assume the '}' token
- // they see is on the new line.
+ // This function should be called in the normal lexing mode with the first
+ // token of a line or an alternative arrangements may have to be made to
+ // recognize the attributes.
+ //
+ // It should also always stop at a token that is at the beginning of the
+ // line (except for eof). That is, if something is called to parse a line,
+ // it should parse it until newline (or fail). This is important for
+ // if-else blocks, directory scopes, etc., that assume the '}' token they
+ // see is on the new line.
//
bool parsed (false);
@@ -302,8 +306,7 @@ namespace build2
assert (attributes_.empty ());
auto at (attributes_push (t, tt));
- // We should always start with one or more names, potentially
- // <>-grouped.
+ // We always start with one or more names, potentially <>-grouped.
//
if (!(start_names (tt) || tt == type::labrace))
{
@@ -454,6 +457,13 @@ namespace build2
{
// Parse target names inside < >.
//
+ // We "reserve" the right to have attributes inside <> though what
+ // exactly that would mean is unclear. One potentially useful
+ // semantics would be the ability to specify attributes for ad hoc
+ // members though the fact that the primary target is listed first
+ // would make it rather unintuitive.
+ //
+ enable_attributes ();
next (t, tt);
auto at (attributes_push (t, tt));
@@ -621,7 +631,10 @@ namespace build2
}
};
- if (next (t, tt) == type::newline)
+ enable_attributes (); // Recognize attributes after `:`.
+ next (t, tt);
+
+ if (tt == type::newline)
{
// See if this is a target block.
//
@@ -734,85 +747,70 @@ namespace build2
//
// This can take any of the following forms:
//
- // x = y
- // foo/ x = y (ns will have two elements)
- // foo/ [attrs] x = y (tt will be '[')
+ // x = y
+ // foo/ x = y (ns will have two elements)
+ //
+ // And in the future we may also want to support:
+ //
+ // foo/ bar/ x = y
//
- // In the future we may also want to support:
+ // Note that we don't support this:
//
- // foo/ bar/ x = y
+ // foo/ [attrs] x = y
//
- if (tt == type::assign || tt == type::prepend || tt == type::append ||
- tt == type::lsbrace)
+ // Because the meaning of `[attrs]` would be ambiguous (it could also be
+ // a name). Note that the above semantics can be easily achieved with an
+ // explicit directory scope:
+ //
+ // foo/
+ // {
+ // [attrs] x = y
+ // }
+ //
+ if (tt == type::assign || tt == type::prepend || tt == type::append)
{
// Detect and handle the directory scope. If things look off, then we
// let parse_variable_name() complain.
//
dir_path d;
-
- if ((ns.size () == 2 && ns[0].directory ()) ||
- (ns.size () == 1 && ns[0].directory () && tt == type::lsbrace))
+ if (ns.size () == 2 && ns[0].directory ())
{
if (at.first)
fail (at.second) << "attributes before scope directory";
- if (tt == type::lsbrace)
- {
- attributes_pop ();
- attributes_push (t, tt);
-
- d = move (ns[0].dir);
- nloc = get_location (t);
- ns = parse_names (t, tt, pattern_mode::ignore);
+ d = move (ns[0].dir);
+ ns.erase (ns.begin ());
- // It got to be a variable assignment.
- //
- if (tt != type::assign &&
- tt != type::prepend &&
- tt != type::append)
- fail (t) << "expected variable assignment instead of " << t;
- }
- else
- {
- d = move (ns[0].dir);
- ns.erase (ns.begin ());
- }
+ // Make sure it's not a pattern (see also the target case above and
+ // scope below).
+ //
+ if (path_pattern (d))
+ fail (nloc) << "pattern in directory " << d.representation ();
}
- // Make sure not a pattern (see also the target case above and scope
- // below).
- //
- if (path_pattern (d))
- fail (nloc) << "pattern in directory " << d.representation ();
+ const variable& var (parse_variable_name (move (ns), nloc));
+ apply_variable_attributes (var);
- if (tt != type::lsbrace)
+ if (var.visibility >= variable_visibility::target)
{
- const variable& var (parse_variable_name (move (ns), nloc));
- apply_variable_attributes (var);
+ diag_record dr (fail (nloc));
- if (var.visibility >= variable_visibility::target)
- {
- diag_record dr (fail (nloc));
+ dr << "variable " << var << " has " << var.visibility
+ << " visibility but is assigned on a scope";
- dr << "variable " << var << " has " << var.visibility
- << " visibility but is assigned on a scope";
-
- if (var.visibility == variable_visibility::target)
- dr << info << "consider changing it to '*: " << var << "'";
- }
-
- {
- enter_scope sg (d.empty ()
- ? enter_scope ()
- : enter_scope (*this, move (d)));
- parse_variable (t, tt, var, tt);
- }
+ if (var.visibility == variable_visibility::target)
+ dr << info << "consider changing it to '*: " << var << "'";
+ }
- next_after_newline (t, tt);
- continue;
+ {
+ enter_scope sg (d.empty ()
+ ? enter_scope ()
+ : enter_scope (*this, move (d)));
+ parse_variable (t, tt, var, tt);
}
- // Not "our" attribute, see if anyone else likes it.
+ next_after_newline (t, tt);
+ continue;
}
// See if this is a directory scope.
@@ -873,7 +871,7 @@ namespace build2
// Parse a target or prerequisite-specific variable block. If type is not
// NULL, then this is a target type/pattern-specific block.
//
- // enter: first token of first line in the block
+ // enter: first token of first line in the block (normal lexer mode)
// leave: rcbrace
//
// This is a more restricted variant of parse_clause() that only allows
@@ -1216,7 +1214,9 @@ namespace build2
fail (ploc) << "no prerequisites in dependency chain or prerequisite-"
<< "specific variable assignment";
+ enable_attributes (); // Recognize attributes after `:`.
next (t, tt);
+
auto at (attributes_push (t, tt));
// @@ PAT: currently we pattern-expand prerequisite-specific vars.
@@ -1670,6 +1670,7 @@ namespace build2
// manually looking for =/=+/+=.
//
mode (lexer_mode::value, '@');
+ enable_attributes (); // @@ VAL.
next (t, tt);
// Get variable attributes, if any (note that here we will go into a
@@ -1820,6 +1821,7 @@ namespace build2
// being able to type them or to return NULL.
//
mode (lexer_mode::value, '@');
+ enable_attributes (); // @@ VAL.
next (t, tt);
auto at (attributes_push (t, tt));
@@ -1971,6 +1973,8 @@ namespace build2
for (;;)
{
string k (move (t.value));
+
+ enable_attributes (); // Recognize attributes before value.
next (t, tt);
bool take (false); // Take this branch?
@@ -2131,7 +2135,9 @@ namespace build2
do
{
+ enable_attributes (); // Recognize attributes before value.
next (t, tt);
+
if (tt == type::newline || tt == type::eos)
fail (t) << "expected switch expression instead of " << t;
@@ -2237,7 +2243,7 @@ namespace build2
//
mode (lexer_mode::case_patterns); // Recognize `|` and `,`.
- auto parse_pattern = [this] (token& t, type& tt)
+ auto parse_pattern_with_attributes = [this] (token& t, type& tt)
{
return parse_value_with_attributes (
t, tt, pattern_mode::ignore, "pattern", nullptr);
@@ -2245,7 +2251,9 @@ namespace build2
for (size_t i (0);; ++i)
{
+ enable_attributes (); // Recognize attributes before pattern.
next (t, tt);
+
if (tt == type::newline || tt == type::eos)
fail (t) << "expected case pattern instead of " << t;
@@ -2254,10 +2262,10 @@ namespace build2
// Handle pattern alternatives (<pattern>|<pattern>).
//
- for (;; next (t, tt))
+ for (;;)
{
const location l (get_location (t));
- value p (parse_pattern (t, tt));
+ value p (parse_pattern_with_attributes (t, tt));
expr& e (exprs[i]); // Note: value might be modified (typified).
if (e.func)
@@ -2300,14 +2308,18 @@ namespace build2
pre_parse_ = true;
do
{
+ enable_attributes (); // Recognize attributes before pattern.
next (t, tt); // Skip `|`.
- parse_pattern (t, tt);
+ parse_pattern_with_attributes (t, tt);
}
while (tt == type::bit_or);
pre_parse_ = false;
break;
}
+
+ enable_attributes (); // Recognize attributes before pattern.
+ next (t, tt);
}
if (!take)
@@ -2421,6 +2433,7 @@ namespace build2
// First take care of the variable name. There is no reason not to
// support variable attributes.
//
+ enable_attributes ();
next (t, tt);
attributes_push (t, tt);
@@ -2445,6 +2458,7 @@ namespace build2
// value on the RHS of an assignment (expansion, attributes).
//
mode (lexer_mode::value, '@');
+ enable_attributes (); // @@ VAL
next (t, tt);
value val (parse_value_with_attributes (t, tt, pattern_mode::expand));
@@ -2573,6 +2587,7 @@ namespace build2
// condition) for the same reason as in if-else (see parse_if_else()).
//
mode (lexer_mode::value);
+ enable_attributes (); // @@ VAL
next (t, tt);
const location el (get_location (t));
@@ -2627,6 +2642,7 @@ namespace build2
// (expansion, attributes).
//
mode (lexer_mode::value, '@');
+ enable_attributes (); // @@ VAL
next (t, tt);
if (value v = parse_value_with_attributes (t, tt, pattern_mode::expand))
@@ -2660,6 +2676,7 @@ namespace build2
// (expansion, attributes).
//
mode (lexer_mode::value, '@');
+ enable_attributes (); // @@ VAL
next (t, tt);
if (value v = parse_value_with_attributes (t, tt, pattern_mode::expand))
@@ -2862,6 +2879,7 @@ namespace build2
parse_variable_value (token& t, type& tt)
{
mode (lexer_mode::value, '@');
+ enable_attributes (); // @@ VAL.
next (t, tt);
// Parse value attributes if any. Note that it's ok not to have anything
@@ -3121,6 +3139,7 @@ namespace build2
// leave: rparen
mode (lexer_mode::eval, '@'); // Auto-expires at rparen.
+ enable_attributes (); // @@ VAL (eval)
next (t, tt);
if (tt == type::rparen)
@@ -3137,7 +3156,7 @@ namespace build2
values parser::
parse_eval_comma (token& t, type& tt, pattern_mode pmode, bool first)
{
- // enter: first token of LHS
+ // enter: first token of LHS (lexed with enabled attributes)
// leave: next token after last RHS
// Left-associative: parse in a loop for as long as we can.
@@ -3150,7 +3169,9 @@ namespace build2
while (tt == type::comma)
{
+ enable_attributes (); // Recognize attributes before value.
next (t, tt);
+
value rhs (parse_eval_ternary (t, tt, pmode));
if (!pre_parse_)
@@ -3163,7 +3184,7 @@ namespace build2
value parser::
parse_eval_ternary (token& t, type& tt, pattern_mode pmode, bool first)
{
- // enter: first token of LHS
+ // enter: first token of LHS (lexed with enabled attributes)
// leave: next token after last RHS
// Right-associative (kind of): we parse what's between ?: without
@@ -3196,7 +3217,9 @@ namespace build2
if (!pp)
pre_parse_ = !q; // Short-circuit middle?
+ enable_attributes (); // Recognize attributes before value.
next (t, tt);
+
value mhs (parse_eval_ternary (t, tt, pmode));
if (tt != type::colon)
@@ -3205,7 +3228,9 @@ namespace build2
if (!pp)
pre_parse_ = q; // Short-circuit right?
+ enable_attributes (); // Recognize attributes before value.
next (t, tt);
+
value rhs (parse_eval_ternary (t, tt, pmode));
pre_parse_ = pp;
@@ -3215,7 +3240,7 @@ namespace build2
value parser::
parse_eval_or (token& t, type& tt, pattern_mode pmode, bool first)
{
- // enter: first token of LHS
+ // enter: first token of LHS (lexed with enabled attributes)
// leave: next token after last RHS
// Left-associative: parse in a loop for as long as we can.
@@ -3234,7 +3259,9 @@ namespace build2
if (!pre_parse_ && convert<bool> (move (lhs)))
pre_parse_ = true;
+ enable_attributes (); // Recognize attributes before value.
next (t, tt);
+
l = get_location (t);
value rhs (parse_eval_and (t, tt, pmode));
@@ -3255,7 +3282,7 @@ namespace build2
value parser::
parse_eval_and (token& t, type& tt, pattern_mode pmode, bool first)
{
- // enter: first token of LHS
+ // enter: first token of LHS (lexed with enabled attributes)
// leave: next token after last RHS
// Left-associative: parse in a loop for as long as we can.
@@ -3274,7 +3301,9 @@ namespace build2
if (!pre_parse_ && !convert<bool> (move (lhs)))
pre_parse_ = true;
+ enable_attributes (); // Recognize attributes before value.
next (t, tt);
+
l = get_location (t);
value rhs (parse_eval_comp (t, tt, pmode));
@@ -3295,7 +3324,7 @@ namespace build2
value parser::
parse_eval_comp (token& t, type& tt, pattern_mode pmode, bool first)
{
- // enter: first token of LHS
+ // enter: first token of LHS (lexed with enabled attributes)
// leave: next token after last RHS
// Left-associative: parse in a loop for as long as we can.
@@ -3312,7 +3341,9 @@ namespace build2
type op (tt);
location l (get_location (t));
+ enable_attributes (); // Recognize attributes before value.
next (t, tt);
+
value rhs (parse_eval_value (t, tt, pmode));
if (pre_parse_)
@@ -3329,7 +3360,7 @@ namespace build2
value parser::
parse_eval_value (token& t, type& tt, pattern_mode pmode, bool first)
{
- // enter: first token of value
+ // enter: first token of value (lexed with enabled attributes)
// leave: next token after value
// Parse value attributes if any. Note that it's ok not to have anything
@@ -3344,7 +3375,9 @@ namespace build2
{
case type::log_not:
{
+ enable_attributes (); // Recognize attributes before value.
next (t, tt);
+
v = parse_eval_value (t, tt, pmode);
if (pre_parse_)
@@ -3498,7 +3531,7 @@ namespace build2
// Using '@' for attribute key-value pairs would be just too ugly. Seeing
// that we control what goes into keys/values, let's use a much nicer '='.
//
- mode (lexer_mode::attribute, '=');
+ mode (lexer_mode::attributes, '=');
next (t, tt);
has = (tt != type::rsbrace);
@@ -5372,7 +5405,7 @@ namespace build2
// In fact, because this is only done in the buildspec mode, we can still
// use eval contexts provided that we quote them: '"cle(an)"'. Note that
// function calls also need quoting (since a separated '(' is not treated as
- // function call): '"$identity(update)"'.
+ // a function call): '"$identity(update)"'.
//
// This poses a problem, though: if it's quoted then it is a concatenated
// expansion and therefore cannot contain multiple values, for example,
diff --git a/libbuild2/parser.hxx b/libbuild2/parser.hxx
index d82496d..b07936c 100644
--- a/libbuild2/parser.hxx
+++ b/libbuild2/parser.hxx
@@ -535,8 +535,8 @@ namespace build2
lexer_->mode (m, ps);
else
// As a sanity check, make sure the mode matches the next token. Note
- // that we don't check the pair separator since it can be overriden by
- // the lexer's mode() implementation.
+ // that we don't check the attributes flags or the pair separator
+ // since they can be overridden by the lexer's mode() implementation.
//
assert (replay_i_ != replay_data_.size () &&
replay_data_[replay_i_].mode == m);
@@ -555,6 +555,13 @@ namespace build2
}
void
+ enable_attributes ()
+ {
+ if (replay_ != replay::play)
+ lexer_->enable_attributes ();
+ }
+
+ void
expire_mode ()
{
if (replay_ != replay::play)
diff --git a/libbuild2/test/script/lexer.cxx b/libbuild2/test/script/lexer.cxx
index 75c04c8..a65eb25 100644
--- a/libbuild2/test/script/lexer.cxx
+++ b/libbuild2/test/script/lexer.cxx
@@ -19,11 +19,14 @@ namespace build2
void lexer::
mode (base_mode m, char ps, optional<const char*> esc)
{
+ bool a (false); // attributes
+
const char* s1 (nullptr);
const char* s2 (nullptr);
- bool s (true);
- bool n (true);
- bool q (true);
+
+ bool s (true); // space
+ bool n (true); // newline
+ bool q (true); // quotes
if (!esc)
{
@@ -71,8 +74,8 @@ namespace build2
// Note that we don't recognize ':' since having a trailing
// variable assignment is illegal.
//
- s1 = "; $([]#\t\n";
- s2 = " ";
+ s1 = "; $(#\t\n";
+ s2 = " ";
break;
}
@@ -128,7 +131,7 @@ namespace build2
//
assert (ps == '\0' ||
m == lexer_mode::eval ||
- m == lexer_mode::attribute);
+ m == lexer_mode::attributes);
base_lexer::mode (m, ps, esc);
return;
@@ -136,7 +139,7 @@ namespace build2
}
assert (ps == '\0');
- state_.push (state {m, ps, s, n, q, *esc, s1, s2});
+ state_.push (state {m, a, ps, s, n, q, *esc, s1, s2});
}
token lexer::
@@ -177,9 +180,6 @@ namespace build2
xchar c (get ());
uint64_t ln (c.line), cn (c.column);
- if (eos (c))
- return token (type::eos, sep, ln, cn, token_printer);
-
state st (state_.top ()); // Make copy (see first/second_token).
lexer_mode m (st.mode);
@@ -217,6 +217,22 @@ namespace build2
return make_token (t, move (v));
};
+ // Handle attributes (do it first to make sure the flag is cleared
+ // regardless of what we return).
+ //
+ if (st.attributes)
+ {
+ assert (m == lexer_mode::variable_line);
+
+ state_.top ().attributes = false;
+
+ if (c == '[')
+ return make_token (type::lsbrace);
+ }
+
+ if (eos (c))
+ return make_token (type::eos);
+
// Expire certain modes at the end of the token. Do it early in case
// we push any new mode (e.g., double quote).
//
@@ -253,18 +269,6 @@ namespace build2
}
}
-
- if (m == lexer_mode::variable_line)
- {
- switch (c)
- {
- // Attributes.
- //
- case '[': return make_token (type::lsbrace);
- case ']': return make_token (type::rsbrace);
- }
- }
-
// Line separators.
//
if (m == lexer_mode::command_line ||
diff --git a/libbuild2/test/script/parser.cxx b/libbuild2/test/script/parser.cxx
index f3f6ffa..aa78437 100644
--- a/libbuild2/test/script/parser.cxx
+++ b/libbuild2/test/script/parser.cxx
@@ -1288,10 +1288,11 @@ namespace build2
// enter: assignment
// leave: newline or semi
- // We cannot reuse the value mode since it will recognize { which we
+ // We cannot reuse the value mode since it will recognize `{` which we
// want to treat as a literal.
//
mode (lexer_mode::variable_line);
+ enable_attributes (); // @@ VAL
next (t, tt);
// Parse value attributes if any. Note that it's ok not to have
@@ -3446,11 +3447,13 @@ namespace build2
path_ = &name;
istringstream is (attributes);
- lexer l (is, name, lexer_mode::attribute);
+ lexer l (is, name, lexer_mode::attributes);
set_lexer (&l);
token t;
type tt;
+
+ enable_attributes (); // Enable `[` recognition.
next (t, tt);
if (tt != type::lsbrace && tt != type::eos)
diff --git a/libbuild2/token.hxx b/libbuild2/token.hxx
index 2370f8d..e420aa8 100644
--- a/libbuild2/token.hxx
+++ b/libbuild2/token.hxx
@@ -14,7 +14,8 @@
namespace build2
{
- // Extendable/inheritable enum-like class.
+
+ // Token type.
//
// A line consists of a sequence of words separated by separators and
// terminated with the newline. If whitespace is a separator, then it is
@@ -22,6 +23,8 @@ namespace build2
//
struct token_type
{
+ // Extendable/inheritable enum-like class.
+ //
enum
{
// NOTE: remember to update token_printer()!
@@ -145,10 +148,12 @@ namespace build2
inline ostream&
operator<< (ostream& o, const token& t) {t.printer (o, t, true); return o;}
- // Extendable/inheritable enum-like class.
+ // Context-dependent lexing (see lexer_mode for details).
//
struct lexer_mode_base
{
+ // Extendable/inheritable enum-like class.
+ //
enum { value_next };
using value_type = uint16_t;