aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBoris Kolpackov <boris@codesynthesis.com>2020-05-25 12:12:13 +0200
committerBoris Kolpackov <boris@codesynthesis.com>2020-05-27 15:44:40 +0200
commitba628f6f90e7412245dcebdecd9cfa7e4bbf989c (patch)
tree8f7f109c70d615bc93daa2a8f4b25ceb3842e062
parent2a9204cab666d47770bf3809d95a689088019121 (diff)
Add support for value subscript after expansionsadhoc-recipe-history
Value subscript is only recognized in evaluation contexts (due to ambiguity with wildcard patterns; consider: $x[123].txt) and should be unseparated from the previous token. For example: x = ($y[1]) x = (($f ? $y : $z)[1]) x = ($identity($x)[$z])
-rw-r--r--libbuild2/build/script/lexer.cxx15
-rw-r--r--libbuild2/lexer.cxx59
-rw-r--r--libbuild2/lexer.hxx38
-rw-r--r--libbuild2/parser.cxx145
-rw-r--r--libbuild2/parser.hxx9
-rw-r--r--libbuild2/script/lexer.cxx5
-rw-r--r--libbuild2/test/script/lexer.cxx15
-rw-r--r--libbuild2/variable.cxx8
-rw-r--r--tests/expansion/concat.testscript2
-rw-r--r--tests/expansion/subscript.testscript97
10 files changed, 308 insertions, 85 deletions
diff --git a/libbuild2/build/script/lexer.cxx b/libbuild2/build/script/lexer.cxx
index 7b8bdd4..a58f794 100644
--- a/libbuild2/build/script/lexer.cxx
+++ b/libbuild2/build/script/lexer.cxx
@@ -27,8 +27,6 @@ namespace build2
optional<const char*> esc,
uintptr_t data)
{
- bool a (false); // attributes
-
const char* s1 (nullptr);
const char* s2 (nullptr);
@@ -88,7 +86,8 @@ namespace build2
}
assert (ps == '\0');
- state_.push (state {m, data, nullopt, a, ps, s, n, q, *esc, s1, s2});
+ state_.push (
+ state {m, data, nullopt, false, false, ps, s, n, q, *esc, s1, s2});
}
token lexer::
@@ -129,16 +128,16 @@ namespace build2
return token (t, sep, ln, cn, token_printer);
};
- // Handle attributes (do it first to make sure the flag is cleared
- // regardless of what we return).
+ // Handle `[` (do it first to make sure the flag is cleared regardless
+ // of what we return).
//
- if (st.attributes)
+ if (st.lsbrace)
{
assert (m == lexer_mode::variable_line);
- state_.top ().attributes = false;
+ state_.top ().lsbrace = false; // Note: st is a copy.
- if (c == '[')
+ if (c == '[' && (!st.lsbrace_unsep || !sep))
return make_token (type::lsbrace);
}
diff --git a/libbuild2/lexer.cxx b/libbuild2/lexer.cxx
index 6d3504c..7149d45 100644
--- a/libbuild2/lexer.cxx
+++ b/libbuild2/lexer.cxx
@@ -39,7 +39,7 @@ namespace build2
void lexer::
mode (lexer_mode m, char ps, optional<const char*> esc, uintptr_t data)
{
- bool a (false); // attributes
+ bool lsb (false); // Enable `[` recognition.
const char* s1 (nullptr);
const char* s2 (nullptr);
@@ -62,9 +62,9 @@ namespace build2
// Note: `%` is only recognized at the beginning of the line so it
// should not be included here.
//
- a = true;
s1 = ":<>=+? $(){}#\t\n";
s2 = " == ";
+ lsb = true;
break;
}
case lexer_mode::value:
@@ -103,6 +103,12 @@ namespace build2
s2 = " ";
break;
}
+ case lexer_mode::subscript:
+ {
+ s1 = " $()]#\t\n";
+ s2 = " ";
+ break;
+ }
case lexer_mode::eval:
{
s1 = ":<>=!&|?, $(){}#\t\n";
@@ -147,7 +153,8 @@ namespace build2
default: assert (false); // Unhandled custom mode.
}
- state_.push (state {m, data, nullopt, a, ps, s, n, q, *esc, s1, s2});
+ state_.push (
+ state {m, data, nullopt, lsb, false, ps, s, n, q, *esc, s1, s2});
}
token lexer::
@@ -168,6 +175,7 @@ namespace build2
case lexer_mode::case_patterns:
case lexer_mode::attributes:
case lexer_mode::attribute_value:
+ case lexer_mode::subscript:
case lexer_mode::variable:
case lexer_mode::buildspec: break;
case lexer_mode::eval: return next_eval ();
@@ -190,14 +198,14 @@ namespace build2
ln, cn, token_printer);
};
- // Handle attributes (do it first to make sure the flag is cleared
- // regardless of what we return).
+ // Handle `[` (do it first to make sure the flag is cleared regardless of
+ // what we return).
//
- if (st.attributes)
+ if (st.lsbrace)
{
- st.attributes = false;
+ st.lsbrace = false;
- if (c == '[')
+ if (c == '[' && (!st.lsbrace_unsep || !sep))
return make_token (type::lsbrace);
}
@@ -226,11 +234,15 @@ namespace build2
m == lexer_mode::case_patterns)
state_.pop ();
- // Re-enable attributes in the normal mode (should never be needed in
- // cmdvar).
+ // Re-enable `[` recognition (attributes) in the normal mode (should
+ // never be needed in cmdvar).
//
- if (state_.top ().mode == lexer_mode::normal)
- state_.top ().attributes = true;
+ state& st (state_.top ());
+ if (st.mode == lexer_mode::normal)
+ {
+ st.lsbrace = true;
+ st.lsbrace_unsep = false;
+ }
sep = true; // Treat newline as always separated.
return make_token (type::newline);
@@ -274,9 +286,12 @@ namespace build2
}
}
- // The following characters are special in all modes except attributes.
+ // The following characters are special in all modes except attributes
+ // and subscript.
//
- if (m != lexer_mode::attributes && m != lexer_mode::attribute_value)
+ if (m != lexer_mode::attributes &&
+ m != lexer_mode::attribute_value &&
+ m != lexer_mode::subscript)
{
switch (c)
{
@@ -295,13 +310,15 @@ namespace build2
}
}
- if (m == lexer_mode::attributes || m == lexer_mode::attribute_value)
+ if (m == lexer_mode::attributes ||
+ m == lexer_mode::attribute_value ||
+ m == lexer_mode::subscript)
{
switch (c)
{
case ']':
{
- state_.pop (); // Expire the attributes mode after closing `]`.
+ state_.pop (); // Expire the mode after closing `]`.
return make_token (type::rsbrace);
}
}
@@ -425,14 +442,14 @@ namespace build2
ln, cn, token_printer);
};
- // Handle attributes (do it first to make sure the flag is cleared
- // regardless of what we return).
+ // Handle `[` (do it first to make sure the flag is cleared regardless of
+ // what we return).
//
- if (st.attributes)
+ if (st.lsbrace)
{
- st.attributes = false;
+ st.lsbrace = false;
- if (c == '[')
+ if (c == '[' && (!st.lsbrace_unsep || !sep))
return make_token (type::lsbrace);
}
diff --git a/libbuild2/lexer.hxx b/libbuild2/lexer.hxx
index 749668e..d5f1c99 100644
--- a/libbuild2/lexer.hxx
+++ b/libbuild2/lexer.hxx
@@ -31,7 +31,9 @@ namespace build2
// values). The attributes/attribute_value modes are like values where each
// value is potentially a variable assignment; they don't treat `{` and `}`
// as special (so we cannot have name groups in attributes) as well as
- // recognizes `=` and `]`. The eval mode is used in the evaluation context.
+ // recognizes `=` and `]`. The subscript mode is like value but doesn't
+ // treat `{` and `}` as special and recognizes `]`. The eval mode is used in
+ // the evaluation context.
//
// A number of modes are "derived" from the value/values mode by recognizing
// a few extra characters:
@@ -55,10 +57,10 @@ namespace build2
// mode data.
//
// The alternative modes must be set manually. The value/values and derived
- // modes automatically expires after the end of the line. The attribute mode
- // expires after the closing `]`. The variable mode expires after the word
- // token. The eval mode expires after the closing `)`. And the foreign mode
- // expires after the closing braces.
+ // modes automatically expires after the end of the line. The attribute and
+ // subscript modes expires after the closing `]`. The variable mode expires
+ // after the word token. The eval mode expires after the closing `)`. And
+ // the foreign mode expires after the closing braces.
//
// Note that normally it is only safe to switch mode when the current token
// is not quoted (or, more generally, when you are not in the double-quoted
@@ -66,13 +68,13 @@ namespace build2
// variable name mode). Failed that your mode (which now will be the top of
// the mode stack) will prevent proper recognition of the closing quote.
//
- // Finally, attributes recognition (the `[` token) cuts across most of the
- // modes and is handled with a flag. In the normal mode it is automatically
- // set at the beginning and after each newline. In all other modes it must
- // be explicitly set at points where attributes are recognized. In all the
- // cases it is automatically reset after lexing the next token (whether `[`
- // or not).
- //
+ // The `[` token is used for attributes (where it cuts across most of the
+ // modes) as well as for value subscript (where it is only recognized after
+ // expansions). It is handled with a flag. In the normal mode it is
+ // automatically set at the beginning and after each newline. In all other
+ // modes it must be explicitly set at points where attribute/subscript is
+ // recognized. In all the cases it is automatically reset after lexing the
+ // next token (whether `[` or not).
// Extendable/inheritable enum-like class.
//
@@ -91,6 +93,7 @@ namespace build2
switch_expressions,
attributes,
attribute_value,
+ subscript,
eval,
single_quoted,
double_quoted,
@@ -134,10 +137,14 @@ namespace build2
optional<const char*> escapes = nullopt,
uintptr_t data = 0);
- // Enable attributes recognition for the next token.
+ // Enable `[` recognition for the next token.
//
void
- enable_attributes () {state_.top ().attributes = true;}
+ enable_lsbrace (bool unsep = false)
+ {
+ state_.top ().lsbrace = true;
+ state_.top ().lsbrace_unsep = unsep;
+ }
// Expire the current mode early.
//
@@ -177,7 +184,8 @@ namespace build2
uintptr_t data;
optional<token> hold;
- bool attributes;
+ bool lsbrace; // Recognize `[`.
+ bool lsbrace_unsep; // Recognize it only if unseparated.
char sep_pair;
bool sep_space; // Are whitespaces separators (see skip_spaces())?
diff --git a/libbuild2/parser.cxx b/libbuild2/parser.cxx
index 94f597d..c359ce0 100644
--- a/libbuild2/parser.cxx
+++ b/libbuild2/parser.cxx
@@ -4204,8 +4204,9 @@ namespace build2
tt == type::dollar ||
tt == type::lparen ||
tt == type::lcbrace))
- fail (t) << "whitespace required after attributes" <<
- info << "use the '\\[' escape sequence if this is a wildcard pattern";
+ fail (t) << "whitespace required after attributes" <<
+ info (l) << "use the '\\[' escape sequence if this is a wildcard "
+ << "pattern";
return make_pair (has, l);
}
@@ -5520,7 +5521,7 @@ namespace build2
continue;
}
- // Variable expansion, function call, or eval context.
+ // Expanions: variable expansion, function call, or eval context.
//
if (tt == type::dollar || tt == type::lparen)
{
@@ -5533,6 +5534,11 @@ namespace build2
const char* what; // Variable, function, or evaluation context.
bool quoted (t.qtype != quote_type::unquoted);
+ // We only recognize value subscripts inside eval contexts due to the
+ // ambiguity with wildcard patterns (consider: $x[123].txt).
+ //
+ bool sub (mode () == lexer_mode::eval);
+
if (tt == type::dollar)
{
// Switch to the variable name mode. We want to use this mode for
@@ -5625,9 +5631,10 @@ namespace build2
if (!pre_parse_ && name.empty ())
fail (loc) << "empty variable/function name";
- // Figure out whether this is a variable expansion or a function
- // call.
+ // Figure out whether this is a variable expansion with potential
+ // subscript or a function call.
//
+ if (sub) enable_subscript ();
tt = peek ();
// Note that we require function call opening paren to be
@@ -5645,15 +5652,17 @@ namespace build2
// context in which to call the function? Hm, interesting...
//
values args (parse_eval (t, tt, pmode));
- tt = peek ();
- if (pre_parse_)
- continue; // As if empty result.
+ if (sub) enable_subscript ();
+ tt = peek ();
// Note that we "move" args to call().
//
- result_data = ctx.functions.call (scope_, name, args, loc);
- what = "function call";
+ if (!pre_parse_)
+ {
+ result_data = ctx.functions.call (scope_, name, args, loc);
+ what = "function call";
+ }
}
else
{
@@ -5661,42 +5670,124 @@ namespace build2
//
lookup l (lookup_variable (move (qual), move (name), loc));
- if (pre_parse_)
- continue; // As if empty value.
-
- if (l.defined ())
- result = l.value; // Otherwise leave as NULL result_data.
+ if (!pre_parse_)
+ {
+ if (l.defined ())
+ result = l.value; // Otherwise leave as NULL result_data.
- what = "variable expansion";
+ what = "variable expansion";
+ }
}
}
else
{
- // Context evaluation.
+ // Evaluation context.
//
loc = get_location (t);
mode (lexer_mode::eval, '@');
next_with_attributes (t, tt);
values vs (parse_eval (t, tt, pmode));
+
+ if (sub) enable_subscript ();
tt = peek ();
- if (pre_parse_)
- continue; // As if empty result.
+ if (!pre_parse_)
+ {
+ switch (vs.size ())
+ {
+ case 0: result_data = value (names ()); break;
+ case 1: result_data = move (vs[0]); break;
+ default: fail (loc) << "expected single value";
+ }
- switch (vs.size ())
+ what = "context evaluation";
+ }
+ }
+
+ // Handle value subscript.
+ //
+ if (tt == type::lsbrace)
+ {
+ location bl (get_location (t));
+ next (t, tt); // `[`
+ mode (lexer_mode::subscript, '\0' /* pair */);
+ next (t, tt);
+
+ location l (get_location (t));
+ value v (
+ tt != type::rsbrace
+ ? parse_value (t, tt, pattern_mode::ignore, "value subscript")
+ : value (names ()));
+
+ if (tt != type::rsbrace)
{
- case 0: result_data = value (names ()); break;
- case 1: result_data = move (vs[0]); break;
- default: fail (loc) << "expected single value";
+ // Note: wildcard pattern should have `]` as well so no escaping
+ // suggestion.
+ //
+ fail (t) << "expected ']' instead of " << t;
}
- what = "context evaluation";
+ if (!pre_parse_)
+ {
+ uint64_t j;
+ try
+ {
+ j = convert<uint64_t> (move (v));
+ }
+ catch (const invalid_argument& e)
+ {
+ fail (l) << "invalid value subscript: " << e <<
+ info (bl) << "use the '\\[' escape sequence if this is a "
+ << "wildcard pattern";
+ }
+
+ // Similar to expanding an undefined variable, we return NULL if
+ // the index is out of bounds.
+ //
+ // Note that result may or may not point to result_data.
+ //
+ if (result->type == nullptr)
+ {
+ const names& ns (result->as<names> ());
+
+ // Pair-aware subscript.
+ //
+ names r;
+ for (auto i (ns.begin ()); i != ns.end (); ++i, --j)
+ {
+ if (j == 0)
+ {
+ r.push_back (*i);
+ if (i->pair)
+ r.push_back (*++i);
+ break;
+ }
+
+ if (i->pair)
+ ++i;
+ }
+
+ result_data = r.empty () ? value () : value (move (r));
+ }
+ else
+ {
+ // @@ TODO: we would want to return a value with element type.
+ //
+ //result_data = ...
+ fail (l) << "typed value subscript not yet supported" <<
+ info (bl) << "use the '\\[' escape sequence if this is a "
+ << "wildcard pattern";
+ }
+
+ result = &result_data;
+ }
+
+ tt = peek ();
}
- // We never end up here during pre-parsing.
- //
- assert (!pre_parse_);
+ if (pre_parse_)
+ continue; // As if empty result.
// Should we accumulate? If the buffer is not empty, then we continue
// accumulating (the case where we are separated should have been
diff --git a/libbuild2/parser.hxx b/libbuild2/parser.hxx
index bc01e08..2f67c31 100644
--- a/libbuild2/parser.hxx
+++ b/libbuild2/parser.hxx
@@ -619,7 +619,14 @@ namespace build2
enable_attributes ()
{
if (replay_ != replay::play)
- lexer_->enable_attributes ();
+ lexer_->enable_lsbrace ();
+ }
+
+ void
+ enable_subscript ()
+ {
+ if (replay_ != replay::play)
+ lexer_->enable_lsbrace (true /* unseparated */);
}
void
diff --git a/libbuild2/script/lexer.cxx b/libbuild2/script/lexer.cxx
index d78e999..ce409c1 100644
--- a/libbuild2/script/lexer.cxx
+++ b/libbuild2/script/lexer.cxx
@@ -16,8 +16,6 @@ namespace build2
void lexer::
mode (base_mode m, char ps, optional<const char*> esc, uintptr_t data)
{
- bool a (false); // attributes
-
const char* s1 (nullptr);
const char* s2 (nullptr);
@@ -86,7 +84,8 @@ namespace build2
}
assert (ps == '\0');
- state_.push (state {m, data, nullopt, a, ps, s, n, q, *esc, s1, s2});
+ state_.push (
+ state {m, data, nullopt, false, false, ps, s, n, q, *esc, s1, s2});
}
token lexer::
diff --git a/libbuild2/test/script/lexer.cxx b/libbuild2/test/script/lexer.cxx
index a94109b..e895d4a 100644
--- a/libbuild2/test/script/lexer.cxx
+++ b/libbuild2/test/script/lexer.cxx
@@ -26,8 +26,6 @@ namespace build2
void lexer::
mode (base_mode m, char ps, optional<const char*> esc, uintptr_t data)
{
- bool a (false); // attributes
-
const char* s1 (nullptr);
const char* s2 (nullptr);
@@ -109,7 +107,8 @@ namespace build2
}
assert (ps == '\0');
- state_.push (state {m, data, nullopt, a, ps, s, n, q, *esc, s1, s2});
+ state_.push (
+ state {m, data, nullopt, false, false, ps, s, n, q, *esc, s1, s2});
}
token lexer::
@@ -153,16 +152,16 @@ namespace build2
return token (t, sep, ln, cn, token_printer);
};
- // Handle attributes (do it first to make sure the flag is cleared
- // regardless of what we return).
+ // Handle `[` (do it first to make sure the flag is cleared regardless
+ // of what we return).
//
- if (st.attributes)
+ if (st.lsbrace)
{
assert (m == lexer_mode::variable_line);
- state_.top ().attributes = false;
+ state_.top ().lsbrace = false; // Note: st is a copy.
- if (c == '[')
+ if (c == '[' && (!st.lsbrace_unsep || !sep))
return make_token (type::lsbrace);
}
diff --git a/libbuild2/variable.cxx b/libbuild2/variable.cxx
index d16fcb4..206eb54 100644
--- a/libbuild2/variable.cxx
+++ b/libbuild2/variable.cxx
@@ -491,7 +491,13 @@ namespace build2
{
// May throw invalid_argument or out_of_range.
//
- return stoull (n.value);
+ size_t i;
+ uint64_t r (stoull (n.value, &i));
+
+ if (i == n.value.size ())
+ return r;
+
+ // Fall through.
}
catch (const std::exception&)
{
diff --git a/tests/expansion/concat.testscript b/tests/expansion/concat.testscript
index 181a738..bec48ce 100644
--- a/tests/expansion/concat.testscript
+++ b/tests/expansion/concat.testscript
@@ -1,4 +1,4 @@
-# file : tests/expansion/type.testscript
+# file : tests/expansion/concat.testscript
# license : MIT; see accompanying LICENSE file
# Test concatenated expansion.
diff --git a/tests/expansion/subscript.testscript b/tests/expansion/subscript.testscript
new file mode 100644
index 0000000..0c06394
--- /dev/null
+++ b/tests/expansion/subscript.testscript
@@ -0,0 +1,97 @@
+# file : tests/expansion/subscript.testscript
+# license : MIT; see accompanying LICENSE file
+
+# Test subscript expansion.
+
+.include ../common.testscript
+
+: basics
+:
+$* <<EOI >>EOO
+x = zero one two three
+y = zero@one two@three
+i = 2
+
+print ($x[1])
+print ($x[4])
+print (($x)[1])
+print (($x)[4])
+print ($identity($x)[1])
+print ($identity($x)[4])
+
+print
+
+print ($y[1])
+print ($y[4])
+print (($y)[1])
+print (($y)[4])
+print ($identity($y)[1])
+print ($identity($y)[4])
+
+print
+
+print ($x[$i])
+
+EOI
+one
+[null]
+one
+[null]
+one
+[null]
+
+two@three
+[null]
+two@three
+[null]
+two@three
+[null]
+
+two
+EOO
+
+: unseparated
+:
+$* <<EOI >>EOO
+x = zero one
+print ($x [1])
+EOI
+zero one
+EOO
+
+: escape
+:
+$* <<EOI >>EOO
+x = zero
+print ($x\[abc])
+EOI
+EOO
+
+: preparse
+:
+$* <<EOI >>EOO
+x = zero one two three
+print (true ? $x[1] : $x[])
+EOI
+one
+EOO
+
+: missing-rsbrace
+:
+$* <'print ($x[1)' 2>>EOE != 0
+<stdin>:1:12: error: expected ']' instead of ')'
+EOE
+
+: invalid-subscript
+:
+$* <'print ($x[1a])' 2>>EOE != 0
+<stdin>:1:11: error: invalid value subscript: invalid uint64 value: '1a'
+ <stdin>:1:9: info: use the '\[' escape sequence if this is a wildcard pattern
+EOE
+
+: empty-subscript
+:
+$* <'print ($x[])' 2>>EOE != 0
+<stdin>:1:11: error: invalid value subscript: invalid uint64 value: empty
+ <stdin>:1:9: info: use the '\[' escape sequence if this is a wildcard pattern
+EOE