aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKaren Arutyunov <karen@codesynthesis.com>2022-10-18 15:13:29 +0300
committerKaren Arutyunov <karen@codesynthesis.com>2022-10-18 18:31:39 +0300
commitb408d19f614b47670cd0a0def501266f0d7689b5 (patch)
treebb6bceee609507fb5ef5c2559934622c1737a259
parent329aa22d9f8e72596b909d2110e11976949155fa (diff)
Fix unexpected 'unterminated double-quoted sequence' script error
-rw-r--r--libbuild2/build/script/lexer.cxx17
-rw-r--r--libbuild2/build/script/lexer.hxx2
-rw-r--r--libbuild2/build/script/parser+diag.test.testscript13
-rw-r--r--libbuild2/build/script/parser+for.test.testscript12
-rw-r--r--libbuild2/lexer.cxx67
-rw-r--r--libbuild2/lexer.hxx46
-rw-r--r--libbuild2/script/lexer.cxx11
-rw-r--r--libbuild2/script/lexer.hxx2
-rw-r--r--libbuild2/test/script/lexer.cxx19
-rw-r--r--libbuild2/test/script/lexer.hxx2
-rw-r--r--libbuild2/test/script/parser+for.test.testscript15
-rw-r--r--tests/recipe/buildscript/testscript42
-rw-r--r--tests/test/script/runner/for.testscript23
13 files changed, 217 insertions, 54 deletions
diff --git a/libbuild2/build/script/lexer.cxx b/libbuild2/build/script/lexer.cxx
index 5c13239..e0d87fe 100644
--- a/libbuild2/build/script/lexer.cxx
+++ b/libbuild2/build/script/lexer.cxx
@@ -35,10 +35,7 @@ namespace build2
bool q (true); // quotes
if (!esc)
- {
- assert (!state_.empty ());
- esc = state_.top ().escapes;
- }
+ esc = current_state ().escapes;
switch (m)
{
@@ -107,7 +104,7 @@ namespace build2
}
assert (ps == '\0');
- state_.push (
+ mode_impl (
state {m, data, nullopt, false, false, ps, s, n, q, *esc, s1, s2});
}
@@ -116,7 +113,7 @@ namespace build2
{
token r;
- switch (state_.top ().mode)
+ switch (mode ())
{
case lexer_mode::command_line:
case lexer_mode::first_token:
@@ -142,7 +139,7 @@ namespace build2
xchar c (get ());
uint64_t ln (c.line), cn (c.column);
- state st (state_.top ()); // Make copy (see first/second_token).
+ state st (current_state ()); // Make copy (see first/second_token).
lexer_mode m (st.mode);
auto make_token = [&sep, ln, cn] (type t)
@@ -158,7 +155,7 @@ namespace build2
assert (m == lexer_mode::variable_line ||
m == lexer_mode::for_loop);
- state_.top ().lsbrace = false; // Note: st is a copy.
+ current_state ().lsbrace = false; // Note: st is a copy.
if (c == '[' && (!st.lsbrace_unsep || !sep))
return make_token (type::lsbrace);
@@ -171,7 +168,7 @@ namespace build2
// we push any new mode (e.g., double quote).
//
if (m == lexer_mode::first_token || m == lexer_mode::second_token)
- state_.pop ();
+ expire_mode ();
// NOTE: remember to update mode() if adding new special characters.
@@ -182,7 +179,7 @@ namespace build2
// Expire variable value mode at the end of the line.
//
if (m == lexer_mode::variable_line)
- state_.pop ();
+ expire_mode ();
sep = true; // Treat newline as always separated.
return make_token (type::newline);
diff --git a/libbuild2/build/script/lexer.hxx b/libbuild2/build/script/lexer.hxx
index 313d80a..3f51493 100644
--- a/libbuild2/build/script/lexer.hxx
+++ b/libbuild2/build/script/lexer.hxx
@@ -68,6 +68,8 @@ namespace build2
static redirect_aliases_type redirect_aliases;
private:
+ using build2::script::lexer::mode; // Getter.
+
token
next_line ();
};
diff --git a/libbuild2/build/script/parser+diag.test.testscript b/libbuild2/build/script/parser+diag.test.testscript
index 272d10c..a720fe2 100644
--- a/libbuild2/build/script/parser+diag.test.testscript
+++ b/libbuild2/build/script/parser+diag.test.testscript
@@ -45,16 +45,19 @@ $* <<EOI >>EOO
diag: copy foo
EOO
- # @@ TMP Enable when the unexpected 'unterminated double-quoted sequence'
- # error is fixed.
- #\
: quoted
:
- $* <<EOI >false
+ $* <<EOI >'diag: foo'
f = foo
diag "$f"
EOI
- #\
+
+ : quoted-eval
+ :
+ $* <<EOI >'diag: foo'
+ f = foo
+ diag "($f)"
+ EOI
: temp_dir
:
diff --git a/libbuild2/build/script/parser+for.test.testscript b/libbuild2/build/script/parser+for.test.testscript
index 880b03c..2a9f169 100644
--- a/libbuild2/build/script/parser+for.test.testscript
+++ b/libbuild2/build/script/parser+for.test.testscript
@@ -543,20 +543,20 @@
buildfile:11:1: error: for: missing variable name
EOE
- # @@ TMP Enable when the unexpected 'unterminated double-quoted sequence'
- # error is fixed.
- #\
- : quoted-ops
+ : quoted-opt
:
$* <<EOI >>EOO
- o=-w
+ o = -w
for "$o" x <'a b'
cmd $x
end
+ for "($o)" x <'a b'
+ cmd $x
+ end
EOI
for -w x <'a b'
+ for -w x <'a b'
EOO
- #\
: untyped
:
diff --git a/libbuild2/lexer.cxx b/libbuild2/lexer.cxx
index 76c31be..9176422 100644
--- a/libbuild2/lexer.cxx
+++ b/libbuild2/lexer.cxx
@@ -160,13 +160,15 @@ namespace build2
break;
}
case lexer_mode::foreign:
- assert (data > 1);
- // Fall through.
+ {
+ assert (ps == '\0' && data > 1);
+ s = false;
+ break;
+ }
case lexer_mode::single_quoted:
case lexer_mode::double_quoted:
{
- assert (ps == '\0');
- s = false;
+ assert (false); // Can only be set manually in word().
break;
}
case lexer_mode::variable:
@@ -178,8 +180,49 @@ namespace build2
default: assert (false); // Unhandled custom mode.
}
- state_.push (
- state {m, data, nullopt, lsb, false, ps, s, n, q, *esc, s1, s2});
+ mode_impl (state {m, data, nullopt, lsb, false, ps, s, n, q, *esc, s1, s2});
+ }
+
+ void lexer::
+ mode_impl (state&& s)
+ {
+ // If we are in the double-quoted mode then, unless the new mode is eval
+ // or variable, delay the state switch until the current mode is expired.
+ // Note that we delay by injecting the new state beneath the current
+ // state.
+ //
+ if (!state_.empty () &&
+ state_.top ().mode == lexer_mode::double_quoted &&
+ s.mode != lexer_mode::eval &&
+ s.mode != lexer_mode::variable)
+ {
+ state qs (move (state_.top ())); // Save quoted state.
+ state_.top () = move (s); // Overwrite quoted state with new state.
+ state_.push (move (qs)); // Restore quoted state.
+ }
+ else
+ state_.push (move (s));
+ }
+
+ void lexer::
+ expire_mode ()
+ {
+ // If we are in the double-quoted mode, then delay the state expiration
+ // until the current mode is expired. Note that we delay by overwriting
+ // the being expired state with the current state.
+ //
+ assert (!state_.empty () &&
+ (state_.top ().mode != lexer_mode::double_quoted ||
+ state_.size () > 1));
+
+ if (state_.top ().mode == lexer_mode::double_quoted)
+ {
+ state qs (move (state_.top ())); // Save quoted state.
+ state_.pop (); // Pop quoted state.
+ state_.top () = move (qs); // Expire state, restoring quoted state.
+ }
+ else
+ state_.pop ();
}
token lexer::
@@ -835,6 +878,13 @@ namespace build2
//
if (st.quotes && !done)
{
+ auto quoted_mode = [this] (lexer_mode m)
+ {
+ state_.push (state {
+ m, 0, nullopt, false, false, '\0', false, true, true,
+ state_.top ().escapes, nullptr, nullptr});
+ };
+
switch (c)
{
case '\'':
@@ -842,7 +892,7 @@ namespace build2
// Enter the single-quoted mode in case the derived lexer needs
// to notice this.
//
- mode (lexer_mode::single_quoted);
+ quoted_mode (lexer_mode::single_quoted);
switch (qtype)
{
@@ -881,7 +931,8 @@ namespace build2
{
get ();
- mode (lexer_mode::double_quoted);
+ quoted_mode (lexer_mode::double_quoted);
+
st = state_.top ();
m = st.mode;
diff --git a/libbuild2/lexer.hxx b/libbuild2/lexer.hxx
index 78d35d7..4371206 100644
--- a/libbuild2/lexer.hxx
+++ b/libbuild2/lexer.hxx
@@ -133,10 +133,23 @@ namespace build2
const path_name&
name () const {return name_;}
- // Note: sets mode for the next token. The second argument can be used to
- // specify the pair separator character (if the mode supports pairs). If
- // escapes is not specified, then inherit the current mode's (though a
- // mode can also override it).
+ // Set the lexer mode for the next token or delay this until the end of a
+ // double-quoted token sequence is encountered. The second argument can be
+ // used to specify the pair separator character (if the mode supports
+ // pairs). If escapes is not specified, then inherit the current mode's
+ // (though a mode can also override it).
+ //
+ // Note that there is a common parsing pattern of sensing the language
+ // construct kind we are about to parse by reading its first token,
+ // switching to an appropriate lexing mode, and then parsing the rest. The
+ // problem here is that the first token may start the double-quoted token
+ // sequence, turning the lexer into the double-quoted mode. In this case
+ // switching the lexer mode right away would not be a good idea. Thus,
+ // this function delays the mode switch until the end of the double-quoted
+ // sequence is encountered. Note, however, that such a delay only works
+ // properly if the function is called right after the first quoted token
+ // is read (because any subsequent tokens may end up being parsed in a
+ // nested mode such as variable or eval; see mode_impl() for details).
//
virtual void
mode (lexer_mode,
@@ -153,10 +166,12 @@ namespace build2
state_.top ().lsbrace_unsep = unsep;
}
- // Expire the current mode early.
+ // Expire the current mode early or delay this until the end of a
+ // double-quoted token sequence is encountered (see mode() for details on
+ // the delay condition and reasoning).
//
void
- expire_mode () {state_.pop ();}
+ expire_mode ();
lexer_mode
mode () const {return state_.top ().mode;}
@@ -258,6 +273,20 @@ namespace build2
pair<bool, bool>
skip_spaces ();
+ // Set state for the next token or delay until the end of a double-quoted
+ // token sequence is encountered (see mode() for details on the delay
+ // condition and reasoning).
+ //
+ void
+ mode_impl (state&&);
+
+ state&
+ current_state ()
+ {
+ assert (!state_.empty ());
+ return state_.top ();
+ }
+
// Diagnostics.
//
protected:
@@ -286,11 +315,14 @@ namespace build2
}
const path_name& name_;
- std::stack<state> state_;
bool sep_; // True if we skipped spaces in peek().
private:
+ // Use current_state(), mode_impl(), and expire_mode().
+ //
+ std::stack<state> state_;
+
using base = char_scanner<butl::utf8_validator, 2>;
// Buffer for a get()/peek() potential error.
diff --git a/libbuild2/script/lexer.cxx b/libbuild2/script/lexer.cxx
index 7577149..e13bbdb 100644
--- a/libbuild2/script/lexer.cxx
+++ b/libbuild2/script/lexer.cxx
@@ -24,10 +24,7 @@ namespace build2
bool q (true); // quotes
if (!esc)
- {
- assert (!state_.empty ());
- esc = state_.top ().escapes;
- }
+ esc = current_state ().escapes;
switch (m)
{
@@ -84,7 +81,7 @@ namespace build2
}
assert (ps == '\0');
- state_.push (
+ mode_impl (
state {m, data, nullopt, false, false, ps, s, n, q, *esc, s1, s2});
}
@@ -93,7 +90,7 @@ namespace build2
{
token r;
- switch (state_.top ().mode)
+ switch (mode ())
{
case lexer_mode::command_expansion:
case lexer_mode::here_line_single:
@@ -119,7 +116,7 @@ namespace build2
xchar c (get ());
uint64_t ln (c.line), cn (c.column);
- const state& st (state_.top ());
+ const state& st (current_state ());
lexer_mode m (st.mode);
auto make_token = [&sep, &m, ln, cn] (type t)
diff --git a/libbuild2/script/lexer.hxx b/libbuild2/script/lexer.hxx
index dbfdfcc..3cbcc03 100644
--- a/libbuild2/script/lexer.hxx
+++ b/libbuild2/script/lexer.hxx
@@ -112,6 +112,8 @@ namespace build2
const redirect_aliases_type& redirect_aliases;
protected:
+ using build2::lexer::mode; // Getter.
+
lexer (istream& is, const path_name& name, uint64_t line,
const char* escapes,
bool set_mode,
diff --git a/libbuild2/test/script/lexer.cxx b/libbuild2/test/script/lexer.cxx
index 9475ad4..b470d25 100644
--- a/libbuild2/test/script/lexer.cxx
+++ b/libbuild2/test/script/lexer.cxx
@@ -34,10 +34,7 @@ namespace build2
bool q (true); // quotes
if (!esc)
- {
- assert (!state_.empty ());
- esc = state_.top ().escapes;
- }
+ esc = current_state ().escapes;
switch (m)
{
@@ -113,7 +110,7 @@ namespace build2
}
assert (ps == '\0');
- state_.push (
+ mode_impl (
state {m, data, nullopt, false, false, ps, s, n, q, *esc, s1, s2});
}
@@ -122,7 +119,7 @@ namespace build2
{
token r;
- switch (state_.top ().mode)
+ switch (mode ())
{
case lexer_mode::command_line:
case lexer_mode::first_token:
@@ -151,7 +148,7 @@ namespace build2
xchar c (get ());
uint64_t ln (c.line), cn (c.column);
- state st (state_.top ()); // Make copy (see first/second_token).
+ state st (current_state ()); // Make copy (see first/second_token).
lexer_mode m (st.mode);
auto make_token = [&sep, ln, cn] (type t)
@@ -167,7 +164,7 @@ namespace build2
assert (m == lexer_mode::variable_line ||
m == lexer_mode::for_loop);
- state_.top ().lsbrace = false; // Note: st is a copy.
+ current_state ().lsbrace = false; // Note: st is a copy.
if (c == '[' && (!st.lsbrace_unsep || !sep))
return make_token (type::lsbrace);
@@ -180,7 +177,7 @@ namespace build2
// we push any new mode (e.g., double quote).
//
if (m == lexer_mode::first_token || m == lexer_mode::second_token)
- state_.pop ();
+ expire_mode ();
// NOTE: remember to update mode() if adding new special characters.
@@ -191,7 +188,7 @@ namespace build2
// Expire variable value mode at the end of the line.
//
if (m == lexer_mode::variable_line)
- state_.pop ();
+ expire_mode ();
sep = true; // Treat newline as always separated.
return make_token (type::newline);
@@ -322,7 +319,7 @@ namespace build2
if (c == '\n')
{
get ();
- state_.pop (); // Expire the description mode.
+ expire_mode (); // Expire the description mode.
return token (type::newline, true, ln, cn, token_printer);
}
diff --git a/libbuild2/test/script/lexer.hxx b/libbuild2/test/script/lexer.hxx
index def269b..993a9db 100644
--- a/libbuild2/test/script/lexer.hxx
+++ b/libbuild2/test/script/lexer.hxx
@@ -68,6 +68,8 @@ namespace build2
static redirect_aliases_type redirect_aliases;
private:
+ using build2::script::lexer::mode; // Getter.
+
token
next_line ();
diff --git a/libbuild2/test/script/parser+for.test.testscript b/libbuild2/test/script/parser+for.test.testscript
index 9d70886..5350f28 100644
--- a/libbuild2/test/script/parser+for.test.testscript
+++ b/libbuild2/test/script/parser+for.test.testscript
@@ -801,6 +801,21 @@
testscript:1:1: error: for: missing variable name
EOE
+ : quoted-opt
+ :
+ $* <<EOI >>EOO
+ o = -w
+ for "$o" x <'a b'
+ cmd $x
+ end;
+ for "($o)" x <'a b'
+ cmd $x
+ end
+ EOI
+ for -w x <'a b'
+ for -w x <'a b'
+ EOO
+
: untyped
:
$* <<EOI >>EOO
diff --git a/tests/recipe/buildscript/testscript b/tests/recipe/buildscript/testscript
index c473b0d..94eb665 100644
--- a/tests/recipe/buildscript/testscript
+++ b/tests/recipe/buildscript/testscript
@@ -1550,6 +1550,48 @@ if $posix
$* clean 2>-
}
+ : quoting
+ :
+ {
+ echo 'bar' >=bar;
+ echo 'baz' >=baz;
+
+ cat <<EOI >=buildfile;
+ foo: bar baz
+ {{
+ n = 'gen'
+ diag "($n)" ($>)
+
+ p = $path($>)
+ rm -f $p
+
+ o = -w
+ for "$o" f <<"EOF"
+ $path($<)
+ EOF
+ cat $f >>$p
+ end
+
+ o = -n
+ for "($o)" f <<"EOF"
+ $path($<)
+ EOF
+ echo $f >>$p
+ end
+ }}
+ EOI
+
+ $* 2>'gen file{foo.}';
+
+ cat <<<foo >>~%EOO%;
+ bar
+ baz
+ %.+bar .+baz%
+ EOO
+
+ $* clean 2>-
+ }
+
: special-var
:
{
diff --git a/tests/test/script/runner/for.testscript b/tests/test/script/runner/for.testscript
index 85ea765..658ff49 100644
--- a/tests/test/script/runner/for.testscript
+++ b/tests/test/script/runner/for.testscript
@@ -268,6 +268,29 @@
'b'
EOO
+ : quoted-opt
+ :
+ $c <<EOI && $b >>EOO
+ o = -n
+ for "$o" x <<EOF
+ a
+ b
+ EOF
+ echo "'$x'" >|
+ end;
+ for "($o)" x <<EOF
+ c
+ d
+ EOF
+ echo "'$x'" >|
+ end
+ EOI
+ 'a'
+ 'b'
+ 'c'
+ 'd'
+ EOO
+
: newline-split
:
$c <<EOI && $b >>EOO