aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBoris Kolpackov <boris@codesynthesis.com>2016-11-29 15:28:47 +0200
committerBoris Kolpackov <boris@codesynthesis.com>2016-11-29 15:28:47 +0200
commit4b31ef06275ad423e48a75d15fb0ee21c3127e3c (patch)
tree7cb5b1358dcb7c1650215ad319593f485d9a0caa
parent97d5397a8079fc0ece521e0c36e313043bc22b12 (diff)
Fix escaping issue, propagate quoting for $ and ( tokens
-rw-r--r--build2/lexer6
-rw-r--r--build2/lexer.cxx19
-rw-r--r--build2/test/script/lexer4
-rw-r--r--build2/test/script/lexer.cxx38
-rw-r--r--build2/token6
-rw-r--r--unit-tests/lexer/quoting.test4
6 files changed, 53 insertions, 24 deletions
diff --git a/build2/lexer b/build2/lexer
index e2cf07c..ad00e83 100644
--- a/build2/lexer
+++ b/build2/lexer
@@ -82,12 +82,14 @@ namespace build2
name () const {return name_;}
// Note: sets mode for the next token. The second argument can be used to
- // specifythe pair separator character (if the mode supports pairs).
+ // specifythe pair separator character (if the mode supports pairs). If
+ // escapes not specified, then inherit the current mode's (thought a mode
+ // can also override it).
//
virtual void
mode (lexer_mode,
char pair_separator = '\0',
- const char* escapes = nullptr);
+ optional<const char*> escapes = nullopt);
// Expire the current mode early.
//
diff --git a/build2/lexer.cxx b/build2/lexer.cxx
index 3c8eb5a..8918740 100644
--- a/build2/lexer.cxx
+++ b/build2/lexer.cxx
@@ -30,13 +30,19 @@ namespace build2
}
void lexer::
- mode (lexer_mode m, char ps, const char* esc)
+ mode (lexer_mode m, char ps, optional<const char*> esc)
{
const char* s1 (nullptr);
const char* s2 (nullptr);
bool s (true);
bool q (true);
+ if (!esc)
+ {
+ assert (!state_.empty ());
+ esc = state_.top ().escapes;
+ }
+
switch (m)
{
case lexer_mode::normal:
@@ -76,7 +82,7 @@ namespace build2
default: assert (false); // Unhandled custom mode.
}
- state_.push (state {m, ps, s, q, esc, s1, s2});
+ state_.push (state {m, ps, s, q, *esc, s1, s2});
}
token lexer::
@@ -272,10 +278,15 @@ namespace build2
uint64_t ln (c.line), cn (c.column);
+ auto make_token = [ln, cn] (type t)
+ {
+ return token (t, false, quote_type::double_, ln, cn, token_printer);
+ };
+
switch (c)
{
- case '$': return token (type::dollar, false, ln, cn, token_printer);
- case '(': return token (type::lparen, false, ln, cn, token_printer);
+ case '$': return make_token (type::dollar);
+ case '(': return make_token (type::lparen);
}
// Otherwise it is a word.
diff --git a/build2/test/script/lexer b/build2/test/script/lexer
index b812f84..a6a96ca 100644
--- a/build2/test/script/lexer
+++ b/build2/test/script/lexer
@@ -55,7 +55,9 @@ namespace build2
}
virtual void
- mode (base_mode, char = '\0', const char* = nullptr) override;
+ mode (base_mode,
+ char = '\0',
+ optional<const char*> = nullopt) override;
// Number of quoted (double or single) tokens since last reset.
//
diff --git a/build2/test/script/lexer.cxx b/build2/test/script/lexer.cxx
index cdf726b..74aa02e 100644
--- a/build2/test/script/lexer.cxx
+++ b/build2/test/script/lexer.cxx
@@ -17,13 +17,19 @@ namespace build2
using type = token_type;
void lexer::
- mode (base_mode m, char ps, const char* esc)
+ mode (base_mode m, char ps, optional<const char*> esc)
{
const char* s1 (nullptr);
const char* s2 (nullptr);
bool s (true);
bool q (true);
+ if (!esc)
+ {
+ assert (!state_.empty ());
+ esc = state_.top ().escapes;
+ }
+
switch (m)
{
case lexer_mode::script_line:
@@ -129,7 +135,7 @@ namespace build2
}
assert (ps == '\0');
- state_.push (state {m, ps, s, q, esc, s1, s2});
+ state_.push (state {m, ps, s, q, *esc, s1, s2});
}
token lexer::
@@ -170,16 +176,24 @@ namespace build2
xchar c (get ());
uint64_t ln (c.line), cn (c.column);
- auto make_token = [&sep, ln, cn] (type t)
+ if (eos (c))
+ return token (type::eos, sep, ln, cn, token_printer);
+
+ state st (state_.top ()); // Make copy (see first/second_token).
+ lexer_mode m (st.mode);
+
+ auto make_token = [&sep, &m, ln, cn] (type t, string v = string ())
{
- return token (t, sep, ln, cn, token_printer);
- };
+ bool q (m == lexer_mode::here_line_double);
- if (eos (c))
- return make_token (type::eos);
+ return token (t, move (v), sep,
+ (q ? quote_type::double_ : quote_type::unquoted), q,
+ ln, cn,
+ token_printer);
+ };
auto make_token_with_modifiers =
- [&sep, ln, cn, this] (type t, const char* mods, bool exc = false)
+ [&make_token, this] (type t, const char* mods, bool exc = false)
{
string v;
if (mods != nullptr)
@@ -199,15 +213,9 @@ namespace build2
}
}
- return token (t, move (v), sep,
- quote_type::unquoted, false,
- ln, cn,
- token_printer);
+ return make_token (t, move (v));
};
- state st (state_.top ()); // Make copy (see first/second_token).
- lexer_mode m (st.mode);
-
// Expire certain modes at the end of the token. Do it early in case
// we push any new mode (e.g., double quote).
//
diff --git a/build2/token b/build2/token
index 0dc914f..3d580b9 100644
--- a/build2/token
+++ b/build2/token
@@ -98,6 +98,12 @@ namespace build2
token (token_type t, bool s, uint64_t l, uint64_t c, printer_type* p)
: token (t, string (), s, quote_type::unquoted, false, l, c, p) {}
+ token (token_type t, bool s,
+ quote_type qt,
+ uint64_t l, uint64_t c,
+ printer_type* p)
+ : token (t, string (), s, qt, qt != quote_type::unquoted, l, c, p) {}
+
token (string v, bool s,
quote_type qt, bool qc,
uint64_t l, uint64_t c)
diff --git a/unit-tests/lexer/quoting.test b/unit-tests/lexer/quoting.test
index aab02c3..efe5e05 100644
--- a/unit-tests/lexer/quoting.test
+++ b/unit-tests/lexer/quoting.test
@@ -48,7 +48,7 @@ EOO
:
$* <'"$foo"' >>EOO
'' [D/P]
-$
+$ [D/C]
'foo' [D/P]
<newline>
EOO
@@ -58,7 +58,7 @@ EOO
:
$* <'"foo$"' >>EOO
'foo' [D/P]
-$
+$ [D/C]
'' [D/P]
<newline>
EOO