aboutsummaryrefslogtreecommitdiff
path: root/build2
diff options
context:
space:
mode:
authorBoris Kolpackov <boris@codesynthesis.com>2016-11-25 11:18:34 +0200
committerBoris Kolpackov <boris@codesynthesis.com>2016-11-25 11:18:34 +0200
commit28f8338ded34f160e0083da9be4679bc778be7ca (patch)
tree7bd01311683d835f946c73d7d8220f552bae718f /build2
parentf32bb0aceb00cfa4bd04eea72f8fa2fe02b738b3 (diff)
Distinguish token quoting type and completeness
Diffstat (limited to 'build2')
-rw-r--r--build2/lexer.cxx74
-rw-r--r--build2/parser.cxx7
-rw-r--r--build2/test/script/lexer.cxx12
-rw-r--r--build2/test/script/parser.cxx21
-rw-r--r--build2/token23
5 files changed, 109 insertions, 28 deletions
diff --git a/build2/lexer.cxx b/build2/lexer.cxx
index cf8a789..b73c291 100644
--- a/build2/lexer.cxx
+++ b/build2/lexer.cxx
@@ -295,7 +295,24 @@ namespace build2
uint64_t ln (c.line), cn (c.column);
string lexeme;
- bool quoted (m == lexer_mode::double_quoted);
+ quote_type qtype (m == lexer_mode::double_quoted
+ ? quote_type::double_
+ : quote_type::unquoted);
+
+ // If we are already in the quoted mode then we didn't start with the
+ // quote character.
+ //
+ bool qcomp (false);
+
+ auto append = [&lexeme, &m, &qcomp] (char c)
+ {
+ lexeme += c;
+
+ // An unquoted character after a quoted fragment.
+ //
+ if (qcomp && m != lexer_mode::double_quoted)
+ qcomp = false;
+ };
for (; !eos (c); c = peek ())
{
@@ -321,7 +338,7 @@ namespace build2
fail (p) << "unterminated escape sequence";
if (p != '\n') // Ignore if line continuation.
- lexeme += p;
+ append (p);
continue;
}
@@ -424,6 +441,22 @@ namespace build2
//
mode (lexer_mode::single_quoted);
+ switch (qtype)
+ {
+ case quote_type::unquoted:
+ qtype = quote_type::single;
+ qcomp = lexeme.empty ();
+ break;
+ case quote_type::single:
+ qcomp = false; // Non-contiguous.
+ break;
+ case quote_type::double_:
+ qtype = quote_type::mixed;
+ case quote_type::mixed:
+ qcomp = false;
+ break;
+ }
+
get ();
for (c = get (); !eos (c) && c != '\''; c = get ())
lexeme += c;
@@ -432,8 +465,6 @@ namespace build2
fail (c) << "unterminated single-quoted sequence";
state_.pop ();
-
- quoted = true;
continue;
}
case '\"':
@@ -444,7 +475,22 @@ namespace build2
st = state_.top ();
m = st.mode;
- quoted = true;
+ switch (qtype)
+ {
+ case quote_type::unquoted:
+ qtype = quote_type::double_;
+ qcomp = lexeme.empty ();
+ break;
+ case quote_type::double_:
+ qcomp = false; // Non-contiguous.
+ break;
+ case quote_type::single:
+ qtype = quote_type::mixed;
+ case quote_type::mixed:
+ qcomp = false;
+ break;
+ }
+
continue;
}
}
@@ -455,19 +501,27 @@ namespace build2
break;
get ();
- lexeme += c;
+ append (c);
}
- if (eos (c) && m == lexer_mode::double_quoted)
- fail (c) << "unterminated double-quoted sequence";
+ if (m == lexer_mode::double_quoted)
+ {
+ if (eos (c))
+ fail (c) << "unterminated double-quoted sequence";
+
+ // If we are still in the quoted mode then we didn't end with the quote
+ // character.
+ //
+ if (qcomp)
+ qcomp = false;
+ }
// Expire variable mode at the end of the word.
//
if (m == lexer_mode::variable)
state_.pop ();
- return token (move (lexeme), sep, quoted, ln, cn);
-
+ return token (move (lexeme), sep, qtype, qcomp, ln, cn);
}
bool lexer::
diff --git a/build2/parser.cxx b/build2/parser.cxx
index 5f9850d..c2737cb 100644
--- a/build2/parser.cxx
+++ b/build2/parser.cxx
@@ -2154,7 +2154,10 @@ namespace build2
tt != type::lparen) || peeked ().separated))
{
tt = type::word;
- t = token (move (concat_str), true, false, t.line, t.column);
+ t = token (move (concat_str),
+ true,
+ quote_type::unquoted, false,
+ t.line, t.column);
concat = false;
}
else if (!first)
@@ -2792,7 +2795,7 @@ namespace build2
//
// See tests/keyword.
//
- if (!t.quoted)
+ if (t.qtype == quote_type::unquoted)
{
// We cannot peek at the whole token here since it might have to be
// lexed in a different mode. So peek at its first character.
diff --git a/build2/test/script/lexer.cxx b/build2/test/script/lexer.cxx
index 5e6c66a..19e7498 100644
--- a/build2/test/script/lexer.cxx
+++ b/build2/test/script/lexer.cxx
@@ -137,7 +137,7 @@ namespace build2
break;
}
- if (r.quoted)
+ if (r.qtype != quote_type::unquoted)
++quoted_;
return r;
@@ -448,7 +448,10 @@ namespace build2
lexeme += c;
}
- return token (move (lexeme), false, false, ln, cn);
+ return token (move (lexeme),
+ false,
+ quote_type::unquoted, false,
+ ln, cn);
}
token lexer::
@@ -480,7 +483,10 @@ namespace build2
}
state_.pop (); // Expire the variable mode.
- return token (move (lexeme), sep, false, ln, cn);
+ return token (move (lexeme),
+ sep,
+ quote_type::unquoted, false,
+ ln, cn);
}
}
}
diff --git a/build2/test/script/parser.cxx b/build2/test/script/parser.cxx
index 9afef75..a116873 100644
--- a/build2/test/script/parser.cxx
+++ b/build2/test/script/parser.cxx
@@ -321,7 +321,7 @@ namespace build2
//
lt = line_type::cmd; // Default.
- if (tt == type::word && !t.quoted)
+ if (tt == type::word && t.qtype == quote_type::unquoted)
{
const string& n (t.value);
@@ -353,7 +353,7 @@ namespace build2
//
lt = line_type::cmd; // Default.
- if (tt == type::word && !t.quoted)
+ if (tt == type::word && t.qtype == quote_type::unquoted)
{
const string& n (t.value);
@@ -719,7 +719,7 @@ namespace build2
const token& p (peeked ());
const location ll (get_location (p));
- if (pt == type::word && !p.quoted)
+ if (pt == type::word && p.qtype == quote_type::unquoted)
{
if (p.value == "elif") lt = line_type::cmd_elif;
else if (p.value == "elif!") lt = line_type::cmd_elifn;
@@ -1652,7 +1652,7 @@ namespace build2
//
next (t, tt);
- if (tt != type::word || t.quoted)
+ if (tt != type::word || t.qtype != quote_type::unquoted)
fail (l) << "expected here-document end marker";
hd.push_back (here_doc {0, 0, 0, move (t.value), nn});
@@ -1751,7 +1751,8 @@ namespace build2
// quoted (note that the current token is "next" and is not part
// of this).
//
- bool q ((quoted () - (t.quoted ? 1 : 0)) != 0);
+ bool q ((quoted () -
+ (t.qtype != quote_type::unquoted ? 1 : 0)) != 0);
for (name& n: ns)
{
@@ -2074,7 +2075,9 @@ namespace build2
// Check if this is the end marker. For starters, it should be a
// single, unquoted word followed by a newline.
//
- if (tt == type::word && !t.quoted && peek () == type::newline)
+ if (tt == type::word &&
+ t.qtype == quote_type::unquoted &&
+ peek () == type::newline)
{
const string& v (t.value);
@@ -2652,7 +2655,7 @@ namespace build2
// Examine tokens we have replayed since last reset.
//
for (size_t i (replay_quoted_); i != replay_i_; ++i)
- if (replay_data_[i].token.quoted)
+ if (replay_data_[i].token.qtype != quote_type::unquoted)
++r;
}
@@ -2663,14 +2666,14 @@ namespace build2
reset_quoted (token& cur)
{
if (replay_ != replay::play)
- lexer_->reset_quoted (cur.quoted ? 1 : 0);
+ lexer_->reset_quoted (cur.qtype != quote_type::unquoted ? 1 : 0);
else
{
replay_quoted_ = replay_i_ - 1;
// Must be the same token.
//
- assert (replay_data_[replay_quoted_].token.quoted == cur.quoted);
+ assert (replay_data_[replay_quoted_].token.qtype == cur.qtype);
}
}
diff --git a/build2/token b/build2/token
index b3ebf5b..df25d4c 100644
--- a/build2/token
+++ b/build2/token
@@ -56,6 +56,11 @@ namespace build2
value_type v_;
};
+ // Token can be unquoted, single-quoted ('') or double-quoted (""). It can
+ // also be mixed.
+ //
+ enum class quote_type {unquoted, single, double_, mixed};
+
class token;
void
@@ -68,7 +73,13 @@ namespace build2
token_type type;
bool separated; // Whitespace-separated from the previous token.
- bool quoted; // Word (or some part of it) was quoted.
+
+ // Quoting can be complete, where the token starts and ends with the quote
+ // characters and quoting is contiguous or partial where only some part(s)
+ // of the token are quoted or quoting continus to the next token.
+ //
+ quote_type qtype;
+ bool qcomp;
string value; // Only valid for word.
@@ -82,12 +93,16 @@ namespace build2
: token (token_type::eos, false, 0, 0, token_printer) {}
token (token_type t, bool s, uint64_t l, uint64_t c, printer_type* p)
- : type (t), separated (s), quoted (false),
+ : type (t), separated (s), qtype (quote_type::unquoted),
line (l), column (c),
printer (p) {}
- token (string v, bool s, bool q, uint64_t l, uint64_t c)
- : type (token_type::word), separated (s), quoted (q), value (move (v)),
+ token (string v, bool s,
+ quote_type qt, bool qc,
+ uint64_t l, uint64_t c)
+ : type (token_type::word), separated (s),
+ qtype (qt), qcomp (qc),
+ value (move (v)),
line (l), column (c),
printer (&token_printer) {}
};