aboutsummaryrefslogtreecommitdiff
path: root/libbuild2/lexer.cxx
diff options
context:
space:
mode:
Diffstat (limited to 'libbuild2/lexer.cxx')
-rw-r--r--libbuild2/lexer.cxx190
1 files changed, 154 insertions, 36 deletions
diff --git a/libbuild2/lexer.cxx b/libbuild2/lexer.cxx
index f445d4b..04c15be 100644
--- a/libbuild2/lexer.cxx
+++ b/libbuild2/lexer.cxx
@@ -42,6 +42,22 @@ namespace build2
return make_pair (make_pair (r[0], r[1]), sep_);
}
+ pair<char, bool> lexer::
+ peek_char ()
+ {
+ auto p (skip_spaces ());
+ assert (!p.second);
+ sep_ = p.first;
+
+ char r ('\0');
+
+ xchar c (peek ());
+ if (!eos (c))
+ r = c;
+
+ return make_pair (r, sep_);
+ }
+
void lexer::
mode (lexer_mode m, char ps, optional<const char*> esc, uintptr_t data)
{
@@ -144,13 +160,15 @@ namespace build2
break;
}
case lexer_mode::foreign:
- assert (data > 1);
- // Fall through.
+ {
+ assert (ps == '\0' && data > 1);
+ s = false;
+ break;
+ }
case lexer_mode::single_quoted:
case lexer_mode::double_quoted:
{
- assert (ps == '\0');
- s = false;
+ assert (false); // Can only be set manually in word().
break;
}
case lexer_mode::variable:
@@ -162,8 +180,49 @@ namespace build2
default: assert (false); // Unhandled custom mode.
}
- state_.push (
- state {m, data, nullopt, lsb, false, ps, s, n, q, *esc, s1, s2});
+ mode_impl (state {m, data, nullopt, lsb, false, ps, s, n, q, *esc, s1, s2});
+ }
+
+ void lexer::
+ mode_impl (state&& s)
+ {
+ // If we are in the double-quoted mode then, unless the new mode is eval
+ // or variable, delay the state switch until the current mode is expired.
+ // Note that we delay by injecting the new state beneath the current
+ // state.
+ //
+ if (!state_.empty () &&
+ state_.top ().mode == lexer_mode::double_quoted &&
+ s.mode != lexer_mode::eval &&
+ s.mode != lexer_mode::variable)
+ {
+ state qs (move (state_.top ())); // Save quoted state.
+ state_.top () = move (s); // Overwrite quoted state with new state.
+ state_.push (move (qs)); // Restore quoted state.
+ }
+ else
+ state_.push (move (s));
+ }
+
+ void lexer::
+ expire_mode ()
+ {
+ // If we are in the double-quoted mode, then delay the state expiration
+ // until the current mode is expired. Note that we delay by overwriting
+ // the being expired state with the current state.
+ //
+ assert (!state_.empty () &&
+ (state_.top ().mode != lexer_mode::double_quoted ||
+ state_.size () > 1));
+
+ if (state_.top ().mode == lexer_mode::double_quoted)
+ {
+ state qs (move (state_.top ())); // Save quoted state.
+ state_.pop (); // Pop quoted state.
+ state_.top () = move (qs); // Expire state, restoring quoted state.
+ }
+ else
+ state_.pop ();
}
token lexer::
@@ -654,9 +713,9 @@ namespace build2
}
token lexer::
- word (state st, bool sep)
+ word (const state& rst, bool sep)
{
- lexer_mode m (st.mode);
+ lexer_mode m (rst.mode);
xchar c (peek ());
assert (!eos (c));
@@ -687,22 +746,66 @@ namespace build2
lexeme += c;
};
- for (; !eos (c); c = peek ())
+ const state* st (&rst);
+ for (bool first (true); !eos (c); first = false, c = peek ())
{
// First handle escape sequences.
//
if (c == '\\')
{
- // In the variable mode we treat the beginning of the escape sequence
- // as a separator (think \"$foo\").
+ // In the variable mode we treat immediate `\` as the escape sequence
+ // literal and any following as a separator (think \"$foo\").
//
if (m == lexer_mode::variable)
- break;
+ {
+ if (!first)
+ break;
+
+ get ();
+ c = get ();
+
+ if (eos (c))
+ fail (c) << "unterminated escape sequence";
+
+ // For now we only support all the simple C/C++ escape sequences
+ // plus \0 (which in C/C++ is an octal escape sequence).
+ //
+ // In the future we may decide to support more elaborate sequences
+ // such as \xNN, \uNNNN, etc.
+ //
+ // Note: we return it in the literal form instead of translating for
+ // easier printing.
+ //
+ switch (c)
+ {
+ case '\'':
+ case '"':
+ case '?':
+ case '\\':
+ case '0':
+ case 'a':
+ case 'b':
+ case 'f':
+ case 'n':
+ case 'r':
+ case 't':
+ case 'v': lexeme = c; break;
+ default:
+ fail (c) << "unknown escape sequence \\" << c;
+ }
+
+ state_.pop ();
+ return token (type::escape,
+ move (lexeme),
+ sep,
+ qtype, qcomp, qfirst,
+ ln, cn);
+ }
get ();
xchar p (peek ());
- const char* esc (st.escapes);
+ const char* esc (st->escapes);
if (esc == nullptr ||
(*esc != '\0' && !eos (p) && strchr (esc, p) != nullptr))
@@ -718,7 +821,7 @@ namespace build2
continue;
}
else
- unget (c); // Treat as a normal character.
+ unget (c); // Fall through to treat as a normal character.
}
bool done (false);
@@ -747,8 +850,8 @@ namespace build2
get ();
state_.pop ();
- st = state_.top ();
- m = st.mode;
+ st = &state_.top ();
+ m = st->mode;
continue;
}
}
@@ -757,19 +860,17 @@ namespace build2
//
else if (m == lexer_mode::variable)
{
- bool first (lexeme.empty ());
-
// Handle special variable names, if any.
//
- if (first &&
- st.data != 0 &&
- strchr (reinterpret_cast<const char*> (st.data), c) != nullptr)
+ if (first &&
+ st->data != 0 &&
+ strchr (reinterpret_cast<const char*> (st->data), c) != nullptr)
{
get ();
lexeme += c;
done = true;
}
- else if (c != '_' && !(first ? alpha (c) : alnum (c)))
+ else if (c != '_' && !(lexeme.empty () ? alpha (c) : alnum (c)))
{
if (c != '.')
done = true;
@@ -789,17 +890,17 @@ namespace build2
{
// First check if it's a pair separator.
//
- if (c == st.sep_pair)
+ if (c == st->sep_pair)
done = true;
else
{
// Then see if this character or character sequence is a separator.
//
- for (const char* p (strchr (st.sep_first, c));
+ for (const char* p (strchr (st->sep_first, c));
p != nullptr;
p = done ? nullptr : strchr (p + 1, c))
{
- char s (st.sep_second[p - st.sep_first]);
+ char s (st->sep_second[p - st->sep_first]);
// See if it has a second.
//
@@ -817,8 +918,21 @@ namespace build2
// Handle single and double quotes if enabled for this mode and unless
// they were considered separators.
//
- if (st.quotes && !done)
+ if (st->quotes && !done)
{
+ auto quoted_mode = [this] (lexer_mode m)
+ {
+ // In the double-quoted mode we only do effective escaping of the
+ // special `$("\` characters, line continuations, plus `)` for
+ // symmetry. Nothing can be escaped in single-quoted.
+ //
+ const char* esc (m == lexer_mode::double_quoted ? "$()\"\\\n" : "");
+
+ state_.push (state {
+ m, 0, nullopt, false, false, '\0', false, true, true,
+ esc, nullptr, nullptr});
+ };
+
switch (c)
{
case '\'':
@@ -826,7 +940,7 @@ namespace build2
// Enter the single-quoted mode in case the derived lexer needs
// to notice this.
//
- mode (lexer_mode::single_quoted);
+ quoted_mode (lexer_mode::single_quoted);
switch (qtype)
{
@@ -865,9 +979,10 @@ namespace build2
{
get ();
- mode (lexer_mode::double_quoted);
- st = state_.top ();
- m = st.mode;
+ quoted_mode (lexer_mode::double_quoted);
+
+ st = &state_.top ();
+ m = st->mode;
switch (qtype)
{
@@ -989,7 +1104,7 @@ namespace build2
if ((c = peek ()) == '\\')
{
get ();
- if ((c = peek ()) == '\n')
+ if ((c = peek ()) == '\n' || eos (c))
return true;
}
@@ -1000,15 +1115,16 @@ namespace build2
{
// Scan until we see the closing one.
//
- for (; !eos (c); c = peek ())
+ for (;;)
{
- get ();
if (c == '#' && ml ())
break;
- }
- if (eos (c))
- fail (c) << "unterminated multi-line comment";
+ if (eos (c = peek ()))
+ fail (c) << "unterminated multi-line comment";
+
+ get ();
+ }
}
else
{
@@ -1022,6 +1138,8 @@ namespace build2
}
case '\\':
{
+ // See if this is line continuation.
+ //
get ();
if (peek () == '\n')