aboutsummaryrefslogtreecommitdiff
path: root/build2/lexer.cxx
diff options
context:
space:
mode:
authorBoris Kolpackov <boris@codesynthesis.com>2016-01-05 11:55:15 +0200
committerBoris Kolpackov <boris@codesynthesis.com>2016-01-05 11:55:15 +0200
commit9fb791e9fad6c63fc1dac49f4d05ae63b8a3db9b (patch)
treed60322d4382ca5f97b676c5abe2e39524f35eab4 /build2/lexer.cxx
parentf159b1dac68c8714f7ba71ca168e3b695891aad9 (diff)
Rename build directory/namespace to build2
Diffstat (limited to 'build2/lexer.cxx')
-rw-r--r--build2/lexer.cxx431
1 files changed, 431 insertions, 0 deletions
diff --git a/build2/lexer.cxx b/build2/lexer.cxx
new file mode 100644
index 0000000..e4d03c4
--- /dev/null
+++ b/build2/lexer.cxx
@@ -0,0 +1,431 @@
+// file : build2/lexer.cxx -*- C++ -*-
+// copyright : Copyright (c) 2014-2015 Code Synthesis Ltd
+// license : MIT; see accompanying LICENSE file
+
+#include <build2/lexer>
+
+using namespace std;
+
+namespace build2
+{
+ typedef token_type type;
+
+ token lexer::
+ next ()
+ {
+ token t (next_impl ());
+ if (processor_ != nullptr)
+ processor_ (t, *this);
+ return t;
+ }
+
+ pair<char, bool> lexer::
+ peek_char ()
+ {
+ // In the quoted mode we don't skip spaces.
+ //
+ sep_ = mode_.top () != lexer_mode::quoted && skip_spaces ();
+ xchar c (peek ());
+ return make_pair (eos (c) ? '\0' : char (c), sep_);
+ }
+
+ token lexer::
+ next_impl ()
+ {
+ lexer_mode m (mode_.top ());
+
+ // For some modes we have dedicated imlementations of next().
+ //
+ switch (m)
+ {
+ case lexer_mode::eval: return next_eval ();
+ case lexer_mode::quoted: return next_quoted ();
+ default: break;
+ }
+
+ bool sep (skip_spaces ());
+
+ xchar c (get ());
+ uint64_t ln (c.line), cn (c.column);
+
+ if (eos (c))
+ return token (type::eos, sep, ln, cn);
+
+ switch (c)
+ {
+ // NOTE: remember to update name(), next_eval() if adding new
+ // special characters.
+ //
+ case '\n':
+ {
+ // Expire value/pairs mode at the end of the line.
+ //
+ if (m == lexer_mode::value || m == lexer_mode::pairs)
+ mode_.pop ();
+
+ return token (type::newline, sep, ln, cn);
+ }
+ case '{': return token (type::lcbrace, sep, ln, cn);
+ case '}': return token (type::rcbrace, sep, ln, cn);
+ case '$': return token (type::dollar, sep, ln, cn);
+ case '(': return token (type::lparen, sep, ln, cn);
+ case ')': return token (type::rparen, sep, ln, cn);
+ }
+
+ // Handle pair separator.
+ //
+ if (m == lexer_mode::pairs && c == pair_separator_)
+ return token (c, sep, ln, cn);
+
+ // The following characters are not treated as special in the
+ // value or pairs mode.
+ //
+ if (m != lexer_mode::value && m != lexer_mode::pairs)
+ {
+ switch (c)
+ {
+ // NOTE: remember to update name(), next_eval() if adding new
+ // special characters.
+ //
+ case ':': return token (type::colon, sep, ln, cn);
+ case '=':
+ {
+ if (peek () == '+')
+ {
+ get ();
+ return token (type::equal_plus, sep, ln, cn);
+ }
+ else
+ return token (type::equal, sep, ln, cn);
+ }
+ case '+':
+ {
+ if (peek () == '=')
+ {
+ get ();
+ return token (type::plus_equal, sep, ln, cn);
+ }
+ }
+ }
+ }
+
+ // Otherwise it is a name.
+ //
+ unget (c);
+ return name (sep);
+ }
+
+ token lexer::
+ next_eval ()
+ {
+ bool sep (skip_spaces ());
+ xchar c (get ());
+
+ if (eos (c))
+ fail (c) << "unterminated evaluation context";
+
+ uint64_t ln (c.line), cn (c.column);
+
+ // This mode is quite a bit like the value mode when it comes
+ // to special characters.
+ //
+ switch (c)
+ {
+ // NOTE: remember to update name() if adding new special characters.
+ //
+ case '\n': fail (c) << "newline in evaluation context";
+ case '{': return token (type::lcbrace, sep, ln, cn);
+ case '}': return token (type::rcbrace, sep, ln, cn);
+ case '$': return token (type::dollar, sep, ln, cn);
+ case '(': return token (type::lparen, sep, ln, cn);
+ case ')':
+ {
+ mode_.pop (); // Expire eval mode.
+ return token (type::rparen, sep, ln, cn);
+ }
+ }
+
+ // Otherwise it is a name.
+ //
+ unget (c);
+ return name (sep);
+ }
+
+ token lexer::
+ next_quoted ()
+ {
+ xchar c (get ());
+
+ if (eos (c))
+ fail (c) << "unterminated double-quoted sequence";
+
+ uint64_t ln (c.line), cn (c.column);
+
+ switch (c)
+ {
+ case '$': return token (type::dollar, false, ln, cn);
+ case '(': return token (type::lparen, false, ln, cn);
+ }
+
+ // Otherwise it is a name.
+ //
+ unget (c);
+ return name (false);
+ }
+
+ token lexer::
+ name (bool sep)
+ {
+ xchar c (peek ());
+ assert (!eos (c));
+
+ uint64_t ln (c.line), cn (c.column);
+ string lexeme;
+
+ lexer_mode m (mode_.top ());
+ bool quoted (m == lexer_mode::quoted);
+
+ for (; !eos (c); c = peek ())
+ {
+ bool done (false);
+
+ // Handle pair separator.
+ //
+ if (m == lexer_mode::pairs && c == pair_separator_)
+ break;
+
+ // The following characters are not treated as special in the
+ // value/pairs, eval, and quoted modes.
+ //
+ if (m != lexer_mode::value &&
+ m != lexer_mode::pairs &&
+ m != lexer_mode::eval &&
+ m != lexer_mode::quoted)
+ {
+ switch (c)
+ {
+ case ':':
+ case '+':
+ case '=':
+ {
+ done = true;
+ break;
+ }
+ }
+
+ if (done)
+ break;
+ }
+
+ // While these extra characters are treated as the name end in
+ // the variable mode.
+ //
+ if (m == lexer_mode::variable)
+ {
+ switch (c)
+ {
+ case '/':
+ case '-':
+ {
+ done = true;
+ break;
+ }
+ }
+
+ if (done)
+ break;
+ }
+
+ // If we are quoted, these are ordinary characters.
+ //
+ if (m != lexer_mode::quoted)
+ {
+ switch (c)
+ {
+ case ' ':
+ case '\t':
+ case '\n':
+ case '#':
+ case '{':
+ case '}':
+ case ')':
+ {
+ done = true;
+ break;
+ }
+ case '\\':
+ {
+ get ();
+ c = escape ();
+ if (c != '\n') // Ignore.
+ lexeme += c;
+ continue;
+ }
+ case '\'':
+ {
+ // If we are in the variable mode, then treat quote as just
+ // another separator.
+ //
+ if (m == lexer_mode::variable)
+ {
+ done = true;
+ break;
+ }
+ else
+ {
+ get ();
+
+ for (c = get (); !eos (c) && c != '\''; c = get ())
+ lexeme += c;
+
+ if (eos (c))
+ fail (c) << "unterminated single-quoted sequence";
+
+ quoted = true;
+ continue;
+ }
+ }
+ }
+
+ if (done)
+ break;
+ }
+
+ switch (c)
+ {
+ case '$':
+ case '(':
+ {
+ done = true;
+ break;
+ }
+ case '\"':
+ {
+ // If we are in the variable mode, then treat quote as just
+ // another separator.
+ //
+ if (m == lexer_mode::variable)
+ {
+ done = true;
+ break;
+ }
+ else
+ {
+ get ();
+
+ if (m == lexer_mode::quoted)
+ mode_.pop ();
+ else
+ {
+ mode_.push (lexer_mode::quoted);
+ quoted = true;
+ }
+
+ m = mode_.top ();
+ continue;
+ }
+ }
+ default:
+ {
+ get ();
+ lexeme += c;
+ continue;
+ }
+ }
+
+ assert (done);
+ break;
+ }
+
+ if (m == lexer_mode::quoted && eos (c))
+ fail (c) << "unterminated double-quoted sequence";
+
+ // Expire variable mode at the end of the name.
+ //
+ if (m == lexer_mode::variable)
+ mode_.pop ();
+
+ return token (lexeme, sep, quoted, ln, cn);
+ }
+
+ bool lexer::
+ skip_spaces ()
+ {
+ bool r (sep_);
+ sep_ = false;
+
+ xchar c (peek ());
+ bool start (c.column == 1);
+
+ for (; !eos (c); c = peek ())
+ {
+ switch (c)
+ {
+ case ' ':
+ case '\t':
+ {
+ r = true;
+ break;
+ }
+ case '\n':
+ {
+ // Skip empty lines.
+ //
+ if (start)
+ {
+ r = false;
+ break;
+ }
+
+ return r;
+ }
+ case '#':
+ {
+ get ();
+
+ // Read until newline or eos.
+ //
+ for (c = peek (); !eos (c) && c != '\n'; c = peek ())
+ get ();
+
+ r = true;
+ continue;
+ }
+ case '\\':
+ {
+ get ();
+
+ if (peek () == '\n')
+ break; // Ignore.
+
+ unget (c);
+ // Fall through.
+ }
+ default:
+ return r; // Not a space.
+ }
+
+ get ();
+ }
+
+ return r;
+ }
+
+ lexer::xchar lexer::
+ escape ()
+ {
+ xchar c (get ());
+
+ if (eos (c))
+ fail (c) << "unterminated escape sequence";
+
+ return c;
+ }
+
+ location_prologue lexer::fail_mark_base::
+ operator() (const xchar& c) const
+ {
+ return build2::fail_mark_base<failed>::operator() (
+ location (name_.c_str (), c.line, c.column));
+ }
+}