1 files changed, 138 insertions, 0 deletions
diff --git a/build2/lexer b/build2/lexer
new file mode 100644
index 0000000..51f3e56
--- /dev/null
+++ b/build2/lexer
@@ -0,0 +1,138 @@
+// file      : build2/lexer -*- C++ -*-
+// copyright : Copyright (c) 2014-2015 Code Synthesis Ltd
+// license   : MIT; see accompanying LICENSE file
+
+#ifndef BUILD2_LEXER
+#define BUILD2_LEXER
+
+#include <stack>
+#include <string>
+#include <iosfwd>
+#include <cstddef> // size_t
+#include <cstdint> // uint64_t
+#include <cassert>
+#include <exception>
+
+#include <butl/char-scanner>
+
+#include <build2/types>
+#include <build2/utility>
+
+#include <build2/token>
+#include <build2/diagnostics>
+
+namespace build2
+{
+  // Context-dependent lexing mode. In the value mode we don't treat
+  // certain characters (e.g., +, =) as special so that we can use
+  // them in the variable values, e.g., 'foo = g++'. In contrast, in
+  // the variable mode, we restrict certain character (e.g., /) from
+  // appearing in the name. The pairs mode is just like value except
+  // that we split names separated by the pair character. The eval
+  // mode is used in the evaluation context.
+  //
+  // The alternnative modes must be set manually. The value and pairs
+  // modes are automatically reset after the end of the line. The
+  // variable mode is reset after the name token. And the eval mode
+  // is reset after the closing ')'.
+  //
+  // Quoted is an internal mode and should not be set explicitly.
+  //
+  enum class lexer_mode {normal, variable, value, pairs, eval, quoted};
+
+  class lexer: protected butl::char_scanner
+  {
+  public:
+    lexer (std::istream& is,
+           const std::string& name,
+           void (*processor) (token&, const lexer&) = nullptr)
+        : char_scanner (is), fail (name), processor_ (processor), sep_ (false)
+    {
+      mode_.push (lexer_mode::normal);
+    }
+
+    const std::string&
+    name () const {return fail.name_;}
+
+    // Note: sets mode for the next token. If mode is pairs, then
+    // the second argument specifies the separator character.
+    //
+    void
+    mode (lexer_mode m, char pair_separator = '=')
+    {
+      mode_.push (m);
+      pair_separator_ = pair_separator;
+    }
+
+    // Expire the current mode early.
+    //
+    void
+    expire_mode () {mode_.pop ();}
+
+    lexer_mode
+    mode () const {return mode_.top ();}
+
+    char
+    pair_separator () const {return pair_separator_;}
+
+    // Scanner.
+    //
+    token
+    next ();
+
+    // Peek at the first character of the next token. Return the character
+    // or 0 if the next token will be eos. Also return an indicator of
+    // whether the next token will be separated.
+    //
+    pair<char, bool>
+    peek_char ();
+
+  private:
+    token
+    next_impl ();
+
+    token
+    next_eval ();
+
+    token
+    next_quoted ();
+
+    token
+    name (bool separated);
+
+    // Return true if we have seen any spaces. Skipped empty lines
+    // don't count. In other words, we are only interested in spaces
+    // that are on the same line as the following non-space character.
+    //
+    bool
+    skip_spaces ();
+
+    xchar
+    escape ();
+
+    // Diagnostics.
+    //
+  private:
+    struct fail_mark_base: build2::fail_mark_base<failed>
+    {
+      fail_mark_base (const std::string& n): name_ (n) {}
+
+      location_prologue
+      operator() (const xchar&) const;
+
+      std::string name_;
+    };
+    typedef diag_mark<fail_mark_base> fail_mark;
+
+  private:
+    fail_mark fail;
+
+    void (*processor_) (token&, const lexer&);
+
+    std::stack<lexer_mode> mode_;
+    char pair_separator_;
+    bool sep_; // True if we skipped spaces in peek().
+  };
+}
+
+#endif // BUILD2_LEXER