aboutsummaryrefslogtreecommitdiff
path: root/build2/lexer
diff options
context:
space:
mode:
Diffstat (limited to 'build2/lexer')
-rw-r--r--build2/lexer138
1 files changed, 138 insertions, 0 deletions
diff --git a/build2/lexer b/build2/lexer
new file mode 100644
index 0000000..51f3e56
--- /dev/null
+++ b/build2/lexer
@@ -0,0 +1,138 @@
+// file : build2/lexer -*- C++ -*-
+// copyright : Copyright (c) 2014-2015 Code Synthesis Ltd
+// license : MIT; see accompanying LICENSE file
+
+#ifndef BUILD2_LEXER
+#define BUILD2_LEXER
+
+#include <stack>
+#include <string>
+#include <iosfwd>
+#include <cstddef> // size_t
+#include <cstdint> // uint64_t
+#include <cassert>
+#include <exception>
+
+#include <butl/char-scanner>
+
+#include <build2/types>
+#include <build2/utility>
+
+#include <build2/token>
+#include <build2/diagnostics>
+
+namespace build2
+{
+ // Context-dependent lexing mode. In the value mode we don't treat
+ // certain characters (e.g., +, =) as special so that we can use
+ // them in the variable values, e.g., 'foo = g++'. In contrast, in
+ // the variable mode, we restrict certain character (e.g., /) from
+ // appearing in the name. The pairs mode is just like value except
+ // that we split names separated by the pair character. The eval
+ // mode is used in the evaluation context.
+ //
+ // The alternnative modes must be set manually. The value and pairs
+ // modes are automatically reset after the end of the line. The
+ // variable mode is reset after the name token. And the eval mode
+ // is reset after the closing ')'.
+ //
+ // Quoted is an internal mode and should not be set explicitly.
+ //
+ enum class lexer_mode {normal, variable, value, pairs, eval, quoted};
+
+ class lexer: protected butl::char_scanner
+ {
+ public:
+ lexer (std::istream& is,
+ const std::string& name,
+ void (*processor) (token&, const lexer&) = nullptr)
+ : char_scanner (is), fail (name), processor_ (processor), sep_ (false)
+ {
+ mode_.push (lexer_mode::normal);
+ }
+
+ const std::string&
+ name () const {return fail.name_;}
+
+ // Note: sets mode for the next token. If mode is pairs, then
+ // the second argument specifies the separator character.
+ //
+ void
+ mode (lexer_mode m, char pair_separator = '=')
+ {
+ mode_.push (m);
+ pair_separator_ = pair_separator;
+ }
+
+ // Expire the current mode early.
+ //
+ void
+ expire_mode () {mode_.pop ();}
+
+ lexer_mode
+ mode () const {return mode_.top ();}
+
+ char
+ pair_separator () const {return pair_separator_;}
+
+ // Scanner.
+ //
+ token
+ next ();
+
+ // Peek at the first character of the next token. Return the character
+ // or 0 if the next token will be eos. Also return an indicator of
+ // whether the next token will be separated.
+ //
+ pair<char, bool>
+ peek_char ();
+
+ private:
+ token
+ next_impl ();
+
+ token
+ next_eval ();
+
+ token
+ next_quoted ();
+
+ token
+ name (bool separated);
+
+ // Return true if we have seen any spaces. Skipped empty lines
+ // don't count. In other words, we are only interested in spaces
+ // that are on the same line as the following non-space character.
+ //
+ bool
+ skip_spaces ();
+
+ xchar
+ escape ();
+
+ // Diagnostics.
+ //
+ private:
+ struct fail_mark_base: build2::fail_mark_base<failed>
+ {
+ fail_mark_base (const std::string& n): name_ (n) {}
+
+ location_prologue
+ operator() (const xchar&) const;
+
+ std::string name_;
+ };
+ typedef diag_mark<fail_mark_base> fail_mark;
+
+ private:
+ fail_mark fail;
+
+ void (*processor_) (token&, const lexer&);
+
+ std::stack<lexer_mode> mode_;
+ char pair_separator_;
+ bool sep_; // True if we skipped spaces in peek().
+ };
+}
+
+#endif // BUILD2_LEXER