aboutsummaryrefslogtreecommitdiff
path: root/build2/lexer.hxx
diff options
context:
space:
mode:
Diffstat (limited to 'build2/lexer.hxx')
-rw-r--r--build2/lexer.hxx205
1 files changed, 0 insertions, 205 deletions
diff --git a/build2/lexer.hxx b/build2/lexer.hxx
deleted file mode 100644
index b71167a..0000000
--- a/build2/lexer.hxx
+++ /dev/null
@@ -1,205 +0,0 @@
-// file : build2/lexer.hxx -*- C++ -*-
-// copyright : Copyright (c) 2014-2019 Code Synthesis Ltd
-// license : MIT; see accompanying LICENSE file
-
-#ifndef BUILD2_LEXER_HXX
-#define BUILD2_LEXER_HXX
-
-#include <stack>
-
-#include <libbutl/char-scanner.mxx>
-
-#include <build2/types.hxx>
-#include <build2/utility.hxx>
-
-#include <build2/token.hxx>
-#include <build2/diagnostics.hxx>
-
-namespace build2
-{
- // Context-dependent lexing mode. In the value mode we don't treat certain
- // characters (e.g., '+', '=') as special so that we can use them in the
- // variable values, e.g., 'foo = g++'. In contrast, in the variable mode, we
- // restrict certain character (e.g., '/') from appearing in the name. The
- // attribute mode is like value except it doesn't treat '{' and '}' as
- // special (so we cannot have name groups in attributes). The eval mode is
- // used in the evaluation context. Quoted modes are internal and should not
- // be set explicitly.
- //
- // Note that the normal, value, and eval modes split words separated by the
- // pair character (to disable pairs one can pass '\0' as a pair character).
- //
- // The alternnative modes must be set manually. The value mode automatically
- // expires after the end of the line. The attribute mode expires after the
- // closing ']'. The variable mode expires after the word token. And the eval
- // mode expires after the closing ')'.
- //
- // Note that normally it is only safe to switch mode when the current token
- // is not quoted (or, more generally, when you are not in the double-quoted
- // mode) unless the mode treats the double-quote as a separator (e.g.,
- // variable name mode). Failed that your mode (which now will be the top of
- // the mode stack) will prevent proper recognition of the closing quote.
- //
-
- // Extendable/inheritable enum-like class.
- //
- struct lexer_mode: lexer_mode_base
- {
- using base_type = lexer_mode_base;
-
- enum
- {
- normal = base_type::value_next,
- variable,
- value,
- attribute,
- eval,
- single_quoted,
- double_quoted,
- buildspec,
-
- value_next
- };
-
- lexer_mode () = default;
- lexer_mode (value_type v): base_type (v) {}
- lexer_mode (base_type v): base_type (v) {}
- };
-
- class lexer: public butl::char_scanner
- {
- public:
- // If escape is not NULL then only escape sequences with characters from
- // this string are considered "effective escapes" with all others passed
- // through as is. Note that the escape string is not copied.
- //
- lexer (istream& is,
- const path& name,
- uint64_t line = 1, // Start line in the stream.
- const char* escapes = nullptr)
- : lexer (is, name, line, escapes, true /* set_mode */) {}
-
- const path&
- name () const {return name_;}
-
- // Note: sets mode for the next token. The second argument can be used to
- // specifythe pair separator character (if the mode supports pairs). If
- // escapes not specified, then inherit the current mode's (thought a mode
- // can also override it).
- //
- virtual void
- mode (lexer_mode,
- char pair_separator = '\0',
- optional<const char*> escapes = nullopt);
-
- // Expire the current mode early.
- //
- void
- expire_mode () {state_.pop ();}
-
- lexer_mode
- mode () const {return state_.top ().mode;}
-
- char
- pair_separator () const {return state_.top ().sep_pair;}
-
- // Scanner. Note that it is ok to call next() again after getting eos.
- //
- // If you extend the lexer and add a custom lexer mode, then you must
- // override next() and handle the custom mode there.
- //
- virtual token
- next ();
-
- // Peek at the first character of the next token. Return the character
- // or '\0' if the next token will be eos. Also return an indicator of
- // whether the next token will be separated.
- //
- pair<char, bool>
- peek_char ();
-
- protected:
- struct state
- {
- lexer_mode mode;
-
- char sep_pair;
- bool sep_space; // Are whitespaces separators (see skip_spaces())?
- bool sep_newline; // Is newline special (see skip_spaces())?
- bool quotes; // Recognize quoted fragments.
-
- const char* escapes; // Effective escape sequences to recognize.
-
- // Word separator characters. For two-character sequence put the first
- // one in sep_first and the second one in the corresponding position of
- // sep_second. If it's a single-character sequence, then put space in
- // sep_second. If there are multiple sequences that start with the same
- // character, then repeat the first character in sep_first.
- //
- const char* sep_first;
- const char* sep_second;
- };
-
- token
- next_eval ();
-
- token
- next_quoted ();
-
- // Lex a word assuming current is the top state (which may already have
- // been "expired" from the top).
- //
- virtual token
- word (state current, bool separated);
-
- // Return true if we have seen any spaces. Skipped empty lines
- // don't count. In other words, we are only interested in spaces
- // that are on the same line as the following non-space character.
- //
- bool
- skip_spaces ();
-
- // Diagnostics.
- //
- protected:
- fail_mark fail;
-
- // Lexer state.
- //
- protected:
- lexer (istream& is,
- const path& name,
- uint64_t line,
- const char* escapes,
- bool set_mode)
- : char_scanner (is, true /* crlf */, line),
- fail ("error", &name_),
- name_ (name),
- sep_ (false)
- {
- if (set_mode)
- mode (lexer_mode::normal, '@', escapes);
- }
-
- const path name_;
- std::stack<state> state_;
-
- bool sep_; // True if we skipped spaces in peek().
- };
-}
-
-// Diagnostics plumbing.
-//
-namespace butl // ADL
-{
- inline build2::location
- get_location (const butl::char_scanner::xchar& c, const void* data)
- {
- using namespace build2;
-
- assert (data != nullptr); // E.g., must be &lexer::name_.
- return location (static_cast<const path*> (data), c.line, c.column);
- }
-}
-
-#endif // BUILD2_LEXER_HXX