aboutsummaryrefslogtreecommitdiff
path: root/build2/cc/lexer.hxx
diff options
context:
space:
mode:
Diffstat (limited to 'build2/cc/lexer.hxx')
-rw-r--r--build2/cc/lexer.hxx190
1 files changed, 0 insertions, 190 deletions
diff --git a/build2/cc/lexer.hxx b/build2/cc/lexer.hxx
deleted file mode 100644
index 5d5fa60..0000000
--- a/build2/cc/lexer.hxx
+++ /dev/null
@@ -1,190 +0,0 @@
-// file : build2/cc/lexer.hxx -*- C++ -*-
-// copyright : Copyright (c) 2014-2019 Code Synthesis Ltd
-// license : MIT; see accompanying LICENSE file
-
-#ifndef BUILD2_CC_LEXER_HXX
-#define BUILD2_CC_LEXER_HXX
-
-#include <libbutl/sha256.mxx>
-#include <libbutl/char-scanner.mxx>
-
-#include <libbuild2/types.hxx>
-#include <libbuild2/utility.hxx>
-
-#include <libbuild2/diagnostics.hxx>
-
-namespace build2
-{
- namespace cc
- {
- // Preprocessor-level tokenization of C/C++ source. In other words, the
- // sequence of tokens returned is similar to what a real C/C++ compiler
- // would see from its preprocessor.
- //
- // The input is a (partially-)preprocessed translation unit that may still
- // contain comments, line continuations, and preprocessor directives such
- // as #line, #pragma, but not #include (which is diagnosed). Currently,
- // all preprocessor directives except #line are ignored and no values are
- // saved from literals. The #line directive (and its shorthand notation)
- // is recognized to provide the logical token location.
- //
- // While at it we also calculate the checksum of the input ignoring
- // comments, whitespaces, etc. This is used to detect changes that do not
- // alter the resulting token stream.
- //
- enum class token_type
- {
- // NOTE: remember to update operator<<() if changing anything here!
- //
- eos,
-
- dot, // .
- semi, // ;
- less, // <
- greater, // >
- lcbrace, // {
- rcbrace, // }
-
- punctuation, // Other punctuation.
-
- identifier,
-
- number, // Number literal.
- character, // Char literal.
- string, // String literal.
-
- other // Other token.
- };
-
- struct token
- {
- token_type type = token_type::eos;
- string value;
-
- // Logical position.
- //
- path file;
- uint64_t line = 0;
- uint64_t column = 0;
-
- // Physical position in the stream, currently only for identifiers.
- //
- uint64_t position = 0;
- };
-
- // Output the token value in a format suitable for diagnostics.
- //
- ostream&
- operator<< (ostream&, const token&);
-
- class lexer: protected butl::char_scanner
- {
- public:
- lexer (ifdstream& is, const path& name)
- : char_scanner (is, false),
- name_ (name),
- fail ("error", &name_),
- log_file_ (name) {}
-
- const path&
- name () const {return name_;}
-
- string
- checksum () const {return cs_.string ();}
-
- // Note that it is ok to call next() again after getting eos.
- //
- token
- next ()
- {
- token t;
- next (t, skip_spaces (), true);
- return t;
- }
-
- // As above but reuse the token to avoid a (potential) memory
- // allocation. Typical usage:
- //
- // for (token t; l.next (t) != token_type::eos; )
- // ...
- //
- token_type
- next (token& t)
- {
- next (t, skip_spaces (), true);
- return t.type;
- }
-
- private:
- void
- next (token&, xchar, bool);
-
- void
- number_literal (token&, xchar);
-
- void
- char_literal (token&, xchar);
-
- void
- string_literal (token&, xchar);
-
- void
- raw_string_literal (token&, xchar);
-
- void
- literal_suffix (xchar);
-
- void
- line_directive (token&, xchar);
-
- xchar
- skip_spaces (bool newline = true);
-
- // The char_scanner adaptation for newline escape sequence processing.
- // Enabled by default and is only disabled in the raw string literals.
- //
- private:
- using base = char_scanner;
-
- xchar
- peek (bool escape = true);
-
- xchar
- get (bool escape = true);
-
- void
- get (const xchar& peeked);
-
- // Hashing versions.
- //
- xchar
- geth (bool escape = true);
-
- void
- geth (const xchar& peeked);
-
- private:
- const path name_;
- const fail_mark fail;
-
- // Logical file and line as set by the #line directives. Note that the
- // lexer diagnostics still uses the physical file/lines.
- //
- path log_file_;
- optional<uint64_t> log_line_;
-
- string tmp_file_;
- sha256 cs_;
- };
-
- // Diagnostics plumbing.
- //
- inline location
- get_location (const token& t, const void* = nullptr)
- {
- return location (&t.file, t.line, t.column);
- }
- }
-}
-
-#endif // BUILD2_CC_LEXER_HXX