From 4bdf53837e010073de802070d4e6087410662d3e Mon Sep 17 00:00:00 2001 From: Karen Arutyunov Date: Sat, 24 Aug 2019 17:41:30 +0300 Subject: Move cc build system module to separate library --- build2/cc/lexer.hxx | 190 ---------------------------------------------------- 1 file changed, 190 deletions(-) delete mode 100644 build2/cc/lexer.hxx (limited to 'build2/cc/lexer.hxx') diff --git a/build2/cc/lexer.hxx b/build2/cc/lexer.hxx deleted file mode 100644 index 5d5fa60..0000000 --- a/build2/cc/lexer.hxx +++ /dev/null @@ -1,190 +0,0 @@ -// file : build2/cc/lexer.hxx -*- C++ -*- -// copyright : Copyright (c) 2014-2019 Code Synthesis Ltd -// license : MIT; see accompanying LICENSE file - -#ifndef BUILD2_CC_LEXER_HXX -#define BUILD2_CC_LEXER_HXX - -#include -#include - -#include -#include - -#include - -namespace build2 -{ - namespace cc - { - // Preprocessor-level tokenization of C/C++ source. In other words, the - // sequence of tokens returned is similar to what a real C/C++ compiler - // would see from its preprocessor. - // - // The input is a (partially-)preprocessed translation unit that may still - // contain comments, line continuations, and preprocessor directives such - // as #line, #pragma, but not #include (which is diagnosed). Currently, - // all preprocessor directives except #line are ignored and no values are - // saved from literals. The #line directive (and its shorthand notation) - // is recognized to provide the logical token location. - // - // While at it we also calculate the checksum of the input ignoring - // comments, whitespaces, etc. This is used to detect changes that do not - // alter the resulting token stream. - // - enum class token_type - { - // NOTE: remember to update operator<<() if changing anything here! - // - eos, - - dot, // . - semi, // ; - less, // < - greater, // > - lcbrace, // { - rcbrace, // } - - punctuation, // Other punctuation. - - identifier, - - number, // Number literal. - character, // Char literal. - string, // String literal. - - other // Other token. - }; - - struct token - { - token_type type = token_type::eos; - string value; - - // Logical position. - // - path file; - uint64_t line = 0; - uint64_t column = 0; - - // Physical position in the stream, currently only for identifiers. - // - uint64_t position = 0; - }; - - // Output the token value in a format suitable for diagnostics. - // - ostream& - operator<< (ostream&, const token&); - - class lexer: protected butl::char_scanner - { - public: - lexer (ifdstream& is, const path& name) - : char_scanner (is, false), - name_ (name), - fail ("error", &name_), - log_file_ (name) {} - - const path& - name () const {return name_;} - - string - checksum () const {return cs_.string ();} - - // Note that it is ok to call next() again after getting eos. - // - token - next () - { - token t; - next (t, skip_spaces (), true); - return t; - } - - // As above but reuse the token to avoid a (potential) memory - // allocation. Typical usage: - // - // for (token t; l.next (t) != token_type::eos; ) - // ... - // - token_type - next (token& t) - { - next (t, skip_spaces (), true); - return t.type; - } - - private: - void - next (token&, xchar, bool); - - void - number_literal (token&, xchar); - - void - char_literal (token&, xchar); - - void - string_literal (token&, xchar); - - void - raw_string_literal (token&, xchar); - - void - literal_suffix (xchar); - - void - line_directive (token&, xchar); - - xchar - skip_spaces (bool newline = true); - - // The char_scanner adaptation for newline escape sequence processing. - // Enabled by default and is only disabled in the raw string literals. - // - private: - using base = char_scanner; - - xchar - peek (bool escape = true); - - xchar - get (bool escape = true); - - void - get (const xchar& peeked); - - // Hashing versions. - // - xchar - geth (bool escape = true); - - void - geth (const xchar& peeked); - - private: - const path name_; - const fail_mark fail; - - // Logical file and line as set by the #line directives. Note that the - // lexer diagnostics still uses the physical file/lines. - // - path log_file_; - optional log_line_; - - string tmp_file_; - sha256 cs_; - }; - - // Diagnostics plumbing. - // - inline location - get_location (const token& t, const void* = nullptr) - { - return location (&t.file, t.line, t.column); - } - } -} - -#endif // BUILD2_CC_LEXER_HXX -- cgit v1.1