From 501ce5993f3d52208696c81248829247da7b46b5 Mon Sep 17 00:00:00 2001 From: Boris Kolpackov Date: Thu, 18 Jun 2015 15:25:36 +0200 Subject: Add char-scanner to libbutl, use in libbpkg and build2 --- build/lexer | 59 ++++------------------------------------ build/lexer.cxx | 84 +++++++-------------------------------------------------- 2 files changed, 14 insertions(+), 129 deletions(-) diff --git a/build/lexer b/build/lexer index 1106ed0..5205ae9 100644 --- a/build/lexer +++ b/build/lexer @@ -10,6 +10,8 @@ #include // uint64_t #include +#include + #include #include @@ -25,10 +27,11 @@ namespace build // enum class lexer_mode {normal, value, variable, pairs}; - class lexer + class lexer: protected butl::char_scanner { public: - lexer (std::istream& is, const std::string& name): is_ (is), fail (name) {} + lexer (std::istream& is, const std::string& name) + : char_scanner (is), fail (name) {} const std::string& name () const {return fail.name_;} @@ -52,30 +55,6 @@ namespace build next (); private: - class xchar - { - public: - typedef std::char_traits traits_type; - typedef traits_type::int_type int_type; - typedef traits_type::char_type char_type; - - xchar (int_type v, std::uint64_t l, std::uint64_t c) - : v_ (v), l_ (l), c_ (c) {} - - operator char_type () const {return static_cast (v_);} - - int_type - value () const {return v_;} - - std::uint64_t line () const {return l_;} - std::uint64_t column () const {return c_;} - - private: - int_type v_; - std::uint64_t l_; - std::uint64_t c_; - }; - token name (xchar, bool separated); @@ -89,26 +68,6 @@ namespace build xchar escape (); - // Character interface. - // - private: - xchar - peek (); - - xchar - get (); - - void - unget (const xchar&); - - // Tests. - // - bool - is_eos (const xchar& c) const - { - return c.value () == xchar::traits_type::eof (); - } - // Diagnostics. // private: @@ -124,16 +83,8 @@ namespace build typedef diag_mark fail_mark; private: - std::istream& is_; fail_mark fail; - std::uint64_t l_ {1}; - std::uint64_t c_ {1}; - - bool unget_ {false}; - xchar buf_ {0, 0, 0}; - - bool eos_ {false}; lexer_mode mode_ {lexer_mode::normal}; char pair_separator_; lexer_mode next_mode_ {lexer_mode::normal}; // Switch to for next token. diff --git a/build/lexer.cxx b/build/lexer.cxx index 43c0690..9dce949 100644 --- a/build/lexer.cxx +++ b/build/lexer.cxx @@ -20,9 +20,9 @@ namespace build bool sep (skip_spaces ()); xchar c (get ()); - uint64_t ln (c.line ()), cn (c.column ()); + uint64_t ln (c.line), cn (c.column); - if (is_eos (c)) + if (eos (c)) return token (token_type::eos, sep, ln, cn); switch (c) @@ -106,11 +106,11 @@ namespace build token lexer:: name (xchar c, bool sep) { - uint64_t ln (c.line ()), cn (c.column ()); + uint64_t ln (c.line), cn (c.column); string lexeme; lexeme += (c != '\\' ? c : escape ()); - for (c = peek (); !is_eos (c); c = peek ()) + for (c = peek (); !eos (c); c = peek ()) { bool done (false); @@ -202,9 +202,9 @@ namespace build bool r (false); xchar c (peek ()); - bool start (c.column () == 1); + bool start (c.column == 1); - for (; !is_eos (c); c = peek ()) + for (; !eos (c); c = peek ()) { switch (c) { @@ -232,7 +232,7 @@ namespace build // Read until newline or eos. // - for (c = peek (); !is_eos (c) && c != '\n'; c = peek ()) + for (c = peek (); !eos (c) && c != '\n'; c = peek ()) get (); r = true; @@ -266,82 +266,16 @@ namespace build { xchar c (get ()); - if (is_eos (c)) + if (eos (c)) fail (c) << "unterminated escape sequence"; return c; } - lexer::xchar lexer:: - peek () - { - if (unget_) - return buf_; - else - { - if (eos_) - return xchar (xchar::traits_type::eof (), l_, c_); - else - { - xchar::int_type v (is_.peek ()); - - if (v == xchar::traits_type::eof ()) - eos_ = true; - - return xchar (v, l_, c_); - } - } - } - - lexer::xchar lexer:: - get () - { - if (unget_) - { - unget_ = false; - return buf_; - } - else - { - // When is_.get () returns eof, the failbit is also set (stupid, - // isn't?) which may trigger an exception. To work around this - // we will call peek() first and only call get() if it is not - // eof. But we can only call peek() on eof once; any subsequent - // calls will spoil the failbit (even more stupid). - // - xchar c (peek ()); - - if (!is_eos (c)) - { - is_.get (); - - if (c == '\n') - { - l_++; - c_ = 1; - } - else - c_++; - } - - return c; - } - } - - void lexer:: - unget (const xchar& c) - { - // Because iostream::unget cannot work once eos is reached, - // we have to provide our own implementation. - // - buf_ = c; - unget_ = true; - } - location_prologue lexer::fail_mark_base:: operator() (const xchar& c) const { return build::fail_mark_base::operator() ( - location (name_.c_str (), c.line (), c.column ())); + location (name_.c_str (), c.line, c.column)); } } -- cgit v1.1