aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBoris Kolpackov <boris@codesynthesis.com>2015-06-18 15:25:36 +0200
committerBoris Kolpackov <boris@codesynthesis.com>2015-06-18 15:25:36 +0200
commit501ce5993f3d52208696c81248829247da7b46b5 (patch)
treeb47c473e6f157d8fc53f87ce9d0feb49268a371f
parent7817d08a8a3c9dee2a8c8da7aee1ad369ee1f12e (diff)
Add char-scanner to libbutl, use in libbpkg and build2
-rw-r--r--build/lexer59
-rw-r--r--build/lexer.cxx84
2 files changed, 14 insertions, 129 deletions
diff --git a/build/lexer b/build/lexer
index 1106ed0..5205ae9 100644
--- a/build/lexer
+++ b/build/lexer
@@ -10,6 +10,8 @@
#include <cstdint> // uint64_t
#include <exception>
+#include <butl/char-scanner>
+
#include <build/token>
#include <build/diagnostics>
@@ -25,10 +27,11 @@ namespace build
//
enum class lexer_mode {normal, value, variable, pairs};
- class lexer
+ class lexer: protected butl::char_scanner
{
public:
- lexer (std::istream& is, const std::string& name): is_ (is), fail (name) {}
+ lexer (std::istream& is, const std::string& name)
+ : char_scanner (is), fail (name) {}
const std::string&
name () const {return fail.name_;}
@@ -52,30 +55,6 @@ namespace build
next ();
private:
- class xchar
- {
- public:
- typedef std::char_traits<char> traits_type;
- typedef traits_type::int_type int_type;
- typedef traits_type::char_type char_type;
-
- xchar (int_type v, std::uint64_t l, std::uint64_t c)
- : v_ (v), l_ (l), c_ (c) {}
-
- operator char_type () const {return static_cast<char_type> (v_);}
-
- int_type
- value () const {return v_;}
-
- std::uint64_t line () const {return l_;}
- std::uint64_t column () const {return c_;}
-
- private:
- int_type v_;
- std::uint64_t l_;
- std::uint64_t c_;
- };
-
token
name (xchar, bool separated);
@@ -89,26 +68,6 @@ namespace build
xchar
escape ();
- // Character interface.
- //
- private:
- xchar
- peek ();
-
- xchar
- get ();
-
- void
- unget (const xchar&);
-
- // Tests.
- //
- bool
- is_eos (const xchar& c) const
- {
- return c.value () == xchar::traits_type::eof ();
- }
-
// Diagnostics.
//
private:
@@ -124,16 +83,8 @@ namespace build
typedef diag_mark<fail_mark_base> fail_mark;
private:
- std::istream& is_;
fail_mark fail;
- std::uint64_t l_ {1};
- std::uint64_t c_ {1};
-
- bool unget_ {false};
- xchar buf_ {0, 0, 0};
-
- bool eos_ {false};
lexer_mode mode_ {lexer_mode::normal};
char pair_separator_;
lexer_mode next_mode_ {lexer_mode::normal}; // Switch to for next token.
diff --git a/build/lexer.cxx b/build/lexer.cxx
index 43c0690..9dce949 100644
--- a/build/lexer.cxx
+++ b/build/lexer.cxx
@@ -20,9 +20,9 @@ namespace build
bool sep (skip_spaces ());
xchar c (get ());
- uint64_t ln (c.line ()), cn (c.column ());
+ uint64_t ln (c.line), cn (c.column);
- if (is_eos (c))
+ if (eos (c))
return token (token_type::eos, sep, ln, cn);
switch (c)
@@ -106,11 +106,11 @@ namespace build
token lexer::
name (xchar c, bool sep)
{
- uint64_t ln (c.line ()), cn (c.column ());
+ uint64_t ln (c.line), cn (c.column);
string lexeme;
lexeme += (c != '\\' ? c : escape ());
- for (c = peek (); !is_eos (c); c = peek ())
+ for (c = peek (); !eos (c); c = peek ())
{
bool done (false);
@@ -202,9 +202,9 @@ namespace build
bool r (false);
xchar c (peek ());
- bool start (c.column () == 1);
+ bool start (c.column == 1);
- for (; !is_eos (c); c = peek ())
+ for (; !eos (c); c = peek ())
{
switch (c)
{
@@ -232,7 +232,7 @@ namespace build
// Read until newline or eos.
//
- for (c = peek (); !is_eos (c) && c != '\n'; c = peek ())
+ for (c = peek (); !eos (c) && c != '\n'; c = peek ())
get ();
r = true;
@@ -266,82 +266,16 @@ namespace build
{
xchar c (get ());
- if (is_eos (c))
+ if (eos (c))
fail (c) << "unterminated escape sequence";
return c;
}
- lexer::xchar lexer::
- peek ()
- {
- if (unget_)
- return buf_;
- else
- {
- if (eos_)
- return xchar (xchar::traits_type::eof (), l_, c_);
- else
- {
- xchar::int_type v (is_.peek ());
-
- if (v == xchar::traits_type::eof ())
- eos_ = true;
-
- return xchar (v, l_, c_);
- }
- }
- }
-
- lexer::xchar lexer::
- get ()
- {
- if (unget_)
- {
- unget_ = false;
- return buf_;
- }
- else
- {
- // When is_.get () returns eof, the failbit is also set (stupid,
- // isn't?) which may trigger an exception. To work around this
- // we will call peek() first and only call get() if it is not
- // eof. But we can only call peek() on eof once; any subsequent
- // calls will spoil the failbit (even more stupid).
- //
- xchar c (peek ());
-
- if (!is_eos (c))
- {
- is_.get ();
-
- if (c == '\n')
- {
- l_++;
- c_ = 1;
- }
- else
- c_++;
- }
-
- return c;
- }
- }
-
- void lexer::
- unget (const xchar& c)
- {
- // Because iostream::unget cannot work once eos is reached,
- // we have to provide our own implementation.
- //
- buf_ = c;
- unget_ = true;
- }
-
location_prologue lexer::fail_mark_base::
operator() (const xchar& c) const
{
return build::fail_mark_base<failed>::operator() (
- location (name_.c_str (), c.line (), c.column ()));
+ location (name_.c_str (), c.line, c.column));
}
}