aboutsummaryrefslogtreecommitdiff
path: root/butl
diff options
context:
space:
mode:
Diffstat (limited to 'butl')
-rw-r--r--butl/buildfile2
-rw-r--r--butl/char-scanner82
-rw-r--r--butl/char-scanner.cxx78
3 files changed, 161 insertions, 1 deletions
diff --git a/butl/buildfile b/butl/buildfile
index e9fc900..a1b5a32 100644
--- a/butl/buildfile
+++ b/butl/buildfile
@@ -2,6 +2,6 @@
# copyright : Copyright (c) 2014-2015 Code Synthesis Ltd
# license : MIT; see accompanying LICENSE file
-lib{butl}: cxx{fdstream filesystem path process timestamp}
+lib{butl}: cxx{char-scanner fdstream filesystem path process timestamp}
cxx.poptions += -I$src_root
lib{butl}: cxx.export.poptions = -I$src_root
diff --git a/butl/char-scanner b/butl/char-scanner
new file mode 100644
index 0000000..3c8cdbe
--- /dev/null
+++ b/butl/char-scanner
@@ -0,0 +1,82 @@
+// file : butl/char-scanner -*- C++ -*-
+// copyright : Copyright (c) 2014-2015 Code Synthesis Ltd
+// license : MIT; see accompanying LICENSE file
+
+#ifndef BUTL_CHAR_SCANNER
+#define BUTL_CHAR_SCANNER
+
+#include <string> // char_traits
+#include <iosfwd>
+#include <cstdint> // uint64_t
+
+namespace butl
+{
+ // Low-level character stream scanner. Normally used as a base for
+ // higher-level lexers.
+ //
+ class char_scanner
+ {
+ public:
+ char_scanner (std::istream& is): is_ (is) {}
+
+ char_scanner (const char_scanner&) = delete;
+ char_scanner& operator= (const char_scanner&) = delete;
+
+ // Scanner interface.
+ //
+ public:
+
+ // Extended character. It includes line/column information
+ // and is capable of representing EOF.
+ //
+ class xchar
+ {
+ public:
+ typedef std::char_traits<char> traits_type;
+ typedef traits_type::int_type int_type;
+ typedef traits_type::char_type char_type;
+
+ int_type value;
+ std::uint64_t line;
+ std::uint64_t column;
+
+ operator char_type () const {return static_cast<char_type> (value);}
+
+ xchar (int_type v, std::uint64_t l = 0, std::uint64_t c = 0)
+ : value (v), line (l), column (c) {}
+ };
+
+ xchar
+ get ();
+
+ void
+ unget (const xchar&);
+
+ // Note that if there is an "ungot" character, peek() will return
+ // that.
+ //
+ xchar
+ peek ();
+
+ // Tests. In the future we can add tests line alpha(), alnum(),
+ // etc.
+ //
+ static bool
+ eos (const xchar& c) {return c.value == xchar::traits_type::eof ();}
+
+ // Line and column of the furthest seen (either via get() or
+ // peek()) character.
+ //
+ std::uint64_t line {1};
+ std::uint64_t column {1};
+
+ protected:
+ std::istream& is_;
+
+ bool unget_ {false};
+ xchar buf_ = '\0';
+ bool eos_ {false};
+ };
+}
+
+#endif // BUTL_CHAR_SCANNER
diff --git a/butl/char-scanner.cxx b/butl/char-scanner.cxx
new file mode 100644
index 0000000..1561ad1
--- /dev/null
+++ b/butl/char-scanner.cxx
@@ -0,0 +1,78 @@
+// file : butl/char-scanner.cxx -*- C++ -*-
+// copyright : Copyright (c) 2014-2015 Code Synthesis Ltd
+// license : MIT; see accompanying LICENSE file
+
+#include <butl/char-scanner>
+
+#include <istream>
+
+using namespace std;
+
+namespace butl
+{
+ auto char_scanner::
+ peek () -> xchar
+ {
+ if (unget_)
+ return buf_;
+ else
+ {
+ if (eos_)
+ return xchar (xchar::traits_type::eof (), line, column);
+ else
+ {
+ xchar::int_type v (is_.peek ());
+
+ if (v == xchar::traits_type::eof ())
+ eos_ = true;
+
+ return xchar (v, line, column);
+ }
+ }
+ }
+
+ auto char_scanner::
+ get () -> xchar
+ {
+ if (unget_)
+ {
+ unget_ = false;
+ return buf_;
+ }
+ else
+ {
+ // When is_.get () returns eof, the failbit is also set (stupid,
+ // isn't?) which may trigger an exception. To work around this
+ // we will call peek() first and only call get() if it is not
+ // eof. But we can only call peek() on eof once; any subsequent
+ // calls will spoil the failbit (even more stupid).
+ //
+ xchar c (peek ());
+
+ if (!eos (c))
+ {
+ is_.get ();
+
+ if (c == '\n')
+ {
+ line++;
+ column = 1;
+ }
+ else
+ column++;
+ }
+
+ return c;
+ }
+ }
+
+ void char_scanner::
+ unget (const xchar& c)
+ {
+ // Because iostream::unget cannot work once eos is reached,
+ // we have to provide our own implementation.
+ //
+ buf_ = c;
+ unget_ = true;
+ }
+}