aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBoris Kolpackov <boris@codesynthesis.com>2014-12-11 13:57:42 +0200
committerBoris Kolpackov <boris@codesynthesis.com>2014-12-11 13:57:42 +0200
commite6d92a1fb21232ab09886431d39ccb8a95c7c68d (patch)
tree0d543e1e3c1b22e88f22f02e2dae75ae9eba2db5
parentfdc21950905d64b2ca1df5a0b2622022beffe922 (diff)
Initial lexer implementation for buildfiles
-rw-r--r--build/bd.cxx62
-rw-r--r--build/buildfile1
-rw-r--r--build/lexer98
-rw-r--r--build/lexer.cxx220
-rw-r--r--build/target3
-rw-r--r--build/token55
6 files changed, 438 insertions, 1 deletions
diff --git a/build/bd.cxx b/build/bd.cxx
index c592d64..33ee02f 100644
--- a/build/bd.cxx
+++ b/build/bd.cxx
@@ -6,6 +6,7 @@
#include <vector>
#include <cassert>
+#include <fstream>
#include <iostream>
#include <typeinfo>
#include <system_error>
@@ -15,6 +16,9 @@
#include <build/process>
#include <build/diagnostics>
+#include <build/token>
+#include <build/lexer>
+
using namespace std;
namespace build
@@ -132,6 +136,64 @@ main (int argc, char* argv[])
//
tzset ();
+ // Parse buildfile.
+ //
+ path bf ("buildfile");
+
+ ifstream ifs (bf.string ().c_str ());
+ if (!ifs.is_open ())
+ {
+ cerr << "error: unable to open " << bf << " in read mode" << endl;
+ return 1;
+ }
+
+ ifs.exceptions (ifstream::failbit | ifstream::badbit);
+ lexer l (ifs, bf.string ());
+
+ try
+ {
+ for (token t (l.next ());; t = l.next ())
+ {
+ cout << t.line () << ':' << t.column () << ": ";
+
+ switch (t.type ())
+ {
+ case token_type::eos: cout << "<eos>"; break;
+ case token_type::punctuation:
+ {
+ switch (t.punctuation ())
+ {
+ case token_punctuation::newline: cout << "\\n"; break;
+ case token_punctuation::colon: cout << ':'; break;
+ case token_punctuation::lcbrace: cout << '{'; break;
+ case token_punctuation::rcbrace: cout << '}'; break;
+ }
+ break;
+ }
+ case token_type::name: cout << '\'' << t.name () << '\''; break;
+ }
+
+ cout << endl;
+
+ if (t.type () == token_type::eos)
+ break;
+ }
+ }
+ catch (const lexer_error&)
+ {
+ return 1; // Diagnostics has already been issued.
+ }
+ catch (const std::ios_base::failure&)
+ {
+ cerr << "error: failed to read from " << bf << endl;
+ return 1;
+ }
+
+ return 0;
+
+
+ // Register rules.
+ //
cxx::link cxx_link;
rules.emplace (typeid (exe), cxx_link);
diff --git a/build/buildfile b/build/buildfile
new file mode 100644
index 0000000..ceed236
--- /dev/null
+++ b/build/buildfile
@@ -0,0 +1 @@
+exe{bd}: obj{bd target}
diff --git a/build/lexer b/build/lexer
new file mode 100644
index 0000000..987efab
--- /dev/null
+++ b/build/lexer
@@ -0,0 +1,98 @@
+// file : build/lexer -*- C++ -*-
+// copyright : Copyright (c) 2014-2015 Code Synthesis Tools CC
+// license : MIT; see accompanying LICENSE file
+
+#ifndef BUILD_LEXER
+#define BUILD_LEXER
+
+#include <string>
+#include <iosfwd>
+#include <cstdint> // uint64_t
+#include <exception>
+
+#include <build/token>
+
+namespace build
+{
+ // The handler must assume the diagnostics has already been issued.
+ //
+ struct lexer_error: std::exception {};
+
+ class lexer
+ {
+ public:
+ lexer (std::istream& is, const std::string& name)
+ : is_ (is), name_ (name) {}
+
+ token
+ next ();
+
+ // Character interface.
+ //
+ private:
+ class xchar
+ {
+ public:
+ typedef std::char_traits<char> traits_type;
+ typedef traits_type::int_type int_type;
+ typedef traits_type::char_type char_type;
+
+ xchar (int_type v, std::uint64_t l, std::uint64_t c)
+ : v_ (v), l_ (l), c_ (c) {}
+
+ operator char_type () const {return static_cast<char_type> (v_);}
+
+ int_type
+ value () const {return v_;}
+
+ std::uint64_t line () const {return l_;}
+ std::uint64_t column () const {return c_;}
+
+ private:
+ int_type v_;
+ std::uint64_t l_;
+ std::uint64_t c_;
+ };
+
+ xchar
+ peek ();
+
+ xchar
+ get ();
+
+ void
+ unget (const xchar&);
+
+ // Tests.
+ //
+ bool
+ is_eos (const xchar& c) const
+ {
+ return c.value () == xchar::traits_type::eof ();
+ }
+
+ private:
+ xchar
+ escape ();
+
+ void
+ skip_spaces ();
+
+ token
+ name (xchar);
+
+ private:
+ std::istream& is_;
+ std::string name_;
+
+ std::uint64_t l_ {1};
+ std::uint64_t c_ {1};
+
+ bool eos_ {false};
+
+ bool unget_ {false};
+ xchar buf_ {0, 0, 0};
+ };
+}
+
+#endif // BUILD_LEXER
diff --git a/build/lexer.cxx b/build/lexer.cxx
new file mode 100644
index 0000000..101227e
--- /dev/null
+++ b/build/lexer.cxx
@@ -0,0 +1,220 @@
+// file : build/lexer.cxx -*- C++ -*-
+// copyright : Copyright (c) 2014-2015 Code Synthesis Tools CC
+// license : MIT; see accompanying LICENSE file
+
+#include <build/lexer>
+
+#include <iostream>
+
+using namespace std;
+
+namespace build
+{
+ token lexer::
+ next ()
+ {
+ skip_spaces ();
+
+ xchar c (get ());
+ uint64_t ln (c.line ()), cn (c.column ());
+
+ if (is_eos (c))
+ return token (ln, cn);
+
+ switch (c)
+ {
+ // NOTE: remember to update name() if adding new punctuations.
+ //
+ case '\n':
+ {
+ return token (token_punctuation::newline, ln, cn);
+ }
+ case ':':
+ {
+ return token (token_punctuation::colon, ln, cn);
+ }
+ case '{':
+ {
+ return token (token_punctuation::lcbrace, ln, cn);
+ }
+ case '}':
+ {
+ return token (token_punctuation::rcbrace, ln, cn);
+ }
+ }
+
+ // Otherwise it is a name.
+ //
+ return name (c);
+ }
+
+ lexer::xchar lexer::
+ escape ()
+ {
+ xchar c (get ());
+
+ if (!is_eos (c))
+ return c;
+
+ cerr << name_ << ':' << c.line () << ':' << c.column () << ": error: " <<
+ "unterminated escape sequence" << endl;
+ throw lexer_error ();
+ }
+
+ void lexer::
+ skip_spaces ()
+ {
+ xchar c (peek ());
+ bool start (c.column () == 1);
+
+ for (; !is_eos (c); c = peek ())
+ {
+ switch (c)
+ {
+ case ' ':
+ case '\t':
+ break;
+ case '\n':
+ {
+ // Skip empty lines.
+ //
+ if (start)
+ break;
+
+ return;
+ }
+ case '#':
+ {
+ get ();
+
+ // Read until newline or eos.
+ //
+ for (c = peek (); !is_eos (c) && c != '\n'; c = peek ())
+ get ();
+ continue;
+ }
+ case '\\':
+ {
+ get ();
+
+ if (peek () == '\n')
+ break;
+
+ unget (c);
+ // Fall through.
+ }
+ default:
+ return; // Not a space.
+ }
+
+ get ();
+ }
+ }
+
+ token lexer::
+ name (xchar c)
+ {
+ uint64_t ln (c.line ()), cn (c.column ());
+ string lexeme;
+ lexeme += (c != '\\' ? c : escape ());
+
+ for (c = peek (); !is_eos (c); c = peek ())
+ {
+ switch (c)
+ {
+ case ' ':
+ case '\t':
+ case '\n':
+ case ':':
+ case '{':
+ case '}':
+ case '#':
+ {
+ break;
+ }
+ case '\\':
+ {
+ get ();
+ lexeme += escape ();
+ continue;
+ }
+ default:
+ {
+ get ();
+ lexeme += c;
+ continue;
+ }
+ }
+
+ break;
+ }
+
+ return token (lexeme, ln, cn);
+ }
+
+ lexer::xchar lexer::
+ peek ()
+ {
+ if (unget_)
+ return buf_;
+ else
+ {
+ if (eos_)
+ return xchar (xchar::traits_type::eof (), l_, c_);
+ else
+ {
+ xchar::int_type v (is_.peek ());
+
+ if (v == xchar::traits_type::eof ())
+ eos_ = true;
+
+ return xchar (v, l_, c_);
+ }
+ }
+ }
+
+ lexer::xchar lexer::
+ get ()
+ {
+ if (unget_)
+ {
+ unget_ = false;
+ return buf_;
+ }
+ else
+ {
+ // When is_.get () returns eof, the failbit is also set (stupid,
+ // isn't?) which may trigger an exception. To work around this
+ // we will call peek() first and only call get() if it is not
+ // eof. But we can only call peek() on eof once; any subsequent
+ // calls will spoil the failbit (even more stupid).
+ //
+ xchar c (peek ());
+
+ if (!is_eos (c))
+ {
+ is_.get ();
+
+ if (c == '\n')
+ {
+ l_++;
+ c_ = 1;
+ }
+ else
+ c_++;
+ }
+
+ return c;
+ }
+ }
+
+ void lexer::
+ unget (const xchar& c)
+ {
+ // Because iostream::unget cannot work once eos is reached,
+ // we have to provide our own implementation.
+ //
+ buf_ = c;
+ unget_ = true;
+ }
+}
diff --git a/build/target b/build/target
index 3ef3192..01cddc4 100644
--- a/build/target
+++ b/build/target
@@ -11,6 +11,7 @@
#include <typeindex>
#include <iosfwd>
#include <cassert>
+#include <utility> // move
#include <build/path>
#include <build/timestamp>
@@ -27,7 +28,7 @@ namespace build
class target
{
public:
- target (std::string n): name_ (n) {}
+ target (std::string n): name_ (std::move (n)) {}
const std::string&
name () const {return name_;}
diff --git a/build/token b/build/token
new file mode 100644
index 0000000..bade45c
--- /dev/null
+++ b/build/token
@@ -0,0 +1,55 @@
+// file : build/token -*- C++ -*-
+// copyright : Copyright (c) 2014-2015 Code Synthesis Tools CC
+// license : MIT; see accompanying LICENSE file
+
+#ifndef BUILD_TOKEN
+#define BUILD_TOKEN
+
+#include <string>
+#include <cstddef> // size_t
+#include <cstdint> // uint64_t
+#include <cassert>
+#include <utility> // move
+
+namespace build
+{
+ enum class token_type {eos, name, punctuation};
+ enum class token_punctuation {newline, colon, lcbrace, rcbrace};
+
+ class token
+ {
+ public:
+ token_type
+ type () const {return t_;}
+
+ std::string const&
+ name () const {assert (t_ == token_type::name); return n_;}
+
+ token_punctuation
+ punctuation () const {assert (t_ == token_type::punctuation); return p_;}
+
+ std::uint64_t line () const {return l_;}
+ std::uint64_t column () const {return c_;}
+
+ public:
+ token (std::uint64_t l, std::uint64_t c)
+ : t_ (token_type::eos), l_ (l), c_ (c) {}
+
+ token (std::string n, std::uint64_t l, std::uint64_t c)
+ : t_ (token_type::name), n_ (std::move (n)), l_ (l), c_ (c) {}
+
+ token (token_punctuation p, std::uint64_t l, std::uint64_t c)
+ : t_ (token_type::punctuation), p_ (p), l_ (l), c_ (c) {}
+
+ private:
+ token_type t_;
+
+ token_punctuation p_;
+ std::string n_;
+
+ std::uint64_t l_;
+ std::uint64_t c_;
+ };
+}
+
+#endif // BUILD_TOKEN