From 0dcf07989b4b942f6ff872023b2886b7f698d711 Mon Sep 17 00:00:00 2001
From: Boris Kolpackov <boris@codesynthesis.com>
Date: Fri, 12 Dec 2014 08:02:14 +0200
Subject: Add test for lexer

g++-4.9 -std=c++14 -g -I../../.. -o driver driver.cxx ../../../build/lexer.cxx && ./driver
---
 build/lexer                  |   3 ++
 build/lexer.cxx              |   6 ++-
 tests/build/lexer/driver.cxx | 124 +++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 131 insertions(+), 2 deletions(-)
 create mode 100644 tests/build/lexer/driver.cxx
diff --git a/build/lexer b/build/lexer
index 987efab..34b6fcc 100644
--- a/build/lexer
+++ b/build/lexer
@@ -21,6 +21,9 @@ namespace build
   class lexer
   {
   public:
+    // If name is empty, then no diagnostics is issued, just lexer_error
+    // is thrown (use for testing).
+    //
     lexer (std::istream& is, const std::string& name)
         : is_ (is), name_ (name) {}
 
diff --git a/build/lexer.cxx b/build/lexer.cxx
index 101227e..8566788 100644
--- a/build/lexer.cxx
+++ b/build/lexer.cxx
@@ -56,8 +56,10 @@ namespace build
     if (!is_eos (c))
       return c;
 
-    cerr << name_ << ':' << c.line () << ':' << c.column () << ": error: " <<
-      "unterminated escape sequence" << endl;
+    if (!name_.empty ())
+      cerr << name_ << ':' << c.line () << ':' << c.column () << ": error: " <<
+        "unterminated escape sequence" << endl;
+
     throw lexer_error ();
   }
 
diff --git a/tests/build/lexer/driver.cxx b/tests/build/lexer/driver.cxx
new file mode 100644
index 0000000..b1af9d9
--- /dev/null
+++ b/tests/build/lexer/driver.cxx
@@ -0,0 +1,124 @@
+// file      : tests/build/lexer/driver.cxx -*- C++ -*-
+// copyright : Copyright (c) 2014-2015 Code Synthesis Tools CC
+// license   : MIT; see accompanying LICENSE file
+
+#include <string>
+#include <vector>
+#include <cassert>
+#include <sstream>
+#include <iostream>
+
+#include <build/token>
+#include <build/lexer>
+
+using namespace std;
+using namespace build;
+
+typedef vector<string> tokens;
+
+static tokens
+lex (const char*);
+
+int
+main ()
+{
+  // Whitespaces.
+  //
+  assert (lex ("") == tokens ({""}));
+  assert (lex ("\n") == tokens ({""}));
+  assert (lex ("\n\n") == tokens ({""}));
+  assert (lex (" \t  \n") == tokens ({""}));
+  assert (lex ("#comment") == tokens ({""}));
+  assert (lex ("  #comment") == tokens ({""}));
+  assert (lex ("#comment\n") == tokens ({""}));
+  assert (lex ("#comment\\\n") == tokens ({""}));
+  assert (lex ("#comment 1\n#comment2") == tokens ({""}));
+
+  // Punctuation.
+  //
+  assert (lex (": \n { }") == tokens ({":", "\n", "{", "}", ""}));
+
+  // Names.
+  //
+  assert (lex ("foo") == tokens ({"foo", ""}));
+  assert (lex ("foo.bar") == tokens ({"foo.bar", ""}));
+
+  // Escaping.
+  //
+  assert (lex ("  \\\n") == tokens ({""}));
+  assert (lex ("\\\nfoo") == tokens ({"foo", ""}));
+  assert (lex (" \\ foo") == tokens ({" foo", ""}));
+  assert (lex ("fo\\ o\\:") == tokens ({"fo o:", ""}));
+  assert (lex ("foo\\\nbar") == tokens ({"foo\nbar", ""}));
+  assert (lex ("foo \\\nbar") == tokens ({"foo", "bar", ""}));
+
+  assert (lex ("  \\") == tokens ({"<lexer error>"}));
+  assert (lex ("  foo\\") == tokens ({"<lexer error>"}));
+
+  // Combinations.
+  //
+  assert (lex ("foo: bar") == tokens ({"foo", ":", "bar", ""}));
+  assert (lex ("\n \nfoo: bar") == tokens ({"foo", ":", "bar", ""}));
+  assert (lex ("foo: bar\n") == tokens ({"foo", ":", "bar", "\n", ""}));
+  assert (lex ("foo: bar#comment") == tokens ({"foo", ":", "bar", ""}));
+  assert (lex ("exe{foo}: obj{bar}") ==
+          tokens ({"exe", "{", "foo", "}", ":", "obj", "{", "bar", "}", ""}));
+  assert (lex ("foo: bar\nbaz: biz") ==
+          tokens ({"foo", ":", "bar", "\n", "baz", ":", "biz", ""}));
+  assert (lex ("foo: bar#comment\nbaz: biz") ==
+          tokens ({"foo", ":", "bar", "\n", "baz", ":", "biz", ""}));
+  assert (lex ("foo:#comment \\\nbar") ==
+          tokens ({"foo", ":", "\n", "bar", ""}));
+}
+
+static tokens
+lex (const char* s)
+{
+  tokens r;
+  istringstream is (s);
+
+  is.exceptions (istream::failbit | istream::badbit);
+  lexer l (is, "");
+
+  try
+  {
+    for (token t (l.next ());; t = l.next ())
+    {
+      const char* v (nullptr);
+
+      switch (t.type ())
+      {
+      case token_type::eos: v= ""; break;
+      case token_type::punctuation:
+        {
+          switch (t.punctuation ())
+          {
+          case token_punctuation::newline: v = "\n"; break;
+          case token_punctuation::colon:   v = ":"; break;
+          case token_punctuation::lcbrace: v = "{"; break;
+          case token_punctuation::rcbrace: v = "}"; break;
+          }
+          break;
+        }
+      case token_type::name: v = t.name ().c_str (); break;
+      }
+
+      // cerr << t.line () << ':' << t.column () << ':' << v << endl;
+
+      r.push_back (v);
+
+      if (t.type () == token_type::eos)
+        break;
+    }
+  }
+  catch (const lexer_error&)
+  {
+    r.push_back ("<lexer error>");
+  }
+  catch (const std::ios_base::failure&)
+  {
+    r.push_back ("<io error>");
+  }
+
+  return r;
+}
-- 
cgit v1.1