diff options
-rw-r--r-- | build/lexer | 3 | ||||
-rw-r--r-- | build/lexer.cxx | 6 | ||||
-rw-r--r-- | tests/build/lexer/driver.cxx | 124 |
3 files changed, 131 insertions, 2 deletions
diff --git a/build/lexer b/build/lexer index 987efab..34b6fcc 100644 --- a/build/lexer +++ b/build/lexer @@ -21,6 +21,9 @@ namespace build class lexer { public: + // If name is empty, then no diagnostics is issued, just lexer_error + // is thrown (use for testing). + // lexer (std::istream& is, const std::string& name) : is_ (is), name_ (name) {} diff --git a/build/lexer.cxx b/build/lexer.cxx index 101227e..8566788 100644 --- a/build/lexer.cxx +++ b/build/lexer.cxx @@ -56,8 +56,10 @@ namespace build if (!is_eos (c)) return c; - cerr << name_ << ':' << c.line () << ':' << c.column () << ": error: " << - "unterminated escape sequence" << endl; + if (!name_.empty ()) + cerr << name_ << ':' << c.line () << ':' << c.column () << ": error: " << + "unterminated escape sequence" << endl; + throw lexer_error (); } diff --git a/tests/build/lexer/driver.cxx b/tests/build/lexer/driver.cxx new file mode 100644 index 0000000..b1af9d9 --- /dev/null +++ b/tests/build/lexer/driver.cxx @@ -0,0 +1,124 @@ +// file : tests/build/lexer/driver.cxx -*- C++ -*- +// copyright : Copyright (c) 2014-2015 Code Synthesis Tools CC +// license : MIT; see accompanying LICENSE file + +#include <string> +#include <vector> +#include <cassert> +#include <sstream> +#include <iostream> + +#include <build/token> +#include <build/lexer> + +using namespace std; +using namespace build; + +typedef vector<string> tokens; + +static tokens +lex (const char*); + +int +main () +{ + // Whitespaces. + // + assert (lex ("") == tokens ({""})); + assert (lex ("\n") == tokens ({""})); + assert (lex ("\n\n") == tokens ({""})); + assert (lex (" \t \n") == tokens ({""})); + assert (lex ("#comment") == tokens ({""})); + assert (lex (" #comment") == tokens ({""})); + assert (lex ("#comment\n") == tokens ({""})); + assert (lex ("#comment\\\n") == tokens ({""})); + assert (lex ("#comment 1\n#comment2") == tokens ({""})); + + // Punctuation. + // + assert (lex (": \n { }") == tokens ({":", "\n", "{", "}", ""})); + + // Names. + // + assert (lex ("foo") == tokens ({"foo", ""})); + assert (lex ("foo.bar") == tokens ({"foo.bar", ""})); + + // Escaping. + // + assert (lex (" \\\n") == tokens ({""})); + assert (lex ("\\\nfoo") == tokens ({"foo", ""})); + assert (lex (" \\ foo") == tokens ({" foo", ""})); + assert (lex ("fo\\ o\\:") == tokens ({"fo o:", ""})); + assert (lex ("foo\\\nbar") == tokens ({"foo\nbar", ""})); + assert (lex ("foo \\\nbar") == tokens ({"foo", "bar", ""})); + + assert (lex (" \\") == tokens ({"<lexer error>"})); + assert (lex (" foo\\") == tokens ({"<lexer error>"})); + + // Combinations. + // + assert (lex ("foo: bar") == tokens ({"foo", ":", "bar", ""})); + assert (lex ("\n \nfoo: bar") == tokens ({"foo", ":", "bar", ""})); + assert (lex ("foo: bar\n") == tokens ({"foo", ":", "bar", "\n", ""})); + assert (lex ("foo: bar#comment") == tokens ({"foo", ":", "bar", ""})); + assert (lex ("exe{foo}: obj{bar}") == + tokens ({"exe", "{", "foo", "}", ":", "obj", "{", "bar", "}", ""})); + assert (lex ("foo: bar\nbaz: biz") == + tokens ({"foo", ":", "bar", "\n", "baz", ":", "biz", ""})); + assert (lex ("foo: bar#comment\nbaz: biz") == + tokens ({"foo", ":", "bar", "\n", "baz", ":", "biz", ""})); + assert (lex ("foo:#comment \\\nbar") == + tokens ({"foo", ":", "\n", "bar", ""})); +} + +static tokens +lex (const char* s) +{ + tokens r; + istringstream is (s); + + is.exceptions (istream::failbit | istream::badbit); + lexer l (is, ""); + + try + { + for (token t (l.next ());; t = l.next ()) + { + const char* v (nullptr); + + switch (t.type ()) + { + case token_type::eos: v= ""; break; + case token_type::punctuation: + { + switch (t.punctuation ()) + { + case token_punctuation::newline: v = "\n"; break; + case token_punctuation::colon: v = ":"; break; + case token_punctuation::lcbrace: v = "{"; break; + case token_punctuation::rcbrace: v = "}"; break; + } + break; + } + case token_type::name: v = t.name ().c_str (); break; + } + + // cerr << t.line () << ':' << t.column () << ':' << v << endl; + + r.push_back (v); + + if (t.type () == token_type::eos) + break; + } + } + catch (const lexer_error&) + { + r.push_back ("<lexer error>"); + } + catch (const std::ios_base::failure&) + { + r.push_back ("<io error>"); + } + + return r; +} |