aboutsummaryrefslogtreecommitdiff
path: root/unit-tests/cc/lexer
diff options
context:
space:
mode:
authorBoris Kolpackov <boris@codesynthesis.com>2017-05-24 13:24:31 +0200
committerBoris Kolpackov <boris@codesynthesis.com>2017-05-24 13:24:31 +0200
commit0cef93b4e2e9bf39b0ca542876f9ab1af6d0f01d (patch)
tree187b83b65f28cdf4f8a2b0feadf392b49554fbf3 /unit-tests/cc/lexer
parentb3526a5c925169b3be00a5dd4d8c8222f3a475cd (diff)
Implement support for tokenization of preprocessed C/C++ source
Diffstat (limited to 'unit-tests/cc/lexer')
-rw-r--r--unit-tests/cc/lexer/buildfile17
-rw-r--r--unit-tests/cc/lexer/char-literal.test67
-rw-r--r--unit-tests/cc/lexer/comment.test88
-rw-r--r--unit-tests/cc/lexer/driver.cxx66
-rw-r--r--unit-tests/cc/lexer/line.test67
-rw-r--r--unit-tests/cc/lexer/number.test48
-rw-r--r--unit-tests/cc/lexer/preprocessor.test38
-rw-r--r--unit-tests/cc/lexer/raw-string-literal.test90
-rw-r--r--unit-tests/cc/lexer/string-literal.test65
9 files changed, 546 insertions, 0 deletions
diff --git a/unit-tests/cc/lexer/buildfile b/unit-tests/cc/lexer/buildfile
new file mode 100644
index 0000000..ff4e0b3
--- /dev/null
+++ b/unit-tests/cc/lexer/buildfile
@@ -0,0 +1,17 @@
+# file : unit-tests/cc/lexer/buildfile
+# copyright : Copyright (c) 2014-2017 Code Synthesis Ltd
+# license : MIT; see accompanying LICENSE file
+
+#@@ Temporary until we get utility library support.
+#
+import libs = libbutl%lib{butl}
+src = cc/lexer token lexer diagnostics utility variable name b-options types-parsers \
+context scope parser target operation rule prerequisite file module function \
+functions-builtin functions-path functions-process-path functions-string \
+functions-target-triplet algorithm search dump filesystem scheduler \
+config/{utility init operation module} spec
+
+exe{driver}: cxx{driver} ../../../build2/cxx{$src} ../../../build2/liba{b} \
+$libs test{*}
+
+include ../../../build2/
diff --git a/unit-tests/cc/lexer/char-literal.test b/unit-tests/cc/lexer/char-literal.test
new file mode 100644
index 0000000..f256785
--- /dev/null
+++ b/unit-tests/cc/lexer/char-literal.test
@@ -0,0 +1,67 @@
+# file : unit-tests/cc/lexer/char-literal.test
+# copyright : Copyright (c) 2014-2017 Code Synthesis Ltd
+# license : MIT; see accompanying LICENSE file
+
+# Test character literals.
+#
+
+: normal
+:
+$* <<EOI >>EOO
+'a'
+'aa'
+'"'
+EOI
+<char literal>
+<char literal>
+<char literal>
+EOO
+
+: prefix
+:
+$* <<EOI >>EOO
+L'a'
+U'a'
+u'a'
+u8'a'
+u8R'a'
+EOI
+<char literal>
+<char literal>
+<char literal>
+<char literal>
+'u8R'
+<char literal>
+EOO
+
+: suffix
+:
+$* <<EOI >>EOO
+'a'x
+'a'_X123
+EOI
+<char literal>
+<char literal>
+EOO
+
+: escape
+:
+$* <<EOI >>EOO
+'\''
+'\\'
+'\\\''
+'\n'
+U'\U0001f34c'
+EOI
+<char literal>
+<char literal>
+<char literal>
+<char literal>
+<char literal>
+EOO
+
+: unterminated
+:
+$* <"'a" 2>>EOE != 0
+stdin:1:1: error: unterminated literal
+EOE
diff --git a/unit-tests/cc/lexer/comment.test b/unit-tests/cc/lexer/comment.test
new file mode 100644
index 0000000..e90d8e0
--- /dev/null
+++ b/unit-tests/cc/lexer/comment.test
@@ -0,0 +1,88 @@
+# file : unit-tests/cc/lexer/comment.test
+# copyright : Copyright (c) 2014-2017 Code Synthesis Ltd
+# license : MIT; see accompanying LICENSE file
+
+# Test C and C++ comments.
+#
+
+: c-comment
+:
+$* <<EOI
+/* 'one' */
+/* "two" // three
+*/
+/**
+four
+// five */
+/**
+six /*
+*/
+EOI
+
+: cxx-comment
+:
+$* <<EOI
+// 'one'
+// "two" // three
+// four /* five */
+EOI
+
+: commented-out
+:
+$* <<EOI >"';'"
+// /*
+;
+// */
+EOI
+
+: c-unterminated
+:
+$* <<EOI 2>>EOE != 0
+/*
+comment
+EOI
+stdin:1:2: error: unterminated comment
+EOE
+
+: cxx-unterminated
+:
+$* <<:EOI
+// comment
+EOI
+
+: in-char-literal
+:
+$* <<EOI >>EOO
+'//'
+'/*'*/
+EOI
+<char literal>
+<char literal>
+<punctuation>
+<punctuation>
+EOO
+
+: in-string-literal
+:
+$* <<EOI >>EOO
+"//foo"
+"/*"*/
+EOI
+<string literal>
+<string literal>
+<punctuation>
+<punctuation>
+EOO
+
+: in-raw-string-literal
+:
+$* <<EOI >>EOO
+R"X(
+// foo
+/* bar
+)X"*/
+EOI
+<string literal>
+<punctuation>
+<punctuation>
+EOO
diff --git a/unit-tests/cc/lexer/driver.cxx b/unit-tests/cc/lexer/driver.cxx
new file mode 100644
index 0000000..db3f516
--- /dev/null
+++ b/unit-tests/cc/lexer/driver.cxx
@@ -0,0 +1,66 @@
+// file : unit-tests/cc/lexer/driver.cxx -*- C++ -*-
+// copyright : Copyright (c) 2014-2017 Code Synthesis Ltd
+// license : MIT; see accompanying LICENSE file
+
+#include <cassert>
+#include <iostream>
+
+#include <build2/types.hxx>
+#include <build2/utility.hxx>
+
+#include <build2/cc/lexer.hxx>
+
+using namespace std;
+
+namespace build2
+{
+ namespace cc
+ {
+ // Usage: argv[0] [<file>]
+ //
+ int
+ main (int argc, char* argv[])
+ {
+ try
+ {
+ istream* is;
+ const char* in;
+
+ // Reading from file is several times faster.
+ //
+ ifdstream ifs;
+ if (argc > 1)
+ {
+ in = argv[1];
+ ifs.open (in);
+ is = &ifs;
+ }
+ else
+ {
+ in = "stdin";
+ cin.exceptions (istream::failbit | istream::badbit);
+ is = &cin;
+ }
+
+ lexer l (*is, path (in));
+
+ // No use printing eos since we will either get it or loop forever.
+ //
+ for (token t; l.next (t) != token_type::eos; )
+ cout << t << endl;
+ }
+ catch (const failed&)
+ {
+ return 1;
+ }
+
+ return 0;
+ }
+ }
+}
+
+int
+main (int argc, char* argv[])
+{
+ return build2::cc::main (argc, argv);
+}
diff --git a/unit-tests/cc/lexer/line.test b/unit-tests/cc/lexer/line.test
new file mode 100644
index 0000000..9eda9c3
--- /dev/null
+++ b/unit-tests/cc/lexer/line.test
@@ -0,0 +1,67 @@
+# file : unit-tests/cc/lexer/line.test
+# copyright : Copyright (c) 2014-2017 Code Synthesis Ltd
+# license : MIT; see accompanying LICENSE file
+
+# Test line continuations.
+#
+
+: identifier
+:
+$* <<EOI >"'foo123'"
+fo\
+o\
+1\
+2\
+3
+EOI
+
+: punctuation
+:
+$* <<EOI >'<punctuation>'
+.\
+.\
+.
+EOI
+
+: c-comment
+:
+$* <<EOI
+/\
+*
+comment
+*\
+/\
+
+EOI
+
+: cxx-comment
+:
+$* <<EOI
+/\
+/ comment\
+more\
+more
+EOI
+
+: other
+:
+$* <<EOI >>EOO
+\abc
+EOI
+<punctuation>
+'abc'
+EOO
+
+: multiple
+:
+$* <<EOI >>EOO
+\\
+EOI
+<punctuation>
+EOO
+
+: unterminated
+:
+$* <<:EOI >'<punctuation>'
+\
+EOI
diff --git a/unit-tests/cc/lexer/number.test b/unit-tests/cc/lexer/number.test
new file mode 100644
index 0000000..1d9b9c5
--- /dev/null
+++ b/unit-tests/cc/lexer/number.test
@@ -0,0 +1,48 @@
+# file : unit-tests/cc/lexer/number.test
+# copyright : Copyright (c) 2014-2017 Code Synthesis Ltd
+# license : MIT; see accompanying LICENSE file
+
+# Test numbers.
+#
+
+$* <'1' >'<number literal>'
+$* <'.1' >'<number literal>'
+$* <'1.' >'<number literal>'
+
+$* <'0b101' >'<number literal>'
+$* <'0123' >'<number literal>'
+$* <'0X12AB' >'<number literal>'
+
+$* <'1e10' >'<number literal>'
+$* <'1E+10' >'<number literal>'
+$* <'0x1.p10' >'<number literal>'
+$* <'0x1.P-10' >'<number literal>'
+
+$* <"123'456" >'<number literal>'
+$* <"0xff00'00ff" >'<number literal>'
+
+$* <'123f' >'<number literal>'
+$* <'123UL' >'<number literal>'
+$* <'123_X' >'<number literal>'
+
+: separate-punctuation
+:
+$* <'123;' >>EOO
+<number literal>
+';'
+EOO
+
+: separate-plus-minus
+:
+$* <'1.0_a+2.0' >>EOO
+<number literal>
+<punctuation>
+<number literal>
+EOO
+
+: separate-whitespace
+:
+$* <'123 abc' >>EOO
+<number literal>
+'abc'
+EOO
diff --git a/unit-tests/cc/lexer/preprocessor.test b/unit-tests/cc/lexer/preprocessor.test
new file mode 100644
index 0000000..2917649
--- /dev/null
+++ b/unit-tests/cc/lexer/preprocessor.test
@@ -0,0 +1,38 @@
+# file : unit-tests/cc/lexer/preprocessor.test
+# copyright : Copyright (c) 2014-2017 Code Synthesis Ltd
+# license : MIT; see accompanying LICENSE file
+
+# Test preprocessor lines.
+#
+
+: normal
+:
+$* <<EOI
+#pragma message("abc")
+EOI
+
+: multiline
+:
+$* <<EOI
+#pragma message \
+( \
+"abc" \
+)
+EOI
+
+: comment
+:
+$* <<EOI
+#pragma foo /*
+bar
+baz
+*/
+#pragma foo // bar baz
+EOI
+
+: line
+:
+$* <<EOI
+# 1 "test.cxx" 2
+#line 8 "z:\\tmp\\test.hxx"
+EOI
diff --git a/unit-tests/cc/lexer/raw-string-literal.test b/unit-tests/cc/lexer/raw-string-literal.test
new file mode 100644
index 0000000..e8e8b6b
--- /dev/null
+++ b/unit-tests/cc/lexer/raw-string-literal.test
@@ -0,0 +1,90 @@
+# file : unit-tests/cc/lexer/raw-string-literal.test
+# copyright : Copyright (c) 2014-2017 Code Synthesis Ltd
+# license : MIT; see accompanying LICENSE file
+
+# Test raw string literals.
+#
+
+: normal
+:
+$* <<EOI >>EOO
+R"()"
+R"(ab)"
+R"(a"b)"
+R"(a)b)"
+R"%(a%)b)%"
+R"X(a
+ b)X"
+R"X(a\
+ b)X"
+EOI
+<string literal>
+<string literal>
+<string literal>
+<string literal>
+<string literal>
+<string literal>
+<string literal>
+EOO
+
+: prefix
+:
+$* <<EOI >>EOO
+LR"(ab)"
+UR"(ab)"
+uR"(ab)"
+u8R"(ab)"
+EOI
+<string literal>
+<string literal>
+<string literal>
+<string literal>
+EOO
+
+: suffix
+:
+$* <<EOI >>EOO
+R"(ab)"x
+R"(ab)"_X123
+EOI
+<string literal>
+<string literal>
+EOO
+
+: escape
+:
+$* <<EOI >>EOO
+R"(\)"
+EOI
+<string literal>
+EOO
+
+: invalid-no-paren
+:
+$* <'R"a"' 2>>EOE != 0
+stdin:1:2: error: invalid raw literal
+EOE
+
+: invalid-paren
+:
+$* <'R")()("' 2>>EOE != 0
+stdin:1:2: error: invalid raw literal
+EOE
+
+: invalid-unterminated-paren
+:
+$* <'R"(abc"' 2>>EOE != 0
+stdin:1:2: error: invalid raw literal
+EOE
+
+: invalid-unterminated-delimiter
+:
+$* <'R"X(abc)"' 2>>EOE != 0
+stdin:1:2: error: invalid raw literal
+EOE
+
+: invalid-unterminated-quote
+:
+$* <'R"X(abc)X' 2>>EOE != 0
+stdin:1:2: error: invalid raw literal
+EOE
diff --git a/unit-tests/cc/lexer/string-literal.test b/unit-tests/cc/lexer/string-literal.test
new file mode 100644
index 0000000..062d290
--- /dev/null
+++ b/unit-tests/cc/lexer/string-literal.test
@@ -0,0 +1,65 @@
+# file : unit-tests/cc/lexer/string-literal.test
+# copyright : Copyright (c) 2014-2017 Code Synthesis Ltd
+# license : MIT; see accompanying LICENSE file
+
+# Test string literals (except raw).
+#
+
+: normal
+:
+$* <<EOI >>EOO
+"aa"
+"'"
+"a""b"
+EOI
+<string literal>
+<string literal>
+<string literal>
+<string literal>
+EOO
+
+: prefix
+:
+$* <<EOI >>EOO
+L"ab"
+U"ab"
+u"ab"
+u8"ab"
+EOI
+<string literal>
+<string literal>
+<string literal>
+<string literal>
+EOO
+
+: suffix
+:
+$* <<EOI >>EOO
+"ab"x
+"ab"_X123
+EOI
+<string literal>
+<string literal>
+EOO
+
+: escape
+:
+$* <<EOI >>EOO
+"\"\""
+"\\\\"
+"\\\"\\"
+"\n\t"
+U"a\U0001f34c"
+EOI
+<string literal>
+<string literal>
+<string literal>
+<string literal>
+<string literal>
+EOO
+
+: unterminated
+:
+$* <'"ab' 2>>EOE != 0
+stdin:1:1: error: unterminated literal
+EOE