From 0cef93b4e2e9bf39b0ca542876f9ab1af6d0f01d Mon Sep 17 00:00:00 2001 From: Boris Kolpackov Date: Wed, 24 May 2017 13:24:31 +0200 Subject: Implement support for tokenization of preprocessed C/C++ source --- unit-tests/cc/lexer/buildfile | 17 ++++++ unit-tests/cc/lexer/char-literal.test | 67 +++++++++++++++++++++ unit-tests/cc/lexer/comment.test | 88 ++++++++++++++++++++++++++++ unit-tests/cc/lexer/driver.cxx | 66 +++++++++++++++++++++ unit-tests/cc/lexer/line.test | 67 +++++++++++++++++++++ unit-tests/cc/lexer/number.test | 48 +++++++++++++++ unit-tests/cc/lexer/preprocessor.test | 38 ++++++++++++ unit-tests/cc/lexer/raw-string-literal.test | 90 +++++++++++++++++++++++++++++ unit-tests/cc/lexer/string-literal.test | 65 +++++++++++++++++++++ 9 files changed, 546 insertions(+) create mode 100644 unit-tests/cc/lexer/buildfile create mode 100644 unit-tests/cc/lexer/char-literal.test create mode 100644 unit-tests/cc/lexer/comment.test create mode 100644 unit-tests/cc/lexer/driver.cxx create mode 100644 unit-tests/cc/lexer/line.test create mode 100644 unit-tests/cc/lexer/number.test create mode 100644 unit-tests/cc/lexer/preprocessor.test create mode 100644 unit-tests/cc/lexer/raw-string-literal.test create mode 100644 unit-tests/cc/lexer/string-literal.test (limited to 'unit-tests/cc') diff --git a/unit-tests/cc/lexer/buildfile b/unit-tests/cc/lexer/buildfile new file mode 100644 index 0000000..ff4e0b3 --- /dev/null +++ b/unit-tests/cc/lexer/buildfile @@ -0,0 +1,17 @@ +# file : unit-tests/cc/lexer/buildfile +# copyright : Copyright (c) 2014-2017 Code Synthesis Ltd +# license : MIT; see accompanying LICENSE file + +#@@ Temporary until we get utility library support. +# +import libs = libbutl%lib{butl} +src = cc/lexer token lexer diagnostics utility variable name b-options types-parsers \ +context scope parser target operation rule prerequisite file module function \ +functions-builtin functions-path functions-process-path functions-string \ +functions-target-triplet algorithm search dump filesystem scheduler \ +config/{utility init operation module} spec + +exe{driver}: cxx{driver} ../../../build2/cxx{$src} ../../../build2/liba{b} \ +$libs test{*} + +include ../../../build2/ diff --git a/unit-tests/cc/lexer/char-literal.test b/unit-tests/cc/lexer/char-literal.test new file mode 100644 index 0000000..f256785 --- /dev/null +++ b/unit-tests/cc/lexer/char-literal.test @@ -0,0 +1,67 @@ +# file : unit-tests/cc/lexer/char-literal.test +# copyright : Copyright (c) 2014-2017 Code Synthesis Ltd +# license : MIT; see accompanying LICENSE file + +# Test character literals. +# + +: normal +: +$* <>EOO +'a' +'aa' +'"' +EOI + + + +EOO + +: prefix +: +$* <>EOO +L'a' +U'a' +u'a' +u8'a' +u8R'a' +EOI + + + + +'u8R' + +EOO + +: suffix +: +$* <>EOO +'a'x +'a'_X123 +EOI + + +EOO + +: escape +: +$* <>EOO +'\'' +'\\' +'\\\'' +'\n' +U'\U0001f34c' +EOI + + + + + +EOO + +: unterminated +: +$* <"'a" 2>>EOE != 0 +stdin:1:1: error: unterminated literal +EOE diff --git a/unit-tests/cc/lexer/comment.test b/unit-tests/cc/lexer/comment.test new file mode 100644 index 0000000..e90d8e0 --- /dev/null +++ b/unit-tests/cc/lexer/comment.test @@ -0,0 +1,88 @@ +# file : unit-tests/cc/lexer/comment.test +# copyright : Copyright (c) 2014-2017 Code Synthesis Ltd +# license : MIT; see accompanying LICENSE file + +# Test C and C++ comments. +# + +: c-comment +: +$* <"';'" +// /* +; +// */ +EOI + +: c-unterminated +: +$* <>EOE != 0 +/* +comment +EOI +stdin:1:2: error: unterminated comment +EOE + +: cxx-unterminated +: +$* <<:EOI +// comment +EOI + +: in-char-literal +: +$* <>EOO +'//' +'/*'*/ +EOI + + + + +EOO + +: in-string-literal +: +$* <>EOO +"//foo" +"/*"*/ +EOI + + + + +EOO + +: in-raw-string-literal +: +$* <>EOO +R"X( +// foo +/* bar +)X"*/ +EOI + + + +EOO diff --git a/unit-tests/cc/lexer/driver.cxx b/unit-tests/cc/lexer/driver.cxx new file mode 100644 index 0000000..db3f516 --- /dev/null +++ b/unit-tests/cc/lexer/driver.cxx @@ -0,0 +1,66 @@ +// file : unit-tests/cc/lexer/driver.cxx -*- C++ -*- +// copyright : Copyright (c) 2014-2017 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +#include +#include + +#include +#include + +#include + +using namespace std; + +namespace build2 +{ + namespace cc + { + // Usage: argv[0] [] + // + int + main (int argc, char* argv[]) + { + try + { + istream* is; + const char* in; + + // Reading from file is several times faster. + // + ifdstream ifs; + if (argc > 1) + { + in = argv[1]; + ifs.open (in); + is = &ifs; + } + else + { + in = "stdin"; + cin.exceptions (istream::failbit | istream::badbit); + is = &cin; + } + + lexer l (*is, path (in)); + + // No use printing eos since we will either get it or loop forever. + // + for (token t; l.next (t) != token_type::eos; ) + cout << t << endl; + } + catch (const failed&) + { + return 1; + } + + return 0; + } + } +} + +int +main (int argc, char* argv[]) +{ + return build2::cc::main (argc, argv); +} diff --git a/unit-tests/cc/lexer/line.test b/unit-tests/cc/lexer/line.test new file mode 100644 index 0000000..9eda9c3 --- /dev/null +++ b/unit-tests/cc/lexer/line.test @@ -0,0 +1,67 @@ +# file : unit-tests/cc/lexer/line.test +# copyright : Copyright (c) 2014-2017 Code Synthesis Ltd +# license : MIT; see accompanying LICENSE file + +# Test line continuations. +# + +: identifier +: +$* <"'foo123'" +fo\ +o\ +1\ +2\ +3 +EOI + +: punctuation +: +$* <'' +.\ +.\ +. +EOI + +: c-comment +: +$* <>EOO +\abc +EOI + +'abc' +EOO + +: multiple +: +$* <>EOO +\\ +EOI + +EOO + +: unterminated +: +$* <<:EOI >'' +\ +EOI diff --git a/unit-tests/cc/lexer/number.test b/unit-tests/cc/lexer/number.test new file mode 100644 index 0000000..1d9b9c5 --- /dev/null +++ b/unit-tests/cc/lexer/number.test @@ -0,0 +1,48 @@ +# file : unit-tests/cc/lexer/number.test +# copyright : Copyright (c) 2014-2017 Code Synthesis Ltd +# license : MIT; see accompanying LICENSE file + +# Test numbers. +# + +$* <'1' >'' +$* <'.1' >'' +$* <'1.' >'' + +$* <'0b101' >'' +$* <'0123' >'' +$* <'0X12AB' >'' + +$* <'1e10' >'' +$* <'1E+10' >'' +$* <'0x1.p10' >'' +$* <'0x1.P-10' >'' + +$* <"123'456" >'' +$* <"0xff00'00ff" >'' + +$* <'123f' >'' +$* <'123UL' >'' +$* <'123_X' >'' + +: separate-punctuation +: +$* <'123;' >>EOO + +';' +EOO + +: separate-plus-minus +: +$* <'1.0_a+2.0' >>EOO + + + +EOO + +: separate-whitespace +: +$* <'123 abc' >>EOO + +'abc' +EOO diff --git a/unit-tests/cc/lexer/preprocessor.test b/unit-tests/cc/lexer/preprocessor.test new file mode 100644 index 0000000..2917649 --- /dev/null +++ b/unit-tests/cc/lexer/preprocessor.test @@ -0,0 +1,38 @@ +# file : unit-tests/cc/lexer/preprocessor.test +# copyright : Copyright (c) 2014-2017 Code Synthesis Ltd +# license : MIT; see accompanying LICENSE file + +# Test preprocessor lines. +# + +: normal +: +$* <>EOO +R"()" +R"(ab)" +R"(a"b)" +R"(a)b)" +R"%(a%)b)%" +R"X(a + b)X" +R"X(a\ + b)X" +EOI + + + + + + + +EOO + +: prefix +: +$* <>EOO +LR"(ab)" +UR"(ab)" +uR"(ab)" +u8R"(ab)" +EOI + + + + +EOO + +: suffix +: +$* <>EOO +R"(ab)"x +R"(ab)"_X123 +EOI + + +EOO + +: escape +: +$* <>EOO +R"(\)" +EOI + +EOO + +: invalid-no-paren +: +$* <'R"a"' 2>>EOE != 0 +stdin:1:2: error: invalid raw literal +EOE + +: invalid-paren +: +$* <'R")()("' 2>>EOE != 0 +stdin:1:2: error: invalid raw literal +EOE + +: invalid-unterminated-paren +: +$* <'R"(abc"' 2>>EOE != 0 +stdin:1:2: error: invalid raw literal +EOE + +: invalid-unterminated-delimiter +: +$* <'R"X(abc)"' 2>>EOE != 0 +stdin:1:2: error: invalid raw literal +EOE + +: invalid-unterminated-quote +: +$* <'R"X(abc)X' 2>>EOE != 0 +stdin:1:2: error: invalid raw literal +EOE diff --git a/unit-tests/cc/lexer/string-literal.test b/unit-tests/cc/lexer/string-literal.test new file mode 100644 index 0000000..062d290 --- /dev/null +++ b/unit-tests/cc/lexer/string-literal.test @@ -0,0 +1,65 @@ +# file : unit-tests/cc/lexer/string-literal.test +# copyright : Copyright (c) 2014-2017 Code Synthesis Ltd +# license : MIT; see accompanying LICENSE file + +# Test string literals (except raw). +# + +: normal +: +$* <>EOO +"aa" +"'" +"a""b" +EOI + + + + +EOO + +: prefix +: +$* <>EOO +L"ab" +U"ab" +u"ab" +u8"ab" +EOI + + + + +EOO + +: suffix +: +$* <>EOO +"ab"x +"ab"_X123 +EOI + + +EOO + +: escape +: +$* <>EOO +"\"\"" +"\\\\" +"\\\"\\" +"\n\t" +U"a\U0001f34c" +EOI + + + + + +EOO + +: unterminated +: +$* <'"ab' 2>>EOE != 0 +stdin:1:1: error: unterminated literal +EOE -- cgit v1.1