From 74918fea2ae0ce55fb6541726ec2ab607d7e2d6e Mon Sep 17 00:00:00 2001 From: Boris Kolpackov Date: Wed, 4 Jan 2017 17:00:15 +0200 Subject: Change lexer modes to be semantically accurate --- build2/test/script/lexer | 4 ++-- build2/test/script/lexer.cxx | 32 ++++++++++++++++---------------- build2/test/script/parser.cxx | 10 ++++++---- 3 files changed, 24 insertions(+), 22 deletions(-) diff --git a/build2/test/script/lexer b/build2/test/script/lexer index a6a96ca..4a6cd14 100644 --- a/build2/test/script/lexer +++ b/build2/test/script/lexer @@ -24,11 +24,11 @@ namespace build2 enum { - script_line = base_type::value_next, + command_line = base_type::value_next, first_token, // Expires at the end of the token. second_token, // Expires at the end of the token. variable_line, // Expires at the end of the line. - command_line, + command_expansion, here_line_single, here_line_double, description_line // Expires at the end of the line. diff --git a/build2/test/script/lexer.cxx b/build2/test/script/lexer.cxx index 8bd3484..ce6ed5d 100644 --- a/build2/test/script/lexer.cxx +++ b/build2/test/script/lexer.cxx @@ -32,7 +32,7 @@ namespace build2 switch (m) { - case lexer_mode::script_line: + case lexer_mode::command_line: { s1 = ":;=!|&<> $(#\t\n"; s2 = " == "; @@ -40,9 +40,9 @@ namespace build2 } case lexer_mode::first_token: { - // First token on the script line. Like script_line but recognizes - // leading '+-{}' as tokens as well as variable assignments as - // separators. + // First token on the script line. Like command_line but + // recognizes leading '+-{}' as tokens as well as variable + // assignments as separators. // // Note that to recognize only leading '+-{}' we shouldn't add // them to the separator strings. @@ -53,12 +53,12 @@ namespace build2 } case lexer_mode::second_token: { - // Second token on the script line. Like script_line but + // Second token on the script line. Like command_line but // recognizes leading variable assignments. // // Note that to recognize only leading assignments we shouldn't // add them to the separator strings (so this is identical to - // script_line). + // command_line). // s1 = ":;=!|&<> $(#\t\n"; s2 = " == "; @@ -75,7 +75,7 @@ namespace build2 break; } - case lexer_mode::command_line: + case lexer_mode::command_expansion: { // Note that whitespaces are not word separators in this mode. // @@ -145,11 +145,11 @@ namespace build2 switch (state_.top ().mode) { - case lexer_mode::script_line: + case lexer_mode::command_line: case lexer_mode::first_token: case lexer_mode::second_token: case lexer_mode::variable_line: - case lexer_mode::command_line: + case lexer_mode::command_expansion: case lexer_mode::here_line_single: case lexer_mode::here_line_double: r = next_line (); @@ -224,7 +224,7 @@ namespace build2 // NOTE: remember to update mode() if adding new special characters. - if (m != lexer_mode::command_line) + if (m != lexer_mode::command_expansion) { switch (c) { @@ -266,7 +266,7 @@ namespace build2 // Line separators. // - if (m == lexer_mode::script_line || + if (m == lexer_mode::command_line || m == lexer_mode::first_token || m == lexer_mode::second_token || m == lexer_mode::variable_line) @@ -277,7 +277,7 @@ namespace build2 } } - if (m == lexer_mode::script_line || + if (m == lexer_mode::command_line || m == lexer_mode::first_token || m == lexer_mode::second_token) { @@ -289,8 +289,8 @@ namespace build2 // Command line operator/separators. // - if (m == lexer_mode::script_line || - m == lexer_mode::first_token || + if (m == lexer_mode::command_line || + m == lexer_mode::first_token || m == lexer_mode::second_token) { switch (c) @@ -311,10 +311,10 @@ namespace build2 // Command operators/separators. // - if (m == lexer_mode::script_line || + if (m == lexer_mode::command_line || m == lexer_mode::first_token || m == lexer_mode::second_token || - m == lexer_mode::command_line) + m == lexer_mode::command_expansion) { switch (c) { diff --git a/build2/test/script/parser.cxx b/build2/test/script/parser.cxx index da61c64..99e4a66 100644 --- a/build2/test/script/parser.cxx +++ b/build2/test/script/parser.cxx @@ -57,7 +57,7 @@ namespace build2 pre_parse_ = true; - lexer l (is, *path_, lexer_mode::script_line); + lexer l (is, *path_, lexer_mode::command_line); set_lexer (&l); id_prefix_.clear (); @@ -989,7 +989,7 @@ namespace build2 try { ifdstream ifs (p); - lexer l (ifs, p, lexer_mode::script_line); + lexer l (ifs, p, lexer_mode::command_line); const path* op (path_); path_ = &p; @@ -1908,7 +1908,7 @@ namespace build2 name = path (move (n)); } - // When re-parsing we do "effective escaping" and only for + // When re-lexing we do "effective escaping" and only for // ['"\] (quotes plus the backslash itself). In particular, // there is no way to escape redirects, operators, etc. The // idea is to prefer quoting except for passing literal @@ -1921,7 +1921,9 @@ namespace build2 // cmd $args # cmd x="foo bar" // istringstream is (s); - lexer lex (is, name, lexer_mode::command_line, "\'\"\\"); + lexer lex (is, name, + lexer_mode::command_expansion, + "\'\"\\"); // Treat the first "sub-token" as always separated from what // we saw earlier. -- cgit v1.1