From 1b86963946082e10e879283fad51ba7ce4e942e2 Mon Sep 17 00:00:00 2001 From: Boris Kolpackov Date: Thu, 10 Sep 2015 09:12:47 +0200 Subject: Add support for chunking name parsing --- build/parser | 19 ++++--- build/parser.cxx | 22 ++++++-- tests/function/call/buildfile | 2 + tests/function/call/test.out | 2 + tests/parser/buildfile | 7 ++- tests/parser/driver.cxx | 121 +++++++++++++++++++++++++++++++++++++----- 6 files changed, 146 insertions(+), 27 deletions(-) diff --git a/build/parser b/build/parser index 0678a62..7787568 100644 --- a/build/parser +++ b/build/parser @@ -48,7 +48,7 @@ namespace build // Recursive descent parser. // - private: + protected: void clause (token&, token_type&); @@ -79,17 +79,22 @@ namespace build names_type eval (token&, token_type&); + // If chunk is true, then parse the smallest but complete, name-wise, + // chunk of input. Note that in this case you may still end up with + // multiple names, for example, {foo bar}. + // names_type - names (token& t, token_type& tt) + names (token& t, token_type& tt, bool chunk = false) { names_type ns; - names (t, tt, ns, 0, nullptr, nullptr, nullptr); + names (t, tt, ns, chunk, 0, nullptr, nullptr, nullptr); return ns; } void names (token&, token_type&, names_type&, + bool chunk, std::size_t pair, const std::string* prj, const dir_path* dir, @@ -102,7 +107,7 @@ namespace build // Utilities. // - private: + protected: // Switch to a new current scope. Note that this function might // also have to switch to a new root scope if the new current @@ -122,7 +127,7 @@ namespace build // Lexer. // - private: + protected: token_type next (token&, token_type&); @@ -138,10 +143,10 @@ namespace build // Diagnostics. // - private: + protected: const fail_mark fail; - private: + protected: const std::string* path_; // Path processed by diag_relative(). lexer* lexer_; target* target_; // Current target, if any. diff --git a/build/parser.cxx b/build/parser.cxx index 1ceb193..092ae3d 100644 --- a/build/parser.cxx +++ b/build/parser.cxx @@ -825,6 +825,7 @@ namespace build names (token& t, type& tt, names_type& ns, + bool chunk, size_t pair, const std::string* pp, const dir_path* dp, @@ -837,7 +838,7 @@ namespace build // Buffer that is used to collect the complete name in case of // an unseparated variable expansion or eval context, e.g., - // 'foo$bar$(baz)fox'. The idea is to concatenate all the + // 'foo$bar($baz)fox'. The idea is to concatenate all the // individual parts in this buffer and then re-inject it into // the loop as a single token. // @@ -865,7 +866,18 @@ namespace build concat.clear (); } else if (!first) + { + // If we are chunking, stop at the next separated token. Unless + // current or next token is a pair separator, since we want the + // "x = y" pair to be parsed as a single chunk. + // + if (chunk && + peeked ().separated && + (tt != type::pair_separator && t.type != type::pair_separator)) + break; + next (t, tt); + } // Name. // @@ -969,6 +981,7 @@ namespace build count = ns.size (); names (t, tt, ns, + false, (pair != 0 ? pair : (ns.empty () || ns.back ().pair == '\0' ? 0 : ns.size ())), @@ -1266,6 +1279,7 @@ namespace build count = ns.size (); names (t, tt, ns, + false, (pair != 0 ? pair : (ns.empty () || ns.back ().pair == '\0' ? 0 : ns.size ())), @@ -1279,7 +1293,7 @@ namespace build continue; } - // A pair separator (only in the pair mode). + // A pair separator (only in the pairs mode). // if (tt == type::pair_separator) { @@ -1308,8 +1322,6 @@ namespace build if (!first) break; - // Our caller expected this to be a name. - // if (tt == type::rcbrace) // Empty name, e.g., dir{}. { // If we are a second half of a pair, add another first half @@ -1325,6 +1337,8 @@ namespace build break; } else + // Our caller expected this to be a name. + // fail (t) << "expected name instead of " << t; } diff --git a/tests/function/call/buildfile b/tests/function/call/buildfile index 93eba14..136f652 100644 --- a/tests/function/call/buildfile +++ b/tests/function/call/buildfile @@ -3,6 +3,8 @@ $identity (a) $identity (a b c) $identity(sub/dir{x y z}) +print a$identity (b)c + # Verify we can inhibit function call with quoting. # foo = FOO diff --git a/tests/function/call/test.out b/tests/function/call/test.out index abc5974..88a852b 100644 --- a/tests/function/call/test.out +++ b/tests/function/call/test.out @@ -2,6 +2,8 @@ identity() identity(a) identity(a b c) identity(sub/dir{x} sub/dir{y} sub/dir{z}) +identity(b) +ac FOOBAR FOOBAR FOOBAR diff --git a/tests/parser/buildfile b/tests/parser/buildfile index 5cbcaca..c3eda28 100644 --- a/tests/parser/buildfile +++ b/tests/parser/buildfile @@ -2,5 +2,8 @@ # copyright : Copyright (c) 2014-2015 Code Synthesis Ltd # license : MIT; see accompanying LICENSE file -exe{driver}: cxx{driver ../../../build/{lexer parser scope target native \ - prerequisite context diagnostics utility path timestamp}} +import libs = libbutl%lib{butl} + +exe{driver}: cxx{driver ../../build/{token lexer parser scope target \ + prerequisite variable operation rule search algorithm file module dump \ + context diagnostics name path-io utility}} $libs diff --git a/tests/parser/driver.cxx b/tests/parser/driver.cxx index fef2f41..ffafcea 100644 --- a/tests/parser/driver.cxx +++ b/tests/parser/driver.cxx @@ -9,7 +9,8 @@ #include #include #include -#include +#include +#include #include #include @@ -20,16 +21,64 @@ using namespace build; static bool parse (const char*); +static names +parse_names (const char* s, lexer_mode m, bool chunk); + +static names +chunk_names (const char* s) +{ + return parse_names (s, lexer_mode::pairs, true); +} + int main () { ostream cnull (nullptr); diag_stream = &cnull; - target_types.insert (file::static_type); - target_types.insert (exe::static_type); - target_types.insert (obj::static_type); + reset (); + + global_scope->assign ("foo") = "FOO"; + global_scope->assign ("bar") = "BAR"; + + // names() in chunking mode. + // + assert (chunk_names ("{}") == names ({name ()})); + assert (chunk_names ("foo") == names ({name ("foo")})); + assert (chunk_names ("foo bar") == names ({name ("foo")})); + assert (chunk_names ("{foo bar}") == names ({name ("foo"), name ("bar")})); + assert (chunk_names ("dir{foo bar}") == names ({name ("dir", "foo"), + name ("dir", "bar")})); + assert (chunk_names ("dir{foo bar} baz") == names ({name ("dir", "foo"), + name ("dir", "bar")})); + assert (chunk_names ("dir {foo bar}") == names ({name ("dir", "foo"), + name ("dir", "bar")})); + assert (chunk_names ("dir {foo bar} baz") == names ({name ("dir", "foo"), + name ("dir", "bar")})); + assert (chunk_names ("{} foo") == names ({name ()})); + + // Expansion. + // + assert (chunk_names ("$foo $bar baz") == names ({name ("FOO")})); + assert (chunk_names ("$foo$bar baz") == names ({name ("FOOBAR")})); + + assert (chunk_names ("foo(bar)") == names ({name ("foobar")})); + assert (chunk_names ("foo (bar)") == names ({name ("foo")})); + + assert (chunk_names ("\"$foo\"(bar)") == names ({name ("FOObar")})); + assert (chunk_names ("\"$foo\" (bar)") == names ({name ("FOO")})); + + // Quoting. + // + assert (chunk_names ("\"$foo $bar\" baz") == names ({name ("FOO BAR")})); + + // Pairs. + // + assert (chunk_names ("foo=bar") == names ({name ("foo"), name ("bar")})); + assert (chunk_names ("foo = bar x") == names ({name ("foo"), name ("bar")})); + // General. + // assert (parse ("")); assert (parse ("foo:")); assert (parse ("foo bar:")); @@ -43,25 +92,25 @@ main () assert (parse ("{{foo bar}}:")); assert (parse ("{{foo bar} {baz} {biz fox} fix}:")); - assert (parse ("exe{foo}:")); - assert (parse ("exe{foo bar}:")); - assert (parse ("{exe{foo bar}}:")); - assert (parse ("exe{{foo bar} fox}:")); - assert (parse ("exe{foo}: obj{bar baz} biz.o file{fox}")); + assert (parse ("file{foo}:")); + assert (parse ("file{foo bar}:")); + assert (parse ("{file{foo bar}}:")); + assert (parse ("file{{foo bar} fox}:")); + assert (parse ("file{foo}: file{bar baz} biz.o file{fox}")); - assert (!parse (":")); + //assert (!parse (":")); assert (!parse ("foo")); assert (!parse ("{")); assert (!parse ("{foo:")); assert (!parse ("{foo{:")); assert (!parse ("foo: bar:")); - assert (!parse ("exe{foo:")); + assert (!parse ("file{foo:")); // Directory prefix. // assert (parse ("../{foo}: ../{bar}")); - assert (parse ("../exe{foo}: ../obj{bar}")); - assert (!parse ("../exe{exe{foo}}:")); + assert (parse ("../file{foo}: ../file{bar}")); + assert (!parse ("../file{file{foo}}:")); // Directory scope. // @@ -81,9 +130,23 @@ main () assert (!parse ("test/ foo/:\n{\n}")); } +struct test_parser: parser +{ + names_type + test_names (const char*, lexer_mode, bool chunk); +}; + static bool parse (const char* s) { + reset (); // Clear the state. + + // Create a minimal root scope. + // + auto i (scopes.insert (path::current (), nullptr, true, true)); + scope& root (*i->second); + root.src_path_ = root.out_path_ = &i->first; + istringstream is (s); is.exceptions (istream::failbit | istream::badbit); @@ -91,7 +154,7 @@ parse (const char* s) try { - p.parse (is, path (), scopes[path::current ()]); + p.parse_buildfile (is, path (), root, root); } catch (const failed&) { @@ -100,3 +163,33 @@ parse (const char* s) return true; } + +// parser::names() +// +names test_parser:: +test_names (const char* s, lexer_mode m, bool chunk) +{ + istringstream is (s); + is.exceptions (istream::failbit | istream::badbit); + lexer l (is, ""); + + if (m != lexer_mode::normal) + l.mode (m, '='); + + path_ = &l.name (); + lexer_ = &l; + target_ = nullptr; + scope_ = root_ = global_scope; + + token t (token_type::eos, false, 0, 0); + token_type tt; + next (t, tt); + return names (t, tt, chunk); +} + +static names +parse_names (const char* s, lexer_mode m, bool chunk) +{ + test_parser p; + return p.test_names (s, m, chunk); +} -- cgit v1.1