aboutsummaryrefslogtreecommitdiff
path: root/libbuild2/cc
diff options
context:
space:
mode:
authorBoris Kolpackov <boris@codesynthesis.com>2020-11-17 11:23:36 +0200
committerBoris Kolpackov <boris@codesynthesis.com>2020-11-17 11:23:36 +0200
commit818dd4a4e743bc8c93d1be67685b1f2e5db6dcf5 (patch)
tree57c35b478818ab69fd784263cce193e32bd58479 /libbuild2/cc
parent3ac5998ee4d5e30a35ce7c043b9389598d7f147f (diff)
Implement modules pseudo-directive parsing (p1703, p1857)
Diffstat (limited to 'libbuild2/cc')
-rw-r--r--libbuild2/cc/lexer+first.test.testscript25
-rw-r--r--libbuild2/cc/lexer.cxx34
-rw-r--r--libbuild2/cc/lexer.hxx8
-rw-r--r--libbuild2/cc/lexer.test.cxx14
-rw-r--r--libbuild2/cc/parser+module.test.testscript23
-rw-r--r--libbuild2/cc/parser.cxx96
-rw-r--r--libbuild2/cc/parser.hxx2
7 files changed, 148 insertions, 54 deletions
diff --git a/libbuild2/cc/lexer+first.test.testscript b/libbuild2/cc/lexer+first.test.testscript
new file mode 100644
index 0000000..5c55030
--- /dev/null
+++ b/libbuild2/cc/lexer+first.test.testscript
@@ -0,0 +1,25 @@
+# file : libbuild2/cc/lexer+first.test.testscript
+# license : MIT; see accompanying LICENSE file
+
+# Test the first token of a logical line logic.
+#
+
+: basics
+:
+$* -f <<EOI >>EOO
+; .
+ ; .
+; // Hello
+;
+; /* Hello
+World */ .
+EOI
+';' t
+'.' f
+';' t
+'.' f
+';' t
+';' t
+';' t
+'.' f
+EOO
diff --git a/libbuild2/cc/lexer.cxx b/libbuild2/cc/lexer.cxx
index d2be3d8..123a41e 100644
--- a/libbuild2/cc/lexer.cxx
+++ b/libbuild2/cc/lexer.cxx
@@ -138,10 +138,13 @@ namespace build2
using type = token_type;
void lexer::
- next (token& t, xchar c, bool ignore_pp)
+ next (token& t, pair<xchar, bool> cf, bool ignore_pp)
{
- for (;; c = skip_spaces ())
+ for (;; cf = skip_spaces ())
{
+ xchar c (cf.first);
+
+ t.first = cf.second;
t.file = &log_file_;
t.line = log_line_ ? *log_line_ : c.line;
t.column = c.column;
@@ -197,7 +200,7 @@ namespace build2
{
// Note that we keep using the passed token for buffers.
//
- c = skip_spaces (false); // Stop at newline.
+ c = skip_spaces (false).first; // Stop at newline.
if (eos (c) || c == '\n')
break;
@@ -215,7 +218,7 @@ namespace build2
//
if (!(c >= '0' && c <= '9'))
{
- next (t, c, false);
+ next (t, make_pair (c, false), false);
if (t.type == type::identifier)
{
@@ -230,7 +233,7 @@ namespace build2
if (t.type != type::identifier || t.value != "line")
continue;
- c = skip_spaces (false);
+ c = skip_spaces (false).first;
if (!(c >= '0' && c <= '9'))
fail (c) << "line number expected after #line directive";
@@ -242,7 +245,7 @@ namespace build2
continue; // Parse the tail, if any.
}
- next (t, c, false);
+ next (t, make_pair (c, false), false);
}
break;
}
@@ -823,7 +826,7 @@ namespace build2
// See if we have the file.
//
- c = skip_spaces (false);
+ c = skip_spaces (false).first;
if (c == '\"')
{
@@ -1007,16 +1010,24 @@ namespace build2
}
auto lexer::
- skip_spaces (bool nl) -> xchar
+ skip_spaces (bool nl) -> pair<xchar, bool>
{
xchar c (get ());
+ // Besides the first character, we also need to take into account any
+ // newlines that we are skipping. For example, the first character may
+ // be a space at the end of the line which we will skip along with the
+ // following newline.
+ //
+ bool first (c.column == 1);
+
for (; !eos (c); c = get ())
{
switch (c)
{
case '\n':
if (!nl) break;
+ first = true;
// Fall through.
case ' ':
case '\t':
@@ -1072,11 +1083,16 @@ namespace build2
if (!nl)
break;
+ first = true;
continue;
}
// C comment.
//
+ // Note that for the first logic we consider a C comment to be
+ // entirely part of the same logical line even if there are
+ // newlines inside.
+ //
if (p == '*')
{
get (p);
@@ -1132,7 +1148,7 @@ namespace build2
break;
}
- return c;
+ return make_pair (c, first);
}
ostream&
diff --git a/libbuild2/cc/lexer.hxx b/libbuild2/cc/lexer.hxx
index d3fe807..b4e1045 100644
--- a/libbuild2/cc/lexer.hxx
+++ b/libbuild2/cc/lexer.hxx
@@ -25,7 +25,8 @@ namespace build2
// as #line, #pragma, but not #include (which is diagnosed). Currently,
// all preprocessor directives except #line are ignored and no values are
// saved from literals. The #line directive (and its shorthand notation)
- // is recognized to provide the logical token location.
+ // is recognized to provide the logical token location. Note that the
+ // modules-related pseudo-directives are not recognized or handled.
//
// While at it we also calculate the checksum of the input ignoring
// comments, whitespaces, etc. This is used to detect changes that do not
@@ -58,6 +59,7 @@ namespace build2
struct token
{
token_type type = token_type::eos;
+ bool first = false; // First token of a logical line.
string value;
// Logical position.
@@ -121,7 +123,7 @@ namespace build2
private:
void
- next (token&, xchar, bool);
+ next (token&, pair<xchar, bool /* first */>, bool);
void
number_literal (token&, xchar);
@@ -141,7 +143,7 @@ namespace build2
void
line_directive (token&, xchar);
- xchar
+ pair<xchar, bool /* first */>
skip_spaces (bool newline = true);
// The char_scanner adaptation for newline escape sequence processing.
diff --git a/libbuild2/cc/lexer.test.cxx b/libbuild2/cc/lexer.test.cxx
index 852d8b2..284d592 100644
--- a/libbuild2/cc/lexer.test.cxx
+++ b/libbuild2/cc/lexer.test.cxx
@@ -16,12 +16,19 @@ namespace build2
{
namespace cc
{
- // Usage: argv[0] [-l] [<file>]
+ // Usage: argv[0] [-l] [-f] [<file>]
+ //
+ // -l
+ // Print location.
+ //
+ // -f
+ // Print first flag.
//
int
main (int argc, char* argv[])
{
bool loc (false);
+ bool first (false);
path file;
for (int i (1); i != argc; ++i)
@@ -30,6 +37,8 @@ namespace build2
if (a == "-l")
loc = true;
+ else if (a == "-f")
+ first = true;
else
{
file = path (argv[i]);
@@ -61,6 +70,9 @@ namespace build2
{
cout << t;
+ if (first)
+ cout << ' ' << (t.first ? 't' : 'f');
+
if (loc)
cout << ' ' << *t.file << ':' << t.line << ':' << t.column;
diff --git a/libbuild2/cc/parser+module.test.testscript b/libbuild2/cc/parser+module.test.testscript
index b92f80b..e4ec139 100644
--- a/libbuild2/cc/parser+module.test.testscript
+++ b/libbuild2/cc/parser+module.test.testscript
@@ -45,6 +45,17 @@ EOI
export import foo;
EOO
+: non-import
+:
+$* <<EOI
+import
+foo;
+export import(*a);
+import::inner xi = {};
+::import <a>;
+class import<int>;
+EOI
+
: non-module
:
$* <<EOI
@@ -52,6 +63,10 @@ $* <<EOI
#pragma export module foo;
#pragma module foo;
export namespace bar {int fox ();}
+module
+foo;
+foo::module();
+module::inner yi = {};
EOI
: attribute
@@ -105,14 +120,6 @@ EOI
<stdin>:6:1: warning: extraneous '}'
EOE
-: import-missing-name
-:
-$* <<EOI 2>>EOE != 0
-import ;
-EOI
-<stdin>:1:8: error: module or header name expected instead of ';'
-EOE
-
: module-missing-name
:
$* <<EOI 2>>EOE != 0
diff --git a/libbuild2/cc/parser.cxx b/libbuild2/cc/parser.cxx
index 55be8b7..fbf076c 100644
--- a/libbuild2/cc/parser.cxx
+++ b/libbuild2/cc/parser.cxx
@@ -43,8 +43,8 @@ namespace build2
token t;
for (bool n (true); (n ? l_->next (t) : t.type) != type::eos; )
{
- // Break to stop, continue to continue, set n to false if the
- // next token already extracted.
+ // Break to stop, continue to continue, and set n to false if the
+ // next token is already extracted.
//
n = true;
@@ -71,37 +71,63 @@ namespace build2
// [export] import <module-name> [<attributes>] ;
// [export] import <header-name> [<attributes>] ;
//
+ // The leading module/export/import keyword should be the first
+ // token of a logical line and only if certain characters appear
+ // after module/import and all the tokens are on the same line,
+ // then the line is recognized as a pseudo-directive; see p1857
+ // for details.
+ //
// Additionally, when include is translated to an import, it's
// normally replaced with the special __import keyword since it
// may appear in C context.
//
- const string& id (t.value);
-
- if (bb == 0)
+ if (bb == 0 && t.first)
{
- if (id == "import" || id == "__import")
+ const string& id (t.value); // Note: tracks t.
+
+ // Handle the export prefix which can appear for both module
+ // and import.
+ //
+ bool ex (false);
+ if (id == "export")
{
- parse_import (t, false);
+ if (l_->next (t) != type::identifier || t.first)
+ {
+ n = false; // Could be module/import on next line.
+ continue;
+ }
+
+ ex = true;
+ // Fall through.
}
- else if (id == "module")
+
+ if (id == "module")
{
- parse_module (t, false);
+ location_value l (get_location (t));
+ l_->next (t);
+
+ if ((t.type == type::semi ||
+ t.type == type::identifier) && !t.first)
+ parse_module (t, ex, move (l));
+ else
+ n = false;
}
- else if (id == "export")
+ else if (id == "import" || id == "__import")
{
- if (l_->next (t) == type::identifier)
- {
- if (id == "module") parse_module (t, true);
- else if (id == "import") parse_import (t, true);
- else n = false; // Something else (e.g., export namespace).
- }
+ l_->next (t);
+
+ if ((t.type == type::less ||
+ t.type == type::string ||
+ t.type == type::identifier) && !t.first)
+ parse_import (t, ex);
else
n = false;
}
}
continue;
}
- default: continue;
+ default:
+ continue;
}
break;
@@ -120,6 +146,8 @@ namespace build2
// if anything in between fails (probably by having it sitting in a
// diag_frame). So let's keep it simple for now.
//
+ // @@ We now do that for missing include, so could do here as well.
+ //
if (bb != 0)
warn (t) << (bb > 0 ? "missing '}'" : "extraneous '}'");
@@ -134,12 +162,12 @@ namespace build2
void parser::
parse_import (token& t, bool ex)
{
- // enter: import keyword
+ // enter: token after import keyword
// leave: semi
string un;
unit_type ut;
- switch (l_->next (t)) // Start of module/header name.
+ switch (t.type) // Start of module/header name.
{
case type::less:
case type::string:
@@ -155,15 +183,19 @@ namespace build2
break;
}
default:
- fail (t) << "module or header name expected instead of " << t << endf;
+ assert (false);
}
// Should be {}-balanced.
//
- for (; t.type != type::eos && t.type != type::semi; l_->next (t)) ;
+ for (;
+ t.type != type::eos && t.type != type::semi && !t.first;
+ l_->next (t)) ;
if (t.type != type::semi)
fail (t) << "';' expected instead of " << t;
+ else if (t.first)
+ fail (t) << "';' must be on the same line";
// For now we skip header units (see a comment on module type/info
// string serialization in compile rule for details). Note that
@@ -191,21 +223,17 @@ namespace build2
}
void parser::
- parse_module (token& t, bool ex)
+ parse_module (token& t, bool ex, location_value l)
{
- // enter: module keyword
+ // enter: token after module keyword (l is the module keyword location)
// leave: semi
- location_value l (get_location (t));
-
- l_->next (t);
-
// Handle the leading 'module;' marker (p0713).
//
// Note that we don't bother diagnosing invalid/duplicate markers
// leaving that to the compiler.
//
- if (!ex && t.type == type::semi)
+ if (!ex && t.type == type::semi && !t.first)
{
module_marker_ = move (l);
return;
@@ -217,10 +245,14 @@ namespace build2
// Should be {}-balanced.
//
- for (; t.type != type::eos && t.type != type::semi; l_->next (t)) ;
+ for (;
+ t.type != type::eos && t.type != type::semi && !t.first;
+ l_->next (t)) ;
if (t.type != type::semi)
fail (t) << "';' expected instead of " << t;
+ else if (t.first)
+ fail (t) << "';' must be on the same line";
if (!u_->module_info.name.empty ())
fail (l) << "multiple module declarations";
@@ -241,12 +273,12 @@ namespace build2
//
for (;; l_->next (t))
{
- if (t.type != type::identifier)
+ if (t.type != type::identifier || t.first)
fail (t) << "module name expected instead of " << t;
n += t.value;
- if (l_->next (t) != type::dot)
+ if (l_->next (t) != type::dot || t.first)
break;
n += '.';
@@ -271,7 +303,7 @@ namespace build2
{
while (l_->next (t) != type::greater)
{
- if (t.type == type::eos)
+ if (t.type == type::eos || t.first)
fail (t) << "closing '>' expected after header name" << endf;
}
}
diff --git a/libbuild2/cc/parser.hxx b/libbuild2/cc/parser.hxx
index 7b33ef9..7c893b5 100644
--- a/libbuild2/cc/parser.hxx
+++ b/libbuild2/cc/parser.hxx
@@ -31,7 +31,7 @@ namespace build2
parse_import (token&, bool);
void
- parse_module (token&, bool);
+ parse_module (token&, bool, location_value);
string
parse_module_name (token&);