aboutsummaryrefslogtreecommitdiff
path: root/build
diff options
context:
space:
mode:
authorBoris Kolpackov <boris@codesynthesis.com>2015-03-02 09:52:59 +0200
committerBoris Kolpackov <boris@codesynthesis.com>2015-03-02 09:52:59 +0200
commit4a9ee48613cf5c59e071400280b62358eb79987e (patch)
tree5a5546a3b090f88db00d21caccced186f5765786 /build
parentfbd3c230d3861084b7316a6d5a8597cb00c9510b (diff)
Indicate whether token is separated from previous one by whitespaces
Diffstat (limited to 'build')
-rw-r--r--build/lexer32
-rw-r--r--build/lexer.cxx159
-rw-r--r--build/parser2
-rw-r--r--build/parser.cxx2
-rw-r--r--build/token14
5 files changed, 117 insertions, 92 deletions
diff --git a/build/lexer b/build/lexer
index 1723ae0..67a94c8 100644
--- a/build/lexer
+++ b/build/lexer
@@ -20,11 +20,11 @@ namespace build
public:
lexer (std::istream& is, const std::string& name): is_ (is), fail (name) {}
+ // Scanner.
+ //
token
next ();
- // Character interface.
- //
private:
class xchar
{
@@ -50,6 +50,22 @@ namespace build
std::uint64_t c_;
};
+ token
+ name (xchar, bool separated);
+
+ // Return true we have seen any spaces. Skipped empty lines don't
+ // count. In other words, we are only interested in spaces that
+ // are on the same line as the following non-space character.
+ //
+ bool
+ skip_spaces ();
+
+ xchar
+ escape ();
+
+ // Character interface.
+ //
+ private:
xchar
peek ();
@@ -67,18 +83,6 @@ namespace build
return c.value () == xchar::traits_type::eof ();
}
- // Scanner.
- //
- private:
- xchar
- escape ();
-
- void
- skip_spaces ();
-
- token
- name (xchar);
-
// Diagnostics.
//
private:
diff --git a/build/lexer.cxx b/build/lexer.cxx
index 6836322..4e29b43 100644
--- a/build/lexer.cxx
+++ b/build/lexer.cxx
@@ -11,13 +11,13 @@ namespace build
token lexer::
next ()
{
- skip_spaces ();
+ bool sep (skip_spaces ());
xchar c (get ());
uint64_t ln (c.line ()), cn (c.column ());
if (is_eos (c))
- return token (token_type::eos, ln, cn);
+ return token (token_type::eos, sep, ln, cn);
switch (c)
{
@@ -30,15 +30,15 @@ namespace build
if (mode_ == mode::value)
mode_ = mode::normal;
- return token (token_type::newline, ln, cn);
+ return token (token_type::newline, sep, ln, cn);
}
case '{':
{
- return token (token_type::lcbrace, ln, cn);
+ return token (token_type::lcbrace, sep, ln, cn);
}
case '}':
{
- return token (token_type::rcbrace, ln, cn);
+ return token (token_type::rcbrace, sep, ln, cn);
}
}
@@ -53,12 +53,12 @@ namespace build
{
case ':':
{
- return token (token_type::colon, ln, cn);
+ return token (token_type::colon, sep, ln, cn);
}
case '=':
{
mode_ = mode::value;
- return token (token_type::equal, ln, cn);
+ return token (token_type::equal, sep, ln, cn);
}
case '+':
{
@@ -66,79 +66,18 @@ namespace build
fail (c) << "expected = after +";
mode_ = mode::value;
- return token (token_type::plus_equal, ln, cn);
+ return token (token_type::plus_equal, sep, ln, cn);
}
}
}
// Otherwise it is a name.
//
- return name (c);
- }
-
- lexer::xchar lexer::
- escape ()
- {
- xchar c (get ());
-
- if (is_eos (c))
- fail (c) << "unterminated escape sequence";
-
- return c;
- }
-
- void lexer::
- skip_spaces ()
- {
- xchar c (peek ());
- bool start (c.column () == 1);
-
- for (; !is_eos (c); c = peek ())
- {
- switch (c)
- {
- case ' ':
- case '\t':
- break;
- case '\n':
- {
- // Skip empty lines.
- //
- if (start)
- break;
-
- return;
- }
- case '#':
- {
- get ();
-
- // Read until newline or eos.
- //
- for (c = peek (); !is_eos (c) && c != '\n'; c = peek ())
- get ();
- continue;
- }
- case '\\':
- {
- get ();
-
- if (peek () == '\n')
- break;
-
- unget (c);
- // Fall through.
- }
- default:
- return; // Not a space.
- }
-
- get ();
- }
+ return name (c, sep);
}
token lexer::
- name (xchar c)
+ name (xchar c, bool sep)
{
uint64_t ln (c.line ()), cn (c.column ());
string lexeme;
@@ -198,7 +137,83 @@ namespace build
break;
}
- return token (lexeme, ln, cn);
+ return token (lexeme, sep, ln, cn);
+ }
+
+ bool lexer::
+ skip_spaces ()
+ {
+ bool r (false);
+
+ xchar c (peek ());
+ bool start (c.column () == 1);
+
+ for (; !is_eos (c); c = peek ())
+ {
+ switch (c)
+ {
+ case ' ':
+ case '\t':
+ {
+ r = true;
+ break;
+ }
+ case '\n':
+ {
+ // Skip empty lines.
+ //
+ if (start)
+ {
+ r = false;
+ break;
+ }
+
+ return r;
+ }
+ case '#':
+ {
+ get ();
+
+ // Read until newline or eos.
+ //
+ for (c = peek (); !is_eos (c) && c != '\n'; c = peek ())
+ get ();
+
+ r = true;
+ continue;
+ }
+ case '\\':
+ {
+ get ();
+
+ if (peek () == '\n')
+ {
+ r = true;
+ break;
+ }
+
+ unget (c);
+ // Fall through.
+ }
+ default:
+ return r; // Not a space.
+ }
+
+ get ();
+ }
+
+ return r;
+ }
+
+ lexer::xchar lexer::
+ escape ()
+ {
+ xchar c (get ());
+
+ if (is_eos (c))
+ fail (c) << "unterminated escape sequence";
+
+ return c;
}
lexer::xchar lexer::
diff --git a/build/parser b/build/parser
index 4f099fe..53fc83c 100644
--- a/build/parser
+++ b/build/parser
@@ -77,7 +77,7 @@ namespace build
lexer* lexer_;
scope* scope_;
- token peek_ {token_type::eos, 0, 0};
+ token peek_ {token_type::eos, false, 0, 0};
bool peeked_ {false};
std::unordered_set<path> include_;
diff --git a/build/parser.cxx b/build/parser.cxx
index d887d5f..6eca0e6 100644
--- a/build/parser.cxx
+++ b/build/parser.cxx
@@ -62,7 +62,7 @@ namespace build
lexer_ = &l;
scope_ = &s;
- token t (type::eos, 0, 0);
+ token t (type::eos, false, 0, 0);
type tt;
next (t, tt);
diff --git a/build/token b/build/token
index a071987..e6930ff 100644
--- a/build/token
+++ b/build/token
@@ -31,6 +31,11 @@ namespace build
token_type
type () const {return t_;}
+ // Token is whitespace-separated from the previous token.
+ //
+ bool
+ separated () const {return s_;}
+
std::string const&
name () const {assert (t_ == token_type::name); return n_;}
@@ -38,14 +43,15 @@ namespace build
std::uint64_t column () const {return c_;}
public:
- token (token_type t, std::uint64_t l, std::uint64_t c)
- : t_ (t), l_ (l), c_ (c) {}
+ token (token_type t, bool s, std::uint64_t l, std::uint64_t c)
+ : t_ (t), s_ (s), l_ (l), c_ (c) {}
- token (std::string n, std::uint64_t l, std::uint64_t c)
- : t_ (token_type::name), n_ (std::move (n)), l_ (l), c_ (c) {}
+ token (std::string n, bool s, std::uint64_t l, std::uint64_t c)
+ : t_ (token_type::name), s_ (s), n_ (std::move (n)), l_ (l), c_ (c) {}
private:
token_type t_;
+ bool s_;
std::string n_;
std::uint64_t l_;