1 files changed, 49 insertions, 27 deletions
diff --git a/libbuild2/cc/lexer.cxx b/libbuild2/cc/lexer.cxx
index d2be3d8..d20e0dc 100644
--- a/libbuild2/cc/lexer.cxx
+++ b/libbuild2/cc/lexer.cxx
@@ -138,10 +138,13 @@ namespace build2
     using type = token_type;
 
     void lexer::
-    next (token& t, xchar c, bool ignore_pp)
+    next (token& t, pair<xchar, bool> cf, bool ignore_pp)
     {
-      for (;; c = skip_spaces ())
+      for (;; cf = skip_spaces ())
       {
+        xchar c (cf.first);
+
+        t.first = cf.second;
         t.file = &log_file_;
         t.line = log_line_ ? *log_line_ : c.line;
         t.column = c.column;
@@ -197,7 +200,7 @@ namespace build2
               {
                 // Note that we keep using the passed token for buffers.
                 //
-                c = skip_spaces (false); // Stop at newline.
+                c = skip_spaces (false).first; // Stop at newline.
 
                 if (eos (c) || c == '\n')
                   break;
@@ -211,18 +214,21 @@ namespace build2
                   // #line <integer> [<string literal>] ...
                   // #     <integer> [<string literal>] ...
                   //
-                  // Also diagnose #include while at it.
+                  // Also diagnose #include while at it if preprocessed.
                   //
                   if (!(c >= '0' && c <= '9'))
                   {
-                    next (t, c, false);
+                    next (t, make_pair (c, false), false);
 
                     if (t.type == type::identifier)
                     {
-                      if (t.value == "include")
-                        fail (l) << "unexpected #include directive";
-                      else if (t.value != "line")
+                      if (t.value != "line")
+                      {
+                        if (preprocessed_ && t.value == "include")
+                          fail (l) << "unexpected #include directive";
+
                         continue;
+                      }
                     }
                     else
                       continue;
@@ -230,7 +236,7 @@ namespace build2
                     if (t.type != type::identifier || t.value != "line")
                       continue;
 
-                    c = skip_spaces (false);
+                    c = skip_spaces (false).first;
 
                     if (!(c >= '0' && c <= '9'))
                       fail (c) << "line number expected after #line directive";
@@ -242,7 +248,7 @@ namespace build2
                   continue; // Parse the tail, if any.
                 }
 
-                next (t, c, false);
+                next (t, make_pair (c, false), false);
               }
               break;
             }
@@ -374,9 +380,13 @@ namespace build2
             xchar p (peek ());
 
             if (p == ':')
+            {
               geth (p);
+              t.type = type::scope;
+            }
+            else
+              t.type = type::colon;
 
-            t.type = type::punctuation;
             return;
           }
           // Number (and also .<N> above).
@@ -727,8 +737,8 @@ namespace build2
       // R"<delimiter>(<raw_characters>)<delimiter>"
       //
       // Where <delimiter> is a potentially-empty character sequence made of
-      // any source character but parentheses, backslash and spaces. It can be
-      // at most 16 characters long.
+      // any source character but parentheses, backslash, and spaces (in
+      // particular, it can be `"`). It can be at most 16 characters long.
       //
       // Note that the <raw_characters> are not processed in any way, not even
       // for line continuations.
@@ -743,7 +753,7 @@ namespace build2
       {
         c = geth ();
 
-        if (eos (c) || c == '\"' || c == ')' || c == '\\' || c == ' ')
+        if (eos (c) || c == ')' || c == '\\' || c == ' ')
           fail (l) << "invalid raw string literal";
 
         if (c == '(')
@@ -823,7 +833,7 @@ namespace build2
 
       // See if we have the file.
       //
-      c = skip_spaces (false);
+      c = skip_spaces (false).first;
 
       if (c == '\"')
       {
@@ -1007,16 +1017,24 @@ namespace build2
     }
 
     auto lexer::
-    skip_spaces (bool nl) -> xchar
+    skip_spaces (bool nl) -> pair<xchar, bool>
     {
       xchar c (get ());
 
+      // Besides the first character, we also need to take into account any
+      // newlines that we are skipping. For example, the first character may
+      // be a space at the end of the line which we will skip along with the
+      // following newline.
+      //
+      bool first (c.column == 1);
+
       for (; !eos (c); c = get ())
       {
         switch (c)
         {
         case '\n':
           if (!nl) break;
+          first = true;
           // Fall through.
         case ' ':
         case '\t':
@@ -1072,11 +1090,16 @@ namespace build2
               if (!nl)
                 break;
 
+              first = true;
               continue;
             }
 
             // C comment.
             //
+            // Note that for the first logic we consider a C comment to be
+            // entirely part of the same logical line even if there are
+            // newlines inside.
+            //
             if (p == '*')
             {
               get (p);
@@ -1088,21 +1111,18 @@ namespace build2
                 if (eos (c))
                   fail (p) << "unterminated comment";
 
-                if (c == '*' && (c = peek ()) == '/')
+                if (c == '*')
                 {
-                  get (c);
-                  break;
+                  if ((c = peek ()) == '/')
+                  {
+                    get (c);
+                    break;
+                  }
                 }
-
-                if (c != '*' && c != '\\')
+                else
                 {
                   // Direct buffer scan.
                   //
-                  // Note that we should call get() prior to the direct buffer
-                  // scan (see butl::char_scanner for details).
-                  //
-                  get (c);
-
                   const char* b (gptr_);
                   const char* e (egptr_);
                   const char* p (b);
@@ -1132,7 +1152,7 @@ namespace build2
         break;
       }
 
-      return c;
+      return make_pair (c, first);
     }
 
     ostream&
@@ -1142,6 +1162,8 @@ namespace build2
       {
       case type::dot:         o << "'.'";                   break;
       case type::semi:        o << "';'";                   break;
+      case type::colon:       o << "':'";                   break;
+      case type::scope:       o << "'::'";                  break;
       case type::less:        o << "'<'";                   break;
       case type::greater:     o << "'>'";                   break;
       case type::lcbrace:     o << "'{'";                   break;