aboutsummaryrefslogtreecommitdiff
path: root/build/lexer
blob: 4a50e2a1d0757852b16364bbf13d2dd4b293caa4 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
// file      : build/lexer -*- C++ -*-
// copyright : Copyright (c) 2014-2015 Code Synthesis Ltd
// license   : MIT; see accompanying LICENSE file

#ifndef BUILD_LEXER
#define BUILD_LEXER

#include <stack>
#include <string>
#include <iosfwd>
#include <cstddef> // size_t
#include <cstdint> // uint64_t
#include <cassert>
#include <exception>

#include <butl/char-scanner>

#include <build/types>
#include <build/utility>

#include <build/token>
#include <build/diagnostics>

namespace build
{
  // Context-dependent lexing mode. In the value mode we don't treat
  // certain characters (e.g., +, =) as special so that we can use
  // them in the variable values, e.g., 'foo = g++'. In contrast, in
  // the variable mode, we restrict certain character (e.g., /) from
  // appearing in the name. The pairs mode is just like value except
  // that we split names separated by the pair character. The eval
  // mode is used in the evaluation context.
  //
  // The alternnative modes must be set manually. The value and pairs
  // modes are automatically reset after the end of the line. The
  // variable mode is reset after the name token. And the eval mode
  // is reset after the closing ')'.
  //
  // Quoted is an internal mode and should not be set explicitly.
  //
  enum class lexer_mode {normal, variable, value, pairs, eval, quoted};

  class lexer: protected butl::char_scanner
  {
  public:
    lexer (std::istream& is,
           const std::string& name,
           void (*processor) (token&, const lexer&) = nullptr)
        : char_scanner (is), fail (name), processor_ (processor), sep_ (false)
    {
      mode_.push (lexer_mode::normal);
    }

    const std::string&
    name () const {return fail.name_;}

    // Note: sets mode for the next token. If mode is pairs, then
    // the second argument specifies the separator character.
    //
    void
    mode (lexer_mode m, char pair_separator = '=')
    {
      mode_.push (m);
      pair_separator_ = pair_separator;
    }

    // Expire the current mode early.
    //
    void
    expire_mode () {mode_.pop ();}

    lexer_mode
    mode () const {return mode_.top ();}

    char
    pair_separator () const {return pair_separator_;}

    // Scanner.
    //
    token
    next ();

    // Peek at the first character of the next token. Return the character
    // or 0 if the next token will be eos. Also return an indicator of
    // whether the next token will be separated.
    //
    pair<char, bool>
    peek_char ();

  private:
    token
    next_impl ();

    token
    next_eval ();

    token
    next_quoted ();

    token
    name (bool separated);

    // Return true if we have seen any spaces. Skipped empty lines
    // don't count. In other words, we are only interested in spaces
    // that are on the same line as the following non-space character.
    //
    bool
    skip_spaces ();

    xchar
    escape ();

    // Diagnostics.
    //
  private:
    struct fail_mark_base: build::fail_mark_base<failed>
    {
      fail_mark_base (const std::string& n): name_ (n) {}

      location_prologue
      operator() (const xchar&) const;

      std::string name_;
    };
    typedef diag_mark<fail_mark_base> fail_mark;

  private:
    fail_mark fail;

    void (*processor_) (token&, const lexer&);

    std::stack<lexer_mode> mode_;
    char pair_separator_;
    bool sep_; // True if we skipped spaces in peek().
  };
}

#endif // BUILD_LEXER