aboutsummaryrefslogtreecommitdiff
path: root/build2/lexer
blob: 827d141ff5945ee7a02b1c547df57a6c1c4a58ce (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
// file      : build2/lexer -*- C++ -*-
// copyright : Copyright (c) 2014-2016 Code Synthesis Ltd
// license   : MIT; see accompanying LICENSE file

#ifndef BUILD2_LEXER
#define BUILD2_LEXER

#include <stack>

#include <butl/char-scanner>

#include <build2/types>
#include <build2/utility>

#include <build2/token>
#include <build2/diagnostics>

namespace build2
{
  // Context-dependent lexing mode. In the value mode we don't treat certain
  // characters (e.g., '+', '=') as special so that we can use them in the
  // variable values, e.g., 'foo = g++'. In contrast, in the variable mode, we
  // restrict certain character (e.g., '/') from appearing in the name. The
  // eval mode is used in the evaluation context. Quoted is an internal mode
  // and should not be set explicitly.
  //
  // Note that the normal, value, and eval modes split names separated by the
  // pair character (to disable pairs one can pass '\0' as a pair character).
  //
  // The alternnative modes must be set manually. The value mode is
  // automatically reset after the end of the line. The variable mode is reset
  // after the name token. And the eval mode is reset after the closing ')'.
  //
  enum class lexer_mode {normal, variable, value, eval, quoted};

  class lexer: protected butl::char_scanner
  {
  public:
    // If escape is not NULL then only escape sequences with characters from
    // this string are considered "effective escapes" with all others passed
    // through as is. Note that the escape string is not copied.
    //
    lexer (istream& is,
           const path& name,
           const char* escapes = nullptr,
           void (*processor) (token&, const lexer&) = nullptr)
        : char_scanner (is),
          fail (name),
          escapes_ (escapes),
          processor_ (processor),
          sep_ (false)
    {
      mode (lexer_mode::normal);
    }

    const path&
    name () const {return fail.name_;}

    // Note: sets mode for the next token. For the value mode the second
    // argument can be used to specify an alternative separator character.
    //
    void
    mode (lexer_mode m, char pair_separator = '@')
    {
      state_.push (state{m, pair_separator});
    }

    // Expire the current mode early.
    //
    void
    expire_mode () {state_.pop ();}

    lexer_mode
    mode () const {return state_.top ().mode;}

    char
    pair_separator () const {return state_.top ().pair_separator;}

    // Scanner. Note that it is ok to call next() again after getting eos.
    //
    token
    next ();

    // Peek at the first character of the next token. Return the character
    // or '\0' if the next token will be eos. Also return an indicator of
    // whether the next token will be separated.
    //
    pair<char, bool>
    peek_char ();

  private:
    token
    next_impl ();

    token
    next_eval ();

    token
    next_quoted ();

    token
    name (bool separated);

    // Return true if we have seen any spaces. Skipped empty lines
    // don't count. In other words, we are only interested in spaces
    // that are on the same line as the following non-space character.
    //
    bool
    skip_spaces ();

    // Diagnostics.
    //
  private:
    struct fail_mark_base: build2::fail_mark_base<failed>
    {
      fail_mark_base (const path& n): name_ (n) {}

      location_prologue
      operator() (const xchar&) const;

      path name_;
    };
    typedef diag_mark<fail_mark_base> fail_mark;

  private:
    fail_mark fail;

    const char* escapes_;
    void (*processor_) (token&, const lexer&);


    struct state
    {
      lexer_mode mode;
      char pair_separator;
    };
    std::stack<state> state_;

    bool sep_; // True if we skipped spaces in peek().
  };
}

#endif // BUILD2_LEXER