1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
|
// file : libbuild2/script/parser.hxx -*- C++ -*-
// license : MIT; see accompanying LICENSE file
#ifndef LIBBUILD2_SCRIPT_PARSER_HXX
#define LIBBUILD2_SCRIPT_PARSER_HXX
#include <libbuild2/types.hxx>
#include <libbuild2/forward.hxx>
#include <libbuild2/utility.hxx>
#include <libbuild2/parser.hxx>
#include <libbuild2/diagnostics.hxx>
#include <libbuild2/script/token.hxx>
#include <libbuild2/script/lexer.hxx> // redirect_aliases
#include <libbuild2/script/script.hxx>
namespace build2
{
namespace script
{
class lexer;
struct lexer_mode;
class parser: protected build2::parser
{
public:
parser (context& c): build2::parser (c) {}
// Helpers.
//
// Parse attribute string and perform attribute-guided assignment.
// Issue diagnostics and throw failed in case of an error.
//
void
apply_value_attributes (const variable*, // Optional.
value& lhs,
value&& rhs,
const string& attributes,
token_type assign_kind,
const path_name&); // For diagnostics.
using build2::parser::apply_value_attributes;
// Return true if a command line element needs to be re-lexed.
//
// Specifically, it needs to be re-lexed if it contains any of the
// special characters (|<>&), quotes ("') or effective escape sequences
// (\", \', \\).
//
static bool
need_cmdline_relex (const string&);
// Commonly used parsing functions. Issue diagnostics and throw failed
// in case of an error.
//
// Usually (but not always) parse functions receive the token/type
// from which it should start consuming and in return the token/type
// should contain the first token that has not been consumed.
//
// Functions that are called parse_*() rather than pre_parse_*() can be
// used for both stages.
//
protected:
value
parse_variable_line (token&, token_type&);
// Ordered sequence of here-document redirects that we can expect to
// see after the command line.
//
struct here_redirect
{
size_t expr; // Index in command_expr.
size_t pipe; // Index in command_pipe.
int fd; // Redirect fd (0 - in, 1 - out, 2 - err).
};
struct here_doc
{
// Redirects that share here_doc. Most of the time we will have no
// more than 2 (2 - for the roundtrip cases). Doesn't refer overridden
// redirects and thus can be empty.
//
small_vector<here_redirect, 2> redirects;
string end;
bool literal; // Literal (single-quote).
string modifiers;
// Regex introducer ('\0' if not a regex, so can be used as bool).
//
char regex;
// Regex global flags. Meaningful if regex != '\0'.
//
string regex_flags;
};
using here_docs = vector<here_doc>;
pair<command_expr, here_docs>
parse_command_expr (token&, token_type&, const redirect_aliases&);
command_exit
parse_command_exit (token&, token_type&);
void
parse_here_documents (token&, token_type&,
pair<command_expr, here_docs>&);
struct parsed_doc
{
union
{
string str; // Here-document literal.
regex_lines regex; // Here-document regex.
};
bool re; // True if regex.
uint64_t end_line; // Here-document end marker location.
uint64_t end_column;
parsed_doc (string, uint64_t line, uint64_t column);
parsed_doc (regex_lines&&, uint64_t line, uint64_t column);
parsed_doc (parsed_doc&&); // Note: move constuctible-only type.
~parsed_doc ();
};
parsed_doc
parse_here_document (token&, token_type&,
const string&,
const string& mode,
char re_intro); // '\0' if not a regex.
// Start pre-parsing a script line returning its type, detected based on
// the first two tokens. Use the specified lexer mode to peek the second
// token.
//
line_type
pre_parse_line_start (token&, token_type&, lexer_mode);
// Parse the env pseudo-builtin arguments up to the program name. Return
// the program execution timeout, CWD, the list of the variables that
// should be unset ("name") and/or set ("name=value") in the command
// environment, and the token/type that starts the program name. Note
// that the variable unsets come first, if present.
//
struct parsed_env
{
optional<duration> timeout;
optional<dir_path> cwd;
environment_vars variables;
};
parsed_env
parse_env_builtin (token&, token_type&);
// Execute.
//
protected:
// Return false if the execution of the script should be terminated with
// the success status (e.g., as a result of encountering the exit
// builtin). For unsuccessful termination the failed exception is thrown.
//
using exec_set_function = void (const variable&,
token&, token_type&,
const location&);
using exec_cmd_function = void (token&, token_type&,
const iteration_index*, size_t li,
bool single,
const location&);
using exec_cond_function = bool (token&, token_type&,
const iteration_index*, size_t li,
const location&);
// If a parser implementation doesn't pre-enter variables into a pool
// during the pre-parsing phase, then they are entered during the
// execution phase and so the variable pool must be provided. Note that
// in this case the variable pool insertions are not MT-safe.
//
bool
exec_lines (lines::const_iterator b, lines::const_iterator e,
const function<exec_set_function>&,
const function<exec_cmd_function>&,
const function<exec_cond_function>&,
const iteration_index*, size_t& li,
variable_pool* = nullptr);
// Customization hooks.
//
protected:
// Parse the command's leading name chunk. The argument first is true if
// this is the first command in the line. The argument env is true if
// the command is executed via the env pseudo-builtin.
//
// During the execution phase try to parse and translate the leading
// names into the process path and return nullopt if choose not to do
// so, leaving it to the parser to handle. Also return in the last
// two arguments uninterpreted names, if any.
//
// The default implementation always returns nullopt. The derived parser
// can provide an override that can, for example, handle process path
// values, executable targets, etc.
//
// Note that normally it makes sense to leave simple unpaired names for
// the parser to handle, unless there is a good reason not to (e.g.,
// it's a special builtin or some such). Such names may contain
// something that requires re-lexing, for example `foo|bar`, which won't
// be easy to translate but which are handled by the parser.
//
// Note that the chunk could be of the special cmdline type in which
// case the names may need to be "preprocessed" (at least unquoted or
// potentially fully re-lexed) before being analyzed/consumed. Note also
// that in this case any names left unconsumed must remain of the
// cmdline type.
//
//
// During the pre-parsing phase the returned process path and names
// (that must still be parsed) are discarded. The main purpose of the
// call is to allow implementations to perform static script analysis,
// recognize and execute certain directives, or some such.
//
virtual optional<process_path>
parse_program (token&, token_type&,
bool first, bool env,
names&, parse_names_result&);
// Set lexer pointers for both the current and the base classes.
//
protected:
void
set_lexer (lexer*);
// Number of quoted tokens since last reset. Note that this includes the
// peeked token, if any.
//
protected:
size_t
quoted () const;
void
reset_quoted (token& current);
size_t replay_quoted_;
protected:
lexer* lexer_ = nullptr;
};
}
}
#endif // LIBBUILD2_SCRIPT_PARSER_HXX
|