From 00df206af5c80aba31bf7d180bdf03d617071e94 Mon Sep 17 00:00:00 2001 From: Boris Kolpackov Date: Mon, 8 May 2023 10:43:40 +0200 Subject: Add support for dumping build system state in JSON format (GH issue #182) Specifically: 1. New --dump-format option. Valid values are `buildfile` and `json-v0.1`. 2. The --dump option now recognizes two additional values: `match-pre` and `match-post` to dump the state of pre/post-operations. The `match` value now only triggers dumping of the main operation. --- libbuild2/dump.cxx | 1035 ++++++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 970 insertions(+), 65 deletions(-) (limited to 'libbuild2/dump.cxx') diff --git a/libbuild2/dump.cxx b/libbuild2/dump.cxx index e00d1b9..ca7254a 100644 --- a/libbuild2/dump.cxx +++ b/libbuild2/dump.cxx @@ -3,6 +3,11 @@ #include +#ifndef BUILD2_BOOTSTRAP +# include // cout +# include +#endif + #include #include #include @@ -11,6 +16,7 @@ #include using namespace std; +using namespace butl; namespace build2 { @@ -53,6 +59,317 @@ namespace build2 } } +#ifndef BUILD2_BOOTSTRAP + + static string + quoted_target_name (const names_view& ns, bool rel) + { + ostringstream os; + stream_verb (os, stream_verbosity (rel ? 0 : 1, 0)); + to_stream (os, ns, quote_mode::effective, '@'); + return os.str (); + } + + static void + dump_quoted_target_name (json::stream_serializer& j, + const names_view& ns, + bool rel) + { + j.value (quoted_target_name (ns, rel)); + } + + static string + quoted_target_name (const target& t, bool rel) + { + names ns (t.as_name ()); // Note: potentially adds an extension. + + // Don't print target names relative if the target is in src and out!=src. + // Failed that, we will end up with pointless ../../../... paths. + // + // It may also seem that we can omit @-qualification in this case, since + // it is implied by the containing scope. However, keep in mind that the + // target may not be directly in this scope. We could make it relative, + // though. + // + if (rel && !t.out.empty ()) + { + // Make the out relative ourselves and then disable relative for src. + // + dir_path& o (ns.back ().dir); + o = relative (o); // Note: may return empty path. + if (o.empty ()) + o = dir_path ("."); + + rel = false; + } + + return quoted_target_name (ns, rel); + } + + void + dump_quoted_target_name (json::stream_serializer& j, + const target& t, + bool rel) + { + j.value (quoted_target_name (t, rel)); + } + + using target_name_cache = unordered_map; + + static void + dump_quoted_target_name (json::stream_serializer& j, + const target& t, + target_name_cache& tc) + { + auto i (tc.find (&t)); + if (i == tc.end ()) + i = tc.emplace (&t, quoted_target_name (t, false /* relative */)).first; + + j.value (i->second); + } + + void + dump_display_target_name (json::stream_serializer& j, + const target& t, + bool rel) + { + // Note: see the quoted version above for details. + + target_key tk (t.key ()); + + dir_path o; + if (rel && !tk.out->empty ()) + { + o = relative (*tk.out); + if (o.empty ()) + o = dir_path ("."); + tk.out = &o; + + rel = false; + } + + // Change the stream verbosity to print relative if requested and omit + // extension. + // + ostringstream os; + stream_verb (os, stream_verbosity (rel ? 0 : 1, 0)); + os << tk; + j.value (os.str ()); + } + + static void + dump_value (json::stream_serializer& j, const value& v) + { + // Hints. + // + // Note that the pair hint should only be used for simple names. + // + optional h_array; + optional h_pair; // true/false - second/first is optional. + + if (v.null) + { + j.value (nullptr); + return; + } + else if (v.type != nullptr) + { + const value_type& t (*v.type); + + auto s_array = [&j] (const auto& vs) + { + j.begin_array (); + for (const auto& v: vs) j.value (v); + j.end_array (); + }; + + auto s_array_string = [&j] (const auto& vs) + { + j.begin_array (); + for (const auto& v: vs) j.value (v.string ()); + j.end_array (); + }; + + // Note: check in the derived-first order. + // + if (t.is_a ()) j.value (v.as ()); + else if (t.is_a ()) j.value (v.as ()); + else if (t.is_a ()) j.value (v.as ()); + else if (t.is_a ()) j.value (v.as ()); + else if (t.is_a ()) j.value (v.as ().string ()); + else if (t.is_a ()) j.value (v.as ().string ()); + else if (t.is_a ()) j.value (v.as ().string ()); + else if (t.is_a ()) j.value (v.as ().string ()); + else if (t.is_a ()) s_array (v.as ()); + else if (t.is_a ()) s_array (v.as ()); + else if (t.is_a ()) s_array (v.as ()); + else if (t.is_a ()) s_array_string (v.as ()); + else if (t.is_a ()) s_array_string (v.as ()); + else + { + // Note: check in the derived-first order. + // + if (t.is_a ()) h_array = false; + else if (t.is_a ()) + { + h_array = false; + h_pair = true; + } + else if (t.is_a ()) + { + // Decide on array dynamically. + h_pair = true; + } + else if (t.is_a ()) + { + h_array = false; + h_pair = true; + } + else if (t.is_a () || + t.is_a> ()) + { + h_array = true; + } + else if (t.is_a>> () || + t.is_a>>> () || + t.is_a>>> () || + t.is_a> () || + t.is_a>> () || + t.is_a>> () || + t.is_a> ()) + { + h_array = true; + h_pair = true; + } + else if (t.is_a, string>> () || + t.is_a, string>>> ()) + { + h_array = true; + h_pair = false; + } + + goto fall_through; + } + + return; + + fall_through: + ; + } + + names storage; + names_view ns (reverse (v, storage, true /* reduce */)); + + if (ns.empty ()) + { + // When it comes to representing an empty value, our options are: empty + // array ([]), empty object ({}), or an absent member. The latter feels + // closer to null than empty, so that's out. After some experimentation, + // it feels the best choice is to use array unless we know for sure it + // is not, in which case we use an object if it's a pair and empty + // string otherwise (the empty string makes sense because we serialize + // complex names as target names; see below). + // + if (!h_array || *h_array) + { + j.begin_array (); + j.end_array (); + } + else + { + if (h_pair) + { + j.begin_object (); + j.end_object (); + } + else + j.value (""); + } + } + else + { + if (!h_array) + h_array = ns.size () > 2 || (ns.size () == 2 && !ns.front ().pair); + + if (*h_array) + j.begin_array (); + + // While it may be tempting to try to provide a heterogeneous array + // (i.e., all strings, all objects, all pairs), in case of pairs we + // actually don't know whether a non-pair element is first or second + // (it's up to interpretation; though we do hint which one is optional + // for typed values above). So we serialize each name in its most + // appropriate form. + // + auto simple = [] (const name& n) + { + return n.simple () || n.directory () || n.file (); + }; + + auto s_simple = [&j] (const name& n) + { + if (n.simple ()) + j.value (n.value); + else if (n.directory ()) + j.value (n.dir.string ()); + else if (n.file ()) + { + // Note: both must be present due to earlier checks. + // + j.value ((n.dir / n.value).string ()); + } + else + return false; + + return true; + }; + + for (auto i (ns.begin ()), e (ns.end ()); i != e; ) + { + const name& l (*i++); + const name* r (l.pair ? &*i++ : nullptr); + + optional hp (h_pair); + + if (!hp && r != nullptr && simple (l) && simple (*r)) + hp = true; + + if (hp) + { + // Pair of simple names. + // + j.begin_object (); + + if (r != nullptr) + { + j.member_name ("first"); s_simple (l); + j.member_name ("second"); s_simple (*r); + } + else + { + j.member_name (*hp ? "first" : "second"); s_simple (l); + } + + j.end_object (); + } + else if (r == nullptr && s_simple (l)) + ; + else + { + // If complex name (or pair thereof), then assume a target name. + // + dump_quoted_target_name (j, + names_view (&l, r != nullptr ? 2 : 1), + false /* relative */); + } + } + + if (*h_array) + j.end_array (); + } + } +#endif + enum class variable_kind {scope, tt_pat, target, rule, prerequisite}; static void @@ -127,6 +444,68 @@ namespace build2 } } +#ifndef BUILD2_BOOTSTRAP + static void + dump_variable (json::stream_serializer& j, + const variable_map& vm, + const variable_map::const_iterator& vi, + const scope& s, + variable_kind k) + { + // Note: see the buildfile version above for comments. + + assert (k != variable_kind::tt_pat); // TODO + + const auto& p (*vi); + const variable& var (p.first); + const value& v (p.second); + + lookup l (v, var, vm); + if (k != variable_kind::prerequisite) + { + if (var.override ()) + return; // Ignore. + + if (var.overrides != nullptr) + { + l = s.lookup_override ( + var, + make_pair (l, 1), + k == variable_kind::target || k == variable_kind::rule, + k == variable_kind::rule).first; + + assert (l.defined ()); // We at least have the original. + } + } + + // Note that we do not distinguish between variable/value type. + // + // An empty value of a non-array type is represented as an empty object + // ({}). + // +#if 0 + struct variable + { + string name; + optional type; + json_value value; // string|number|boolean|null|object|array + }; +#endif + + j.begin_object (); + + j.member ("name", var.name); + + if (l->type != nullptr) + j.member ("type", l->type->name); + + j.member_name ("value"); + dump_value (j, *l); + + j.end_object (); + } +#endif + static void dump_variables (ostream& os, string& ind, @@ -143,6 +522,20 @@ namespace build2 } } +#ifndef BUILD2_BOOTSTRAP + static void + dump_variables (json::stream_serializer& j, + const variable_map& vars, + const scope& s, + variable_kind k) + { + for (auto i (vars.begin ()), e (vars.end ()); i != e; ++i) + { + dump_variable (j, vars, i, s, k); + } + } +#endif + // Dump target type/pattern-specific variables. // static void @@ -248,10 +641,27 @@ namespace build2 } } + // Similar to target::matched() but for the load phase. + // + static inline bool + matched (const target& t, action a) + { + // Note: running serial and task_count is 0 before any operation has + // started. + // + if (size_t c = t[a].task_count.load (memory_order_relaxed)) + { + if (c == t.ctx.count_applied () || c == t.ctx.count_executed ()) + return true; + } + + return false; + } + static void - dump_target (optional a, - ostream& os, + dump_target (ostream& os, string& ind, + optional a, const target& t, const scope& s, bool rel) @@ -260,6 +670,9 @@ namespace build2 // scope. To achieve this we are going to temporarily lower the stream // path verbosity to level 0. // + // @@ Not if in src and out != src? Otherwise end up with ../../../... + // See JSON version for the state of the art. + // stream_verbosity osv, nsv; if (rel) { @@ -321,32 +734,26 @@ namespace build2 // If the target has been matched to a rule, we also print resolved // prerequisite targets. // - // Note: running serial and task_count is 0 before any operation has - // started. - // const prerequisite_targets* pts (nullptr); { action inner; // @@ Only for the inner part of the action currently. - if (size_t c = t[inner].task_count.load (memory_order_relaxed)) + if (matched (t, inner)) { - if (c == t.ctx.count_applied () || c == t.ctx.count_executed ()) - { - pts = &t.prerequisite_targets[inner]; + pts = &t.prerequisite_targets[inner]; - bool f (false); - for (const target* pt: *pts) + bool f (false); + for (const target* pt: *pts) + { + if (pt != nullptr) { - if (pt != nullptr) - { - f = true; - break; - } + f = true; + break; } - - if (!f) - pts = nullptr; } + + if (!f) + pts = nullptr; } } @@ -510,10 +917,318 @@ namespace build2 stream_verb (os, osv); } +#ifndef BUILD2_BOOTSTRAP + static void + dump_target (json::stream_serializer& j, + optional a, + const target& t, + const scope& s, + bool rel, + target_name_cache& tcache) + { + // Note: see the buildfile version above for comments. + + // Note that the target name (and display_name) are relative to the + // containing scope (if any). + // +#if 0 + struct prerequisite + { + string name; // Quoted/qualified name. + string type; + vector variables; // Prerequisite variables. + }; + + struct loaded_target + { + string name; // Quoted/qualified name. + string display_name; + string type; + //string declaration; + optional group; // Quoted/qualified group target name. + + vector variables; // Target variables. + + vector prerequisites; + }; + + // @@ TODO: target attributes (rule_hint) + + struct prerequisite_target + { + string name; // Target name (always absolute). + string type; + bool adhoc; + }; + + struct operation_state + { + string rule; // null if direct recipe match + + optional state; // unchanged|changed|group + + vector variables; // Rule variables. + + vector prerequisite_targets; + }; + + struct matched_target + { + string name; + string display_name; + string type; + //string declaration; + optional group; + + optional path; // Absent of not path-based target, not assigned. + + vector variables; + + optional outer_operation; // null if not matched. + operation_state inner_operation; // null if not matched. + }; +#endif + + j.begin_object (); + + j.member_name ("name"); + dump_quoted_target_name (j, t, rel /* relative */); + + j.member_name ("display_name"); + dump_display_target_name (j, t, rel /* relative */); + + j.member ("type", t.type ().name); + + // @@ This value currently doesn't make much sense: + // + // - why are all the system headers prereq-new? + // + // - why is synthesized obje{} prereq-new? + // +#if 0 + { + const char* v (nullptr); + switch (t.decl) + { + case target_decl::prereq_new: v = "prerequisite-new"; break; + case target_decl::prereq_file: v = "prerequisite-file"; break; + case target_decl::implied: v = "implied"; break; + case target_decl::real: v = "real"; break; + } + j.member ("declaration", v); + } +#endif + + if (t.group != nullptr) + { + j.member_name ("group"); + dump_quoted_target_name (j, *t.group, tcache); + } + + if (a) + { + const string* v (nullptr); + + if (t.is_a () || t.is_a ()) + { + v = &t.dir.string (); + } + else if (const auto* pt = t.is_a ()) + { + const path& p (pt->path ()); + + if (!p.empty ()) + v = &p.string (); + } + + if (v != nullptr) + j.member ("path", *v); + } + + // Target variables. + // + if (!t.vars.empty ()) + { + j.member_begin_array ("variables"); + dump_variables (j, t.vars, s, variable_kind::target); + j.end_array (); + } + + // Prerequisites. + // + if (!a) + { + const prerequisites& ps (t.prerequisites ()); + + if (!ps.empty ()) + { + j.member_begin_array ("prerequisites"); + + for (const prerequisite& p: ps) + { + j.begin_object (); + + { + // Cobble together an equivalent of dump_quoted_target_name(). + // + prerequisite_key pk (p.key ()); + target_key& tk (pk.tk); + + // It's possible that the containing scope differs from + // prerequisite's. This, for example, happens when we copy the + // prerequisite for a synthesized obj{} dependency that happens to + // be in a subdirectory, as in exe{foo}:src/cxx{foo}. In this + // case, we need to rebase relative paths to the containing scope. + // + dir_path d, o; + if (p.scope != s) + { + if (tk.out->empty ()) + { + if (tk.dir->relative ()) + { + d = (p.scope.out_path () / *tk.dir).relative (s.out_path ()); + tk.dir = &d; + } + } + else + { + if (tk.dir->relative ()) + { + d = (p.scope.src_path () / *tk.dir).relative (s.src_path ()); + tk.dir = &d; + } + + if (tk.out->relative ()) + { + o = (p.scope.out_path () / *tk.out).relative (s.out_path ()); + if (o.empty ()) + o = dir_path ("."); + tk.out = &o; + } + } + } + + // If prerequisite paths are absolute, keep them absolute. + // + ostringstream os; + stream_verb (os, stream_verbosity (1, 0)); + + if (pk.proj) + os << *pk.proj << '%'; + + to_stream (os, pk.tk.as_name (), quote_mode::effective, '@'); + + j.member ("name", os.str ()); + } + + j.member ("type", p.type.name); + + if (!p.vars.empty ()) + { + j.member_begin_array ("variables"); + dump_variables (j, p.vars, s, variable_kind::prerequisite); + j.end_array (); + } + + j.end_object (); + } + + j.end_array (); + } + } + else + { + // Matched rules and their state (prerequisite_targets, vars, etc). + // + auto dump_opstate = [&tcache, &j, &s, &t] (action a) + { + const target::opstate& o (t[a]); + + j.begin_object (); + + j.member ("rule", o.rule != nullptr ? o.rule->first.c_str () : nullptr); + + // It feels natural to omit the unknown state, as if it corresponded + // to absent in optional. + // + if (o.state != target_state::unknown) + { + assert (o.state == target_state::unchanged || + o.state == target_state::changed || + o.state == target_state::group); + + j.member ("state", to_string (o.state)); + } + + if (!o.vars.empty ()) + { + j.member_begin_array ("variables"); + dump_variables (j, o.vars, s, variable_kind::rule); + j.end_array (); + } + + { + bool first (true); + for (const prerequisite_target& pt: t.prerequisite_targets[a]) + { + if (pt.target == nullptr) + continue; + + if (first) + { + j.member_begin_array ("prerequisite_targets"); + first = false; + } + + j.begin_object (); + + j.member_name ("name"); + dump_quoted_target_name (j, *pt.target, tcache); + + j.member ("type", pt.target->type ().name); + + if (pt.adhoc ()) + j.member ("adhoc", true); + + j.end_object (); + } + + if (!first) + j.end_array (); + } + + j.end_object (); + }; + + if (a->outer ()) + { + j.member_name ("outer_operation"); + if (matched (t, *a)) + dump_opstate (*a); + else + j.value (nullptr); + } + + { + action ia (a->inner_action ()); + + j.member_name ("inner_operation"); + if (matched (t, ia)) + dump_opstate (ia); + else + j.value (nullptr); + } + } + + j.end_object (); + } +#endif + static void - dump_scope (optional a, - ostream& os, + dump_scope (ostream& os, string& ind, + optional a, scope_map::const_iterator& i, bool rel) { @@ -588,21 +1303,25 @@ namespace build2 // disabled amalgamation will be printed directly inside the global // scope). // - for (auto e (p.ctx.scopes.end ()); - (i != e && - i->second.front () != nullptr && - i->second.front ()->parent_scope () == &p); ) + for (auto e (p.ctx.scopes.end ()); i != e; ) { - if (vb || rb || sb) + if (i->second.front () == nullptr) + ++i; // Skip over src paths. + else if (i->second.front ()->parent_scope () != &p) + break; // Moved past our parent. + else { - os << endl; - vb = rb = false; - } + if (vb || rb || sb) + { + os << endl; + vb = rb = false; + } - os << endl; // Extra newline between scope blocks. + os << endl; // Extra newline between scope blocks. - dump_scope (a, os, ind, i, true /* relative */); - sb = true; + dump_scope (os, ind, a, i, true /* relative */); + sb = true; + } } // Targets. @@ -624,7 +1343,7 @@ namespace build2 } os << endl; // Extra newline between targets. - dump_target (a, os, ind, t, p, true /* relative */); + dump_target (os, ind, a, t, p, true /* relative */); tb = true; } @@ -635,59 +1354,245 @@ namespace build2 << ind << '}'; } +#ifndef BUILD2_BOOTSTRAP + static void + dump_scope (json::stream_serializer& j, + optional a, + scope_map::const_iterator& i, + bool rel, + target_name_cache& tcache) + { + // Note: see the buildfile version above for additional comments. + + const scope& p (*i->second.front ()); + const dir_path& d (i->first); + ++i; + +#if 0 + struct scope + { + // The out_path member is relative to parent scopes. It is empty for the + // special global scope. The src_path member is absent if the same as + // out_path (in-source build or scope outside of project). + // + string out_path; + optional src_path; + + vector variables; // Non-type/pattern scope variables. + + vector scopes; // Immediate children. + + vector targets; + }; +#endif + + j.begin_object (); + + if (d.empty ()) + j.member ("out_path", ""); // Global scope. + else + { + const dir_path& rd (rel ? relative (d) : d); + j.member ("out_path", rd.empty () ? string (".") : rd.string ()); + + if (!p.out_eq_src ()) + j.member ("src_path", p.src_path ().string ()); + } + + const dir_path* orb (relative_base); + relative_base = &d; + + // Scope variables. + // + if (!p.vars.empty ()) + { + j.member_begin_array ("variables"); + dump_variables (j, p.vars, p, variable_kind::scope); + j.end_array (); + } + + // Nested scopes of which we are an immediate parent. + // + { + bool first (true); + for (auto e (p.ctx.scopes.end ()); i != e; ) + { + if (i->second.front () == nullptr) + ++i; + else if (i->second.front ()->parent_scope () != &p) + break; + else + { + if (first) + { + j.member_begin_array ("scopes"); + first = false; + } + + dump_scope (j, a, i, true /* relative */, tcache); + } + } + + if (!first) + j.end_array (); + } + + // Targets. + // + { + bool first (true); + for (const auto& pt: p.ctx.targets) + { + const target& t (*pt); + + if (&p != &t.base_scope ()) // @@ PERF + continue; + + // Skip targets that haven't been matched for this action. + // + if (a) + { + if (!(matched (t, a->inner_action ()) || + (a->outer () && matched (t, *a)))) + continue; + } + + if (first) + { + j.member_begin_array ("targets"); + first = false; + } + + dump_target (j, a, t, p, true /* relative */, tcache); + } + + if (!first) + j.end_array (); + } + + relative_base = orb; + j.end_object (); + } +#endif + void - dump (const context& c, optional a) + dump (const context& c, optional a, dump_format fmt) { auto i (c.scopes.begin ()); assert (i->second.front () == &c.global_scope); - // We don't lock diag_stream here as dump() is supposed to be called from - // the main thread prior/after to any other threads being spawned. - // - string ind; - ostream& os (*diag_stream); - dump_scope (a, os, ind, i, false /* relative */); - os << endl; + switch (fmt) + { + case dump_format::buildfile: + { + // We don't lock diag_stream here as dump() is supposed to be called + // from the main thread prior/after to any other threads being + // spawned. + // + string ind; + ostream& os (*diag_stream); + dump_scope (os, ind, a, i, false /* relative */); + os << endl; + break; + } + case dump_format::json: + { +#ifndef BUILD2_BOOTSTRAP + target_name_cache tc; + json::stream_serializer j (cout, 0 /* indent */); + dump_scope (j, a, i, false /* relative */, tc); + cout << endl; +#else + assert (false); +#endif + break; + } + } } void - dump (const scope* s, optional a, const char* cind) + dump (const scope* s, optional a, dump_format fmt, const char* cind) { - string ind (cind); - ostream& os (*diag_stream); - + scope_map::const_iterator i; if (s != nullptr) { const scope_map& m (s->ctx.scopes); - auto i (m.find_exact (s->out_path ())); + i = m.find_exact (s->out_path ()); assert (i != m.end () && i->second.front () == s); - - dump_scope (a, os, ind, i, false /* relative */); } - else - os << ind << ""; - os << endl; + switch (fmt) + { + case dump_format::buildfile: + { + string ind (cind); + ostream& os (*diag_stream); + + if (s != nullptr) + dump_scope (os, ind, a, i, false /* relative */); + else + os << ind << ""; + + os << endl; + break; + } + case dump_format::json: + { +#ifndef BUILD2_BOOTSTRAP + target_name_cache tc; + json::stream_serializer j (cout, 0 /* indent */); + + if (s != nullptr) + dump_scope (j, a, i, false /* relative */, tc); + else + j.value (nullptr); + + cout << endl; +#else + assert (false); +#endif + break; + } + } } void - dump (const target* t, optional a, const char* cind) + dump (const target* t, optional a, dump_format fmt, const char* cind) { - string ind (cind); - ostream& os (*diag_stream); + const scope* bs (t != nullptr ? &t->base_scope () : nullptr); - if (t != nullptr) + switch (fmt) { - dump_target (a, - os, - ind, - *t, - t->base_scope (), - false /* relative */); - } - else - os << ind << ""; + case dump_format::buildfile: + { + string ind (cind); + ostream& os (*diag_stream); - os << endl; + if (t != nullptr) + dump_target (os, ind, a, *t, *bs, false /* relative */); + else + os << ind << ""; + + os << endl; + break; + } + case dump_format::json: + { +#ifndef BUILD2_BOOTSTRAP + target_name_cache tc; + json::stream_serializer j (cout, 0 /* indent */); + + if (t != nullptr) + dump_target (j, a, *t, *bs, false /* relative */, tc); + else + j.value (nullptr); + + cout << endl; +#else + assert (false); +#endif + break; + } + } } } -- cgit v1.1