diff options
Diffstat (limited to 'bpkg/fetch-git.cxx')
-rw-r--r-- | bpkg/fetch-git.cxx | 783 |
1 files changed, 610 insertions, 173 deletions
diff --git a/bpkg/fetch-git.cxx b/bpkg/fetch-git.cxx index 69eb5a0..ff53318 100644 --- a/bpkg/fetch-git.cxx +++ b/bpkg/fetch-git.cxx @@ -5,12 +5,12 @@ #include <bpkg/fetch.hxx> #include <map> -#include <algorithm> // find(), find_if(), replace(), sort() +#include <algorithm> // find_if(), replace(), sort() #include <libbutl/git.mxx> #include <libbutl/utility.mxx> // digit(), xdigit() #include <libbutl/process.mxx> -#include <libbutl/filesystem.mxx> // path_match() +#include <libbutl/filesystem.mxx> // path_match(), path_entry() #include <libbutl/semantic-version.mxx> #include <libbutl/standard-version.mxx> // parse_standard_version() @@ -1490,65 +1490,50 @@ namespace bpkg return sort (move (r)); } - // Checkout the repository submodules (see git_checkout_submodules() - // description for details). + // Print diagnostics, optionally attributing it to a submodule with the + // specified (non-empty) directory prefix, and fail. // - static void - checkout_submodules (const common_options& co, - const dir_path& dir, - const dir_path& git_dir, - const dir_path& prefix) + [[noreturn]] static void + submodule_failure (const string& desc, + const dir_path& prefix, + const exception* e = nullptr) { - tracer trace ("checkout_submodules"); - - path mf (dir / path (".gitmodules")); - - if (!exists (mf)) - return; - - auto failure = [&prefix] (const char* desc) - { - diag_record dr (fail); - dr << desc; + diag_record dr (fail); + dr << desc; - if (!prefix.empty ()) - // Strips the trailing slash. - // - dr << " for submodule '" << prefix.string () << "'"; + if (!prefix.empty ()) + // Strips the trailing slash. + // + dr << " for submodule '" << prefix.string () << "'"; - dr << endg; - }; + if (e != nullptr) + dr << ": " << *e; - // Initialize submodules. - // - if (!run_git ( - co, - co.git_option (), - "-C", dir, + dr << endg; + } - // Note that older git versions don't recognize the --super-prefix - // option but seem to behave correctly without any additional - // efforts when it is omitted. - // - !prefix.empty () && git_ver >= semantic_version {2, 14, 0} - ? strings ({"--super-prefix", prefix.posix_representation ()}) - : strings (), + // Find submodules for a top repository or submodule directory. The prefix + // is only used for diagnostics (see submodule_failure() for details). + // + struct submodule + { + dir_path path; // Relative to the containing repository. + string commit; + }; + using submodules = vector<submodule>; - "submodule--helper", "init", - verb < 2 ? "-q" : nullptr)) - failure ("unable to initialize submodules"); + static submodules + find_submodules (const common_options& co, + const dir_path& dir, + const dir_path& prefix) + { + tracer trace ("find_submodules"); - repository_url orig_url (origin_url (co, dir)); + auto failure = [&prefix] (const string& d, const exception* e = nullptr) + { + submodule_failure (d, prefix, e); + }; - // Iterate over the registered submodules initializing/fetching them and - // recursively checking them out. - // - // Note that we don't expect submodules nesting be too deep and so recurse - // while reading the git process output. - // - // Also note that we don't catch the failed exception here, relying on the - // fact that the process destructor will wait for the process completion. - // fdpipe pipe (open_pipe ()); process pr (start_git (co, @@ -1563,6 +1548,7 @@ namespace bpkg try { + submodules r; ifdstream is (move (pipe.in), fdstream_mode::skip, ifdstream::badbit); for (string l; !eof (getline (is, l)); ) @@ -1578,156 +1564,220 @@ namespace bpkg l4 ([&]{trace << "submodule: " << l;}); if (!(l.size () > 50 && l[48] == '0' && l[49] == '\t')) - failure ("invalid submodule description"); - - string commit (l.substr (7, 40)); + throw runtime_error ("invalid submodule description '" + l + "'"); - // Submodule directory path, relative to the containing project. + // Submodule directory path is relative to the containing repository. // - dir_path sdir (l.substr (50)); + r.push_back (submodule {dir_path (string (l, 50)) /* path */, + string (l, 7, 40) /* commit */}); + } - // Submodule directory path, relative to the top project. - // - dir_path psdir (prefix / sdir); - string psd (psdir.posix_string ()); // For use in the diagnostics. + is.close (); - string nm (git_line (co, "submodule name", - co.git_option (), - "-C", dir, - "submodule--helper", "name", - sdir)); + if (pr.wait ()) + return r; - string uo ("submodule." + nm + ".url"); - string uv (config_get (co, dir, uo, "submodule URL")); + // Fall through. + } + catch (const invalid_path& e) + { + if (pr.wait ()) + failure ("invalid submodule path '" + e.path + "'"); - l4 ([&]{trace << "name: " << nm << ", URL: " << uv;}); + // Fall through. + } + catch (const io_error& e) + { + if (pr.wait ()) + failure ("unable to read submodules list", &e); - dir_path fsdir (dir / sdir); - bool initialized (git_repository (fsdir)); + // Fall through. + } + // Note that the io_error class inherits from the runtime_error class, so + // this catch-clause must go last. + // + catch (const runtime_error& e) + { + if (pr.wait ()) + failure (e.what ()); - // If the submodule is already initialized and its commit didn't - // change then we skip it. - // - if (initialized && git_line (co, "submodule commit", - co.git_option (), - "-C", fsdir, - "rev-parse", - "--verify", - "HEAD") == commit) - continue; - - // Note that the "submodule--helper init" command (see above) doesn't - // sync the submodule URL in .git/config file with the one in - // .gitmodules file, that is a primary URL source. Thus, we always - // calculate the URL using .gitmodules and update it in .git/config, if - // necessary. - // - repository_url url; + // Fall through. + } - try - { - url = from_git_url ( - config_get (co, mf, uo, "submodule original URL")); + // We should only get here if the child exited with an error status. + // + assert (!pr.wait ()); - // Complete the relative submodule URL against the containing - // repository origin URL. - // - if (url.scheme == repository_protocol::file && url.path->relative ()) - { - repository_url u (orig_url); - *u.path /= *url.path; + submodule_failure ("unable to list submodules", prefix); + } - // Note that we need to collapse 'example.com/a/..' to - // 'example.com/', rather than to 'example.com/.'. - // - u.path->normalize ( - false /* actual */, - orig_url.scheme != repository_protocol::file /* cur_empty */); + // Checkout the repository submodules (see git_checkout_submodules() + // description for details). + // + static void + checkout_submodules (const common_options& co, + const dir_path& dir, + const dir_path& git_dir, + const dir_path& prefix) + { + tracer trace ("checkout_submodules"); - url = move (u); - } + auto failure = [&prefix] (const string& d, const exception* e = nullptr) + { + submodule_failure (d, prefix, e); + }; - // Fix-up submodule URL in .git/config file, if required. + path mf (dir / path (".gitmodules")); + + if (!exists (mf)) + return; + + // Initialize submodules. + // + if (!run_git ( + co, + co.git_option (), + "-C", dir, + + // Note that older git versions don't recognize the --super-prefix + // option but seem to behave correctly without any additional + // efforts when it is omitted. // - if (url != from_git_url (move (uv))) - { - config_set (co, dir, uo, to_git_url (url)); + !prefix.empty () && git_ver >= semantic_version {2, 14, 0} + ? strings ({"--super-prefix", prefix.posix_representation ()}) + : strings (), - // We also need to fix-up submodule's origin URL, if its - // repository is already initialized. - // - if (initialized) - origin_url (co, fsdir, url); - } - } - catch (const invalid_path& e) - { - fail << "invalid repository path for submodule '" << psd << "': " - << e << endg; - } - catch (const invalid_argument& e) - { - fail << "invalid repository URL for submodule '" << psd << "': " - << e << endg; - } + "submodule--helper", "init", + verb < 2 ? "-q" : nullptr)) + failure ("unable to initialize submodules"); - // Initialize the submodule repository. - // - // Note that we initialize the submodule repository git directory out - // of the working tree, the same way as "submodule--helper clone" - // does. This prevents us from loosing the fetched data when switching - // the containing repository between revisions, that potentially - // contain different sets of submodules. - // - dir_path gdir (git_dir / dir_path ("modules") / sdir); + repository_url orig_url (origin_url (co, dir)); - if (!initialized) - { - mk_p (gdir); - init (co, fsdir, url, gdir); - } + // Iterate over the registered submodules initializing/fetching them and + // recursively checking them out. + // + for (const submodule& sm: find_submodules (co, dir, prefix)) + { + // Submodule directory path, relative to the top repository. + // + dir_path psdir (prefix / sm.path); + string psd (psdir.posix_string ()); // For use in the diagnostics. - // Fetch and checkout the submodule. - // - git_ref_filters rfs { - git_ref_filter {nullopt, commit, false /* exclusion */}}; + string nm (git_line (co, "submodule name", + co.git_option (), + "-C", dir, + "submodule--helper", "name", + sm.path)); + + string uo ("submodule." + nm + ".url"); + string uv (config_get (co, dir, uo, "submodule URL")); + + l4 ([&]{trace << "name: " << nm << ", URL: " << uv;}); + + dir_path fsdir (dir / sm.path); + bool initialized (git_repository (fsdir)); - fetch (co, fsdir, psdir, rfs); + // If the submodule is already initialized and its commit didn't + // change then we skip it. + // + if (initialized && git_line (co, "submodule commit", + co.git_option (), + "-C", fsdir, + "rev-parse", + "--verify", + "HEAD") == sm.commit) + continue; + + // Note that the "submodule--helper init" command (see above) doesn't + // sync the submodule URL in .git/config file with the one in + // .gitmodules file, that is a primary URL source. Thus, we always + // calculate the URL using .gitmodules and update it in .git/config, if + // necessary. + // + repository_url url; - git_checkout (co, fsdir, commit); + try + { + url = from_git_url ( + config_get (co, mf, uo, "submodule original URL")); - // Let's make the message match the git-submodule script output - // (again, except for capitalization). + // Complete the relative submodule URL against the containing + // repository origin URL. // - if (verb && !co.no_progress ()) - text << "submodule path '" << psd << "': checked out '" << commit - << "'"; + if (url.scheme == repository_protocol::file && url.path->relative ()) + { + repository_url u (orig_url); + *u.path /= *url.path; + + // Note that we need to collapse 'example.com/a/..' to + // 'example.com/', rather than to 'example.com/.'. + // + u.path->normalize ( + false /* actual */, + orig_url.scheme != repository_protocol::file /* cur_empty */); + + url = move (u); + } - // Check out the submodule submodules, recursively. + // Fix-up submodule URL in .git/config file, if required. // - checkout_submodules (co, fsdir, gdir, psdir); + if (url != from_git_url (move (uv))) + { + config_set (co, dir, uo, to_git_url (url)); + + // We also need to fix-up submodule's origin URL, if its + // repository is already initialized. + // + if (initialized) + origin_url (co, fsdir, url); + } + } + catch (const invalid_path& e) + { + failure ( + "invalid submodule '" + nm + "' repository path '" + e.path + "'"); + } + catch (const invalid_argument& e) + { + failure ("invalid submodule '" + nm + "' repository URL", &e); } - is.close (); + // Initialize the submodule repository. + // + // Note that we initialize the submodule repository git directory out of + // the working tree, the same way as "submodule--helper clone" does. + // This prevents us from loosing the fetched data when switching the + // containing repository between revisions, that potentially contain + // different sets of submodules. + // + dir_path gdir (git_dir / dir_path ("modules") / sm.path); - if (pr.wait ()) - return; + if (!initialized) + { + mk_p (gdir); + init (co, fsdir, url, gdir); + } - // Fall through. - } - catch (const io_error&) - { - if (pr.wait ()) - failure ("unable to read submodules list"); + // Fetch and checkout the submodule. + // + git_ref_filters rfs { + git_ref_filter {nullopt, sm.commit, false /* exclusion */}}; - // Fall through. - } + fetch (co, fsdir, psdir, rfs); - // We should only get here if the child exited with an error status. - // - assert (!pr.wait ()); + git_checkout (co, fsdir, sm.commit); - failure ("unable to list submodules"); + // Let's make the message match the git-submodule script output (again, + // except for capitalization). + // + if (verb && !co.no_progress ()) + text << "submodule path '" << psd << "': checked out '" << sm.commit + << "'"; + + // Check out the submodule submodules, recursively. + // + checkout_submodules (co, fsdir, gdir, psdir); + } } void @@ -1805,8 +1855,8 @@ namespace bpkg const string& commit) { // For some (probably valid) reason the hard reset command doesn't remove - // a submodule directory that is not plugged into the project anymore. It - // also prints the non-suppressible warning like this: + // a submodule directory that is not plugged into the repository anymore. + // It also prints the non-suppressible warning like this: // // warning: unable to rmdir libbar: Directory not empty // @@ -1851,4 +1901,391 @@ namespace bpkg dir / dir_path (".git"), dir_path () /* prefix */); } + +#ifndef _WIN32 + + // Noop on POSIX. + // + bool + git_fixup_worktree (const common_options&, const dir_path&, bool) + { + return false; + } + +#else + + // Find symlinks in the repository (non-recursive submodule-wise). + // + static paths + find_symlinks (const common_options& co, + const dir_path& dir, + const dir_path& prefix) + { + tracer trace ("find_symlinks"); + + auto failure = [&prefix] (const string& d, const exception* e = nullptr) + { + submodule_failure (d, prefix, e); + }; + + fdpipe pipe (open_pipe ()); + + // Note: -z tells git to print file paths literally (without escaping) and + // terminate lines with NUL character. + // + process pr (start_git (co, + pipe, 2 /* stderr */, + co.git_option (), + "-C", dir, + "ls-files", + "--stage", + "-z")); + + // Shouldn't throw, unless something is severely damaged. + // + pipe.out.close (); + + try + { + paths r; + ifdstream is (move (pipe.in), fdstream_mode::skip, ifdstream::badbit); + + for (string l; !eof (getline (is, l, '\0')); ) + { + // The line describing a file is NUL-terminated and has the following + // form: + // + // <mode><SPACE><object><SPACE><stage><TAB><path> + // + // The mode is a 6-digit octal representation of the file type and + // permission bits mask. For example: + // + // 100644 165b42ec7a10fb6dd4a60b756fa1966c1065ef85 0 README + // + l4 ([&]{trace << "file: " << l;}); + + if (!(l.size () > 50 && l[48] == '0' && l[49] == '\t')) + throw runtime_error ("invalid file description '" + l + "'"); + + // For symlinks permission bits are always zero, so we can match the + // mode as a string. + // + if (l.compare (0, 6, "120000") == 0) + r.push_back (path (string (l, 50))); + } + + is.close (); + + if (pr.wait ()) + return r; + + // Fall through. + } + catch (const invalid_path& e) + { + if (pr.wait ()) + failure ("invalid repository symlink path '" + e.path + "'"); + + // Fall through. + } + catch (const io_error& e) + { + if (pr.wait ()) + failure ("unable to read repository file list", &e); + + // Fall through. + } + // Note that the io_error class inherits from the runtime_error class, + // so this catch-clause must go last. + // + catch (const runtime_error& e) + { + if (pr.wait ()) + failure (e.what ()); + + // Fall through. + } + + // We should only get here if the child exited with an error status. + // + assert (!pr.wait ()); + + // Show the noreturn attribute to the compiler to avoid the 'end of + // non-void function' warning. + // + submodule_failure ("unable to list repository files", prefix); + } + + // Fix up or revert the previously made fixes in a working tree of a top + // repository or submodule (see git_fixup_worktree() description for + // details). Return nullopt if no changes are required (because real symlink + // are being used). + // + static optional<bool> + fixup_worktree (const common_options& co, + const dir_path& dir, + bool revert, + const dir_path& prefix) + { + bool r (false); + + auto failure = [&prefix] (const string& d, const exception* e = nullptr) + { + submodule_failure (d, prefix, e); + }; + + if (!revert) + { + // Fix up symlinks depth-first, so link targets in submodules exist by + // the time we potentially reference them from the containing + // repository. + // + for (const submodule& sm: find_submodules (co, dir, prefix)) + { + optional<bool> fixed ( + fixup_worktree (co, dir / sm.path, revert, prefix / sm.path)); + + // If no further fix up is required, then the repository contains a + // real symlink. If that's the case, bailout or fail if git's + // filesystem-agnostic symlinks are also present in the repository. + // + if (!fixed) + { + // Note that the error message is not precise as path for the + // symlink in question is no longer available. However, the case + // feels unusual, so let's not complicate things for now. + // + if (r) + failure ("unexpected real symlink in submodule '" + + sm.path.string () + "'"); + + return nullopt; + } + + if (*fixed) + r = true; + } + + // Note that the target belonging to the current repository can be + // unavailable at the time we create a link to it because its path may + // contain a not yet created link components. Also, an existing target + // can be a not yet replaced filesystem-agnostic symlink. + // + // First, we cache link/target paths and remove the filesystem-agnostic + // links from the filesystem in order not to end up hard-linking them as + // targets. Then, we create links (hardlinks and junctions) iteratively, + // skipping those with not-yet-existing target, unless no links were + // created at the previous run, in which case we fail. + // + paths ls (find_symlinks (co, dir, prefix)); + vector<pair<path, path>> links; // List of the link/target path pairs. + + // Cache/remove filesystem-agnostic symlinks. + // + for (auto& l: ls) + { + path lp (dir / l); // Absolute or relative to the current directory. + + // Check the symlink type to see if we need to replace it or can bail + // out/fail (see above). + // + // @@ Note that things are broken here if running in the Windows + // "elevated console mode": + // + // - file symlinks are currently not supported (see + // libbutl/filesystem.mxx for details). + // + // - git creates symlinks to directories, rather than junctions. This + // makes things to fall apart as Windows API seems to be unable to + // see through such directory symlinks. More research is required. + // + try + { + pair<bool, entry_stat> e (path_entry (lp)); + + if (!e.first) + failure ("symlink '" + l.string () + "' does not exist"); + + if (e.second.type == entry_type::symlink) + { + if (r) + failure ("unexpected real symlink '" + l.string () + "'"); + + return nullopt; + } + } + catch (const system_error& e) + { + failure ("unable to stat symlink '" + l.string () + "'", &e); + } + + // Read the symlink target path. + // + path t; + + try + { + ifdstream fs (lp); + t = path (fs.read_text ()); + } + catch (const invalid_path& e) + { + failure ("invalid target path '" + e.path + "' for symlink '" + + l.string () + "'", + &e); + } + catch (const io_error& e) + { + failure ("unable to read target path for symlink '" + l.string () + + "'", + &e); + } + + // Mark the symlink as unchanged and remove it. + // + if (!run_git (co, + co.git_option (), + "-C", dir, + "update-index", + "--assume-unchanged", + l)) + failure ("unable to mark symlink '" + l.string () + + "' as unchanged"); + + links.emplace_back (move (l), move (t)); + + rm (lp); + r = true; + } + + // Create real links (hardlinks and junctions). + // + while (!links.empty ()) + { + size_t n (links.size ()); + + for (auto i (links.cbegin ()); i != links.cend (); ) + { + const path& l (i->first); + const path& t (i->second); + + // Absolute or relative to the current directory. + // + path lp (dir / l); + path tp (lp.directory () / t); + + bool dir_target; + + try + { + pair<bool, entry_stat> pe (path_entry (tp)); + + // Skip the symlink that references a not-yet-existing target. + // + if (!pe.first) + { + ++i; + continue; + } + + dir_target = pe.second.type == entry_type::directory; + } + catch (const system_error& e) + { + failure ("unable to stat target '" + t.string () + + "' for symlink '" + l.string () + "'", + &e); + } + + // Create the hardlink for a file target and junction for a + // directory target. + // + try + { + if (dir_target) + mksymlink (tp, lp, true /* dir */); + else + mkhardlink (tp, lp); + } + catch (const system_error& e) + { + failure (string ("unable to create ") + + (dir_target ? "junction" : "hardlink") + " '" + + l.string () + "' with target '" + t.string () + "'", + &e); + } + + i = links.erase (i); + } + + // Fail if no links were created on this run. + // + if (links.size () == n) + { + assert (!links.empty ()); + + failure ("target '" + links[0].first.string () + "' for symlink '" + + links[0].second.string () + "' does not exist"); + } + } + } + else + { + // Revert the fixes we've made previously in the opposite, depth-last, + // order. + // + // For the directory junctions the git-checkout command (see below) + // removes the target directory content, rather then the junction + // filesystem entry. To prevent this, we remove all hardlinks/junctions + // ourselves first. + // + for (const path& l: find_symlinks (co, dir, prefix)) + { + try + { + try_rmfile (dir / l); + } + catch (const system_error& e) + { + failure ("unable to remove hardlink or junction '" + l.string () + + "'", + &e); + } + } + + if (!run_git (co, + co.git_option (), + "-C", dir, + "checkout", + "--", + "./")) + failure ("unable to revert '" + dir.string () + '"'); + + // Revert fixes in submodules. + // + for (const submodule& sm: find_submodules (co, dir, prefix)) + fixup_worktree (co, dir / sm.path, revert, prefix / sm.path); + + // Let's not complicate things detecting if we have reverted anything + // and always return true, assuming there wouldn't be a reason to revert + // if no fixes were made previously. + // + r = true; + } + + return r; + } + + bool + git_fixup_worktree (const common_options& co, + const dir_path& dir, + bool revert) + { + optional<bool> r ( + fixup_worktree (co, dir, revert, dir_path () /* prefix */)); + + return r ? *r : false; + } + +#endif } |