From 8e866579cb459c5104c532d5e41d562d45236ea5 Mon Sep 17 00:00:00 2001 From: Boris Kolpackov Date: Tue, 30 Jun 2015 19:20:16 +0200 Subject: Implement loader --- loader/.gitignore | 3 + loader/buildfile | 20 ++ loader/loader.cxx | 577 +++++++++++++++++++++++++++++++++++++++++++++++++++++ loader/options.cli | 24 +++ 4 files changed, 624 insertions(+) create mode 100644 loader/.gitignore create mode 100644 loader/buildfile create mode 100644 loader/loader.cxx create mode 100644 loader/options.cli (limited to 'loader') diff --git a/loader/.gitignore b/loader/.gitignore new file mode 100644 index 0000000..820b183 --- /dev/null +++ b/loader/.gitignore @@ -0,0 +1,3 @@ +options +options.?xx +brep-loader diff --git a/loader/buildfile b/loader/buildfile new file mode 100644 index 0000000..d4e4678 --- /dev/null +++ b/loader/buildfile @@ -0,0 +1,20 @@ +# file : loader/buildfile +# copyright : Copyright (c) 2014-2015 Code Synthesis Ltd +# license : MIT; see accompanying LICENSE file + +using cli + +import libs += libbpkg%lib{bpkg} +import libs += libbutl%lib{butl} +import libs += libodb-pgsql%lib{odb-pgsql} +import libs += libodb%lib{odb} + +include ../brep/ + +loader = cxx{loader} cli.cxx{options} +exe{brep-loader}: $loader ../brep/libso{brep} $libs + +cli.options += -I $src_root --include-with-brackets --include-prefix loader \ +--guard-prefix LOADER + +cli.cxx{options}: cli{options} diff --git a/loader/loader.cxx b/loader/loader.cxx new file mode 100644 index 0000000..f13349d --- /dev/null +++ b/loader/loader.cxx @@ -0,0 +1,577 @@ +// file : loader/loader.cxx -*- C++ -*- +// copyright : Copyright (c) 2014-2015 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +#include +#include // shared_ptr, make_shared() +#include +#include // move() +#include // uint64_t +#include +#include +#include +#include // runtime_error, invalid_argument + +#include +#include +#include + +#include +#include +#include +#include + +#include // timestamp_nonexistent +#include + +#include // manifest_parsing + +#include +#include + +#include + +using namespace std; +using namespace odb::core; +using namespace butl; +using namespace bpkg; +using namespace brep; + +namespace pgsql = odb::pgsql; + +static void +usage () +{ + cout << "Usage: brep-loader [options] " << endl + << "File lists internal repositories." << endl + << "Options:" << endl; + + options::print_usage (cout); +} + +static inline bool +space (char c) noexcept +{ + return c == ' ' || c == '\t'; +} + +struct internal_repository +{ + repository_location location; + string display_name; + dir_path local_path; + + path + packages_path () const {return local_path / path ("packages");} +}; + +using internal_repositories = vector; + +static internal_repositories +load_repositories (path p) +{ + internal_repositories repos; + + if (p.relative ()) + p.complete (); + + ifstream ifs (p.string ()); + if (!ifs.is_open ()) + throw ifstream::failure (p.string () + ": unable to open"); + + ifs.exceptions (ifstream::badbit); + + try + { + string s; + for (uint64_t l (1); getline (ifs, s); ++l) + { + auto b (s.cbegin ()); + auto i (b); + auto e (s.cend ()); + + // Skip until first non-space (true) or space (false). + // + auto skip ([&i, &e](bool s = true) -> decltype (i) { + for (; i != e && space (*i) == s; ++i); return i;}); + + skip (); // Skip leading spaces. + + if (i == e || *i == '#') // Empty line or comment. + continue; + + // From now on pb will track the begining of the next part + // while i -- the end. + // + auto pb (i); // Location begin. + skip (false); // Find end of location. + + auto bad_line ([&p, l, &pb, &b](const string& d) { + ostringstream os; + os << p << ':' << l << ':' << pb - b + 1 << ": error: " << d; + throw runtime_error (os.str ()); + }); + + repository_location location; + + try + { + location = repository_location (string (pb, i)); + } + catch (const invalid_argument& e) + { + bad_line (e.what ()); + } + + if (location.local ()) + bad_line ("local repository location"); + + for (const auto& r: repos) + if (r.location.canonical_name () == location.canonical_name ()) + bad_line ("duplicate canonical name"); + + pb = skip (); // Find begin of display name. + + if (pb == e) + bad_line ("no display name found"); + + skip (false); // Find end of display name. + + string name (pb, i); + pb = skip (); // Find begin of filesystem path. + + if (pb == e) // For now filesystem path is mandatory. + bad_line ("no filesystem path found"); + + skip (false); // Find end of filesystem path (no spaces allowed). + + internal_repository r { + move (location), + move (name), + dir_path (string (pb, i))}; + + // If the internal repository local path is relative, then + // calculate its absolute local path. Such path is considered to be + // relative to configuration file directory path so result is + // independent from whichever directory is current for the loader + // process. + // + if (r.local_path.relative ()) + { + r.local_path = p.directory () / r.local_path; + } + + try + { + r.local_path.normalize (); + } + catch (const invalid_path&) + { + bad_line ("can't normalize local path"); + } + + if (!file_exists (r.packages_path ())) + bad_line ("'packages' file does not exist"); + + repos.emplace_back (move (r)); + + // Check that there is no non-whitespace junk at the end. + // + if (skip () != e) + bad_line ("junk after filesystem path"); + } + } + catch (const ifstream::failure&) + { + throw ifstream::failure (p.string () + ": io failure"); + } + + return repos; +} + +// Check if repositories persistent state is outdated. If any repository +// differes from its persistent state or there is a persistent repository +// which is not listed in configuration file then the whole persistent +// state will be recreated. Will consider optimization later when the +// package model, including search related objects, settles down. +// +static bool +changed (const internal_repositories& repos, database& db) +{ + strings names; + for (auto& r: repos) + { + shared_ptr pr ( + db.find (r.location.canonical_name ())); + + if (pr == nullptr || r.location.string () != pr->location.string () || + r.display_name != pr->display_name || r.local_path != pr->local_path || + file_mtime (r.packages_path ()) != pr->timestamp || !pr->internal) + return true; + + names.emplace_back (r.location.canonical_name ()); + } + + using query = query; + + // Check if there is an internal repository not being listed in the + // configuration file. + // + auto rs ( + db.query (query::internal && + !query::id.canonical_name.in_range (names.begin (), + names.end ()))); + + return !rs.empty (); +} + +// Load the repository state (including of its prerequsite repositories) +// from the 'packages' file. +// +static void +load_repository (const shared_ptr& rp, database& db) +{ + if (rp->timestamp != timestamp_nonexistent) + return; // The repository is already loaded. + + // Only locally accessible repositories allowed until package manager API is + // ready. + // + assert (!rp->local_path.empty ()); + + path p (rp->local_path / path ("packages")); + + ifstream ifs (p.string ()); + if (!ifs.is_open ()) + throw ifstream::failure (p.string () + ": unable to open"); + ifs.exceptions (ifstream::badbit | ifstream::failbit); + + // Mark as loaded. This is important in case we try to load this + // repository again recursively. + // + rp->timestamp = file_mtime (p); + + manifest_parser mp (ifs, p.string ()); + manifests ms (mp); + + // Close to avoid unpredictable number of files being simultaneously + // opened due to load_repository() recursive calls. + // + ifs.close (); + + // Don't add prerequisite repositories for external repositories. + // + if (rp->internal) + { + for (auto& rm: ms.repositories) + { + if (rm.location.empty ()) + continue; // Ignore entry for this repository. + + repository_location rl; + + auto bad_location ( + [&rp, &rm]() + { + ostringstream o; + o << "invalid location '" << rm.location.string () + << "' of the prerequisite repository for internal " + "repository '" << rp->location.string () << "'"; + + throw runtime_error (o.str ()); + }); + + try + { + // Absolute path location make no sense for the web interface. + // + if (rm.location.absolute ()) + bad_location (); + + // Convert the relative repository location to remote one, leave remote + // location unchanged. + // + rl = repository_location (rm.location.string (), rp->location); + } + catch (const invalid_argument&) + { + bad_location (); + } + + shared_ptr pr (db.find (rl.canonical_name ())); + + if (pr == nullptr) + { + pr = make_shared (move (rl)); + + // If the prerequsite repository location is a relative path, then + // calculate its absolute local path. + // + if (rm.location.relative ()) + { + dir_path& lp (pr->local_path); + lp = rp->local_path / rm.location.path (); + + try + { + lp.normalize (); + } + catch (const invalid_path&) + { + ostringstream o; + o << "can't normalize local path'" << lp.string () + << "' of the prerequisite repository for internal " + "repository '" << rp->location.string () << "'"; + + throw runtime_error (o.str ()); + } + } + + db.persist (pr); + } + + load_repository (pr, db); + + rp->prerequisite_repositories.emplace_back (pr); + } + } + + // Temporary reset ODB session for the current thread while persisting + // package and package_version objects to decrease memory consumption. + // + session& s (session::current ()); + session::reset_current (); + + for (auto& pm: ms.packages) + { + max_package_version mv; + + // If there are no package_version objects persisted yet for this + // package, then query_one() will leave mv unchanged in which case + // the version member remains empty. The empty version value is + // less than any non-empty one so the condition below evaluates + // to true and the package object gets persisted. + // + db.query_one ( + query::id.data.package == pm.name, mv); + + if (mv.version < pm.version) + { + // Create the package object. + // + brep::optional desc; // Ambiguity with butl::optional. + + // Don't add description for external repository packages. + // + if (rp->internal && pm.description) + { + if (pm.description->file) + { + // @@ Pull description from the file when package manager API + // is ready. + } + else + desc = move (*pm.description); + } + + package p (pm.name, + move (pm.summary), + move (pm.tags), + move (desc), + move (pm.url), + move (pm.package_url), + move (pm.email), + move (pm.package_email)); + + if (mv.version.empty ()) + db.persist (p); + else + db.update (p); + } + + // Create package version object. + // + dependencies dep; + requirements req; + string chn; + + // Don't add dependencies, requirements and changes for external + // repository packages. + // + if (rp->internal) + { + dep = move (pm.dependencies); + req = move (pm.requirements); + + for (auto& c: pm.changes) + { + if (c.file) + { + // @@ Pull change notes from the file when package manager + // API is ready. + } + else + { + if (chn.empty ()) + chn = move (c); + else + chn += "\n" + c; + } + } + } + + package_version pv (rp, + lazy_shared_ptr (db, pm.name), + move (pm.version), + pm.priority ? move (*pm.priority) : priority (), + move (pm.license_alternatives), + move (chn), + move (dep), + move (req)); + + db.persist (pv); + } + + session::current (s); // Restore current session. + + db.update (rp); // Save the repository state. +} + +int +main (int argc, char* argv[]) +{ + try + { + cli::argv_scanner scan (argc, argv, true); + options ops (scan); + + // Version. + // + if (ops.version ()) + { + cout << "brep-loader 0.0.0" << endl + << "Copyright (c) 2014-2015 Code Synthesis Ltd" << endl + << "MIT; see accompanying LICENSE file" << endl; + + return 0; + } + + // Help. + // + if (ops.help ()) + { + usage (); + return 0; + } + + if (argc < 2) + { + cout << " argument not provided" << endl; + usage (); + return 1; + } + + if (argc > 2) + { + cout << "unexpected argument encountered" << endl; + usage (); + return 1; + } + + pgsql::database db ("", "", "brep", ops.db_host (), ops.db_port ()); + + // Prevent several loader instances from updating DB simultaneously. + // + { + transaction t (db.begin ()); + db.execute ("CREATE TABLE IF NOT EXISTS loader_mutex ()"); + t.commit (); + } + + pgsql::connection_ptr synch_c (db.connection ()); + + // Don't make current. + // + pgsql::transaction synch_t (synch_c->begin (), false); + + try + { + synch_c->execute ("LOCK TABLE loader_mutex NOWAIT"); + } + catch (const pgsql::database_exception& e) + { + if (e.sqlstate () == "55P03") + return 2; // Other loader instance acquired the mutex. + + throw; + } + + // Load the description of all the internal repositories from + // the configuration file. + // + internal_repositories irs (load_repositories (path (argv[1]))); + + transaction t (db.begin ()); + + if (changed (irs, db)) + { + // Rebuild repositories persistent state from scratch. + // + db.erase_query (); + db.erase_query (); + db.erase_query (); + + // We use repository object timestamp as a flag to signal that + // we have already loaded this repo. The easiest way to make + // it work in case of cycles is to use a session. This way, + // the repository object on which we updated the timestamp + // will be the same as the one we may check down the call + // stack. + // + session s; + + // On the first pass over the internal repositories list we + // persist empty repository objects, setting the interal flag + // to true and timestamp to non-existent. The idea is to + // establish the "final" list of internal repositories. + // + for (auto& ir: irs) + { + shared_ptr r ( + make_shared (ir.location, + move (ir.display_name), + move (ir.local_path))); + + db.persist (r); + } + + // On the second pass over the internal repositories we + // load them and all their (not yet loaded) prerequisite + // repositories. + // + for (const auto& ir: irs) + { + shared_ptr r ( + db.load (ir.location.canonical_name ())); + + load_repository (r, db); + } + } + + t.commit (); + synch_t.commit (); // Release the mutex. + } + catch (const cli::exception& e) + { + cerr << e << endl; + usage (); + return 1; + } + // Fully qualified to avoid ambiguity with odb exception. + // + catch (const std::exception& e) + { + cerr << e.what () << endl; + return 1; + } +} diff --git a/loader/options.cli b/loader/options.cli new file mode 100644 index 0000000..9e96585 --- /dev/null +++ b/loader/options.cli @@ -0,0 +1,24 @@ +// file : loader/options.cli +// copyright : Copyright (c) 2014-2015 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +include ; +include ; + +class options +{ + bool --help {"Print usage information and exit."}; + bool --version {"Print version and exit."}; + + std::string --db-host = "localhost" + { + "", + "Connect database server using specified host or socket directory." + }; + + std::uint16_t --db-port = 5432 + { + "", + "Connect database server using specified port." + }; +}; -- cgit v1.1